publications – andreas karwath

2021

Slater, Luke T.; Williams, John A.; Karwath, Andreas; Fanning, Hilary; Ball, Simon; Schofield, Paul N.; Hoehndorf, Robert; Gkoutos, Georgios V.

Multi-faceted semantic clustering with text-derived phenotypes Journal Article

In: Computers in biology and medicine, 2021, ISSN: 0010-4825.

Abstract | Links | BibTeX | Tags: cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity

@article{14598610a01b4af99802a4b22e67a119,

title = {Multi-faceted semantic clustering with text-derived phenotypes},

author = {Luke T. Slater and John A. Williams and Andreas Karwath and Hilary Fanning and Simon Ball and Paul N. Schofield and Robert Hoehndorf and Georgios V. Gkoutos},

doi = {10.1016/j.compbiomed.2021.104904},

issn = {0010-4825},

year  = {2021},

date = {2021-11-01},

urldate = {2021-11-01},

journal = {Computers in biology and medicine},

publisher = {Elsevier},

abstract = {Identification of ontology concepts in clinical narrative text enables the creation of phenotype profiles that can be associated with clinical entities, such as patients or drugs. Constructing patient phenotype profiles using formal ontologies enables their analysis via semantic similarity, in turn enabling the use of background knowledge in clustering or classification analyses. However, traditional semantic similarity approaches collapse complex relationships between patient phenotypes into a unitary similarity scores for each pair of patients. Moreover, single scores may be based only on matching terms with the greatest information content (IC), ignoring other dimensions of patient similarity. This process necessarily leads to a loss of information in the resulting representation of patient similarity, and is especially apparent when using very large text-derived and highly multi-morbid phenotype profiles. Moreover, it renders finding a biological explanation for similarity very difficult; the black box problem. In this article, we explore the generation of multiple semantic similarity scores for patients based on different facets of their phenotypic manifestation, which we define through different sub-graphs in the Human Phenotype Ontology. We further present a new methodology for deriving sets of qualitative class descriptions for groups of entities described by ontology terms. Leveraging this strategy to obtain meaningful explanations for our semantic clusters alongside other evaluation techniques, we show that semantic clustering with ontology-derived facets enables the representation, and thus identification of, clinically relevant phenotype relationships not easily recoverable using overall clustering alone. In this way, we demonstrate the potential of faceted semantic clustering for gaining a deeper and more nuanced understanding of text-derived patient phenotypes.},

keywords = {cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity},

pubstate = {published},

tppubtype = {article}

}

2014

Seeland, Madeleine; Karwath, Andreas; Kramer, Stefan

Structural clustering of millions of molecular graphs Conference

Symposium on Applied Computing, SAC 2014, ACM ACM, New York, NY, USA, 2014.

Abstract | Links | BibTeX | Tags: cluster explanation, clustering, graph mining