2021
Slater, Luke T.; Williams, John A.; Karwath, Andreas; Fanning, Hilary; Ball, Simon; Schofield, Paul N.; Hoehndorf, Robert; Gkoutos, Georgios V.
Multi-faceted semantic clustering with text-derived phenotypes Journal Article
In: Computers in biology and medicine, 2021, ISSN: 0010-4825.
Abstract | Links | BibTeX | Tags: cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity
@article{14598610a01b4af99802a4b22e67a119,
title = {Multi-faceted semantic clustering with text-derived phenotypes},
author = {Luke T. Slater and John A. Williams and Andreas Karwath and Hilary Fanning and Simon Ball and Paul N. Schofield and Robert Hoehndorf and Georgios V. Gkoutos},
doi = {10.1016/j.compbiomed.2021.104904},
issn = {0010-4825},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Computers in biology and medicine},
publisher = {Elsevier},
abstract = {Identification of ontology concepts in clinical narrative text enables the creation of phenotype profiles that can be associated with clinical entities, such as patients or drugs. Constructing patient phenotype profiles using formal ontologies enables their analysis via semantic similarity, in turn enabling the use of background knowledge in clustering or classification analyses. However, traditional semantic similarity approaches collapse complex relationships between patient phenotypes into a unitary similarity scores for each pair of patients. Moreover, single scores may be based only on matching terms with the greatest information content (IC), ignoring other dimensions of patient similarity. This process necessarily leads to a loss of information in the resulting representation of patient similarity, and is especially apparent when using very large text-derived and highly multi-morbid phenotype profiles. Moreover, it renders finding a biological explanation for similarity very difficult; the black box problem. In this article, we explore the generation of multiple semantic similarity scores for patients based on different facets of their phenotypic manifestation, which we define through different sub-graphs in the Human Phenotype Ontology. We further present a new methodology for deriving sets of qualitative class descriptions for groups of entities described by ontology terms. Leveraging this strategy to obtain meaningful explanations for our semantic clusters alongside other evaluation techniques, we show that semantic clustering with ontology-derived facets enables the representation, and thus identification of, clinically relevant phenotype relationships not easily recoverable using overall clustering alone. In this way, we demonstrate the potential of faceted semantic clustering for gaining a deeper and more nuanced understanding of text-derived patient phenotypes.},
keywords = {cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity},
pubstate = {published},
tppubtype = {article}
}
Karwath, Andreas; Bunting, Karina V; Gill, Simrat K; Tica, Otilia; Pendleton, Samantha; Aziz, Furqan; Barsky, Andrey D; Chernbumroong, Saisakul; Duan, Jinming; Mobley, Alastair R; Cardoso, Victor Roth; Slater, Luke; Williams, John A; Bruce, Emma-Jane; Wang, Xiaoxia; Flather, Marcus D; Coats, Andrew J S; Gkoutos, Georgios V; Kotecha, Dipak
Redefining beta-blocker response in heart failure patients with sinus rhythm and atrial fibrillation: a machine learning cluster analysis Journal Article
In: The Lancet, 2021.
Abstract | Links | BibTeX | Tags: artificial intelligence, clustering, crossvalidation, deep learning, EHR, health data science, phenotypes, validation
@article{Karwath_2021,
title = {Redefining beta-blocker response in heart failure patients with sinus rhythm and atrial fibrillation: a machine learning cluster analysis},
author = {Andreas Karwath and Karina V Bunting and Simrat K Gill and Otilia Tica and Samantha Pendleton and Furqan Aziz and Andrey D Barsky and Saisakul Chernbumroong and Jinming Duan and Alastair R Mobley and Victor Roth Cardoso and Luke Slater and John A Williams and Emma-Jane Bruce and Xiaoxia Wang and Marcus D Flather and Andrew J S Coats and Georgios V Gkoutos and Dipak Kotecha},
url = {https://doi.org/10.1016%2Fs0140-6736%2821%2901638-x},
doi = {10.1016/s0140-6736(21)01638-x},
year = {2021},
date = {2021-08-01},
urldate = {2021-08-01},
journal = {The Lancet},
publisher = {Elsevier BV},
abstract = {Background
Mortality remains unacceptably high in patients with heart failure and reduced left ventricular ejection fraction (LVEF) despite advances in therapeutics. We hypothesised that a novel artificial intelligence approach could better assess multiple and higher-dimension interactions of comorbidities, and define clusters of β-blocker efficacy in patients with sinus rhythm and atrial fibrillation.
Methods
Neural network-based variational autoencoders and hierarchical clustering were applied to pooled individual patient data from nine double-blind, randomised, placebo-controlled trials of β blockers. All-cause mortality during median 1·3 years of follow-up was assessed by intention to treat, stratified by electrocardiographic heart rhythm. The number of clusters and dimensions was determined objectively, with results validated using a leave-one-trial-out approach. This study was prospectively registered with ClinicalTrials.gov (NCT00832442) and the PROSPERO database of systematic reviews (CRD42014010012).
Findings
15 659 patients with heart failure and LVEF of less than 50% were included, with median age 65 years (IQR 56–72) and LVEF 27% (IQR 21–33). 3708 (24%) patients were women. In sinus rhythm (n=12 822), most clusters demonstrated a consistent overall mortality benefit from β blockers, with odds ratios (ORs) ranging from 0·54 to 0·74. One cluster in sinus rhythm of older patients with less severe symptoms showed no significant efficacy (OR 0·86, 95% CI 0·67–1·10; p=0·22). In atrial fibrillation (n=2837), four of five clusters were consistent with the overall neutral effect of β blockers versus placebo (OR 0·92, 0·77–1·10; p=0·37). One cluster of younger atrial fibrillation patients at lower mortality risk but similar LVEF to average had a statistically significant reduction in mortality with β blockers (OR 0·57, 0·35–0·93; p=0·023). The robustness and consistency of clustering was confirmed for all models (p<0·0001 vs random), and cluster membership was externally validated across the nine independent trials.
Interpretation
An artificial intelligence-based clustering approach was able to distinguish prognostic response from β blockers in patients with heart failure and reduced LVEF. This included patients in sinus rhythm with suboptimal efficacy, as well as a cluster of patients with atrial fibrillation where β blockers did reduce mortality.
Funding
Medical Research Council, UK, and EU/EFPIA Innovative Medicines Initiative BigData@Heart.},
keywords = {artificial intelligence, clustering, crossvalidation, deep learning, EHR, health data science, phenotypes, validation},
pubstate = {published},
tppubtype = {article}
}
Mortality remains unacceptably high in patients with heart failure and reduced left ventricular ejection fraction (LVEF) despite advances in therapeutics. We hypothesised that a novel artificial intelligence approach could better assess multiple and higher-dimension interactions of comorbidities, and define clusters of β-blocker efficacy in patients with sinus rhythm and atrial fibrillation.
Methods
Neural network-based variational autoencoders and hierarchical clustering were applied to pooled individual patient data from nine double-blind, randomised, placebo-controlled trials of β blockers. All-cause mortality during median 1·3 years of follow-up was assessed by intention to treat, stratified by electrocardiographic heart rhythm. The number of clusters and dimensions was determined objectively, with results validated using a leave-one-trial-out approach. This study was prospectively registered with ClinicalTrials.gov (NCT00832442) and the PROSPERO database of systematic reviews (CRD42014010012).
Findings
15 659 patients with heart failure and LVEF of less than 50% were included, with median age 65 years (IQR 56–72) and LVEF 27% (IQR 21–33). 3708 (24%) patients were women. In sinus rhythm (n=12 822), most clusters demonstrated a consistent overall mortality benefit from β blockers, with odds ratios (ORs) ranging from 0·54 to 0·74. One cluster in sinus rhythm of older patients with less severe symptoms showed no significant efficacy (OR 0·86, 95% CI 0·67–1·10; p=0·22). In atrial fibrillation (n=2837), four of five clusters were consistent with the overall neutral effect of β blockers versus placebo (OR 0·92, 0·77–1·10; p=0·37). One cluster of younger atrial fibrillation patients at lower mortality risk but similar LVEF to average had a statistically significant reduction in mortality with β blockers (OR 0·57, 0·35–0·93; p=0·023). The robustness and consistency of clustering was confirmed for all models (p<0·0001 vs random), and cluster membership was externally validated across the nine independent trials.
Interpretation
An artificial intelligence-based clustering approach was able to distinguish prognostic response from β blockers in patients with heart failure and reduced LVEF. This included patients in sinus rhythm with suboptimal efficacy, as well as a cluster of patients with atrial fibrillation where β blockers did reduce mortality.
Funding
Medical Research Council, UK, and EU/EFPIA Innovative Medicines Initiative BigData@Heart.
2014
Seeland, Madeleine; Karwath, Andreas; Kramer, Stefan
Structural clustering of millions of molecular graphs Conference
Symposium on Applied Computing, SAC 2014, ACM ACM, New York, NY, USA, 2014.
Abstract | Links | BibTeX | Tags: cluster explanation, clustering, graph mining
@conference{seeland2014a,
title = {Structural clustering of millions of molecular graphs},
author = {Madeleine Seeland and Andreas Karwath and Stefan Kramer},
url = {http://doi.acm.org/10.1145/2554850.2555063},
doi = {10.1145/2554850.2555063},
year = {2014},
date = {2014-03-24},
urldate = {2014-03-24},
booktitle = {Symposium on Applied Computing, SAC 2014},
pages = {121-128},
publisher = {ACM},
address = {New York, NY, USA},
organization = {ACM},
abstract = {Statistical machine learning algorithms building on patterns found by pattern mining algorithms have to cope with large solution sets and thus the high dimensionality of the feature space. Vice versa, pattern mining algorithms are frequently applied to irrelevant instances, thus causing noise in the output. Solution sets of pattern mining algorithms also typically grow with increasing input datasets. The paper proposes an approach to overcome these limitations. The approach extracts information from trained support vector machines, in particular their support vectors and their relevance according to their coefficients. It uses the support vectors along with their coefficients as input to pattern mining algorithms able to handle weighted instances. Our experiments in the domain of graph mining and molecular graphs show that the resulting models are not significantly less accurate than models trained on the full datasets, yet require only a fraction of the time using much smaller sets of patterns.},
keywords = {cluster explanation, clustering, graph mining},
pubstate = {published},
tppubtype = {conference}
}
2012
Seeland, Madeleine; Karwath, Andreas; Kramer, Stefan
A structural cluster kernel for learning on graphs Conference
The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2012, ACM ACM, New York, NY, USA, 2012, ISBN: 978-1-4503-1462-6.
Abstract | Links | BibTeX | Tags: cheminformatics, clustering, data mining, kernels, QSAR, suport vector machines
@conference{seeland2012,
title = {A structural cluster kernel for learning on graphs},
author = {Madeleine Seeland and Andreas Karwath and Stefan Kramer},
url = {http://doi.acm.org/10.1145/2339530.2339614},
doi = {10.1145/2339530.2339614},
isbn = {978-1-4503-1462-6},
year = {2012},
date = {2012-08-12},
booktitle = {The 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2012},
pages = {516-524},
publisher = {ACM},
address = {New York, NY, USA},
organization = {ACM},
crossref = {DBLP:conf/kdd/2012},
abstract = {In recent years, graph kernels have received considerable interest within the machine learning and data mining community. Here, we introduce a novel approach enabling kernel methods to utilize additional information hidden in the structural neighborhood of the graphs under consideration. Our novel structural cluster kernel (SCK) incorporates similarities induced by a structural clustering algorithm to improve state-of-the-art graph kernels. The approach taken is based on the idea that graph similarity can not only be described by the similarity between the graphs themselves, but also by the similarity they possess with respect to their structural neighborhood. We applied our novel kernel in a supervised and a semi-supervised setting to regression and classification problems on a number of real-world datasets of molecular graphs.
Our results show that the structural cluster similarity information can indeed leverage the prediction performance of the base kernel, particularly when the dataset is structurally sparse and consequently structurally diverse. By additionally taking into account a large number of unlabeled instances the performance of the structural cluster kernel can further be improved.},
keywords = {cheminformatics, clustering, data mining, kernels, QSAR, suport vector machines},
pubstate = {published},
tppubtype = {conference}
}
Our results show that the structural cluster similarity information can indeed leverage the prediction performance of the base kernel, particularly when the dataset is structurally sparse and consequently structurally diverse. By additionally taking into account a large number of unlabeled instances the performance of the structural cluster kernel can further be improved.
Gütlein, Martin; Karwath, Andreas; Kramer, Stefan
CheS-Mapper - Chemical Space Mapping and Visualization in 3D Journal Article
In: J. Cheminformatics, vol. 4, pp. 7, 2012.
Abstract | Links | BibTeX | Tags: cheminformatics, clustering, dimensionality reduction, QSAR, visualization
@article{gutlein2012,
title = {CheS-Mapper - Chemical Space Mapping and Visualization in 3D},
author = {Martin Gütlein and Andreas Karwath and Stefan Kramer},
url = {http://dx.doi.org/10.1186/1758-2946-4-7},
doi = {10.1186/1758-2946-4-7},
year = {2012},
date = {2012-03-17},
journal = {J. Cheminformatics},
volume = {4},
pages = {7},
abstract = {Analyzing chemical datasets is a challenging task for scientific researchers in the field of chemoinformatics. It is important, yet difficult to understand the relationship between the structure of chemical compounds, their physico-chemical properties, and biological or toxic effects. To that respect, visualization tools can help to better comprehend the underlying correlations. Our recently developed 3D molecular viewer CheS-Mapper (Chemical Space Mapper) divides large datasets into clusters of similar compounds and consequently arranges them in 3D space, such that their spatial proximity reflects their similarity. The user can indirectly determine similarity, by selecting which features to employ in the process. The tool can use and calculate different kind of features, like structural fragments as well as quantitative chemical descriptors. These features can be highlighted within CheS-Mapper, which aids the chemist to better understand patterns and regularities and relate the observations to established scientific knowledge. As a final function, the tool can also be used to select and export specific subsets of a given dataset for further analysis.
},
keywords = {cheminformatics, clustering, dimensionality reduction, QSAR, visualization},
pubstate = {published},
tppubtype = {article}
}