2022
Slater, Luke T; Russell, Sophie; Makepeace, Silver; Carberry, Alexander; Karwath, Andreas; Williams, John A; Fanning, Hilary; Ball, Simon; Hoehndorf, Robert; Gkoutos, Georgios V
Evaluating semantic similarity methods for comparison of text-derived phenotype profiles Journal Article
In: BMC Medical Informatics and Decision Making, vol. 22, no. 1, 2022, ISSN: 1472-6947.
Abstract | Links | BibTeX | Tags: differential diagnosis, health data science, MIMIC-III, ontology, semantic similarity, semantic web
@article{6b64a2f714094b7abb9373ccb6d527e0,
title = {Evaluating semantic similarity methods for comparison of text-derived phenotype profiles},
author = {Luke T Slater and Sophie Russell and Silver Makepeace and Alexander Carberry and Andreas Karwath and John A Williams and Hilary Fanning and Simon Ball and Robert Hoehndorf and Georgios V Gkoutos},
doi = {10.1186/s12911-022-01770-4},
issn = {1472-6947},
year = {2022},
date = {2022-02-05},
urldate = {2022-02-05},
journal = {BMC Medical Informatics and Decision Making},
volume = {22},
number = {1},
publisher = {Springer},
abstract = {BACKGROUND: Semantic similarity is a valuable tool for analysis in biomedicine. When applied to phenotype profiles derived from clinical text, they have the capacity to enable and enhance 'patient-like me' analyses, automated coding, differential diagnosis, and outcome prediction. While a large body of work exists exploring the use of semantic similarity for multiple tasks, including protein interaction prediction, and rare disease differential diagnosis, there is less work exploring comparison of patient phenotype profiles for clinical tasks. Moreover, there are no experimental explorations of optimal parameters or better methods in the area.METHODS: We develop a platform for reproducible benchmarking and comparison of experimental conditions for patient phentoype similarity. Using the platform, we evaluate the task of ranking shared primary diagnosis from uncurated phenotype profiles derived from all text narrative associated with admissions in the medical information mart for intensive care (MIMIC-III).RESULTS: 300 semantic similarity configurations were evaluated, as well as one embedding-based approach. On average, measures that did not make use of an external information content measure performed slightly better, however the best-performing configurations when measured by area under receiver operating characteristic curve and Top Ten Accuracy used term-specificity and annotation-frequency measures.CONCLUSION: We identified and interpreted the performance of a large number of semantic similarity configurations for the task of classifying diagnosis from text-derived phenotype profiles in one setting. We also provided a basis for further research on other settings and related tasks in the area.},
keywords = {differential diagnosis, health data science, MIMIC-III, ontology, semantic similarity, semantic web},
pubstate = {published},
tppubtype = {article}
}
2021
Slater, Luke T.; Williams, John A.; Karwath, Andreas; Fanning, Hilary; Ball, Simon; Schofield, Paul N.; Hoehndorf, Robert; Gkoutos, Georgios V.
Multi-faceted semantic clustering with text-derived phenotypes Journal Article
In: Computers in biology and medicine, 2021, ISSN: 0010-4825.
Abstract | Links | BibTeX | Tags: cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity
@article{14598610a01b4af99802a4b22e67a119,
title = {Multi-faceted semantic clustering with text-derived phenotypes},
author = {Luke T. Slater and John A. Williams and Andreas Karwath and Hilary Fanning and Simon Ball and Paul N. Schofield and Robert Hoehndorf and Georgios V. Gkoutos},
doi = {10.1016/j.compbiomed.2021.104904},
issn = {0010-4825},
year = {2021},
date = {2021-11-01},
urldate = {2021-11-01},
journal = {Computers in biology and medicine},
publisher = {Elsevier},
abstract = {Identification of ontology concepts in clinical narrative text enables the creation of phenotype profiles that can be associated with clinical entities, such as patients or drugs. Constructing patient phenotype profiles using formal ontologies enables their analysis via semantic similarity, in turn enabling the use of background knowledge in clustering or classification analyses. However, traditional semantic similarity approaches collapse complex relationships between patient phenotypes into a unitary similarity scores for each pair of patients. Moreover, single scores may be based only on matching terms with the greatest information content (IC), ignoring other dimensions of patient similarity. This process necessarily leads to a loss of information in the resulting representation of patient similarity, and is especially apparent when using very large text-derived and highly multi-morbid phenotype profiles. Moreover, it renders finding a biological explanation for similarity very difficult; the black box problem. In this article, we explore the generation of multiple semantic similarity scores for patients based on different facets of their phenotypic manifestation, which we define through different sub-graphs in the Human Phenotype Ontology. We further present a new methodology for deriving sets of qualitative class descriptions for groups of entities described by ontology terms. Leveraging this strategy to obtain meaningful explanations for our semantic clusters alongside other evaluation techniques, we show that semantic clustering with ontology-derived facets enables the representation, and thus identification of, clinically relevant phenotype relationships not easily recoverable using overall clustering alone. In this way, we demonstrate the potential of faceted semantic clustering for gaining a deeper and more nuanced understanding of text-derived patient phenotypes.},
keywords = {cluster explanation, clustering, health data science, MIMIC-III, ontology, semantic similarity},
pubstate = {published},
tppubtype = {article}
}
Pendleton, Samantha C.; Slater, Luke T.; Karwath, Andreas; Gilbert, Rose M.; Davis, Nicola; Pesudovs, Konrad; Liu, Xiaoxuan; Denniston, Alastair K.; Gkoutos, Georgios V.; Braithwaite, Tasanee
In: Computers in Biology and Medicine, vol. 135, pp. 104542, 2021.
Links | BibTeX | Tags: health data science, NLP, semantic similarity
@article{Pendleton_2021,
title = {Development and application of the ocular immune-mediated inflammatory diseases ontology enhanced with synonyms from online patient support forum conversation},
author = {Samantha C. Pendleton and Luke T. Slater and Andreas Karwath and Rose M. Gilbert and Nicola Davis and Konrad Pesudovs and Xiaoxuan Liu and Alastair K. Denniston and Georgios V. Gkoutos and Tasanee Braithwaite},
url = {https://doi.org/10.1016%2Fj.compbiomed.2021.104542},
doi = {10.1016/j.compbiomed.2021.104542},
year = {2021},
date = {2021-08-01},
urldate = {2021-08-01},
journal = {Computers in Biology and Medicine},
volume = {135},
pages = {104542},
publisher = {Elsevier BV},
keywords = {health data science, NLP, semantic similarity},
pubstate = {published},
tppubtype = {article}
}
Slater, Luke T; Karwath, Andreas; Williams, John A; Russell, Sophie; Makepeace, Silver; Carberry, Alexander; Hoehndorf, Robert; Gkoutos, Georgios V
Towards similarity-based differential diagnostics for common diseases Journal Article
In: Computers in Biology and Medicine, vol. 133, pp. 104360, 2021.
Links | BibTeX | Tags: artificial intelligence, health data science, NLP, semantic similarity
@article{Slater_2021,
title = {Towards similarity-based differential diagnostics for common diseases},
author = {Luke T Slater and Andreas Karwath and John A Williams and Sophie Russell and Silver Makepeace and Alexander Carberry and Robert Hoehndorf and Georgios V Gkoutos},
url = {https://doi.org/10.1016%2Fj.compbiomed.2021.104360},
doi = {10.1016/j.compbiomed.2021.104360},
year = {2021},
date = {2021-06-01},
urldate = {2021-06-01},
journal = {Computers in Biology and Medicine},
volume = {133},
pages = {104360},
publisher = {Elsevier BV},
keywords = {artificial intelligence, health data science, NLP, semantic similarity},
pubstate = {published},
tppubtype = {article}
}