diff --git a/content/publication/ieee2024/index.md b/content/publication/ieee2024/index.md new file mode 100644 index 0000000..b2e0853 --- /dev/null +++ b/content/publication/ieee2024/index.md @@ -0,0 +1,69 @@ +--- +# Documentation: https://wowchemy.com/docs/managing-content/ + +title: "Large Language Models for Clinical Text Cleansing Enhance Medical Concept Normalization" +authors: [Akhila Abdulnazar, Roland Roller, Stefan Schulz, Markus Kreuzthaler] +date: 2024-09-25T00:00:00+00:00 +doi: "" + +# Schedule page publish date (NOT publication's date). +publishDate: 2024-09-25T00:00:00+00:00 + +# Publication type. +# Legend: 0 = Uncategorized; 1 = Conference paper; 2 = Journal article; +# 3 = Preprint / Working Paper; 4 = Report; 5 = Book; 6 = Book section; +# 7 = Thesis; 8 = Patent +publication_types: ["2"] + +# Publication name and optional abbreviated publication name. +publication: "IEEE Access" +publication_short: "" + +abstract: "Most clinical information is only available as free text. Large language models (LLMs) are increasingly applied to clinical data to streamline communication, enhance the accuracy of clinical documentation, and ultimately improve healthcare delivery. This study focuses on a corpus of anonymized clinical narratives in German. On the one hand it evaluates the use of ChatGPT for text cleaning, i.e., the automatic rephrasing of raw text into a more readable and standardized form, and on the other hand for retrieval-augmented generation (RAG). In both tasks, the final goal was medical concept normalization (MCN), i.e., the annotation of text segments with codes from a controlled vocabulary using natural language processing.We found that ChatGPT (GPT-4) significantly improves precision and recall compared to simple dictionary matching. For all scenarios, the importance of the underlying terminological basis was also demonstrated. Maximum F1 scores of 0.607, 0.735 and 0.754 (i.e, for top 1, 5 and 10 matches) were achieved through a pipeline including document cleaning, bi-encoder-based term matching based on a large domain dictionary linked to SNOMED CT, and finally re-ranking using RAG." + +# Summary. An optional shortened abstract. +summary: "" + +tags: [] +categories: [] +featured: false + +# Custom links (optional). +# Uncomment and edit lines below to show custom links. +# links: +# - name: Follow +# url: https://twitter.com +# icon_pack: fab +# icon: twitter + +url_pdf: "[https://journals.sagepub.com/doi/full/10.1177/20552076241288681](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10703053)" +url_code: +url_dataset: +url_poster: +url_project: +url_slides: +url_source: +url_video: + +# Featured image +# To use, add an image named `featured.jpg/png` to your page's folder. +# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight. +image: + caption: "" + focal_point: "" + preview_only: false + +# Associated Projects (optional). +# Associate this publication with one or more of your projects. +# Simply enter your project's folder or file name without extension. +# E.g. `internal-project` references `content/project/internal-project/index.md`. +# Otherwise, set `projects: []`. +projects: [] + +# Slides (optional). +# Associate this publication with Markdown slides. +# Simply enter your slide deck's filename without extension. +# E.g. `slides: "example"` references `content/slides/example/index.md`. +# Otherwise, set `slides: ""`. +slides: "" +---