diff --git a/content/post/inlg2024/index.md b/content/post/inlg2024/index.md new file mode 100644 index 00000000..6557678f --- /dev/null +++ b/content/post/inlg2024/index.md @@ -0,0 +1,35 @@ +--- +# Documentation: https://wowchemy.com/docs/managing-content/ + +title: "Two papers accepted to INLG 2024" +subtitle: "" +summary: "" +authors: [] +tags: [] +categories: [] +date: 2024-08-22T09:24:01+02:00 +lastmod: 2024-08-22T09:24:01+02:00 +featured: false +draft: false + +# Featured image +# To use, add an image named `featured.jpg/png` to your page's folder. +# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight. +image: + caption: "" + focal_point: "" + preview_only: false + +# Projects (optional). +# Associate this post with one or more of your projects. +# Simply enter your project's folder or file name without extension. +# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`. +# Otherwise, set `projects = []`. +projects: [] +--- + +Two papers from DFKI NLP researchers have been accepted at the [17th International Natural Language Generation Conference (INLG 2024)](https://inlg2024.github.io/) that will take place September 23-27 in Tokyo, Japan. One paper presents a case study on using large language models to produce customer-friendly help page contents from more technical text, and includes a text quality evaluation by experienced editors. The other paper analyzes echo chamber effects in LLM-based chatbots in political conversations. + +{{< cite page="/publication/gabryszak-etal-2024-enhancing" view="4" >}} +{{< cite page="/publication/bleick-etal-2024-german" view="4" >}} + diff --git a/content/project/TRAILS/featured.png b/content/project/TRAILS/featured.png new file mode 100644 index 00000000..01ee05a3 Binary files /dev/null and b/content/project/TRAILS/featured.png differ diff --git a/content/project/TRAILS/index.md b/content/project/TRAILS/index.md new file mode 100644 index 00000000..9e9d7dc9 --- /dev/null +++ b/content/project/TRAILS/index.md @@ -0,0 +1,44 @@ +--- +# Documentation: https://wowchemy.com/docs/managing-content/ + +title: "TRAILS - Trustworthy and Inclusive Machines" +summary: "Natural language processing (NLP) has demonstrated impressive performance in some human tasks. To achieve such performance, current neural models need to be pre-trained on huge amounts of raw text data. This dependence on uncurated data has at least four indirect and unintended consequences: 1) Uncurated data tends to be linguistically and culturally non-diverse due to the statistical dominance of major languages and dialects in online texts (English vs. North Frisian, US English vs. UK English, etc.). 2) Pre-trained neural models such as the ubiquitous pre-trained language models (PLM) reproduce the features present in the data, including human biases. 3) Rare phenomena (or languages) in the 'long tail' are often not sufficiently taken into account in model evaluation, leading to an underestimation of model performance, especially in real-world application scenarios. 4) The focus on achieving state-of-the-art results through the use of transfer learning with giant PLMs such as GPT4 or mT5 often underestimates alternative methods that are more accessible, efficient and sustainable. + +As inclusion and trust are undermined by these problems, in TRAILS we focus on three main research directions to address such problems: (i) inclusion of underrepresented languages and cultures through multilingual and culturally sensitive NLP, (ii) robustness and fairness with respect to long-tail phenomena and classes and 'trustworthy content', and (iii) robust and efficient NLP models that enable training and deployment of models for (i) and (ii). We also partially address economic inequality by aiming for more efficient models (objective (iii)), which directly translates into a lower resource/cost footprint." + +authors: [leonhard-hennig] +tags: [Bias, Evaluation, Large Language Models] +categories: [] +date: 2024-08-01T11:16:31+01:00 + +# Optional external URL for project (replaces project detail page). +external_link: "https://trails-dfki.github.io/" + +# Featured image +# To use, add an image named `featured.jpg/png` to your page's folder. +# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight. +image: + caption: "" + focal_point: "Smart" + preview_only: false + +# Custom links (optional). +# Uncomment and edit lines below to show custom links. +# links: +# - name: Follow +# url: https://twitter.com +# icon_pack: fab +# icon: twitter + +url_code: "" +url_pdf: "" +url_slides: "" +url_video: "" + +# Slides (optional). +# Associate this project with Markdown slides. +# Simply enter your slide deck's filename without extension. +# E.g. `slides = "example-slides"` references `content/slides/example-slides.md`. +# Otherwise, set `slides = ""`. +slides: "" +--- diff --git a/content/publication/bleick-etal-2024-german/cite.bib b/content/publication/bleick-etal-2024-german/cite.bib new file mode 100644 index 00000000..5cc9ea64 --- /dev/null +++ b/content/publication/bleick-etal-2024-german/cite.bib @@ -0,0 +1,16 @@ +@inproceedings{bleick-etal-2024-german, + abstract = {}, + address = {Tokyo, Japan}, + author = {Bleick, Maximilian and +Feldhus, Nils and +Burchardt, Aljoscha and +M\{"o}ller, Sebastian}, + booktitle = {Proceedings of the 17th International Natural Language Generation Conference}, + doi = {}, + month = {September}, + pages = {}, + publisher = {Association for Computational Linguistics}, + title = {German Voter Personas can Radicalize LLM Chatbots via the Echo Chamber Effect}, + url = {}, + year = {2024} +} diff --git a/content/publication/bleick-etal-2024-german/index.md b/content/publication/bleick-etal-2024-german/index.md new file mode 100644 index 00000000..6dca4ada --- /dev/null +++ b/content/publication/bleick-etal-2024-german/index.md @@ -0,0 +1,19 @@ +--- +title: 'German Voter Personas can Radicalize LLM Chatbots via the Echo Chamber Effect' +authors: +- Maximilian Bleick +- Nils Feldhus +- Aljoscha Burchardt +- Sebastian Möller +date: '2024-09-21' +publishDate: '2024-08-21T13:10:18.998131Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 17th International Natural Language Generation Conference*' +abstract: +links: +- name: URL + url: '' +projects: +- Trails +--- diff --git a/content/publication/gabryszak-etal-2024-enhancing/cite.bib b/content/publication/gabryszak-etal-2024-enhancing/cite.bib new file mode 100644 index 00000000..77888743 --- /dev/null +++ b/content/publication/gabryszak-etal-2024-enhancing/cite.bib @@ -0,0 +1,17 @@ +@inproceedings{gabryszak-etal-2024-enhancing, + abstract = {In this paper, we investigate the use of large language models (LLMs) to enhance the editorial process of rewriting customer help pages. We introduce a German-language dataset comprising Frequently Asked Question-Answer pairs, presenting both raw drafts and their revisions by professional editors. On this dataset, we evaluate the performance of four large language models (LLM) through diverse prompts tailored for the rewriting task. We conduct automatic evaluations of content and text quality using ROUGE, BERTScore, and ChatGPT. Furthermore, we let professional editors assess the helpfulness of automatically generated FAQ revisions for editorial enhancement. Our findings indicate that LLMs can produce FAQ reformulations beneficial to the editorial process. We observe minimal performance discrepancies among LLMs for this task, and our survey on helpfulness underscores the subjective nature of editors' perspectives on editorial refinement.}, + address = {Tokyo, Japan}, + author = {Gabryszak, Aleksandra and +R\{"o}der, Daniel and +Binder, Arne and +Sion, Luca and +Hennig, Leonhard}, + booktitle = {Proceedings of the 17th International Natural Language Generation Conference}, + doi = {}, + month = {September}, + pages = {}, + publisher = {Association for Computational Linguistics}, + title = {Enhancing Editorial Tasks: A Case Study on Rewriting Customer Help Page Contents Using Large Language Models}, + url = {}, + year = {2024} +} diff --git a/content/publication/gabryszak-etal-2024-enhancing/index.md b/content/publication/gabryszak-etal-2024-enhancing/index.md new file mode 100644 index 00000000..251d43c1 --- /dev/null +++ b/content/publication/gabryszak-etal-2024-enhancing/index.md @@ -0,0 +1,32 @@ +--- +title: 'Enhancing Editorial Tasks: A Case Study on Rewriting Customer Help Page Contents + Using Large Language Models' +authors: +- Aleksandra Gabryszak +- Daniel Röder +- Arne Binder +- Luca Sion +- Leonhard Hennig +date: '2024-09-01' +publishDate: '2024-08-21T13:10:18.998131Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 17th International Natural Language Generation Conference*' +abstract: In this paper, we investigate the use of large language models (LLMs) to + enhance the editorial process of rewriting customer help pages. We introduce a German-language + dataset comprising Frequently Asked Question-Answer pairs, presenting both raw drafts + and their revisions by professional editors. On this dataset, we evaluate the performance + of four large language models (LLM) through diverse prompts tailored for the rewriting + task. We conduct automatic evaluations of content and text quality using ROUGE, + BERTScore, and ChatGPT. Furthermore, we let professional editors assess the helpfulness + of automatically generated FAQ revisions for editorial enhancement. Our findings + indicate that LLMs can produce FAQ reformulations beneficial to the editorial process. + We observe minimal performance discrepancies among LLMs for this task, and our survey + on helpfulness underscores the subjective nature of editors' perspectives on editorial + refinement. +links: +- name: URL + url: '' +projects: +- Trails +---