-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
77 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
@inproceedings{ | ||
courtois2024symmetric, | ||
title={Symmetric Dot-Product Attention for Efficient Training of {BERT} Language Models}, | ||
author={Martin, Courtois and Malte, Ostendorff and Leonhard, Hennig and Georg, Rehm}, | ||
booktitle={Findings of the Association for Computational Linguistics: ACL 2024}, | ||
year={2024}, | ||
url={https://openreview.net/forum?id=FwcO26xy8E https://arxiv.org/abs/2406.06366} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
--- | ||
# Documentation: https://wowchemy.com/docs/managing-content/ | ||
|
||
title: "Symmetric Dot-Product Attention for Efficient Training of BERT Language Models" | ||
authors: [Martin Courtois, Malte Ostendorff, Leonhard Hennig, Georg, Rehm] | ||
date: 2024-05-16T00:00:00+00:00 | ||
doi: "" | ||
|
||
# Schedule page publish date (NOT publication's date). | ||
publishDate: 2024-05-16T00:00:00+00:00 | ||
|
||
# Publication type. | ||
# Legend: 0 = Uncategorized; 1 = Conference paper; 2 = Journal article; | ||
# 3 = Preprint / Working Paper; 4 = Report; 5 = Book; 6 = Book section; | ||
# 7 = Thesis; 8 = Patent | ||
publication_types: ["1"] | ||
|
||
# Publication name and optional abbreviated publication name. | ||
publication: "The 62nd Annual Meeting of the Association for Computational Linguistics" | ||
publication_short: "ACL 2024" | ||
|
||
abstract: "Initially introduced as a machine translation model, the Transformer architecture has now become the foundation for modern deep learning architecture, with applications in a wide range of fields, from computer vision to natural language processing. Nowadays, to tackle increasingly more complex tasks, Transformer-based models are stretched to enormous sizes, requiring increasingly larger training datasets, and unsustainable amount of compute resources. The ubiquitous nature of the Transformer and its core component, the attention mechanism, are thus prime targets for efficiency research. In this work, we propose an alternative compatibility function for the self-attention mechanism introduced by the Transformer architecture. This compatibility function exploits an overlap in the learned representation of the traditional scaled dot-product attention, leading to a symmetric with pairwise coefficient dot-product attention. When applied to the pre-training of BERT-like models, this new symmetric attention mechanism reaches a score of 79.36 on the GLUE benchmark against 78.74 for the traditional implementation, leads to a reduction of 6% in the number of trainable parameters, and reduces the number of training steps required before convergence by half." | ||
|
||
# Summary. An optional shortened abstract. | ||
summary: "" | ||
|
||
tags: [] | ||
categories: [] | ||
featured: false | ||
|
||
# Custom links (optional). | ||
# Uncomment and edit lines below to show custom links. | ||
# links: | ||
# - name: Follow | ||
# url: https://twitter.com | ||
# icon_pack: fab | ||
# icon: twitter | ||
|
||
url_pdf: "https://arxiv.org/pdf/2406.06366" | ||
url_code: "https://github.com/mcrts/ACL2024-SymmetricAttentionBert" | ||
url_dataset: | ||
url_poster: | ||
url_project: | ||
url_slides: | ||
url_source: | ||
url_video: | ||
|
||
# Featured image | ||
# To use, add an image named `featured.jpg/png` to your page's folder. | ||
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight. | ||
image: | ||
caption: "" | ||
focal_point: "" | ||
preview_only: false | ||
|
||
# Associated Projects (optional). | ||
# Associate this publication with one or more of your projects. | ||
# Simply enter your project's folder or file name without extension. | ||
# E.g. `internal-project` references `content/project/internal-project/index.md`. | ||
# Otherwise, set `projects: []`. | ||
projects: [] | ||
|
||
# Slides (optional). | ||
# Associate this publication with Markdown slides. | ||
# Simply enter your slide deck's filename without extension. | ||
# E.g. `slides: "example"` references `content/slides/example/index.md`. | ||
# Otherwise, set `slides: ""`. | ||
slides: "" | ||
--- |