Skip to content

Commit

Permalink
refactor and fixing test isolation issues
Browse files Browse the repository at this point in the history
  • Loading branch information
djsaunde committed Dec 21, 2024
1 parent 98368be commit 938b627
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 149 deletions.
4 changes: 3 additions & 1 deletion src/axolotl/cli/integrations/convert_diff_transformer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""CLI to convert a transformers model's attns to diff attns."""
"""CLI to convert a transformers model's attention layers to differential attention layers."""

import logging
import warnings
from pathlib import Path
Expand Down Expand Up @@ -127,6 +128,7 @@ def convert_diff_transformer(cfg, cli_args, config_path):
else:
modified_cfg["plugins"] = [plugin_class]

# Write out the updated axolotl config while preserving original ordering / formatting
dump_yaml_preserved_order(
data=modified_cfg,
reference_yaml_path=config_path,
Expand Down
18 changes: 5 additions & 13 deletions src/axolotl/common/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@
from axolotl.utils.models import load_model, load_tokenizer

configure_logging()
LOG = logging.getLogger("axolotl.common.cli")
LOG = logging.getLogger(__name__)


@dataclass
class PreprocessCliArgs:
"""
dataclass with arguments for preprocessing only
"""
"""dataclass with arguments for preprocessing only"""

debug: bool = field(default=False)
debug_text_only: bool = field(default=False)
Expand All @@ -30,9 +28,7 @@ class PreprocessCliArgs:

@dataclass
class TrainerCliArgs:
"""
dataclass with various non-training arguments
"""
"""dataclass with various non-training arguments"""

debug: bool = field(default=False)
debug_text_only: bool = field(default=False)
Expand All @@ -45,9 +41,7 @@ class TrainerCliArgs:

@dataclass
class EvaluateCliArgs:
"""
dataclass with various evaluation arguments
"""
"""dataclass with various evaluation arguments"""

debug: bool = field(default=False)
debug_text_only: bool = field(default=False)
Expand All @@ -56,9 +50,7 @@ class EvaluateCliArgs:

@dataclass
class ConvertDiffTransformerCliArgs:
"""
dataclass with arguments for convert-diff-transformer CLI
"""
"""dataclass with arguments for convert-diff-transformer CLI"""

debug: bool = field(default=False)
zero_init: bool = field(default=False)
Expand Down
10 changes: 7 additions & 3 deletions src/axolotl/integrations/diff_transformer/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,13 @@ def convert_module(module):

# Iterate through module children, convert any attn layers to diff attn
for name, child in module.named_children():
if isinstance(child, tuple(ATTENTION_MAPPING.keys())):
# Choose appropriate differential attention class
attention_class = ATTENTION_MAPPING[type(child)]
child_class_name = type(child).__name__
if child_class_name in [k.__name__ for k in ATTENTION_MAPPING]:
# Find matching attention class by name
for orig_class, diff_class in ATTENTION_MAPPING.items():
if orig_class.__name__ == child_class_name:
attention_class = diff_class
break

layer_type = type(child).__name__
logger.info(
Expand Down
3 changes: 2 additions & 1 deletion src/axolotl/integrations/diff_transformer/diff_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@


def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor:
"""torch.repeat_interleave(x, dim=1, repeats=n_rep)"""
batch_size, n_kv_heads, slen, head_dim = x.shape
if n_rep == 1:
return x
Expand Down Expand Up @@ -249,6 +248,7 @@ def forward(
class LlamaDifferentialSdpaAttention(DifferentialAttentionBase):
"""SDPA-based implementation of differential attention."""

# pylint: disable=duplicate-code
def forward(
self,
hidden_states: torch.Tensor,
Expand Down Expand Up @@ -312,6 +312,7 @@ def forward(
class LlamaDifferentialFlashAttention2(DifferentialAttentionBase):
"""Flash Attention 2-based implementation of differential attention."""

# pylint: disable=duplicate-code
def forward(
self,
hidden_states: torch.Tensor,
Expand Down
8 changes: 7 additions & 1 deletion src/axolotl/utils/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ class OrderedDumper(yaml.SafeDumper):
"""Custom YAML dumper that maintains dictionary order."""


def represent_none(self, _):
"""Represent None values as empty fields."""
return self.represent_scalar("tag:yaml.org,2002:null", "")


def ordered_dict_representer(dumper: OrderedDumper, data: Dict) -> Any:
"""Custom representer for dictionaries that maintains order."""
return dumper.represent_mapping("tag:yaml.org,2002:map", data.items())
Expand Down Expand Up @@ -121,7 +126,8 @@ def dump_yaml_preserved_order(
# Reorder the data
ordered_data = reorder_dict(data, tracker.structure)

# Register the custom representer
# Register the custom representers
OrderedDumper.add_representer(type(None), represent_none)
OrderedDumper.add_representer(dict, ordered_dict_representer)
OrderedDumper.add_representer(OrderedDict, ordered_dict_representer)

Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/integrations/convert_diff_transformer/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from click.testing import CliRunner


@pytest.fixture()
@pytest.fixture(scope="class")
def base_config():
"""Basic config for testing."""
return {
Expand All @@ -26,6 +26,6 @@ def base_config():
}


@pytest.fixture
@pytest.fixture(scope="class")
def cli_runner():
return CliRunner()
Loading

0 comments on commit 938b627

Please sign in to comment.