Skip to content

Commit

Permalink
ruff formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminye committed Apr 3, 2024
1 parent 36688e0 commit c9d0bc8
Show file tree
Hide file tree
Showing 10 changed files with 51 additions and 154 deletions.
8 changes: 2 additions & 6 deletions llmtune/data/dataset_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,8 @@ def _format_one_prompt(self, example, is_test: bool = False):
return example

def _format_prompts(self):
self.dataset["train"] = self.dataset["train"].map(
partial(self._format_one_prompt, is_test=False)
)
self.dataset["test"] = self.dataset["test"].map(
partial(self._format_one_prompt, is_test=True)
)
self.dataset["train"] = self.dataset["train"].map(partial(self._format_one_prompt, is_test=False))
self.dataset["test"] = self.dataset["test"].map(partial(self._format_one_prompt, is_test=True))

def get_dataset(self) -> Tuple[Dataset, Dataset]:
self._train_test_split()
Expand Down
4 changes: 1 addition & 3 deletions llmtune/data/ingestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ def get_ingestor(data_type: str):
elif data_type == "huggingface":
return HuggingfaceIngestor
else:
raise ValueError(
f"'type' must be one of 'json', 'csv', or 'huggingface', you have {data_type}"
)
raise ValueError(f"'type' must be one of 'json', 'csv', or 'huggingface', you have {data_type}")


class Ingestor(ABC):
Expand Down
4 changes: 1 addition & 3 deletions llmtune/finetune/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ def _inject_lora(self):
self.model = get_peft_model(self.model, self._lora_config)

if not self.config.accelerate:
self.optimizer = bnb.optim.Adam8bit(
self.model.parameters(), lr=self._training_args.learning_rate
)
self.optimizer = bnb.optim.Adam8bit(self.model.parameters(), lr=self._training_args.learning_rate)
self.lr_scheduler = torch.optim.lr_scheduler.ConstantLR(self.optimizer)
if self.config.accelerate:
self.model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
Expand Down
30 changes: 7 additions & 23 deletions llmtune/inference/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ def __init__(
self.device_map = self.config.model.device_map
self._weights_path = dir_helper.save_paths.weights

self.model, self.tokenizer = self._get_merged_model(
dir_helper.save_paths.weights
)
self.model, self.tokenizer = self._get_merged_model(dir_helper.save_paths.weights)

def _get_merged_model(self, weights_path: str):
# purge VRAM
Expand All @@ -45,20 +43,14 @@ def _get_merged_model(self, weights_path: str):
dtype = (
torch.float16
if self.config.training.training_args.fp16
else (
torch.bfloat16
if self.config.training.training_args.bf16
else torch.float32
)
else (torch.bfloat16 if self.config.training.training_args.bf16 else torch.float32)
)

self.model = AutoPeftModelForCausalLM.from_pretrained(
weights_path,
torch_dtype=dtype,
device_map=self.device_map,
quantization_config=(
BitsAndBytesConfig(**self.config.model.bitsandbytes.model_dump())
),
quantization_config=(BitsAndBytesConfig(**self.config.model.bitsandbytes.model_dump())),
)

"""TODO: figure out multi-gpu
Expand All @@ -68,9 +60,7 @@ def _get_merged_model(self, weights_path: str):

model = self.model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(
self._weights_path, device_map=self.device_map
)
tokenizer = AutoTokenizer.from_pretrained(self._weights_path, device_map=self.device_map)

return model, tokenizer

Expand All @@ -81,9 +71,7 @@ def infer_all(self):

# inference loop
for idx, (prompt, label) in enumerate(zip(prompts, labels)):
RichUI.inference_ground_truth_display(
f"Generating on test set: {idx+1}/{len(prompts)}", prompt, label
)
RichUI.inference_ground_truth_display(f"Generating on test set: {idx+1}/{len(prompts)}", prompt, label)

try:
result = self.infer_one(prompt)
Expand All @@ -101,9 +89,7 @@ def infer_all(self):
writer.writerow(row)

def infer_one(self, prompt: str) -> str:
input_ids = self.tokenizer(
prompt, return_tensors="pt", truncation=True
).input_ids.cuda()
input_ids = self.tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()

# stream processor
streamer = TextIteratorStreamer(
Expand All @@ -113,9 +99,7 @@ def infer_one(self, prompt: str) -> str:
timeout=60, # 60 sec timeout for generation; to handle OOM errors
)

generation_kwargs = dict(
input_ids=input_ids, streamer=streamer, **self.config.inference.model_dump()
)
generation_kwargs = dict(input_ids=input_ids, streamer=streamer, **self.config.inference.model_dump())

thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
thread.start()
Expand Down
71 changes: 19 additions & 52 deletions llmtune/pydantic_models/config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,15 @@
# TODO: Refactor this into multiple files...
HfModelPath = str


class QaConfig(BaseModel):
llm_tests: Optional[List[str]] = Field([], description = "list of tests that needs to be connected")
llm_tests: Optional[List[str]] = Field([], description="list of tests that needs to be connected")


class DataConfig(BaseModel):
file_type: Literal["json", "csv", "huggingface"] = Field(
None, description="File type"
)
path: Union[FilePath, HfModelPath] = Field(
None, description="Path to the file or HuggingFace model"
)
prompt: str = Field(
None, description="Prompt for the model. Use {} brackets for column name"
)
file_type: Literal["json", "csv", "huggingface"] = Field(None, description="File type")
path: Union[FilePath, HfModelPath] = Field(None, description="Path to the file or HuggingFace model")
prompt: str = Field(None, description="Prompt for the model. Use {} brackets for column name")
prompt_stub: str = Field(
None,
description="Stub for the prompt; this is injected during training. Use {} brackets for column name",
Expand All @@ -47,9 +42,7 @@ class DataConfig(BaseModel):


class BitsAndBytesConfig(BaseModel):
load_in_8bit: Optional[bool] = Field(
False, description="Enable 8-bit quantization with LLM.int8()"
)
load_in_8bit: Optional[bool] = Field(False, description="Enable 8-bit quantization with LLM.int8()")
llm_int8_threshold: Optional[float] = Field(
6.0, description="Outlier threshold for outlier detection in 8-bit quantization"
)
Expand All @@ -60,9 +53,7 @@ class BitsAndBytesConfig(BaseModel):
False,
description="Enable splitting model parts between int8 on GPU and fp32 on CPU",
)
llm_int8_has_fp16_weight: Optional[bool] = Field(
False, description="Run LLM.int8() with 16-bit main weights"
)
llm_int8_has_fp16_weight: Optional[bool] = Field(False, description="Run LLM.int8() with 16-bit main weights")

load_in_4bit: Optional[bool] = Field(
True,
Expand All @@ -85,14 +76,10 @@ class ModelConfig(BaseModel):
"NousResearch/Llama-2-7b-hf",
description="Path to the model (huggingface repo or local path)",
)
device_map: Optional[str] = Field(
"auto", description="device onto which to load the model"
)
device_map: Optional[str] = Field("auto", description="device onto which to load the model")

quantize: Optional[bool] = Field(False, description="Flag to enable quantization")
bitsandbytes: BitsAndBytesConfig = Field(
None, description="Bits and Bytes configuration"
)
bitsandbytes: BitsAndBytesConfig = Field(None, description="Bits and Bytes configuration")

# @validator("hf_model_ckpt")
# def validate_model(cls, v, **kwargs):
Expand All @@ -115,22 +102,12 @@ def set_device_map_to_none(cls, v, values, **kwargs):

class LoraConfig(BaseModel):
r: Optional[int] = Field(8, description="Lora rank")
task_type: Optional[str] = Field(
"CAUSAL_LM", description="Base Model task type during training"
)
task_type: Optional[str] = Field("CAUSAL_LM", description="Base Model task type during training")

lora_alpha: Optional[int] = Field(
16, description="The alpha parameter for Lora scaling"
)
bias: Optional[str] = Field(
"none", description="Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
)
lora_dropout: Optional[float] = Field(
0.1, description="The dropout probability for Lora layers"
)
target_modules: Optional[List[str]] = Field(
None, description="The names of the modules to apply Lora to"
)
lora_alpha: Optional[int] = Field(16, description="The alpha parameter for Lora scaling")
bias: Optional[str] = Field("none", description="Bias type for Lora. Can be 'none', 'all' or 'lora_only'")
lora_dropout: Optional[float] = Field(0.1, description="The dropout probability for Lora layers")
target_modules: Optional[List[str]] = Field(None, description="The names of the modules to apply Lora to")
fan_in_fan_out: Optional[bool] = Field(
False,
description="Flag to indicate if the layer to replace stores weight like (fan_in, fan_out)",
Expand All @@ -139,9 +116,7 @@ class LoraConfig(BaseModel):
None,
description="List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint",
)
layers_to_transform: Optional[Union[List[int], int]] = Field(
None, description="The layer indexes to transform"
)
layers_to_transform: Optional[Union[List[int], int]] = Field(None, description="The layer indexes to transform")
layers_pattern: Optional[str] = Field(None, description="The layer pattern name")
# rank_pattern: Optional[Dict[str, int]] = Field(
# {}, description="The mapping from layer names or regexp expression to ranks"
Expand All @@ -154,15 +129,9 @@ class LoraConfig(BaseModel):
# TODO: Get comprehensive Args!
class TrainingArgs(BaseModel):
num_train_epochs: Optional[int] = Field(1, description="Number of training epochs")
per_device_train_batch_size: Optional[int] = Field(
1, description="Batch size per training device"
)
gradient_accumulation_steps: Optional[int] = Field(
1, description="Number of steps for gradient accumulation"
)
gradient_checkpointing: Optional[bool] = Field(
True, description="Flag to enable gradient checkpointing"
)
per_device_train_batch_size: Optional[int] = Field(1, description="Batch size per training device")
gradient_accumulation_steps: Optional[int] = Field(1, description="Number of steps for gradient accumulation")
gradient_checkpointing: Optional[bool] = Field(True, description="Flag to enable gradient checkpointing")
optim: Optional[str] = Field("paged_adamw_32bit", description="Optimizer")
logging_steps: Optional[int] = Field(100, description="Number of logging steps")
learning_rate: Optional[float] = Field(2.0e-4, description="Learning rate")
Expand All @@ -171,9 +140,7 @@ class TrainingArgs(BaseModel):
fp16: Optional[bool] = Field(False, description="Flag to enable fp16")
max_grad_norm: Optional[float] = Field(0.3, description="Maximum gradient norm")
warmup_ratio: Optional[float] = Field(0.03, description="Warmup ratio")
lr_scheduler_type: Optional[str] = Field(
"constant", description="Learning rate scheduler type"
)
lr_scheduler_type: Optional[str] = Field("constant", description="Learning rate scheduler type")


# TODO: Get comprehensive Args!
Expand Down
17 changes: 4 additions & 13 deletions llmtune/qa/generics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ def test_name(self) -> str:
pass

@abstractmethod
def get_metric(
self, prompt: str, grount_truth: str, model_pred: str
) -> Union[float, int, bool]:
def get_metric(self, prompt: str, grount_truth: str, model_pred: str) -> Union[float, int, bool]:
pass


Expand Down Expand Up @@ -45,7 +43,6 @@ def __init__(
ground_truths: List[str],
model_preds: List[str],
) -> None:

self.tests = tests
self.prompts = prompts
self.ground_truths = ground_truths
Expand All @@ -57,9 +54,7 @@ def run_tests(self) -> Dict[str, List[Union[float, int, bool]]]:
test_results = {}
for test in zip(self.tests):
metrics = []
for prompt, ground_truth, model_pred in zip(
self.prompts, self.ground_truths, self.model_preds
):
for prompt, ground_truth, model_pred in zip(self.prompts, self.ground_truths, self.model_preds):
metrics.append(test.get_metric(prompt, ground_truth, model_pred))
test_results[test.test_name] = metrics

Expand All @@ -74,14 +69,10 @@ def print_test_results(self):
result_dictionary = self.test_results()
column_data = {key: list(result_dictionary[key]) for key in result_dictionary}
mean_values = {key: statistics.mean(column_data[key]) for key in column_data}
median_values = {
key: statistics.median(column_data[key]) for key in column_data
}
median_values = {key: statistics.median(column_data[key]) for key in column_data}
stdev_values = {key: statistics.stdev(column_data[key]) for key in column_data}
# Use the RichUI class to display the table
RichUI.display_table(
result_dictionary, mean_values, median_values, stdev_values
)
RichUI.display_table(result_dictionary, mean_values, median_values, stdev_values)

def save_test_results(self, path: str):
# TODO: save these!
Expand Down
40 changes: 10 additions & 30 deletions llmtune/qa/qa_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ class LengthTest(LLMQaTest):
def test_name(self) -> str:
return "summary_length"

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> Union[float, int, bool]:
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
return abs(len(ground_truth) - len(model_prediction))


Expand All @@ -39,9 +37,7 @@ class JaccardSimilarityTest(LLMQaTest):
def test_name(self) -> str:
return "jaccard_similarity"

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> Union[float, int, bool]:
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
set_ground_truth = set(ground_truth.lower())
set_model_prediction = set(model_prediction.lower())

Expand All @@ -64,14 +60,10 @@ def _encode_sentence(self, sentence):
outputs = model(**tokens)
return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> Union[float, int, bool]:
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
embedding_ground_truth = self._encode_sentence(ground_truth)
embedding_model_prediction = self._encode_sentence(model_prediction)
dot_product_similarity = np.dot(
embedding_ground_truth, embedding_model_prediction
)
dot_product_similarity = np.dot(embedding_ground_truth, embedding_model_prediction)
return dot_product_similarity


Expand All @@ -81,9 +73,7 @@ class RougeScoreTest(LLMQaTest):
def test_name(self) -> str:
return "rouge_score"

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> Union[float, int, bool]:
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
scorer = rouge_scorer.RougeScorer(["rouge1"], use_stemmer=True)
scores = scorer.score(model_prediction, ground_truth)
return float(scores["rouge1"].precision)
Expand All @@ -101,9 +91,7 @@ def _remove_stopwords(self, text: str) -> str:
filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
return " ".join(filtered_text)

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> Union[float, int, bool]:
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
cleaned_model_prediction = self._remove_stopwords(model_prediction)
cleaned_ground_truth = self._remove_stopwords(ground_truth)

Expand All @@ -130,12 +118,8 @@ class VerbPercent(PosCompositionTest):
def test_name(self) -> str:
return "verb_percent"

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> float:
return self._get_pos_percent(
model_prediction, ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
)
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> float:
return self._get_pos_percent(model_prediction, ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"])


@TestRegistry.register("adjective_percent")
Expand All @@ -144,9 +128,7 @@ class AdjectivePercent(PosCompositionTest):
def test_name(self) -> str:
return "adjective_percent"

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> float:
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> float:
return self._get_pos_percent(model_prediction, ["JJ", "JJR", "JJS"])


Expand All @@ -156,9 +138,7 @@ class NounPercent(PosCompositionTest):
def test_name(self) -> str:
return "noun_percent"

def get_metric(
self, prompt: str, ground_truth: str, model_prediction: str
) -> float:
def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> float:
return self._get_pos_percent(model_prediction, ["NN", "NNS", "NNP", "NNPS"])


Expand Down
Loading

0 comments on commit c9d0bc8

Please sign in to comment.