-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f4838ef
commit 171bc46
Showing
7 changed files
with
208 additions
and
736 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
name: Publish Docker image | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
- migrate-to-github-registry-for-docker-images | ||
|
||
jobs: | ||
|
||
container: | ||
runs-on: ubuntu-latest | ||
|
||
permissions: | ||
contents: read | ||
packages: write | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
- name: Log in to the Container registry | ||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Extract metadata (tags, labels) for Docker | ||
id: meta | ||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 | ||
with: | ||
images: ghcr.io/kyryl-opens-ml/fine-tune-llm-in-2024 | ||
|
||
- name: Build and push Docker image | ||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 | ||
with: | ||
context: . | ||
push: true | ||
tags: ${{ steps.meta.outputs.tags }},latest | ||
labels: ${{ steps.meta.outputs.labels }} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/app/test.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,105 @@ | ||
import transformers | ||
import torch | ||
|
||
model_id = "meta-llama/Meta-Llama-3-8B-Instruct" | ||
|
||
pipeline = transformers.pipeline( | ||
"text-generation", | ||
model=model_id, | ||
model_kwargs={"torch_dtype": torch.bfloat16}, | ||
device="cuda", | ||
) | ||
|
||
messages = [ | ||
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, | ||
{"role": "user", "content": "Who are you?"}, | ||
] | ||
|
||
prompt = pipeline.tokenizer.apply_chat_template( | ||
messages, | ||
tokenize=False, | ||
add_generation_prompt=True | ||
) | ||
|
||
terminators = [ | ||
pipeline.tokenizer.eos_token_id, | ||
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | ||
] | ||
|
||
outputs = pipeline( | ||
prompt, | ||
max_new_tokens=256, | ||
eos_token_id=terminators, | ||
do_sample=True, | ||
temperature=0.6, | ||
top_p=0.9, | ||
) | ||
print(outputs[0]["generated_text"][len(prompt):]) | ||
# import transformers | ||
# import torch | ||
|
||
# model_id = "meta-llama/Meta-Llama-3-8B-Instruct" | ||
|
||
# pipeline = transformers.pipeline( | ||
# "text-generation", | ||
# model=model_id, | ||
# model_kwargs={"torch_dtype": torch.bfloat16}, | ||
# device="cuda", | ||
# ) | ||
|
||
# messages = [ | ||
# {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, | ||
# {"role": "user", "content": "Who are you?"}, | ||
# ] | ||
|
||
# prompt = pipeline.tokenizer.apply_chat_template( | ||
# messages, | ||
# tokenize=False, | ||
# add_generation_prompt=True | ||
# ) | ||
|
||
# terminators = [ | ||
# pipeline.tokenizer.eos_token_id, | ||
# pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | ||
# ] | ||
|
||
# outputs = pipeline( | ||
# prompt, | ||
# max_new_tokens=256, | ||
# eos_token_id=terminators, | ||
# do_sample=True, | ||
# temperature=0.6, | ||
# top_p=0.9, | ||
# ) | ||
# print(outputs[0]["generated_text"][len(prompt):]) | ||
|
||
def end2end_test(): | ||
# create_text_to_sql_dataset = create_text_to_sql_dataset(config=DataConfig()) | ||
create_text_to_sql_dataset = {'train_path': 'train_dataset-sql.json', 'test_path': 'test_dataset-sql.json'} | ||
train_data = load_dataset( | ||
"json", data_files=create_text_to_sql_dataset["train_path"], split="train" | ||
) | ||
test_data = load_dataset( | ||
"json", data_files=create_text_to_sql_dataset["test_path"], split="train" | ||
) | ||
|
||
# train_data = train_data.select(range(100)) | ||
# test_data = train_data | ||
config = ModelTrainingConfig(peft_model_id='duckdb-text2sql-llama-3-8B-sql-full-lora') | ||
|
||
# trained_model = trained_model() | ||
|
||
|
||
import os | ||
import zipfile | ||
import io | ||
|
||
def create_and_zip_folder(): | ||
# Define the folder and files to create | ||
folder_path = 'example_folder' | ||
file_names = ['file1.txt', 'file2.txt', 'file3.txt'] | ||
|
||
# Create the folder | ||
os.makedirs(folder_path, exist_ok=True) | ||
|
||
# Create some example files in the folder | ||
for file_name in file_names: | ||
with open(os.path.join(folder_path, file_name), 'w') as f: | ||
f.write(f"Contents of {file_name}") | ||
|
||
# Create a zip file in memory | ||
zip_buffer = io.BytesIO() | ||
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: | ||
for file_name in file_names: | ||
zip_file.write(os.path.join(folder_path, file_name), arcname=file_name) | ||
|
||
# Clean up the folder after zipping (optional) | ||
for file_name in file_names: | ||
os.remove(os.path.join(folder_path, file_name)) | ||
os.rmdir(folder_path) | ||
|
||
# Return the bytes of the zip file | ||
zip_buffer.seek(0) | ||
return zip_buffer.getvalue() | ||
|
||
def main_function(): | ||
# Get the zip bytes | ||
zip_bytes = create_and_zip_folder() | ||
|
||
# Read the zip from bytes | ||
zip_buffer = io.BytesIO(zip_bytes) | ||
|
||
# Define the directory where to unzip | ||
output_dir = 'unzipped_content' | ||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
# Unzip the content | ||
with zipfile.ZipFile(zip_buffer, 'r') as zip_file: | ||
zip_file.extractall(path=output_dir) | ||
|
||
# Calling the main function to execute | ||
if __name__ == '__main__': | ||
main_function() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.