From a8ec3e427fd4ef3df92c1f9c840013e32ec59b4d Mon Sep 17 00:00:00 2001
From: Ipadeola Ladipo Ezekiel <105759894+rileydrizzy@users.noreply.github.com>
Date: Fri, 15 Dec 2023 10:03:58 +0100
Subject: [PATCH] merge (#9)

* update

* updates

* updates

* [add] updates

* [add] updates

* updates

* updates

* [add] updates

* merge
---
 .github/workflows/run_units_test.yml          |  13 +-
 .gitignore                                    |   6 +-
 .gitpod.yml                                   |  10 -
 linguify_yb/data/.gitkeep => Dockerfile       |   0
 Makefile                                      |  10 +-
 README.md                                     |  24 +-
 linguify_yb/src/config/config.yaml => app.py  |   0
 test_inference.py => inference.py             |   0
 linguify_yb/README.md                         |  23 +-
 linguify_yb/data/dev_samples.json             |   9 -
 linguify_yb/src/benchmark.py                  |  65 -----
 linguify_yb/src/config.py                     |   6 -
 linguify_yb/src/dataset/dataset_loader.py     | 119 ---------
 linguify_yb/src/dataset/preprocess.py         |  28 ---
 linguify_yb/src/main.py                       | 101 --------
 linguify_yb/src/trainer.py                    | 137 ----------
 linguify_yb/src/utils/logger_util.py          |   2 +-
 pyproject.toml                                |   4 +-
 requirements.txt                              | 171 ++++++-------
 run_setup.sh                                  |   7 +
 set_environment_variables_template.sh         |   2 +
 signa2text/README.md                          |  25 +-
 .../metrics.py => signa2text/data/.gitkeep    |   0
 .../data/dataset_paths.json                   |   0
 signa2text/data/dev_samples.json              |   9 +
 .../development/code_dev.ipynb                |   0
 .../development/data_dev.ipynb                |   0
 .../development/dev.ipynb                     |   0
 .../development/trans_dev.ipynb               |   0
 .../notebooks/analyasis.ipynb                 |   0
 signa2text/run_train.sh                       |   8 +
 signa2text/src/benchmark.py                   | 117 +++++++++
 signa2text/src/config.py                      |   6 +
 .../src/config/config.yaml                    |   0
 signa2text/src/dataset/dataset_loader.py      | 237 ++++++++++++++++++
 signa2text/src/dataset/dataset_paths.py       |  46 ++++
 .../src/dataset/frames_config.py              |  56 ++---
 signa2text/src/dataset/preprocess.py          |  61 +++++
 {linguify_yb => signa2text}/src/dev_data.py   |   6 +-
 .../src/evalute.py                            |   0
 signa2text/src/main.py                        |  95 +++++++
 .../__init__.py => signa2text/src/metrics.py  |   0
 signa2text/src/{utils => models}/__init__.py  |   0
 .../src/models/baseline_transformer.py        | 108 ++++----
 .../src/models/model_loader.py                |  12 +-
 .../src/models/static_transfromer.py          |   0
 signa2text/src/tests/__init__.py              |   0
 .../src/tests/test_data_ingestion.py          |  36 ++-
 .../src/tests/test_model.py                   |   0
 signa2text/src/tests/test_pipeline.py         |   0
 signa2text/src/trainer.py                     | 132 ++++++++++
 signa2text/src/utils/logger_util.py           |   3 +-
 {linguify_yb => signa2text}/src/utils/util.py |  10 +-
 version.txt                                   |   1 +
 54 files changed, 987 insertions(+), 718 deletions(-)
 delete mode 100644 .gitpod.yml
 rename linguify_yb/data/.gitkeep => Dockerfile (100%)
 rename linguify_yb/src/config/config.yaml => app.py (100%)
 rename test_inference.py => inference.py (100%)
 delete mode 100644 linguify_yb/data/dev_samples.json
 delete mode 100644 linguify_yb/src/benchmark.py
 delete mode 100644 linguify_yb/src/config.py
 delete mode 100644 linguify_yb/src/dataset/dataset_loader.py
 delete mode 100644 linguify_yb/src/dataset/preprocess.py
 delete mode 100644 linguify_yb/src/main.py
 delete mode 100644 linguify_yb/src/trainer.py
 create mode 100644 run_setup.sh
 rename linguify_yb/src/metrics.py => signa2text/data/.gitkeep (100%)
 rename {linguify_yb => signa2text}/data/dataset_paths.json (100%)
 create mode 100644 signa2text/data/dev_samples.json
 rename {linguify_yb => signa2text}/development/code_dev.ipynb (100%)
 rename {linguify_yb => signa2text}/development/data_dev.ipynb (100%)
 rename {linguify_yb => signa2text}/development/dev.ipynb (100%)
 rename {linguify_yb => signa2text}/development/trans_dev.ipynb (100%)
 rename {linguify_yb => signa2text}/notebooks/analyasis.ipynb (100%)
 create mode 100644 signa2text/run_train.sh
 create mode 100644 signa2text/src/benchmark.py
 create mode 100644 signa2text/src/config.py
 rename linguify_yb/src/models/__init__.py => signa2text/src/config/config.yaml (100%)
 create mode 100644 signa2text/src/dataset/dataset_loader.py
 create mode 100644 signa2text/src/dataset/dataset_paths.py
 rename {linguify_yb => signa2text}/src/dataset/frames_config.py (58%)
 create mode 100644 signa2text/src/dataset/preprocess.py
 rename {linguify_yb => signa2text}/src/dev_data.py (96%)
 rename linguify_yb/src/models/static_transfromer.py => signa2text/src/evalute.py (100%)
 create mode 100644 signa2text/src/main.py
 rename linguify_yb/src/tests/__init__.py => signa2text/src/metrics.py (100%)
 rename signa2text/src/{utils => models}/__init__.py (100%)
 rename {linguify_yb => signa2text}/src/models/baseline_transformer.py (78%)
 rename {linguify_yb => signa2text}/src/models/model_loader.py (68%)
 rename linguify_yb/src/tests/test_pipeline.py => signa2text/src/models/static_transfromer.py (100%)
 create mode 100644 signa2text/src/tests/__init__.py
 rename {linguify_yb => signa2text}/src/tests/test_data_ingestion.py (70%)
 rename {linguify_yb => signa2text}/src/tests/test_model.py (100%)
 create mode 100644 signa2text/src/tests/test_pipeline.py
 create mode 100644 signa2text/src/trainer.py
 rename {linguify_yb => signa2text}/src/utils/util.py (90%)

diff --git a/.github/workflows/run_units_test.yml b/.github/workflows/run_units_test.yml
index 43fa0cb2..034dd036 100644
--- a/.github/workflows/run_units_test.yml
+++ b/.github/workflows/run_units_test.yml
@@ -1,5 +1,3 @@
-name: Units Tests
-
 on:
   push:
     branches:
@@ -22,8 +20,13 @@ jobs:
         uses: actions/setup-python@v2
         with:
           python-version: 3.10
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+          python -m pip install --upgrade pip
+          python -m pip install -r requirements.txt
 
-      - name: Pytest
+      - name: Set up Pytest
         run: |
-          cd linguify
-          pytest
\ No newline at end of file
+          cd signa2text
+          pytest
diff --git a/.gitignore b/.gitignore
index aa4ae93d..7b0f4fb0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -90,9 +90,14 @@ target/
 # pytest cache
 .pytest_cache/
 
+#mics
+.gitpod.yml
+poetry.lock
+
 # Data and models
 data/*/*
 models/*
+kaggle
 !.gitkeep
 !dataset_paths.json
 !dev_samples.json
@@ -114,7 +119,6 @@ yb2audio/data/*/*
 
 # Development Enviroment
 dev.py
-#development
 dev_env.txt
 
 # Keys
diff --git a/.gitpod.yml b/.gitpod.yml
deleted file mode 100644
index 63a3e4fd..00000000
--- a/.gitpod.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-# This configuration file was automatically generated by Gitpod.
-# Please adjust to your needs (see https://www.gitpod.io/docs/introduction/learn-gitpod/gitpod-yaml)
-# and commit this file to your remote git repository to share the goodness with others.
-
-# Learn more from ready-to-use templates: https://www.gitpod.io/docs/introduction/getting-started/quickstart
-
-tasks:
-  - init: make
-
-
diff --git a/linguify_yb/data/.gitkeep b/Dockerfile
similarity index 100%
rename from linguify_yb/data/.gitkeep
rename to Dockerfile
diff --git a/Makefile b/Makefile
index 44a6076c..fc07bc95 100644
--- a/Makefile
+++ b/Makefile
@@ -13,16 +13,16 @@ setup:
 	poetry install
 	poetry add pre-commit
 	python pre-commit install
-	@echo "Environment setup complete"	
+	@echo "Environment setup complete"
 	
 precommit:
 	@echo "Running precommit on all files"
 	python pre-commit run --all-files
 
-export:
+export_:
 	@echo "Exporting dependencies to requirements file"
 	poetry export --without-hashes -f requirements.txt --output requirements.txt
 
-backup: # To push to Github without running precommit
-	git commit --no-verify -m "updates"
-	git push origin main
+run_container:
+	@echo "Running Docker Contain"
+	
diff --git a/README.md b/README.md
index 320d59a3..82fd2a3d 100644
--- a/README.md
+++ b/README.md
@@ -40,25 +40,29 @@ Effective communication is a cornerstone of societal cohesion, and this project
 
 ```bash
 # Clone this repository
-$ git clone
+$ git clone 
 
 # Go into the repository
 $ cd 
 
 # Install dependencies
-$ make setup
+$ . ./run_setup.sh
 
-# activate virtual enviroment
-$ source $(poetry env info --path)/bin/activate
 ```
 
 ### Project Roadmap
 
 Here's a glimpse of the exciting features we plan to implement in the coming weeks:
 
-- [x] Add project's documentation
-- [] Develop a Proof of Concept System
-- [] Deployment of Proof of Concept System
+| Feature                   | Description                                                | Status      |
+| ------------------------- | ---------------------------------------------------------- | ----------- |
+| SignText Model            | Implement the training of the SignText model    | In Progress |
+| Deployement of the  System| Develop and Deploy the system to Google Cloud. | Planned     |
+| User Interface    |  Developing a friendly and functionaly User Interface| Planned     |
+
+## How to Contribute
+
+We welcome contributions from the community. If you're interested in contributing, please refer to the [Contributing Guidelines](CONTRIBUTING.md).
 
 ## Acknowledgments
 
@@ -68,9 +72,13 @@ I would like to acknowledge the outstanding contributions of :
 **Email:** <tejumade.afonja@aisaturdayslagos.com>  
 **GitHub:** [@tejuafonja](https://github.com/tejuafonja)
 
-## Contact
+## Support and Contact
+
+If you have questions or need assistance, feel free to reach out to:
 
 **Name:** **Ipadeola Ezekiel Ladipo**  
 **Email:** <ipadeolaoladipo@outlook.com>  
 **GitHub:** [@rileydrizzy](https://github.com/rileydrizzy)  
 **Linkdeln:** [Ipadeola Ladipo](https://www.linkedin.com/in/ladipo-ipadeola/)
+
+---
diff --git a/linguify_yb/src/config/config.yaml b/app.py
similarity index 100%
rename from linguify_yb/src/config/config.yaml
rename to app.py
diff --git a/test_inference.py b/inference.py
similarity index 100%
rename from test_inference.py
rename to inference.py
diff --git a/linguify_yb/README.md b/linguify_yb/README.md
index a5a84d35..d427164d 100644
--- a/linguify_yb/README.md
+++ b/linguify_yb/README.md
@@ -1,30 +1,11 @@
-# Linguify-YB
+#  Signa-Text
 
 [![LICENSE](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
 [![Python](https://img.shields.io/badge/python-3.6-blue.svg?style=flat-square)](https://www.python.org/)
 [![PyTorch](https://img.shields.io/badge/PyTorch-1.7.0-orange)](https://pytorch.org/)
 
-![image/gif](https://github.com/rileydrizzy/Cohort8-Ransom-Kuti-Ladipo/blob/main/images/sign%20lang.gif)
+![image/gif]()
 
 ## Project description
 
 ***Overview:*** \
-
-## Project Roadmap
-
-- **[Month Year]:** Project Initiation
-- **[Month Year]:** Core Functionality Completion
-- **[Month Year]:** User Interface Design Completion
-- **[Month Year]:** Data Integration Completion
-- **[Month Year]:** Testing and Quality Assurance Completion
-- **[Month Year]:** Deployment to Production
-
-## How to Contribute
-
-We welcome contributions from the community. If you're interested in contributing, please refer to the [Contributing Guidelines](CONTRIBUTING.md).
-
-## Support and Contact
-
-If you have questions or need assistance, feel free to reach out to [Your Contact Information].
-
----
diff --git a/linguify_yb/data/dev_samples.json b/linguify_yb/data/dev_samples.json
deleted file mode 100644
index 93702d14..00000000
--- a/linguify_yb/data/dev_samples.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "train_files": [
-        "data/asl-fingerspelling/train_landmarks/1019715464.parquet",
-        "data/asl-fingerspelling/train_landmarks/1021040628.parquet"
-    ],
-    "valid_files": [
-        "data/asl-fingerspelling/train_landmarks/105143404.parquet"
-    ]
-}
\ No newline at end of file
diff --git a/linguify_yb/src/benchmark.py b/linguify_yb/src/benchmark.py
deleted file mode 100644
index 9b83888a..00000000
--- a/linguify_yb/src/benchmark.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""doc
-"""
-from torchprofile import profile_macs
-from torch import nn
-
-
-Byte = 8
-KiB = 1024 * Byte
-MiB = 1024 * KiB
-GiB = 1024 * MiB
-
-
-class BenchMarker:
-    """_summary_"""
-
-    def __init__(self) -> None:
-        pass
-
-    def get_model_macs(self, model, inputs=None) -> int:
-        """
-        calculate the MACS of a model
-        """
-        return profile_macs(model, inputs)
-
-    def get_model_sparsity(self, model: nn.Module) -> float:
-        """
-        calculate the sparsity of the given model
-            sparsity = #zeros / #elements = 1 - #nonzeros / #elements
-        """
-        num_nonzeros, num_elements = 0, 0
-        for param in model.parameters():
-            num_nonzeros += param.count_nonzero()
-            num_elements += param.numel()
-        return 1 - float(num_nonzeros) / num_elements
-
-    def get_num_parameters(self, model: nn.Module, count_nonzero_only=False) -> int:
-        """
-        calculate the total number of parameters of model
-        :param count_nonzero_only: only count nonzero weights
-        """
-        num_counted_elements = 0
-        for param in model.parameters():
-            if count_nonzero_only:
-                num_counted_elements += param.count_nonzero()
-            else:
-                num_counted_elements += param.numel()
-        return num_counted_elements
-
-    def get_model_size(
-        self, model: nn.Module, data_width=32, count_nonzero_only=False
-    ) -> int:
-        """
-        calculate the model size in bits
-        :param data_width: #bits per element
-        :param count_nonzero_only: only count nonzero weights
-        """
-        return self.get_num_parameters(model, count_nonzero_only) * data_width
-
-    def runner(self, model):
-        model_macs = self.get_model_macs(model)
-        model_sparsity = self.get_model_sparsity(model)
-        model_num_params = self.get_num_parameters(model)
-        model_size = self.get_model_size(model)
-
-        return
diff --git a/linguify_yb/src/config.py b/linguify_yb/src/config.py
deleted file mode 100644
index 98a1d336..00000000
--- a/linguify_yb/src/config.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""doc
-"""
-from pydantic import BaseModel
-
-class Data(BaseModel):
-    
\ No newline at end of file
diff --git a/linguify_yb/src/dataset/dataset_loader.py b/linguify_yb/src/dataset/dataset_loader.py
deleted file mode 100644
index afaa302f..00000000
--- a/linguify_yb/src/dataset/dataset_loader.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""doc
-"""
-
-import json
-
-import numpy as np
-import pandas as pd
-import pyarrow.parquet as pq
-import torch
-from torch.nn import functional as F
-from torch.utils.data import DataLoader, Dataset
-
-from dataset.frames_config import FEATURE_COLUMNS, FRAME_LEN, LHAND_IDX, RHAND_IDX
-from dataset.preprocess import clean_frames_process
-
-PHRASE_PATH = "/kaggle/input/asl-fingerspelling/character_to_prediction_index.json"
-METADATA = "/kaggle/input/asl-fingerspelling/train.csv"
-
-with open(PHRASE_PATH, "r", encoding="utf-8") as f:
-    character_to_num = json.load(f)
-
-PAD_TOKEN = "P"
-START_TOKEN = "<"
-END_TOKEN = ">"
-PAD_TOKEN_IDX = 59
-START_TOKEN_IDX = 60
-END_TOKEN_IDX = 61
-
-character_to_num[PAD_TOKEN] = PAD_TOKEN_IDX
-character_to_num[START_TOKEN] = START_TOKEN_IDX
-character_to_num[END_TOKEN] = END_TOKEN_IDX
-num_to_character = {j: i for i, j in character_to_num.items()}
-
-
-class TokenHashTable:
-    def __init__(
-        self, word2index_mapping=character_to_num, index2word_mapping=num_to_character
-    ):
-        self.word2index = word2index_mapping
-        self.index2word = index2word_mapping
-
-    def _indexesfromsentence(self, sentence):
-        return [self.word2index[word] for word in sentence]
-
-    def sentence_to_tensor(self, sentence):
-        indexes = self._indexesfromsentence(sentence)
-        return torch.tensor(indexes, dtype=torch.long)
-
-    def index_to_sentence(self, indexes_list):
-        if torch.is_tensor(indexes_list):
-            indexes_list = indexes_list.tolist()
-        words = [self.index2word[idx] for idx in indexes_list]
-        return words
-
-
-def read_file(file, file_id, landmarks_metadata_path):
-    phrase_list = []
-    frames_list = []
-    metadata_train_dataframe = pd.read_csv(landmarks_metadata_path)
-    file_id_df = metadata_train_dataframe.loc[
-        metadata_train_dataframe["file_id"] == file_id
-    ]
-    saved_parueat_df = pq.read_table(
-        file, columns=["sequence_id"] + FEATURE_COLUMNS
-    ).to_pandas()
-    for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase):
-        frames = saved_parueat_df[saved_parueat_df.index == seq_id].to_numpy()
-        # NaN
-        frames_list.append(torch.tensor(frames))
-        phrase_list.append(phrase)
-    return (frames_list, phrase_list)
-
-
-class LandmarkDataset(Dataset):
-    def __init__(self, file_path, file_id, table, transform=True):
-        self.landmarks_metadata_path = METADATA
-        self.frames, self.labels = read_file(
-            file_path, file_id, self.landmarks_metadata_path
-        )
-        self.trans = transform
-        self.table = table
-
-    def _label_pre(self, label_sample):
-        sample = START_TOKEN + label_sample + END_TOKEN
-        new_phrase = self.table.tensorfromsentence(list(sample))
-        ans = F.pad(
-            input=new_phrase,
-            pad=[0, 64 - new_phrase.shape[0]],
-            mode="constant",
-            value=PAD_TOKEN_IDX,
-        )
-        return ans
-
-    def __len__(self):
-        return len(self.labels)
-
-    def __getitem__(self, idx):
-        if torch.is_tensor(idx):
-            idx = idx.tolist()
-        phrase = self.labels[idx]
-        frames = self.frames[idx]
-
-        if self.trans:
-            phrase = self._label_pre(phrase)
-            frames = clean_frames_process(frames)
-        return frames, phrase
-
-
-def get_dataloader(file_path, file_id, batch_size=32, num_workers_=1):
-    lookup_table = TokenHashTable(character_to_num, num_to_character)
-    dataset = LandmarkDataset(file_path, file_id, lookup_table, transform=True)
-
-    dataloader = DataLoader(
-        dataset,
-        batch_size=batch_size,
-        num_workers=num_workers_,
-        pin_memory=True,
-    )
-    return dataloader
diff --git a/linguify_yb/src/dataset/preprocess.py b/linguify_yb/src/dataset/preprocess.py
deleted file mode 100644
index 192dd73a..00000000
--- a/linguify_yb/src/dataset/preprocess.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""doc
-"""
-import torch
-from torch.nn import functional as F
-
-# TODO Clean up code, add comments and docs
-# TODO remove print and debug statements
-
-
-def clean_frames_process(
-    x, max_frame_len=128, n_hand_landmarks=21, n_pose_landmarks=33, n_face_landmarks=40
-):
-    x = x[:max_frame_len]
-    x = torch.where(torch.isnan(x), torch.zeros_like(x), x)
-    n_frames = x.size(0)
-    lhand = x[:, 0:63].view(n_frames, 3, n_hand_landmarks).transpose(1, 2)
-    rhand = x[:, 63:126].view(n_frames, 3, n_hand_landmarks).transpose(1, 2)
-    pose = x[:, 126:225].view(n_frames, 3, n_pose_landmarks).transpose(1, 2)
-    face = x[:, 225:345].view(n_frames, 3, n_face_landmarks).transpose(1, 2)
-
-    x = torch.cat([lhand, rhand, pose, face], axis=1)
-    x = x.view(n_frames, 345)
-    if n_frames < max_frame_len:
-        # Calculate the padding on the first dimension from the bottom
-        padding_bottom = max(0, max_frame_len - x.size(0))
-        # Pad the tensor along the first dimension from the bottom
-        x = F.pad(x, (0, 0, 0, padding_bottom))
-    return x
diff --git a/linguify_yb/src/main.py b/linguify_yb/src/main.py
deleted file mode 100644
index ac12cfc8..00000000
--- a/linguify_yb/src/main.py
+++ /dev/null
@@ -1,101 +0,0 @@
-"""
-doc
-
-# Usage:
-# python -m src/train.py \
-# --epochs 10 \
-# --batch 512 \
-"""
-# TODO Complete and refactor code for distributed training
-
-import os
-import json
-
-import numpy as np
-import torch
-import wandb
-from torch import nn
-
-from utils.util import get_device_strategy, parse_args, set_seed
-from utils.logger_util import logger
-from models.model_loader import ModelLoader
-from dataset.dataset_loader import get_dataloader
-import trainer
-
-try:
-    dataset_paths = "data/dev_samples.json"  # On kaggle replace with "data/dataset_paths.json" to train on full data
-    with open(dataset_paths, "r", encoding="utf-8") as json_file:
-        data_dict = json.load(json_file)
-    LANDMARK_DIR = "/kaggle/input/asl-fingerspelling/train_landmarks"
-    MODEL_DIR = "model.pt"
-
-    # Training dataset
-    train_dataset = data_dict["train_files"]
-    train_file_ids = [os.path.basename(file) for file in train_dataset]
-    train_file_ids = [
-        int(file_name.replace(".parquet", "")) for file_name in train_file_ids
-    ]
-    assert len(train_dataset) == len(
-        train_file_ids
-    ), "Failed import of Train files path "
-    TRAIN_DS_FILES = list(zip(train_dataset, train_file_ids))
-
-    # Validation dataset
-    valid_dataset = data_dict["valid_files"]
-    valid_file_ids = [os.path.basename(file) for file in valid_dataset]
-    valid_file_ids = [
-        int(file_name.replace(".parquet", "")) for file_name in valid_file_ids
-    ]
-    assert len(train_dataset) == len(
-        train_file_ids
-    ), "Failed Import of Valid Files path"
-    VALID_DS_FILES = list(zip(valid_dataset, valid_file_ids))
-except AssertionError as asset_error:
-    logger.exception(f"failed {asset_error}")
-
-
-def main(arg):
-    logger.info(f"Starting training on {arg.model}")
-    # To ensure reproducibility of the training process
-    set_seed()
-    DEVICE = get_device_strategy(tpu=arg.tpu)
-    logger.info(f"Training on {DEVICE} for {arg.epochs} epochs.")
-
-    model = ModelLoader().get_model(arg.model)
-
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
-    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
-
-    # Optimizes given model/function using TorchDynamo and specified backend
-    torch.compile(model)
-
-    logger.info("training")
-    wandb.init(
-        project="ASL-project",
-        config={
-            "learning_rate": 0.01,
-            "architecture": "Test Model",
-            "dataset": "Google ASL Landmarks",
-            "epochs": 12,
-        },
-    )
-
-    wandb.watch(model)
-    try:
-        train(
-            model=arg.model,
-            optim=optimizer,
-            loss_func=criterion,
-            n_epochs=arg.epochs,
-            batch=arg.batch,
-            device=DEVICE,
-        )
-        logger.success(f"Training completed: {arg.epochs} epochs on {DEVICE}.")
-
-    except Exception as error:
-        logger.exception(f"Training failed due to an {error}.")
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)
diff --git a/linguify_yb/src/trainer.py b/linguify_yb/src/trainer.py
deleted file mode 100644
index 3634140c..00000000
--- a/linguify_yb/src/trainer.py
+++ /dev/null
@@ -1,137 +0,0 @@
-"""
-doc
-
-# Usage:
-# python -m src/train.py \
-# --epochs 10 \
-# --batch 512 \
-"""
-# TODO Complete and refactor code for distributed training
-
-import os
-import json
-
-import numpy as np
-import torch
-import wandb
-from torch import nn
-
-from utils.logger_util import logger
-
-
-def train(model, optim, loss_func, n_epochs, batch, device,):
-
-    model.to(device)
-    
-    train_losses = []
-    val_losses = []
-    val_dataloader = # get_dataloader(TRAIN_FILES[0][0], TRAIN_FILES[0][1], batch_size=batch)
-    for epoch in range(n_epochs):
-        logger.info(f"Training on epoch {epoch}.")
-        total_epochs = epoch
-        file_train_loss = []
-        for file, file_id in TRAIN_DS_FILES:
-            train_dataloader =  # get_dataloader(file, file_id, batch_size=batch)
-
-            # Performs training using mini-batches
-            train_loss = mini_batch(
-                model, train_dataloader, optim, loss_func, device, validation=False
-            )
-            file_train_loss.append(train_loss)
-        train_loss = np.mean(file_train_loss)
-        train_losses.append(train_loss)
-
-        # Performs evaluation using mini-batches
-        logger.info("Starting validation.")
-        with torch.no_grad():
-            val_loss = mini_batch(
-                model, val_dataloader, optim, loss_func, device, validation=True
-            )
-            val_losses.append(val_loss)
-
-        wandb.log(
-            {
-                "train_loss": train_loss,
-                "val_loss": val_loss,
-                "epoch": epoch,
-            }
-        )
-
-        if epoch // 2 == 0:
-            logger.info("Initiating checkpoint. Saving model and optimizer states.")
-            save_checkpoint(
-                MODEL_DIR, model, optim, total_epochs, train_losses, val_losses
-            )
-
-
-def mini_batch(
-    model, dataloader, mini_batch_optim, loss_func, device, validation=False
-):
-    # The mini-batch can be used with both loaders
-    # The argument `validation`defines which loader and
-    # corresponding step function is going to be used
-    if validation:
-        step_func = val_step_func(model, loss_func)
-    else:
-        step_func = train_step_func(model, mini_batch_optim, loss_func)
-
-    # Once the data loader and step function, this is the same
-    # mini-batch loop we had before
-    mini_batch_losses = []
-    for x_batch, y_batch in dataloader:
-        x_batch = x_batch.to(device)
-        y_batch = y_batch.to(device)
-        loss = step_func(x=x_batch, y=y_batch)
-        mini_batch_losses.append(loss)
-    loss = np.mean(mini_batch_losses)
-    return loss
-
-
-def train_step_func(model, optim_, loss_func):
-    def perform_train_step_fn(x, y):
-        model.train()
-        preds = model(x)
-        loss = loss_func(preds, y)
-        loss.backward()
-        optim_.step()
-        optim_.zero_grad()
-        return loss.item()
-
-    return perform_train_step_fn
-
-
-def val_step_func(model, loss_func):
-    def perform_val_step_fn(x, y):
-        model.eval()
-        preds = model(x)
-        loss = loss_func(preds, y)
-        return loss.item()
-
-    return perform_val_step_fn
-
-
-def save_checkpoint(filename, model, optimizer, total_epochs, train_losses, val_losses):
-    # Builds dictionary with all elements for resuming training
-    checkpoint = {
-        "epoch": total_epochs,
-        "model_state_dict": model.state_dict(),
-        "optimizer_state_dict": optimizer.state_dict(),
-        "loss": train_losses,
-        "val_loss": val_losses,
-    }
-
-    torch.save(checkpoint, filename)
-
-
-def load_checkpoint(model, optimizer, filename):
-    # Loads dictionary
-    checkpoint = torch.load(filename)
-
-    # Restore state for model and optimizer
-    model.load_state_dict(checkpoint["model_state_dict"])
-    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
-
-    total_epochs = checkpoint["epoch"]
-    losses = checkpoint["loss"]
-    val_losses = checkpoint["val_loss"]
-    return model
diff --git a/linguify_yb/src/utils/logger_util.py b/linguify_yb/src/utils/logger_util.py
index a7e057e6..618d207b 100644
--- a/linguify_yb/src/utils/logger_util.py
+++ b/linguify_yb/src/utils/logger_util.py
@@ -10,7 +10,7 @@
   - `logger.warning("Warning message")`
   - `logger.error("Error message")`
   - `logger.critical("Critical message")`
-  - `logger.success("success messgae")`
+  - `logger.succues("success messgae")`
 """
 
 from pathlib import Path
diff --git a/pyproject.toml b/pyproject.toml
index 524497fc..7220de66 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,6 @@ loguru = "^0.7.2"
 wandb = "^0.15.12"
 transformers = "^4.34.0"
 pandas = "^2.1.1"
-ray = {extras = ["data", "serve", "train", "tune"], version = "^2.7.1"}
 hydra-core = "^1.3.2"
 tensorflow = "^2.14.0"
 torch = "^2.1.0"
@@ -22,6 +21,9 @@ torchvision = "^0.16.0"
 ipykernel = "^6.26.0"
 opencv-python = "^4.8.1.78"
 torchprofile = "^0.0.4"
+pydantic = "^2.5.2"
+pytest = "^7.4.3"
+pyarrow = "^14.0.1"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/requirements.txt b/requirements.txt
index 69a12e79..f4610364 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,162 +1,143 @@
 absl-py==2.0.0 ; python_version >= "3.10" and python_version < "3.12"
-aiohttp-cors==0.7.0 ; python_version >= "3.10" and python_version < "3.12"
-aiohttp==3.8.6 ; python_version >= "3.10" and python_version < "3.12"
-aiorwlock==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
-aiosignal==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
-ansicon==1.89.0 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Windows"
+annotated-types==0.6.0 ; python_version >= "3.10" and python_version < "3.12"
 antlr4-python3-runtime==4.9.3 ; python_version >= "3.10" and python_version < "3.12"
-anyio==3.7.1 ; python_version >= "3.10" and python_version < "3.12"
 appdirs==1.4.4 ; python_version >= "3.10" and python_version < "3.12"
-appnope==0.1.3 ; python_version >= "3.10" and python_version < "3.12" and (platform_system == "Darwin" or sys_platform == "darwin")
-asttokens==2.4.0 ; python_version >= "3.10" and python_version < "3.12"
+appnope==0.1.3 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Darwin"
+asttokens==2.4.1 ; python_version >= "3.10" and python_version < "3.12"
 astunparse==1.6.3 ; python_version >= "3.10" and python_version < "3.12"
-async-timeout==4.0.3 ; python_version >= "3.10" and python_version < "3.12"
-attrs==23.1.0 ; python_version >= "3.10" and python_version < "3.12"
-backcall==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
 bleach==6.1.0 ; python_version >= "3.10" and python_version < "3.12"
-blessed==1.20.0 ; python_version >= "3.10" and python_version < "3.12"
-cachetools==5.3.1 ; python_version >= "3.10" and python_version < "3.12"
-certifi==2023.7.22 ; python_version >= "3.10" and python_version < "3.12"
+cachetools==5.3.2 ; python_version >= "3.10" and python_version < "3.12"
+certifi==2023.11.17 ; python_version >= "3.10" and python_version < "3.12"
 cffi==1.16.0 ; python_version >= "3.10" and python_version < "3.12" and implementation_name == "pypy"
 cfgv==3.4.0 ; python_version >= "3.10" and python_version < "3.12"
-charset-normalizer==3.3.0 ; python_version >= "3.10" and python_version < "3.12"
+charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "3.12"
 click==8.1.7 ; python_version >= "3.10" and python_version < "3.12"
 colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.12" and (sys_platform == "win32" or platform_system == "Windows")
-colorful==0.5.5 ; python_version >= "3.10" and python_version < "3.12"
-comm==0.1.4 ; python_version >= "3.10" and python_version < "3.12"
+comm==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
 debugpy==1.8.0 ; python_version >= "3.10" and python_version < "3.12"
 decorator==5.1.1 ; python_version >= "3.10" and python_version < "3.12"
-distlib==0.3.7 ; python_version >= "3.10" and python_version < "3.12"
+distlib==0.3.8 ; python_version >= "3.10" and python_version < "3.12"
 docker-pycreds==0.4.0 ; python_version >= "3.10" and python_version < "3.12"
-exceptiongroup==1.1.3 ; python_version >= "3.10" and python_version < "3.11"
-executing==2.0.0 ; python_version >= "3.10" and python_version < "3.12"
-fastapi==0.103.2 ; python_version >= "3.10" and python_version < "3.12"
-filelock==3.12.4 ; python_version >= "3.10" and python_version < "3.12"
+exceptiongroup==1.2.0 ; python_version >= "3.10" and python_version < "3.11"
+executing==2.0.1 ; python_version >= "3.10" and python_version < "3.12"
+filelock==3.13.1 ; python_version >= "3.10" and python_version < "3.12"
 flatbuffers==23.5.26 ; python_version >= "3.10" and python_version < "3.12"
-frozenlist==1.4.0 ; python_version >= "3.10" and python_version < "3.12"
-fsspec==2023.9.2 ; python_version >= "3.10" and python_version < "3.12"
+fsspec==2023.12.2 ; python_version >= "3.10" and python_version < "3.12"
 gast==0.5.4 ; python_version >= "3.10" and python_version < "3.12"
-gitdb==4.0.10 ; python_version >= "3.10" and python_version < "3.12"
-gitpython==3.1.38 ; python_version >= "3.10" and python_version < "3.12"
-google-api-core==2.12.0 ; python_version >= "3.10" and python_version < "3.12"
-google-auth-oauthlib==1.0.0 ; python_version >= "3.10" and python_version < "3.12"
-google-auth==2.23.3 ; python_version >= "3.10" and python_version < "3.12"
+gitdb==4.0.11 ; python_version >= "3.10" and python_version < "3.12"
+gitpython==3.1.40 ; python_version >= "3.10" and python_version < "3.12"
+google-auth-oauthlib==1.2.0 ; python_version >= "3.10" and python_version < "3.12"
+google-auth==2.25.2 ; python_version >= "3.10" and python_version < "3.12"
 google-pasta==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
-googleapis-common-protos==1.61.0 ; python_version >= "3.10" and python_version < "3.12"
-gpustat==1.1.1 ; python_version >= "3.10" and python_version < "3.12"
-grpcio==1.59.0 ; python_version >= "3.10" and python_version < "3.12"
-h11==0.14.0 ; python_version >= "3.10" and python_version < "3.12"
+grpcio==1.60.0 ; python_version >= "3.10" and python_version < "3.12"
 h5py==3.10.0 ; python_version >= "3.10" and python_version < "3.12"
-huggingface-hub==0.17.3 ; python_version >= "3.10" and python_version < "3.12"
+huggingface-hub==0.19.4 ; python_version >= "3.10" and python_version < "3.12"
 hydra-core==1.3.2 ; python_version >= "3.10" and python_version < "3.12"
-identify==2.5.30 ; python_version >= "3.10" and python_version < "3.12"
-idna==3.4 ; python_version >= "3.10" and python_version < "3.12"
-ipykernel==6.26.0 ; python_version >= "3.10" and python_version < "3.12"
-ipython==8.16.1 ; python_version >= "3.10" and python_version < "3.12"
+identify==2.5.33 ; python_version >= "3.10" and python_version < "3.12"
+idna==3.6 ; python_version >= "3.10" and python_version < "3.12"
+iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "3.12"
+ipykernel==6.27.1 ; python_version >= "3.10" and python_version < "3.12"
+ipython==8.18.1 ; python_version >= "3.10" and python_version < "3.12"
 jedi==0.19.1 ; python_version >= "3.10" and python_version < "3.12"
 jinja2==3.1.2 ; python_version >= "3.10" and python_version < "3.12"
-jinxed==1.2.0 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Windows"
-jsonschema-specifications==2023.7.1 ; python_version >= "3.10" and python_version < "3.12"
-jsonschema==4.19.1 ; python_version >= "3.10" and python_version < "3.12"
-jupyter-client==8.4.0 ; python_version >= "3.10" and python_version < "3.12"
-jupyter-core==5.4.0 ; python_version >= "3.10" and python_version < "3.12"
+jupyter-client==8.6.0 ; python_version >= "3.10" and python_version < "3.12"
+jupyter-core==5.5.0 ; python_version >= "3.10" and python_version < "3.12"
 kaggle==1.5.16 ; python_version >= "3.10" and python_version < "3.12"
-keras==2.14.0 ; python_version >= "3.10" and python_version < "3.12"
+keras==2.15.0 ; python_version >= "3.10" and python_version < "3.12"
 libclang==16.0.6 ; python_version >= "3.10" and python_version < "3.12"
 loguru==0.7.2 ; python_version >= "3.10" and python_version < "3.12"
-markdown==3.5 ; python_version >= "3.10" and python_version < "3.12"
+markdown==3.5.1 ; python_version >= "3.10" and python_version < "3.12"
 markupsafe==2.1.3 ; python_version >= "3.10" and python_version < "3.12"
 matplotlib-inline==0.1.6 ; python_version >= "3.10" and python_version < "3.12"
 ml-dtypes==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
 mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
-msgpack==1.0.7 ; python_version >= "3.10" and python_version < "3.12"
-multidict==6.0.4 ; python_version >= "3.10" and python_version < "3.12"
 nest-asyncio==1.5.8 ; python_version >= "3.10" and python_version < "3.12"
-networkx==3.2 ; python_version >= "3.10" and python_version < "3.12"
+networkx==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
 nodeenv==1.8.0 ; python_version >= "3.10" and python_version < "3.12"
-numpy==1.26.1 ; python_version >= "3.10" and python_version < "3.12"
-nvidia-ml-py==12.535.108 ; python_version >= "3.10" and python_version < "3.12"
+numpy==1.26.2 ; python_version >= "3.10" and python_version < "3.12"
+nvidia-cublas-cu12==12.1.3.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-cuda-cupti-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-cuda-nvrtc-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-cuda-runtime-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-cudnn-cu12==8.9.2.26 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-cufft-cu12==11.0.2.54 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-curand-cu12==10.3.2.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-cusolver-cu12==11.4.5.107 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-cusparse-cu12==12.1.0.106 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-nccl-cu12==2.18.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-nvjitlink-cu12==12.3.101 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+nvidia-nvtx-cu12==12.1.105 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
 oauthlib==3.2.2 ; python_version >= "3.10" and python_version < "3.12"
 omegaconf==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
-opencensus-context==0.1.3 ; python_version >= "3.10" and python_version < "3.12"
-opencensus==0.11.3 ; python_version >= "3.10" and python_version < "3.12"
 opencv-python==4.8.1.78 ; python_version >= "3.10" and python_version < "3.12"
 opendatasets==0.1.22 ; python_version >= "3.10" and python_version < "3.12"
 opt-einsum==3.3.0 ; python_version >= "3.10" and python_version < "3.12"
 packaging==23.2 ; python_version >= "3.10" and python_version < "3.12"
-pandas==2.1.1 ; python_version >= "3.10" and python_version < "3.12"
+pandas==2.1.4 ; python_version >= "3.10" and python_version < "3.12"
 parso==0.8.3 ; python_version >= "3.10" and python_version < "3.12"
 pathtools==0.1.2 ; python_version >= "3.10" and python_version < "3.12"
-pexpect==4.8.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "win32"
-pickleshare==0.7.5 ; python_version >= "3.10" and python_version < "3.12"
+pexpect==4.9.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "win32"
 pillow==10.1.0 ; python_version >= "3.10" and python_version < "3.12"
 platformdirs==3.11.0 ; python_version >= "3.10" and python_version < "3.12"
-pre-commit==3.5.0 ; python_version >= "3.10" and python_version < "3.12"
-prometheus-client==0.17.1 ; python_version >= "3.10" and python_version < "3.12"
-prompt-toolkit==3.0.39 ; python_version >= "3.10" and python_version < "3.12"
-protobuf==4.24.4 ; python_version >= "3.10" and python_version < "3.12"
+pluggy==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
+pre-commit==3.6.0 ; python_version >= "3.10" and python_version < "3.12"
+prompt-toolkit==3.0.42 ; python_version >= "3.10" and python_version < "3.12"
+protobuf==4.23.4 ; python_version >= "3.10" and python_version < "3.12"
 psutil==5.9.6 ; python_version >= "3.10" and python_version < "3.12"
 ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "win32"
 pure-eval==0.2.2 ; python_version >= "3.10" and python_version < "3.12"
-py-spy==0.3.14 ; python_version >= "3.10" and python_version < "3.12"
-pyarrow==13.0.0 ; python_version >= "3.10" and python_version < "3.12"
+pyarrow==14.0.1 ; python_version >= "3.10" and python_version < "3.12"
 pyasn1-modules==0.3.0 ; python_version >= "3.10" and python_version < "3.12"
-pyasn1==0.5.0 ; python_version >= "3.10" and python_version < "3.12"
+pyasn1==0.5.1 ; python_version >= "3.10" and python_version < "3.12"
 pycparser==2.21 ; python_version >= "3.10" and python_version < "3.12" and implementation_name == "pypy"
-pydantic==1.10.13 ; python_version >= "3.10" and python_version < "3.12"
-pygments==2.16.1 ; python_version >= "3.10" and python_version < "3.12"
+pydantic-core==2.14.5 ; python_version >= "3.10" and python_version < "3.12"
+pydantic==2.5.2 ; python_version >= "3.10" and python_version < "3.12"
+pygments==2.17.2 ; python_version >= "3.10" and python_version < "3.12"
+pytest==7.4.3 ; python_version >= "3.10" and python_version < "3.12"
 python-dateutil==2.8.2 ; python_version >= "3.10" and python_version < "3.12"
 python-slugify==8.0.1 ; python_version >= "3.10" and python_version < "3.12"
 pytz==2023.3.post1 ; python_version >= "3.10" and python_version < "3.12"
 pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.10" and python_version < "3.12"
 pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "3.12"
-pyzmq==25.1.1 ; python_version >= "3.10" and python_version < "3.12"
-ray[data,serve,train,tune]==2.7.1 ; python_version >= "3.10" and python_version < "3.12"
-referencing==0.30.2 ; python_version >= "3.10" and python_version < "3.12"
+pyzmq==25.1.2 ; python_version >= "3.10" and python_version < "3.12"
 regex==2023.10.3 ; python_version >= "3.10" and python_version < "3.12"
 requests-oauthlib==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
 requests==2.31.0 ; python_version >= "3.10" and python_version < "3.12"
-rpds-py==0.10.6 ; python_version >= "3.10" and python_version < "3.12"
 rsa==4.9 ; python_version >= "3.10" and python_version < "3.12"
-safetensors==0.4.0 ; python_version >= "3.10" and python_version < "3.12"
-sentry-sdk==1.32.0 ; python_version >= "3.10" and python_version < "3.12"
+safetensors==0.4.1 ; python_version >= "3.10" and python_version < "3.12"
+sentry-sdk==1.39.0 ; python_version >= "3.10" and python_version < "3.12"
 setproctitle==1.3.3 ; python_version >= "3.10" and python_version < "3.12"
-setuptools==68.2.2 ; python_version >= "3.10" and python_version < "3.12"
+setuptools==69.0.2 ; python_version >= "3.10" and python_version < "3.12"
 six==1.16.0 ; python_version >= "3.10" and python_version < "3.12"
-smart-open==6.4.0 ; python_version >= "3.10" and python_version < "3.12"
 smmap==5.0.1 ; python_version >= "3.10" and python_version < "3.12"
-sniffio==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
 stack-data==0.6.3 ; python_version >= "3.10" and python_version < "3.12"
-starlette==0.27.0 ; python_version >= "3.10" and python_version < "3.12"
 sympy==1.12 ; python_version >= "3.10" and python_version < "3.12"
-tensorboard-data-server==0.7.1 ; python_version >= "3.10" and python_version < "3.12"
-tensorboard==2.14.1 ; python_version >= "3.10" and python_version < "3.12"
-tensorboardx==2.6.2.2 ; python_version >= "3.10" and python_version < "3.12"
-tensorflow-estimator==2.14.0 ; python_version >= "3.10" and python_version < "3.12"
+tensorboard-data-server==0.7.2 ; python_version >= "3.10" and python_version < "3.12"
+tensorboard==2.15.1 ; python_version >= "3.10" and python_version < "3.12"
+tensorflow-estimator==2.15.0 ; python_version >= "3.10" and python_version < "3.12"
 tensorflow-io-gcs-filesystem==0.34.0 ; python_version >= "3.10" and python_version < "3.12"
-tensorflow==2.14.0 ; python_version >= "3.10" and python_version < "3.12"
-termcolor==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
+tensorflow==2.15.0 ; python_version >= "3.10" and python_version < "3.12"
+termcolor==2.4.0 ; python_version >= "3.10" and python_version < "3.12"
 text-unidecode==1.3 ; python_version >= "3.10" and python_version < "3.12"
-tokenizers==0.14.1 ; python_version >= "3.10" and python_version < "3.12"
-torch==2.1.0 ; python_version >= "3.10" and python_version < "3.12"
-torchaudio==2.1.0 ; python_version >= "3.10" and python_version < "3.12"
+tokenizers==0.15.0 ; python_version >= "3.10" and python_version < "3.12"
+tomli==2.0.1 ; python_version >= "3.10" and python_version < "3.11"
+torch==2.1.1 ; python_version >= "3.10" and python_version < "3.12"
+torchaudio==2.1.1 ; python_version >= "3.10" and python_version < "3.12"
 torchprofile==0.0.4 ; python_version >= "3.10" and python_version < "3.12"
-torchvision==0.16.0 ; python_version >= "3.10" and python_version < "3.12"
-tornado==6.3.3 ; python_version >= "3.10" and python_version < "3.12"
+torchvision==0.16.1 ; python_version >= "3.10" and python_version < "3.12"
+tornado==6.4 ; python_version >= "3.10" and python_version < "3.12"
 tqdm==4.66.1 ; python_version >= "3.10" and python_version < "3.12"
-traitlets==5.11.2 ; python_version >= "3.10" and python_version < "3.12"
-transformers==4.34.1 ; python_version >= "3.10" and python_version < "3.12"
-typing-extensions==4.8.0 ; python_version >= "3.10" and python_version < "3.12"
+traitlets==5.14.0 ; python_version >= "3.10" and python_version < "3.12"
+transformers==4.36.0 ; python_version >= "3.10" and python_version < "3.12"
+triton==2.1.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "3.12"
+typing-extensions==4.9.0 ; python_version >= "3.10" and python_version < "3.12"
 tzdata==2023.3 ; python_version >= "3.10" and python_version < "3.12"
-urllib3==2.0.7 ; python_version >= "3.10" and python_version < "3.12"
-uvicorn==0.23.2 ; python_version >= "3.10" and python_version < "3.12"
+urllib3==2.1.0 ; python_version >= "3.10" and python_version < "3.12"
 virtualenv==20.21.0 ; python_version >= "3.10" and python_version < "3.12"
 wandb==0.15.12 ; python_version >= "3.10" and python_version < "3.12"
-watchfiles==0.21.0 ; python_version >= "3.10" and python_version < "3.12"
-wcwidth==0.2.8 ; python_version >= "3.10" and python_version < "3.12"
+wcwidth==0.2.12 ; python_version >= "3.10" and python_version < "3.12"
 webencodings==0.5.1 ; python_version >= "3.10" and python_version < "3.12"
-werkzeug==3.0.0 ; python_version >= "3.10" and python_version < "3.12"
-wheel==0.41.2 ; python_version >= "3.10" and python_version < "3.12"
+werkzeug==3.0.1 ; python_version >= "3.10" and python_version < "3.12"
+wheel==0.42.0 ; python_version >= "3.10" and python_version < "3.12"
 win32-setctime==1.1.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform == "win32"
 wrapt==1.14.1 ; python_version >= "3.10" and python_version < "3.12"
-yarl==1.9.2 ; python_version >= "3.10" and python_version < "3.12"
diff --git a/run_setup.sh b/run_setup.sh
new file mode 100644
index 00000000..2744c0b9
--- /dev/null
+++ b/run_setup.sh
@@ -0,0 +1,7 @@
+echo "Installing..."
+curl -sSL https://install.python-poetry.org | python -
+echo "Activating virtual environment"
+poetry install
+poetry shell
+python pre-commit install
+echo "Environment setup complete"
diff --git a/set_environment_variables_template.sh b/set_environment_variables_template.sh
index 2c80793d..488d881a 100644
--- a/set_environment_variables_template.sh
+++ b/set_environment_variables_template.sh
@@ -5,3 +5,5 @@ export KAGGLE_USERNAME=username
 export KAGGLE_KEY=xxxxxxxxxxxxxx
 #replace with WANDB key
 export WANDB_API_KEY=xxxxxxxxxxxxxx
+#
+export GOOGLE=
diff --git a/signa2text/README.md b/signa2text/README.md
index d427164d..6cc7a3e5 100644
--- a/signa2text/README.md
+++ b/signa2text/README.md
@@ -1,11 +1,30 @@
-#  Signa-Text
+# Linguify-YB
 
 [![LICENSE](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
 [![Python](https://img.shields.io/badge/python-3.6-blue.svg?style=flat-square)](https://www.python.org/)
 [![PyTorch](https://img.shields.io/badge/PyTorch-1.7.0-orange)](https://pytorch.org/)
 
-![image/gif]()
+![image/gif](https://github.com/rileydrizzy/Cohort8-Ransom-Kuti-Ladipo/blob/main/images/sign%20lang.gif)
 
 ## Project description
 
-***Overview:*** \
+***Overview:***
+
+
+## Project Roadmap
+
+
+## How to Contribute
+
+We welcome contributions from the community. If you're interested in contributing, please refer to the [Contributing Guidelines](CONTRIBUTING.md).
+
+## Support and Contact
+
+If you have questions or need assistance, feel free to reach out to:
+
+**Name:** **Ipadeola Ezekiel Ladipo**  
+**Email:** <ipadeolaoladipo@outlook.com>  
+**GitHub:** [@rileydrizzy](https://github.com/rileydrizzy)  
+**Linkdeln:** [Ipadeola Ladipo](https://www.linkedin.com/in/ladipo-ipadeola/)
+
+---
diff --git a/linguify_yb/src/metrics.py b/signa2text/data/.gitkeep
similarity index 100%
rename from linguify_yb/src/metrics.py
rename to signa2text/data/.gitkeep
diff --git a/linguify_yb/data/dataset_paths.json b/signa2text/data/dataset_paths.json
similarity index 100%
rename from linguify_yb/data/dataset_paths.json
rename to signa2text/data/dataset_paths.json
diff --git a/signa2text/data/dev_samples.json b/signa2text/data/dev_samples.json
new file mode 100644
index 00000000..a1f8506d
--- /dev/null
+++ b/signa2text/data/dev_samples.json
@@ -0,0 +1,9 @@
+{
+    "train_files": [
+        "kaggle/input/asl-fingerspelling/train_landmarks/1019715464.parquet",
+        "kaggle/input/asl-fingerspelling/train_landmarks/1021040628.parquet"
+    ],
+    "valid_files": [
+        "kaggle/input/asl-fingerspelling/train_landmarks/105143404.parquet"
+    ]
+}
\ No newline at end of file
diff --git a/linguify_yb/development/code_dev.ipynb b/signa2text/development/code_dev.ipynb
similarity index 100%
rename from linguify_yb/development/code_dev.ipynb
rename to signa2text/development/code_dev.ipynb
diff --git a/linguify_yb/development/data_dev.ipynb b/signa2text/development/data_dev.ipynb
similarity index 100%
rename from linguify_yb/development/data_dev.ipynb
rename to signa2text/development/data_dev.ipynb
diff --git a/linguify_yb/development/dev.ipynb b/signa2text/development/dev.ipynb
similarity index 100%
rename from linguify_yb/development/dev.ipynb
rename to signa2text/development/dev.ipynb
diff --git a/linguify_yb/development/trans_dev.ipynb b/signa2text/development/trans_dev.ipynb
similarity index 100%
rename from linguify_yb/development/trans_dev.ipynb
rename to signa2text/development/trans_dev.ipynb
diff --git a/linguify_yb/notebooks/analyasis.ipynb b/signa2text/notebooks/analyasis.ipynb
similarity index 100%
rename from linguify_yb/notebooks/analyasis.ipynb
rename to signa2text/notebooks/analyasis.ipynb
diff --git a/signa2text/run_train.sh b/signa2text/run_train.sh
new file mode 100644
index 00000000..9504319f
--- /dev/null
+++ b/signa2text/run_train.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Display a header with script information
+echo "=== Running Train Script ==="
+
+torchrun --standalone --nproc_per_node=1 src/main.py --model_name test_model --epoch 2 
+#torchrun --standalone --nproc_per_node=1 src/main.py 
+#--epochs 10 --batch 512
\ No newline at end of file
diff --git a/signa2text/src/benchmark.py b/signa2text/src/benchmark.py
new file mode 100644
index 00000000..02a70d52
--- /dev/null
+++ b/signa2text/src/benchmark.py
@@ -0,0 +1,117 @@
+"""
+Module for benchmarking a PyTorch model.
+
+This module provides a `BenchMarker` class for analyzing model metrics such as
+Multiply-Accumulates(MACs), sparsity, the number of parameters, and model size.
+
+Classes:
+- BenchMarker: A class for benchmarking a PyTorch model.
+
+Functions:
+- get_model_macs: Calculate the MACs (Multiply-Accumulates) of a model.
+- get_model_sparsity: Calculate the sparsity of a model.
+- get_num_parameters: Calculate the total number of parameters of a model.
+- get_model_size: Calculate the size of a model in bits.
+
+
+"""
+from torchprofile import profile_macs
+from torch import nn
+
+Byte = 8
+KiB = 1024 * Byte
+MiB = 1024 * KiB
+GiB = 1024 * MiB
+
+
+class BenchMarker:
+    """
+    Benchmarking class to analyze model metrics such as MACs,
+    sparsity, number of parameters, and model size.
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def get_model_macs(self, model, inputs=None) -> int:
+        """
+        Calculate the Multiply-Accumulates (MACs) of a model.
+
+        Parameters:
+            - model: The PyTorch model.
+            - inputs: The input tensor to the model.
+
+        Returns:
+            - int: The number of MACs.
+        """
+        return profile_macs(model, inputs)
+
+    def get_model_sparsity(self, model: nn.Module) -> float:
+        """
+        Calculate the sparsity of the given model.
+
+        Sparsity is defined as 1 - (number of non-zeros / total number of elements).
+
+        Parameters:
+            - model: The PyTorch model.
+
+        Returns:
+            - float: The sparsity of the model.
+        """
+        num_nonzeros, num_elements = 0, 0
+        for param in model.parameters():
+            num_nonzeros += param.count_nonzero()
+            num_elements += param.numel()
+        return 1 - float(num_nonzeros) / num_elements
+
+    def get_num_parameters(self, model: nn.Module, count_nonzero_only=False) -> int:
+        """
+        Calculate the total number of parameters of the model.
+
+        Parameters:
+            - model: The PyTorch model.
+            - count_nonzero_only: If True, count only nonzero weights.
+
+        Returns:
+            - int: The total number of parameters.
+        """
+        num_counted_elements = 0
+        for param in model.parameters():
+            if count_nonzero_only:
+                num_counted_elements += param.count_nonzero()
+            else:
+                num_counted_elements += param.numel()
+        return num_counted_elements
+
+    def get_model_size(
+        self, model: nn.Module, data_width=32, count_nonzero_only=False
+    ) -> int:
+        """
+        Calculate the model size in bits.
+
+        Parameters:
+            - model: The PyTorch model.
+            - data_width: Number of bits per element.
+            - count_nonzero_only: If True, count only nonzero weights.
+
+        Returns:
+            - int: The model size in bits.
+        """
+        return self.get_num_parameters(model, count_nonzero_only) * data_width
+
+    def runner(self, model):
+        """
+        Run the benchmark on the given model.
+
+        Parameters:
+            - model: The PyTorch model.
+
+        Returns:
+            - tuple: A tuple containing the model metrics
+        """
+        model_macs = self.get_model_macs(model)
+        model_sparsity = self.get_model_sparsity(model)
+        model_num_params = self.get_num_parameters(model)
+        model_size = self.get_model_size(model)
+
+        return model_macs, model_sparsity, model_num_params, model_size
diff --git a/signa2text/src/config.py b/signa2text/src/config.py
new file mode 100644
index 00000000..34ab84f8
--- /dev/null
+++ b/signa2text/src/config.py
@@ -0,0 +1,6 @@
+"""doc
+"""
+from pydantic import BaseModel
+
+class Arg_type(BaseModel):
+    save_every: int
diff --git a/linguify_yb/src/models/__init__.py b/signa2text/src/config/config.yaml
similarity index 100%
rename from linguify_yb/src/models/__init__.py
rename to signa2text/src/config/config.yaml
diff --git a/signa2text/src/dataset/dataset_loader.py b/signa2text/src/dataset/dataset_loader.py
new file mode 100644
index 00000000..de7cc4eb
--- /dev/null
+++ b/signa2text/src/dataset/dataset_loader.py
@@ -0,0 +1,237 @@
+"""
+Module to define datasets and dataloaders for ASL Fingerspelling project.
+
+Classes:
+- TokenHashTable: A class for handling token-to-index and index-to-token mappings.
+- LandmarkDataset: A dataset class for ASL Fingerspelling frames,\
+    including methods for processing and cleaning frames.
+
+Functions:
+- read_file: Read data from file based on file_id_list and landmarks_metadata_path.
+- get_dataset: Create a dataset with token-to-index mapping.
+- prepare_dataloader: Prepare a dataloader with distributed sampling.
+"""
+
+
+import json
+import pandas as pd
+import pyarrow.parquet as pq
+import torch
+from torch.nn import functional as F
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data.distributed import DistributedSampler
+from dataset.frames_config import FEATURE_COLUMNS
+from dataset.preprocess import clean_frames_process
+
+# File paths for metadata and phrase-to-index mapping
+PHRASE_PATH = "/kaggle/input/asl-fingerspelling/character_to_prediction_index.json"
+METADATA = "/kaggle/input/asl-fingerspelling/train.csv"
+
+# Load phrase-to-index mapping
+with open(PHRASE_PATH, "r", encoding="utf-8") as f:
+    character_to_num = json.load(f)
+
+# Define special tokens and their corresponding indices
+PAD_TOKEN = "P"
+START_TOKEN = "<"
+END_TOKEN = ">"
+PAD_TOKEN_IDX = 59
+START_TOKEN_IDX = 60
+END_TOKEN_IDX = 61
+
+# Add special tokens to the mapping
+character_to_num[PAD_TOKEN] = PAD_TOKEN_IDX
+character_to_num[START_TOKEN] = START_TOKEN_IDX
+character_to_num[END_TOKEN] = END_TOKEN_IDX
+
+# Create a mapping from index to character
+num_to_character = {j: i for i, j in character_to_num.items()}
+
+
+class TokenHashTable:
+    def __init__(
+        self, word2index_mapping=character_to_num, index2word_mapping=num_to_character
+    ):
+        """
+        Initialize a TokenHashTable to handle token-to-index and index-to-token mapping.
+
+        Parameters:
+            word2index_mapping (dict): Mapping from word to index.
+            index2word_mapping (dict): Mapping from index to word.
+        """
+        self.word2index = word2index_mapping
+        self.index2word = index2word_mapping
+
+    def _indexesfromsentence(self, sentence):
+        """
+        Convert a sentence into a list of corresponding indices.
+
+        Parameters:
+            sentence (list): List of words in a sentence.
+
+        Returns:
+            list: List of indices corresponding to words in the sentence.
+        """
+        return [self.word2index[word] for word in sentence]
+
+    def tensorfromsentence(self, sentence):
+        """
+        Convert a sentence into a tensor of indices.
+
+        Parameters:
+            sentence (list): List of words in a sentence.
+
+        Returns:
+            torch.Tensor: Tensor of indices.
+        """
+        indexes = self._indexesfromsentence(sentence)
+        return torch.tensor(indexes, dtype=torch.long)
+
+    def indexes_to_sentence(self, indexes_list):
+        """
+        Convert a list of indices into a list of corresponding words.
+
+        Parameters:
+            indexes_list (list or torch.Tensor): List or tensor of indices.
+
+        Returns:
+            list: List of words corresponding to the indices.
+        """
+        if torch.is_tensor(indexes_list):
+            indexes_list = indexes_list.tolist()
+        words = [self.index2word[idx] for idx in indexes_list]
+        return words
+
+
+def read_file(file_id_list, landmarks_metadata_path):
+    """
+    Read data from file based on file_id_list and landmarks_metadata_path.
+
+    Parameters:
+        file_id_list (list): List of tuples containing file paths and corresponding file_ids.
+        landmarks_metadata_path (str): Path to the metadata file.
+
+    Returns:
+        tuple: A tuple containing lists of frames and phrases.
+    """
+    phrase_list = []
+    frames_list = []
+    for file, file_id in file_id_list:
+        metadata_train_dataframe = pd.read_csv(landmarks_metadata_path)
+        file_id_df = metadata_train_dataframe.loc[
+            metadata_train_dataframe["file_id"] == file_id
+        ]
+        saved_parquet_df = pq.read_table(
+            file, columns=["sequence_id"] + FEATURE_COLUMNS
+        ).to_pandas()
+        for seq_id, phrase in zip(file_id_df.sequence_id, file_id_df.phrase):
+            frames = saved_parquet_df[saved_parquet_df.index == seq_id].to_numpy()
+            # Handle NaN values
+            frames_list.append(torch.tensor(frames))
+            phrase_list.append(phrase)
+    return frames_list, phrase_list
+
+
+class LandmarkDataset(Dataset):
+    def __init__(self, file_path, table, transform=True):
+        """
+        Initialize a LandmarkDataset.
+
+        Parameters:
+            - file_path (_type_): _description_
+            - table (_type_): _description_
+            - transform (bool, optional): _description_, by default True
+        """
+        self.landmarks_metadata_path = METADATA
+        self.frames, self.labels = read_file(file_path, self.landmarks_metadata_path)
+        self.trans = transform
+        self.table = table
+
+    def _label_pre(self, label_sample):
+        """
+        Preprocess label samples.
+
+        Parameters:
+            - label_sample (_type_): _description_
+
+        Returns:
+            - _type_: _description_
+        """
+        sample = START_TOKEN + label_sample + END_TOKEN
+        new_phrase = self.table.tensorfromsentence(list(sample))
+        ans = F.pad(
+            input=new_phrase,
+            pad=[0, 64 - new_phrase.shape[0]],
+            mode="constant",
+            value=PAD_TOKEN_IDX,
+        )
+        return ans
+
+    def __len__(self):
+        return len(self.labels)
+
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+        phrase = self.labels[idx]
+        frames = self.frames[idx]
+
+        if self.trans:
+            phrase = self._label_pre(phrase)
+            frames = clean_frames_process(frames)
+        return frames, phrase
+
+
+def get_dataset(file_path):
+    """
+    Create a dataset with token-to-index mapping.
+
+    Parameters:
+        - file_path (_type_): _description_
+
+    Returns:
+        - _type_: _description_
+    """
+    lookup_table = TokenHashTable(character_to_num, num_to_character)
+    dataset = LandmarkDataset(file_path, lookup_table, transform=True)
+    return dataset
+
+
+def prepare_dataloader(dataset: Dataset, batch_size: int, num_workers_: int = 1):
+    """
+    Prepare a dataloader with distributed sampling.
+
+    Parameters:
+        dataset (Dataset): The dataset to load.
+        batch_size (int): Number of samples per batch.
+        num_workers_ (int, optional): Number of workers for data loading, by default 1.
+
+    Returns:
+        DataLoader: A DataLoader instance for the specified dataset.
+    """
+    return DataLoader(
+        dataset,
+        batch_size=batch_size,
+        pin_memory=True,
+        num_workers=num_workers_,
+        sampler=DistributedSampler(dataset),
+    )
+
+
+#! A dataset class for debugging the train pipeline
+class TestDataset(Dataset):
+    def __init__(self, size):
+        self.size = size
+        self.data = [(torch.rand(20), torch.rand(1)) for _ in range(size)]
+
+    def __len__(self):
+        return self.size
+
+    def __getitem__(self, index):
+        return self.data[index]
+
+
+#! Function to get a test dataset for debugging train pipeline
+def get_test_dataset():
+    dataset = TestDataset
+    return dataset
diff --git a/signa2text/src/dataset/dataset_paths.py b/signa2text/src/dataset/dataset_paths.py
new file mode 100644
index 00000000..3ea6fb0f
--- /dev/null
+++ b/signa2text/src/dataset/dataset_paths.py
@@ -0,0 +1,46 @@
+"""doc
+"""
+import os
+import json
+from utils.logger_util import logger
+
+
+def get_dataset_paths():
+    """_summary_
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+    try:
+        # On kaggle replace with "data/dataset_paths.json" to train on full data
+        dataset_paths = "data/dev_samples.json"
+        with open(dataset_paths, "r", encoding="utf-8") as json_file:
+            dataset_paths_dict = json.load(json_file)
+
+        # Training dataset
+        train_dataset_dict = dataset_paths_dict["train_files"]
+        train_file_ids = [os.path.basename(file) for file in train_dataset_dict]
+        train_file_ids = [
+            int(file_name.replace(".parquet", "")) for file_name in train_file_ids
+        ]
+        assert len(train_dataset_dict) == len(
+            train_file_ids
+        ), "Failed getting Train files path"
+        train_ds_files = list(zip(train_dataset_dict, train_file_ids))
+
+        # Validation dataset
+        valid_dataset_dict = dataset_paths_dict["valid_files"]
+        valid_file_ids = [os.path.basename(file) for file in valid_dataset_dict]
+        valid_file_ids = [
+            int(file_name.replace(".parquet", "")) for file_name in valid_file_ids
+        ]
+        assert len(train_dataset_dict) == len(
+            train_file_ids
+        ), "Failed getting of Valid Files path"
+        valid_ds_files = list(zip(valid_dataset_dict, valid_file_ids))
+
+        return train_ds_files, valid_ds_files
+    except AssertionError as asset_error:
+        logger.exception(f"Failed due to {asset_error}")
diff --git a/linguify_yb/src/dataset/frames_config.py b/signa2text/src/dataset/frames_config.py
similarity index 58%
rename from linguify_yb/src/dataset/frames_config.py
rename to signa2text/src/dataset/frames_config.py
index 2f277968..a60ef371 100644
--- a/linguify_yb/src/dataset/frames_config.py
+++ b/signa2text/src/dataset/frames_config.py
@@ -1,51 +1,22 @@
-"""doc
+"""
+Module to define constants and lists related to ASL Fingerspelling frame features.
+Variables:
+- FRAME_LEN:
+- LIP:
+- FEATURE_COLOUMNS:
 """
 
-
+# Length of each frame
 FRAME_LEN = 128
+
+# Indices corresponding to lip features
 LIP = [
-    61,
-    185,
-    40,
-    39,
-    37,
-    0,
-    267,
-    269,
-    270,
-    409,
-    291,
-    146,
-    91,
-    181,
-    84,
-    17,
-    314,
-    405,
-    321,
-    375,
-    78,
-    191,
-    80,
-    81,
-    82,
-    13,
-    312,
-    311,
-    310,
-    415,
-    95,
-    88,
-    178,
-    87,
-    14,
-    317,
-    402,
-    318,
-    324,
-    308,
+    61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 146, 91, 181, 84, 17, 314, 405, 321,
+    375, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 95, 88, 178, 87, 14, 317, 402, 318,
+    324, 308,
 ]
 
+# Feature names for different body parts
 FRAME = ["frame"]
 N_LHAND = (
     [f"x_left_hand_{i}" for i in range(21)]
@@ -69,4 +40,5 @@
     + [f"z_face_{i}" for i in LIP]
 )
 
+# Combined list of feature columns
 FEATURE_COLUMNS = FRAME + N_LHAND + N_RHAND + N_POSE + N_FACE
diff --git a/signa2text/src/dataset/preprocess.py b/signa2text/src/dataset/preprocess.py
new file mode 100644
index 00000000..7e927b9f
--- /dev/null
+++ b/signa2text/src/dataset/preprocess.py
@@ -0,0 +1,61 @@
+"""
+Module to define a function for cleaning and processing ASL Fingerspelling frames.
+Functions:
+- clean_frames_process:
+
+"""
+
+import torch
+from torch.nn import functional as F
+
+
+def clean_frames_process(
+    frames,
+    max_frame_len=128,
+    n_hand_landmarks=21,
+    n_pose_landmarks=33,
+    n_face_landmarks=40,
+):
+    """Clean and process ASL Fingerspelling frames.
+
+    Parameters
+    ----------
+    frames : (torch.Tensor)
+        Input tensor containing frames.
+    max_frame_len : int, optional
+         Maximum length of frames, by default 128
+    n_hand_landmarks : int, optional
+        Number of hand landmarks, by default 21
+    n_pose_landmarks : int, optional
+        Number of pose landmarks, by default 33
+    n_face_landmarks : int, optional
+        Number of face landmarks, by default 40
+
+    Returns
+    -------
+    frames
+       torch.Tensor: Cleaned and processed frames tensor.
+    """
+    # Clip frames to the maximum length
+    frames = frames[:max_frame_len]
+    # Replace NaN values with zeros
+    frames = torch.where(torch.isnan(frames), torch.zeros_like(frames), frames)
+
+    # Split the tensor into different body part landmarks
+    lhand = frames[:, 0:63].view(frames.size(0), 3, n_hand_landmarks).transpose(1, 2)
+    rhand = frames[:, 63:126].view(frames.size(0), 3, n_hand_landmarks).transpose(1, 2)
+    pose = frames[:, 126:225].view(frames.size(0), 3, n_pose_landmarks).transpose(1, 2)
+    face = frames[:, 225:345].view(frames.size(0), 3, n_face_landmarks).transpose(1, 2)
+
+    # Concatenate the landmarks along the specified axis
+    frames = torch.cat([lhand, rhand, pose, face], axis=1)
+    # Reshape the tensor
+    frames = frames.view(frames.size(0), 345)
+
+    if frames.size(0) < max_frame_len:
+        # Calculate the padding on the first dimension from the bottom
+        padding_bottom = max(0, max_frame_len - frames.size(0))
+        # Pad the tensor along the first dimension from the bottom
+        frames = F.pad(frames, (0, 0, 0, padding_bottom))
+
+    return frames
diff --git a/linguify_yb/src/dev_data.py b/signa2text/src/dev_data.py
similarity index 96%
rename from linguify_yb/src/dev_data.py
rename to signa2text/src/dev_data.py
index a861d8b0..6f15731a 100644
--- a/linguify_yb/src/dev_data.py
+++ b/signa2text/src/dev_data.py
@@ -16,7 +16,7 @@
 
 from utils.logger_util import logger
 
-DATA_DIR = "data/asl-fingerspelling/"
+DATA_DIR = "kaggle/input/asl-fingerspelling/"
 data_files = ["train.csv", "character_to_prediction_index.json"]
 train_landmarks = ["1019715464.parquet", "1021040628.parquet", "105143404.parquet"]
 TRAIN_LANDMARKS_DIR = "train_landmarks/"
@@ -30,7 +30,7 @@
     "-f",
     "FILE",
     "-p",
-    "data/raw/",
+    f"{DATA_DIR}",
 ]
 
 
@@ -98,7 +98,7 @@ def main():
     try:
         logger.info(f"Current Available space {check_storage()}GB")
         for file in data_files:
-            logger.info(f"Downloading{file} in {DATA_DIR}")
+            logger.info(f"Downloading {file} in {DATA_DIR}")
             COMMAND[6] = file
             unzipfile_path = DATA_DIR + file + ".zip"
             downlaod_file(COMMAND, unzipfile_path, DATA_DIR)
diff --git a/linguify_yb/src/models/static_transfromer.py b/signa2text/src/evalute.py
similarity index 100%
rename from linguify_yb/src/models/static_transfromer.py
rename to signa2text/src/evalute.py
diff --git a/signa2text/src/main.py b/signa2text/src/main.py
new file mode 100644
index 00000000..bdfeb63b
--- /dev/null
+++ b/signa2text/src/main.py
@@ -0,0 +1,95 @@
+"""
+Module for distributed training with PyTorch using Distributed Data Parallel (DDP).
+
+"""
+
+# TODO cleanup and complete documentation
+# TODO Complete and refactor code for distributed training
+# TODO remove test model and test data
+
+import torch
+
+from torch import nn
+
+from utils.util import parse_args, set_seed
+from utils.logger_util import logger
+from models.model_loader import ModelLoader
+from dataset.dataset_loader import get_dataset, prepare_dataloader, get_test_dataset
+from dataset.dataset_paths import get_dataset_paths
+from trainer import Trainer, ddp_setup
+from torch.distributed import destroy_process_group
+
+
+def load_train_objs(model_name, files=None):
+    """
+    Load training objects, including the model, optimizer, dataset, and criterion.
+
+    Parameters:
+        - model_name (str): Name of the model to be loaded.
+        - files: Optional parameter for specifying files.
+
+    Returns:
+        - model: The loaded model.
+        - optimizer_: The optimizer for training.
+        - dataset: The training dataset.
+        - criterion: The loss criterion for training.
+    """
+    model = ModelLoader().get_model(model_name)
+
+    # Optimizes given model/function using TorchDynamo and specified backend
+    torch.compile(model)
+    optimizer_ = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
+    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
+    dataset = get_test_dataset()  # get_dataset(files)
+    return model, optimizer_, dataset, criterion
+
+
+def main(model_name: str, save_every: int, total_epochs: int, batch_size: int):
+    """
+    Main function for training a model.
+
+    Parameters:
+        - model_name (str): Name of the model to be trained.
+        - save_every (int): Frequency of saving the model during training.
+        - total_epochs (int): Total number of training epochs.
+        - batch_size (int): Batch size for training.
+    """
+    logger.info(f"Starting training on {model_name}, epoch -> {total_epochs}")
+    logger.info(f"Batch Size -> {batch_size}, model saved every -> {save_every} epoch")
+
+    # To ensure reproducibility of the training process
+    set_seed()
+
+    try:
+        # train, valid = get_dataset_paths()
+        ddp_setup()
+        dataset, model, optimizer, criterion = load_train_objs(model_name)
+        train_dataset = prepare_dataloader(
+            dataset,
+            batch_size,
+        )
+        trainer = Trainer(
+            model=model,
+            train_data=train_dataset,
+            optimizer=optimizer,
+            save_every=save_every,
+            loss_func=criterion,
+        )
+
+        trainer.train(total_epochs)
+        destroy_process_group()
+
+        logger.success(f"Training completed: {total_epochs} epochs on.")
+    except Exception as error:
+        logger.exception(f"Training failed due to {error}.")
+
+
+if __name__ == "__main__":
+    arg = parse_args()
+    logger.info(f"{arg.model_name}")
+    main(
+        model_name=arg.model_name,
+        save_every=arg.save_every,
+        total_epochs=arg.epochs,
+        batch_size=arg.batch,
+    )
diff --git a/linguify_yb/src/tests/__init__.py b/signa2text/src/metrics.py
similarity index 100%
rename from linguify_yb/src/tests/__init__.py
rename to signa2text/src/metrics.py
diff --git a/signa2text/src/utils/__init__.py b/signa2text/src/models/__init__.py
similarity index 100%
rename from signa2text/src/utils/__init__.py
rename to signa2text/src/models/__init__.py
diff --git a/linguify_yb/src/models/baseline_transformer.py b/signa2text/src/models/baseline_transformer.py
similarity index 78%
rename from linguify_yb/src/models/baseline_transformer.py
rename to signa2text/src/models/baseline_transformer.py
index 5fc151e2..9147d431 100644
--- a/linguify_yb/src/models/baseline_transformer.py
+++ b/signa2text/src/models/baseline_transformer.py
@@ -18,41 +18,46 @@
 
 
 class TokenEmbedding(nn.Module):
-    """Embed the tokens with postion encoding"""
+    """Embed the tokens with position encoding"""
 
     def __init__(self, num_vocab, maxlen, embedding_dim):
-        """_summary_
-
+        """
         Parameters
         ----------
         num_vocab : int
-            number of vocabulary
+            number of character vocabulary
         maxlen : int
-            maximuin length of sequence
+            maximum length of sequence
         embedding_dim : int
             embedding output dimension
         """
         super().__init__()
         self.token_embed_layer = nn.Embedding(num_vocab, embedding_dim)
-        self.postion_embed_layer = nn.Embedding(maxlen, embedding_dim)
+        self.position_embed_layer = nn.Embedding(maxlen, embedding_dim)
 
     def forward(self, x):
-        """_summary_
-
+        """
         Parameters
         ----------
         x : tensors
-            _description_
+            input tensor with shape (batch_size, sequence_length)
 
         Returns
         -------
         tensors
-            _description_
+            embedded tensor with shape (batch_size, sequence_length, embedding_dim)
         """
-        maxlen = x.size(-1)
+        batch_size, maxlen = x.size()
+
+        # Token embedding
         x = self.token_embed_layer(x)
+
+        # Positional encoding
         positions = torch.arange(0, maxlen).to(x.device)
-        positions = self.postion_embed_layer(positions)
+        positions = (
+            self.position_embed_layer(positions).unsqueeze(0).expand(batch_size, -1, -1)
+        )
+
         return x + positions
 
 
@@ -97,27 +102,27 @@ def forward(self, x):
 
 
 class TransformerEncoder(nn.Module):
-    """_summary_"""
+    """Transformer Encoder Module"""
 
     def __init__(
         self,
         embedding_dim,
         num_heads,
         feed_forward_dim,
-        rate=0.1,
+        dropout_rate=0.1,
     ):
-        """_summary_
+        """Initialize the Transformer Encoder
 
         Parameters
         ----------
-        embedding_dim : _type_
-            _description_
-        num_heads : _type_
-            _description_
-        feed_forward_dim : _type_
-            _description_
-        rate : float, optional
-            _description_, by default 0.1
+        embedding_dim : int
+            Dimension of input embeddings
+        num_heads : int
+            Number of attention heads in the multi-head attention layer
+        feed_forward_dim : int
+            Dimension of the feed-forward layer
+        dropout_rate : float, optional
+            Dropout rate, by default 0.1
         """
         super().__init__()
         self.multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)
@@ -129,31 +134,55 @@ def __init__(
 
         self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)
         self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)
-        self.dropout1 = nn.Dropout(rate)
-        self.dropout2 = nn.Dropout(rate)
+        self.dropout1 = nn.Dropout(dropout_rate)
+        self.dropout2 = nn.Dropout(dropout_rate)
 
     def forward(self, inputs_x):
+        # Multi-head attention
         multi_attention_out, _ = self.multi_attention(inputs_x, inputs_x, inputs_x)
         multi_attention_out = self.dropout1(multi_attention_out)
+
+        # Residual connection and layer normalization
         out1 = self.layernorm1(inputs_x + multi_attention_out)
 
+        # Feed-forward layer
         ffn_out = self.ffn(out1)
         ffn_out = self.dropout2(ffn_out)
+
+        # Residual connection and layer normalization
         x = self.layernorm2(out1 + ffn_out)
+
         return x
 
 
 class TransformerDecoder(nn.Module):
-    """_summary_"""
+    """Transformer Decoder Module"""
 
     def __init__(self, embedding_dim, num_heads, feed_forward_dim, dropout_rate=0.1):
+        """Initialize the Transformer Decoder
+
+        Parameters
+        ----------
+        embedding_dim : int
+            Dimension of input embeddings
+        num_heads : int
+            Number of attention heads in the multi-head attention layer
+        feed_forward_dim : int
+            Dimension of the feed-forward layer
+        dropout_rate : float, optional
+            Dropout rate, by default 0.1
+        """
         super().__init__()
         self.num_heads_ = num_heads
         self.layernorm1 = nn.LayerNorm(embedding_dim, eps=1e-6)
         self.layernorm2 = nn.LayerNorm(embedding_dim, eps=1e-6)
         self.layernorm3 = nn.LayerNorm(embedding_dim, eps=1e-6)
-        self.decoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)
-        self.encoder_multi_attention = nn.MultiheadAttention(embedding_dim, num_heads)
+        self.decoder_multi_attention = nn.MultiheadAttention(
+            embedding_dim, num_heads, batch_first=True
+        )
+        self.encoder_multi_attention = nn.MultiheadAttention(
+            embedding_dim, num_heads, batch_first=True
+        )
         self.decoder_dropout = nn.Dropout(0.5)
         self.encoder_dropout = nn.Dropout(dropout_rate)
         self.ffn_dropout = nn.Dropout(dropout_rate)
@@ -163,7 +192,7 @@ def __init__(self, embedding_dim, num_heads, feed_forward_dim, dropout_rate=0.1)
             nn.Linear(feed_forward_dim, embedding_dim),
         )
 
-    def _causal_attention_mask(self, sequence_length, batch_size=1, device=None):
+    def _causal_attention_mask(self, sequence_length, batch_size, device=None):
         mask = torch.triu(torch.ones(sequence_length, sequence_length), diagonal=1).to(
             device
         )
@@ -172,15 +201,10 @@ def _causal_attention_mask(self, sequence_length, batch_size=1, device=None):
         )
         return mask
 
-    def forward(
-        self,
-        encoder_out,
-        src_target_,
-    ):
-        input_shape = src_target_.size()
-        batch_size = 1  # input_shape[0]
-        seq_len = input_shape[0]
-        x_device = src_target_.device
+    def forward(self, encoder_out, src_target):
+        input_shape = src_target.size()
+        batch_size, seq_len, _ = input_shape
+        x_device = src_target.device
 
         # Mask
         causal_mask = self._causal_attention_mask(
@@ -188,12 +212,9 @@ def forward(
         )
 
         target_att, _ = self.decoder_multi_attention(
-            src_target_, src_target_, src_target_, attn_mask=causal_mask
-        )
-        target_norm_out = self.layernorm1(
-            src_target_ + self.decoder_dropout(target_att)
+            src_target, src_target, src_target, attn_mask=causal_mask
         )
-
+        target_norm_out = self.layernorm1(src_target + self.decoder_dropout(target_att))
         encoder_out, _ = self.encoder_multi_attention(
             target_norm_out, encoder_out, encoder_out
         )
@@ -201,6 +222,7 @@ def forward(
 
         ffn_out = self.ffn(enc_out_norm)
         ffn_out_norm = self.layernorm3(enc_out_norm + self.ffn_dropout(ffn_out))
+
         return ffn_out_norm
 
 
diff --git a/linguify_yb/src/models/model_loader.py b/signa2text/src/models/model_loader.py
similarity index 68%
rename from linguify_yb/src/models/model_loader.py
rename to signa2text/src/models/model_loader.py
index aabc97b7..529690f8 100644
--- a/linguify_yb/src/models/model_loader.py
+++ b/signa2text/src/models/model_loader.py
@@ -3,15 +3,21 @@
 """
 
 from models.baseline_transformer import ASLTransformer
+import torch
+
+
+def test_model():
+    model = torch.nn.Sequential(
+        [torch.nn.Linear(20, 100), torch.nn.Linear(100, 10), torch.nn.Linear(10, 5)]
+    )
+    return model
 
 
 class ModelLoader:
     """Model Loader"""
 
     def __init__(self):
-        self.models = {
-            "asl_transfomer": ASLTransformer(),
-        }
+        self.models = {"asl_transfomer": ASLTransformer(), "test_model": test_model()}
 
     def get_model(self, model_name):
         """build and retrieve the model instance
diff --git a/linguify_yb/src/tests/test_pipeline.py b/signa2text/src/models/static_transfromer.py
similarity index 100%
rename from linguify_yb/src/tests/test_pipeline.py
rename to signa2text/src/models/static_transfromer.py
diff --git a/signa2text/src/tests/__init__.py b/signa2text/src/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/linguify_yb/src/tests/test_data_ingestion.py b/signa2text/src/tests/test_data_ingestion.py
similarity index 70%
rename from linguify_yb/src/tests/test_data_ingestion.py
rename to signa2text/src/tests/test_data_ingestion.py
index aaa5eedf..ab9d97f5 100644
--- a/linguify_yb/src/tests/test_data_ingestion.py
+++ b/signa2text/src/tests/test_data_ingestion.py
@@ -3,7 +3,6 @@
 import pytest
 
 import torch
-from torch.utils.data import DataLoader
 from src.dataset.frames_config import FRAME_LEN
 from src.dataset.preprocess import clean_frames_process
 from src.dataset.dataset_loader import TokenHashTable
@@ -31,15 +30,34 @@ def test_token_hash_table():
     token_table = TokenHashTable()
     sample_sentence = "this is a test run"
     sample_sentence_len = len(sample_sentence)
-    sample_sentence_token = [60,51,39,40,50,0,40,50,0,32,0,51,36,50,51,0,49,52,45,61]
-    # Padding the token
-    sample_sentence_token = sample_sentence_token + (
-        [59] * (64 - len(sample_sentence_token))
-    )
-    sample_sentence_token = torch.tensor(sample_sentence_token)
+    sample_sentence_token = [
+        51,
+        39,
+        40,
+        50,
+        0,
+        40,
+        50,
+        0,
+        32,
+        0,
+        51,
+        36,
+        50,
+        51,
+        0,
+        49,
+        52,
+        45,
+    ]
+    sample_sentence_token = torch.tensor(sample_sentence_token, dtype=torch.long)
     tokenize_result = token_table.sentence_to_tensor(sample_sentence)
-    assert sample_sentence_len == len(tokenize_result)
-    assert sample_sentence_token == tokenize_result
 
+    is_same = all(
+        torch.equal(idx1, idx2)
+        for idx1, idx2 in zip(sample_sentence_token, tokenize_result)
+    )
+    assert sample_sentence_len == len(tokenize_result)
+    assert is_same == True
     # Assert that clean_frames is a PyTorch tensor
     assert torch.is_tensor(tokenize_result), "is not PyTorch tensor"
diff --git a/linguify_yb/src/tests/test_model.py b/signa2text/src/tests/test_model.py
similarity index 100%
rename from linguify_yb/src/tests/test_model.py
rename to signa2text/src/tests/test_model.py
diff --git a/signa2text/src/tests/test_pipeline.py b/signa2text/src/tests/test_pipeline.py
new file mode 100644
index 00000000..e69de29b
diff --git a/signa2text/src/trainer.py b/signa2text/src/trainer.py
new file mode 100644
index 00000000..c7c9587e
--- /dev/null
+++ b/signa2text/src/trainer.py
@@ -0,0 +1,132 @@
+"""
+Module for distributed training with PyTorch using Distributed Data Parallel (DDP).
+
+Classes:
+- Trainer: A class for training neural network models in a distributed setup.
+
+Functions:
+- ddp_setup: Setup Distributed Data Parallel (DDP) for training.
+"""
+
+# TODO Complete and refactor code for distributed training
+
+import os
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.distributed import init_process_group
+from utils.logger_util import logger
+
+
+def ddp_setup():
+    """
+    Setup Distributed Data Parallel (DDP) for training.
+    """
+    init_process_group(backend="nccl")
+    torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
+
+
+class Trainer:
+    def __init__(
+        self,
+        model: torch.nn.Module,
+        train_data: DataLoader,
+        optimizer: torch.optim.Optimizer,
+        save_every: int,
+        loss_func,
+    ):
+        """
+        Initialize a Trainer instance.
+
+        Parameters:
+            - model (torch.nn.Module): The neural network model.
+            - train_data (DataLoader): The DataLoader for training data.
+            - optimizer (torch.optim.Optimizer): The optimizer for training.
+            - save_every (int): Save a snapshot of the model every `save_every` epochs.
+            - loss_func: The loss function for training.
+        """
+        self.gpu_id = int(os.environ["LOCAL_RANK"])
+        self.model = model.to(self.gpu_id)
+        self.train_data = train_data
+        self.optimizer = optimizer
+        self.loss_func = loss_func
+        self.save_every = save_every
+        self.epochs_run = 0
+        self.snapshot_path = "snapshot.pt"
+        if os.path.exists(self.snapshot_path):
+            logger.info("Loading snapshot")
+            self._load_snapshot(self.snapshot_path)
+
+        self.model = DDP(self.model, device_ids=[self.gpu_id])
+
+    def _load_snapshot(self, snapshot_path):
+        """
+        Load a snapshot of the model.
+
+        Parameters:
+            - snapshot_path (str): Path to the snapshot file.
+        """
+        loc = f"cuda:{self.gpu_id}"
+        snapshot = torch.load(snapshot_path, map_location=loc)
+        self.model.load_state_dict(snapshot["MODEL_STATE"])
+        self.epochs_run = snapshot["EPOCHS_RUN"]
+        logger.info(f"Resuming training from snapshot at Epoch {self.epochs_run}")
+
+    def _run_batch(self, source, targets):
+        """
+        Run a training batch.
+
+        Parameters:
+            - source: _type_
+            - targets: _type_
+        """
+        self.optimizer.zero_grad()
+        output = self.model(source)
+        loss = self.loss_func(output, targets)
+        loss.backward()
+        self.optimizer.step()
+
+    def _run_epoch(self, epoch):
+        """
+        Run a training epoch.
+
+        Parameters:
+            - epoch (int): The current epoch.
+        """
+        b_sz = len(next(iter(self.train_data))[0])
+        logger.info(
+            f"[GPU{self.gpu_id}] Epoch {epoch} | Batchsize: {b_sz} | Steps: {len(self.train_data)}"
+        )
+        self.train_data.sampler.set_epoch(epoch)
+        for source, targets in self.train_data:
+            source = source.to(self.gpu_id)
+            targets = targets.to(self.gpu_id)
+            self._run_batch(source, targets)
+
+    def _save_snapshot(self, epoch):
+        """
+        Save a snapshot of the model.
+
+        Parameters:
+            - epoch (int): The current epoch.
+        """
+        snapshot = {
+            "MODEL_STATE": self.model.module.state_dict(),
+            "EPOCHS_RUN": epoch,
+        }
+        torch.save(snapshot, self.snapshot_path)
+        logger.info(f"Epoch {epoch} | Training snapshot saved at {self.snapshot_path}")
+
+    def train(self, max_epochs: int):
+        """
+        Train the model for a specified number of epochs.
+
+        Parameters:
+            - max_epochs (int): The maximum number of epochs to train.
+        """
+        for epoch in range(self.epochs_run, max_epochs):
+            self._run_epoch(epoch)
+            if self.gpu_id == 0 and epoch % self.save_every == 0:
+                self._save_snapshot(epoch)
diff --git a/signa2text/src/utils/logger_util.py b/signa2text/src/utils/logger_util.py
index 618d207b..f81d88f9 100644
--- a/signa2text/src/utils/logger_util.py
+++ b/signa2text/src/utils/logger_util.py
@@ -10,7 +10,8 @@
   - `logger.warning("Warning message")`
   - `logger.error("Error message")`
   - `logger.critical("Critical message")`
-  - `logger.succues("success messgae")`
+  - `logger.exception(" ")`
+  - `logger.success("success messgae")`
 """
 
 from pathlib import Path
diff --git a/linguify_yb/src/utils/util.py b/signa2text/src/utils/util.py
similarity index 90%
rename from linguify_yb/src/utils/util.py
rename to signa2text/src/utils/util.py
index c4371b7b..cd27952f 100644
--- a/linguify_yb/src/utils/util.py
+++ b/signa2text/src/utils/util.py
@@ -39,8 +39,8 @@ def parse_args():
     )
 
     parser.add_argument(
-        "--model",
-        default="asl_transfomer",
+        "--model_name",
+        default="baseline_transfomer",
         type=str,
         metavar="N",
         help="name of model to train",
@@ -72,6 +72,12 @@ def parse_args():
         type=bool,
         help="Path to the checkpoint for resuming training",
     )
+    parser.add_argument(
+        "--save_every",
+        default= 2,
+        type=int,
+        help="",
+    )
 
     args = parser.parse_args()
     return args
diff --git a/version.txt b/version.txt
index e69de29b..8a9ecc2e 100644
--- a/version.txt
+++ b/version.txt
@@ -0,0 +1 @@
+0.0.1
\ No newline at end of file