Skip to content

Commit

Permalink
Fix bugs
Browse files Browse the repository at this point in the history
Make startup faster, preinstall apt packages in docker
Revert SMT model zip from tar.gz to zip
  • Loading branch information
johnml1135 committed Jun 6, 2024
1 parent 5e7b246 commit 8f7360b
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 32 deletions.
4 changes: 3 additions & 1 deletion dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ RUN apt-get update && \
apt-get install --no-install-recommends -y \
curl \
python$PYTHON_VERSION \
python$PYTHON_VERSION-distutils && \
python$PYTHON_VERSION-distutils \
# these are needed for ClearML
git libsm6 libxext6 libxrender-dev libglib2.0-0 && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean

Expand Down
5 changes: 5 additions & 0 deletions dockerfile.cpu_only
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ RUN poetry export --with=gpu --without-hashes -f requirements.txt > requirements
FROM python:$PYTHON_VERSION
WORKDIR /root

# these are needed for ClearML
RUN apt-get update && \
apt-get install --no-install-recommends -y \
git libsm6 libxext6 libxrender-dev libglib2.0-0 && \

COPY --from=builder /src/requirements.txt .
RUN --mount=type=cache,target=/root/.cache \
pip install --no-cache-dir -r requirements.txt && rm requirements.txt
Expand Down
3 changes: 1 addition & 2 deletions machine/jobs/nmt_engine_build_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def run(
) as model_trainer:
model_trainer.train(progress=phase_progress, check_canceled=check_canceled)
model_trainer.save()
train_corpus_size = model_trainer.stats.train_corpus_size
else:
logger.info("No matching entries in the source and target corpus - skipping training")

Expand Down Expand Up @@ -99,7 +98,7 @@ def run(
logger.info("Saving model")
model_path = self._nmt_model_factory.save_model()
self._shared_file_service.save_model(model_path, self._config.save_model + "".join(model_path.suffixes))
return train_corpus_size
return parallel_corpus_size


def _translate_batch(
Expand Down
47 changes: 20 additions & 27 deletions machine/jobs/smt_engine_build_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,11 @@ def run(

with self._shared_file_service.get_source_pretranslations() as src_pretranslations:
inference_step_count = sum(1 for _ in src_pretranslations)
if inference_step_count > 0:
phases = [
Phase(message="Training SMT model", percentage=0.85),
Phase(message="Training truecaser", percentage=0.05),
Phase(message="Pretranslating segments", percentage=0.1),
]
else:
phases = [
Phase(message="Training SMT model", percentage=0.95),
Phase(message="Training truecaser", percentage=0.05),
]
phases = [
Phase(message="Training SMT model", percentage=0.85),
Phase(message="Training truecaser", percentage=0.05),
Phase(message="Pretranslating segments", percentage=0.1),
]
progress_reporter = PhasedProgressReporter(progress, phases)

if check_canceled is not None:
Expand All @@ -73,23 +67,22 @@ def run(
if check_canceled is not None:
check_canceled()

if inference_step_count > 0:
with ExitStack() as stack:
detokenizer = self._smt_model_factory.create_detokenizer()
truecaser = self._smt_model_factory.create_truecaser()
phase_progress = stack.enter_context(progress_reporter.start_next_phase())
engine = stack.enter_context(self._smt_model_factory.create_engine(tokenizer, detokenizer, truecaser))
src_pretranslations = stack.enter_context(self._shared_file_service.get_source_pretranslations())
writer = stack.enter_context(self._shared_file_service.open_target_pretranslation_writer())
current_inference_step = 0
with ExitStack() as stack:
detokenizer = self._smt_model_factory.create_detokenizer()
truecaser = self._smt_model_factory.create_truecaser()
phase_progress = stack.enter_context(progress_reporter.start_next_phase())
engine = stack.enter_context(self._smt_model_factory.create_engine(tokenizer, detokenizer, truecaser))
src_pretranslations = stack.enter_context(self._shared_file_service.get_source_pretranslations())
writer = stack.enter_context(self._shared_file_service.open_target_pretranslation_writer())
current_inference_step = 0
phase_progress(ProgressStatus.from_step(current_inference_step, inference_step_count))
batch_size = self._config["pretranslation_batch_size"]
for pi_batch in batch(src_pretranslations, batch_size):
if check_canceled is not None:
check_canceled()
_translate_batch(engine, pi_batch, writer)
current_inference_step += len(pi_batch)
phase_progress(ProgressStatus.from_step(current_inference_step, inference_step_count))
batch_size = self._config["pretranslation_batch_size"]
for pi_batch in batch(src_pretranslations, batch_size):
if check_canceled is not None:
check_canceled()
_translate_batch(engine, pi_batch, writer)
current_inference_step += len(pi_batch)
phase_progress(ProgressStatus.from_step(current_inference_step, inference_step_count))

if "save_model" in self._config and self._config.save_model is not None:
logger.info("Saving model")
Expand Down
4 changes: 2 additions & 2 deletions machine/jobs/thot/thot_smt_model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def create_truecaser(self) -> Truecaser:
return UnigramTruecaser(model_path=self._model_dir / "unigram-casing-model.txt")

def save_model(self) -> Path:
tar_file_path = Path(
tar_file_basename = Path(
self._config.data_dir, self._config.shared_file_folder, "builds", self._config.build_id, "model"
)
return Path(shutil.make_archive(str(tar_file_path), "gztar", self._model_dir))
return Path(shutil.make_archive(str(tar_file_basename), "zip", self._model_dir))

@property
def _model_dir(self) -> Path:
Expand Down

0 comments on commit 8f7360b

Please sign in to comment.