From 4dba5a30811e4cef2627e72af5d2c2df8c9b3bde Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Sun, 11 Dec 2022 05:11:32 -0600 Subject: [PATCH] Code cleanup: refactoring, type checking, and formatting (#1076) --- CHANGELOG.md | 8 ++++ sockeye/__init__.py | 2 +- sockeye/arguments.py | 62 ++++++++++++++++-------------- sockeye/beam_search.py | 1 - sockeye/constants.py | 7 +--- sockeye/convert_deepspeed.py | 2 +- sockeye/data_io.py | 7 ++-- sockeye/device.py | 25 ------------ sockeye/encoder.py | 3 +- sockeye/evaluate.py | 12 +++--- sockeye/generate_decoder_states.py | 12 +++--- sockeye/inference.py | 1 - sockeye/knn.py | 14 ++++--- sockeye/score.py | 5 +-- sockeye/train.py | 3 +- sockeye/translate.py | 6 +-- sockeye/utils.py | 34 +++++++++++++++- typechecked-files | 10 ++++- 18 files changed, 117 insertions(+), 97 deletions(-) delete mode 100644 sockeye/device.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f3d73856b..76ccadf97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,14 @@ Note that Sockeye has checks in place to not translate with an old model that wa Each version section may have subsections for: _Added_, _Changed_, _Removed_, _Deprecated_, and _Fixed_. +## [3.1.29] + +### Changed + +- Running `sockeye-evaluate` no longer applies text tokenization for TER (same behavior as other metrics). +- Turned on type checking for all `sockeye` modules except `test_utils` and addressed resulting type issues. +- Refactored code in various modules without changing user-level behavior. + ## [3.1.28] ### Added diff --git a/sockeye/__init__.py b/sockeye/__init__.py index 78bb654be..537052cce 100644 --- a/sockeye/__init__.py +++ b/sockeye/__init__.py @@ -11,4 +11,4 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. -__version__ = '3.1.28' +__version__ = '3.1.29' diff --git a/sockeye/arguments.py b/sockeye/arguments.py index 6a613ea08..13403e30f 100644 --- a/sockeye/arguments.py +++ b/sockeye/arguments.py @@ -138,7 +138,8 @@ def check_regular_directory(value_to_check): def int_greater_or_equal(threshold: int) -> Callable: """ - Returns a method that can be used in argument parsing to check that the int argument is greater or equal to `threshold`. + Returns a method that can be used in argument parsing to check that the int argument is greater or equal to + `threshold`. :param threshold: The threshold that we assume the cli argument value is greater or equal to. :return: A method that can be used as a type in argparse. @@ -155,7 +156,8 @@ def check_greater_equal(value: str): def float_greater_or_equal(threshold: float) -> Callable: """ - Returns a method that can be used in argument parsing to check that the float argument is greater or equal to `threshold`. + Returns a method that can be used in argument parsing to check that the float argument is greater or equal to + `threshold`. :param threshold: The threshold that we assume the cli argument value is greater or equal to. :return: A method that can be used as a type in argparse. @@ -571,7 +573,8 @@ def add_device_args(params): device_params.add_argument('--tf32', type=bool_str(), default=True, - help='Globally enable transparent tf32 acceleration of float32 at the cost of reducing precision to 10 bits') + help='Globally enable transparent tf32 acceleration of float32 at the cost of reducing ' + 'precision to 10 bits. Default: %(default)s.') def add_vocab_args(params): @@ -829,22 +832,20 @@ def add_batch_args(params, default_batch_size=4096, default_batch_type=C.BATCH_T def add_nvs_train_parameters(params): - params.add_argument( - '--bow-task-weight', - type=float_greater_or_equal(0.0), - default=1.0, - help= - 'The weight of the auxiliary Bag-of-word (BOW) loss when --neural-vocab-selection is enabled. Default %(default)s.' - ) - - params.add_argument( - '--bow-task-pos-weight', - type=float_greater_or_equal(0.0), - default=10, - help='The weight of the positive class (the set of words present on the target side) for the BOW loss ' - 'when --neural-vocab-selection is set as x * num_negative_class / num_positive_class where x is the ' - '--bow-task-pos-weight. Higher values will bias more towards recall, resulting in larger vocabularies ' - 'at test time trading off larger vocabularies for higher translation quality. Default %(default)s.') + params.add_argument('--bow-task-weight', + type=float_greater_or_equal(0.0), + default=1.0, + help='The weight of the auxiliary Bag-of-word (BOW) loss when --neural-vocab-selection is ' + 'enabled. Default %(default)s.') + + params.add_argument('--bow-task-pos-weight', + type=float_greater_or_equal(0.0), + default=10, + help='The weight of the positive class (the set of words present on the target side) for the ' + 'BOW loss when --neural-vocab-selection is set as x * num_negative_class / ' + 'num_positive_class where x is the --bow-task-pos-weight. Higher values will bias more ' + 'towards recall, resulting in larger vocabularies at test time trading off larger ' + 'vocabularies for higher translation quality. Default %(default)s.') def add_training_args(params): @@ -866,8 +867,9 @@ def add_training_args(params): type=str, default=None, choices=[C.LENGTH_TASK_RATIO, C.LENGTH_TASK_LENGTH], - help='If specified, adds an auxiliary task during training to predict source/target length ratios ' - '(mean squared error loss), or absolute lengths (Poisson) loss. Default %(default)s.') + help='If specified, adds an auxiliary task during training to predict source/target ' + 'length ratios (mean squared error loss), or absolute lengths (Poisson) loss. ' + 'Default %(default)s.') train_params.add_argument('--length-task-weight', type=float_greater_or_equal(0.0), default=1.0, @@ -875,7 +877,8 @@ def add_training_args(params): train_params.add_argument('--length-task-layers', type=int_greater_or_equal(1), default=1, - help='Number of fully-connected layers for predicting the length ratio. Default %(default)s.') + help='Number of fully-connected layers for predicting the length ratio. ' + 'Default %(default)s.') add_nvs_train_parameters(train_params) @@ -1088,7 +1091,8 @@ def add_training_args(params): train_params.add_argument('--keep-initializations', action="store_true", - help='In addition to keeping the last n params files, also keep params from checkpoint 0.') + help='In addition to keeping the last n params files, also keep params from checkpoint ' + '0.') train_params.add_argument('--cache-last-best-params', required=False, @@ -1349,7 +1353,8 @@ def add_inference_args(params): decode_params.add_argument('--skip-nvs', action='store_true', - help='Manually turn off Neural Vocabulary Selection (NVS) to do a softmax over the full target vocabulary.', + help='Manually turn off Neural Vocabulary Selection (NVS) to do a softmax over the full ' + 'target vocabulary.', default=False) decode_params.add_argument('--nvs-thresh', @@ -1406,13 +1411,14 @@ def add_brevity_penalty_args(params): params.add_argument('--brevity-penalty-weight', default=1.0, type=float_greater_or_equal(0.0), - help='Scaler for the brevity penalty in beam search: weight * log(BP) + score. Default: %(default)s') + help='Scaler for the brevity penalty in beam search: weight * log(BP) + score. ' + 'Default: %(default)s') params.add_argument('--brevity-penalty-constant-length-ratio', default=0.0, type=float_greater_or_equal(0.0), - help='Has effect if --brevity-penalty-type is set to \'constant\'. If positive, overrides the length ' - 'ratio, used for brevity penalty calculation, for all inputs. If zero, uses the average of length ' - 'ratios from the training data over all models. Default: %(default)s.') + help='Has effect if --brevity-penalty-type is set to \'constant\'. If positive, overrides the ' + 'length ratio, used for brevity penalty calculation, for all inputs. If zero, uses the ' + 'average of length ratios from the training data over all models. Default: %(default)s.') def add_clamp_to_dtype_arg(params): diff --git a/sockeye/beam_search.py b/sockeye/beam_search.py index a82ef83c4..6d2bfd77d 100644 --- a/sockeye/beam_search.py +++ b/sockeye/beam_search.py @@ -678,7 +678,6 @@ def __init__(self, self.output_vocab_size = inference.model_output_vocab_size self.output_factor_vocab_size = inference.model_output_factor_vocab_size self._inference = inference - self.global_avoid_trie = None assert inference._skip_softmax, "skipping softmax must be enabled for GreedySearch" self.work_block = GreedyTop1() diff --git a/sockeye/constants.py b/sockeye/constants.py index 670c24b48..552378f77 100644 --- a/sockeye/constants.py +++ b/sockeye/constants.py @@ -138,7 +138,6 @@ JSON_RESTRICT_LEXICON_KEY = "restrict_lexicon" JSON_CONSTRAINTS_KEY = "constraints" JSON_AVOID_KEY = "avoid" -JSON_ENCODING = "utf-8" VERSION_NAME = "version" CONFIG_NAME = "config" @@ -285,7 +284,6 @@ DTYPE_BF16 = 'bfloat16' DTYPE_FP16 = 'float16' DTYPE_FP32 = 'float32' -DTYPE_TF32 = 'tf32' DTYPE_INT8 = 'int8' DTYPE_INT16 = 'int16' DTYPE_INT32 = 'int32' @@ -364,7 +362,6 @@ # sequence length count types SEQ_LEN_IN_CHARACTERS = "char" SEQ_LEN_IN_TOKENS = "token" -SEQ_LEN_IN_WORDS = "word" # use case: merge sub-words to original word before counting # scoring SCORING_TYPE_NEGLOGPROB = 'neglogprob' @@ -383,7 +380,7 @@ BREVITY_PENALTY_LEARNED = 'learned' BREVITY_PENALTY_NONE = 'none' -# k-nn +# k-nn KNN_STATE_DATA_STORE_NAME = "keys.npy" KNN_WORD_DATA_STORE_NAME = "vals.npy" KNN_WORD_DATA_STORE_DTYPE = DTYPE_INT32 @@ -391,4 +388,4 @@ KNN_INDEX_NAME = "key_index" KNN_EPSILON = 1e-6 DEFAULT_DATA_STORE_BLOCK_SIZE = 1024 * 1024 -DEFAULT_KNN_LAMBDA = 0.8 \ No newline at end of file +DEFAULT_KNN_LAMBDA = 0.8 diff --git a/sockeye/convert_deepspeed.py b/sockeye/convert_deepspeed.py index 211ea26bb..7642ce23d 100644 --- a/sockeye/convert_deepspeed.py +++ b/sockeye/convert_deepspeed.py @@ -67,7 +67,7 @@ def convert_checkpoint_to_params(model_config_fname: str, checkpoint_dirname: st model_config = model.SockeyeModel.load_config(model_config_fname) sockeye_model = model.SockeyeModel(model_config) # Gather the float32 params on CPU - state_dict = get_fp32_state_dict_from_zero1_checkpoint(checkpoint_dirname) + state_dict = dict(get_fp32_state_dict_from_zero1_checkpoint(checkpoint_dirname)) # Strip the first prefix from each param name to match the SockeyeModel # Ex: 'model.encoder.layers...' -> 'encoder.layers...' state_dict = {name[name.find('.') + 1:]: param for (name, param) in state_dict.items()} diff --git a/sockeye/data_io.py b/sockeye/data_io.py index db2917ac1..e0724fd2c 100644 --- a/sockeye/data_io.py +++ b/sockeye/data_io.py @@ -363,8 +363,8 @@ def create_shards(source_fnames: List[str], :param target_fnames: The path to the target text (and optional token-parallel factor files). :param num_shards: The total number of shards. :param output_prefix: The prefix under which the shard files will be created. - :return: List of tuples of source (and source factor) file names and target (and target factor) file names for each shard - and a flag of whether the returned file names are temporary and can be deleted. + :return: List of tuples of source (and source factor) file names and target (and target factor) file names for each + shard and a flag of whether the returned file names are temporary and can be deleted. """ if num_shards == 1: return [(tuple(source_fnames), tuple(target_fnames))], True @@ -595,7 +595,8 @@ def prepare_data(source_fnames: List[str], pool: multiprocessing.pool.Pool = None, shards: List[Tuple[Tuple[str, ...], Tuple[str, ...]]] = None): """ - :param shards: List of num_shards shards of parallel source and target tuples which in turn contain tuples to shard data factor file paths. + :param shards: List of num_shards shards of parallel source and target tuples which in turn contain tuples to shard + data factor file paths. """ logger.info("Preparing data.") # write vocabularies to data folder diff --git a/sockeye/device.py b/sockeye/device.py deleted file mode 100644 index 976b70cb2..000000000 --- a/sockeye/device.py +++ /dev/null @@ -1,25 +0,0 @@ -import torch -import argparse -from typing import Optional - -def init_device(args: argparse.Namespace, logger=None, local_rank : Optional[int] = None): - """ - return requested torch device, optionally enabling tf32 - - :param args "Device Parameters". args.use_cpu will be set if cuda is not available - :param logger optional logger.info(msg) - :param local_rank optional int LOCAL_RANK env for multiple GPU training - """ - if not torch.cuda.is_available(): - if logger is not None: - logger.info("CUDA not available, using cpu") - args.use_cpu = True - device = torch.device('cpu') if args.use_cpu else torch.device('cuda', args.device_id if local_rank is None else local_rank) - if not args.use_cpu: - # Ensure that GPU operations use the correct device by default - torch.cuda.set_device(device) - if args.tf32: - if logger is not None: - logger.info("CUDA: allow tf32 (float32 but with 10 bits precision)") - torch.backends.cuda.matmul.allow_tf32 = True - return device diff --git a/sockeye/encoder.py b/sockeye/encoder.py index 4f4630743..86838fb03 100644 --- a/sockeye/encoder.py +++ b/sockeye/encoder.py @@ -208,7 +208,8 @@ def forward(self, data: pt.Tensor, valid_length: pt.Tensor) -> Tuple[pt.Tensor, _, max_len, __ = data.size() # length_mask for source attention masking. Shape: (batch_size, max_len) - single_head_att_mask = layers.prepare_source_length_mask(valid_length, self.config.attention_heads, max_length=max_len, expand=False) + single_head_att_mask = layers.prepare_source_length_mask(valid_length, self.config.attention_heads, + max_length=max_len, expand=False) # Shape: (batch_size, max_len) -> (batch_size * heads, 1, max_len) att_mask = single_head_att_mask.unsqueeze(1).expand(-1, self.config.attention_heads, -1).reshape((-1, max_len)).unsqueeze(1) att_mask = att_mask.expand(-1, max_len, -1) diff --git a/sockeye/evaluate.py b/sockeye/evaluate.py index ecf5a3783..5e5c18ce2 100644 --- a/sockeye/evaluate.py +++ b/sockeye/evaluate.py @@ -47,7 +47,7 @@ def raw_corpus_bleu(hypotheses: Iterable[str], references: Iterable[str], :param offset: Smoothing constant. :return: BLEU score as float between 0 and 1. """ - return sacrebleu.raw_corpus_bleu(hypotheses, [references], smooth_value=offset).score / 100.0 + return sacrebleu.raw_corpus_bleu(hypotheses, [references], smooth_value=offset).score / 100.0 # type: ignore def raw_corpus_chrf(hypotheses: Iterable[str], references: Iterable[str]) -> float: @@ -58,7 +58,7 @@ def raw_corpus_chrf(hypotheses: Iterable[str], references: Iterable[str]) -> flo :param references: Reference stream. :return: chrF score as float between 0 and 1. """ - return sacrebleu.corpus_chrf(hypotheses, [references]).score + return sacrebleu.corpus_chrf(hypotheses, [references]).score # type: ignore def raw_corpus_ter(hypotheses: Iterable[str], references: Iterable[str]) -> float: @@ -69,8 +69,8 @@ def raw_corpus_ter(hypotheses: Iterable[str], references: Iterable[str]) -> floa :param references: Reference stream. :return: TER score as float between 0 and 1. """ - ter = sacrebleu.metrics.TER(argparse.Namespace()) - return ter.corpus_score(hypotheses, [references]).score + ter = sacrebleu.metrics.TER() + return ter.corpus_score(hypotheses, [references]).score # type: ignore def raw_corpus_rouge1(hypotheses: Iterable[str], references: Iterable[str]) -> float: @@ -186,8 +186,8 @@ def _print_mean_std_score(metrics: List[Tuple[str, Callable]], scores: Dict[str, scores_mean_std = [] # type: List[str] for name, _ in metrics: if len(scores[name]) > 1: - score_mean = np.item(np.mean(scores[name])) - score_std = np.item(np.std(scores[name], ddof=1)) + score_mean = np.mean(scores[name]).item() + score_std = np.std(scores[name], ddof=1).item() scores_mean_std.append("%.3f\t%.3f" % (score_mean, score_std)) else: score = scores[name][0] diff --git a/sockeye/generate_decoder_states.py b/sockeye/generate_decoder_states.py index 8df008f77..2c636767a 100644 --- a/sockeye/generate_decoder_states.py +++ b/sockeye/generate_decoder_states.py @@ -14,7 +14,7 @@ import argparse import logging import os -from typing import Dict, List +from typing import List, Optional import numpy as np import torch as pt @@ -50,7 +50,7 @@ def __init__(self, self.num_dim = num_dim # dimension of a single entry self.dtype = dtype self.block_size = -1 - self.mmap = None + self.mmap = None # type: Optional[np.memmap] self.tail_idx = 0 # where the next entry should be inserted self.size = 0 # size of storage already assigned @@ -120,12 +120,12 @@ def __init__(self, self.max_seq_len_target = max_seq_len_target self.output_dir = output_dir - self.state_store_file = None - self.words_store_file = None + self.state_store_file = None # type: Optional[NumpyMemmapStorage] + self.words_store_file = None # type: Optional[NumpyMemmapStorage] # info for KNNConfig self.num_states = 0 - self.dimension = None + self.dimension = None # type: Optional[int] self.state_data_type = utils.get_numpy_dtype(state_data_type) self.word_data_type = utils.get_numpy_dtype(word_data_type) @@ -186,7 +186,7 @@ def generate_states_and_store(self, trace_inputs = {'get_decoder_states': model_inputs} self.traced_model = pt.jit.trace_module(self.model, trace_inputs, strict=False) # shape: (batch, seq_len, hidden_dim) - decoder_states = self.traced_model.get_decoder_states(*model_inputs) + decoder_states = self.traced_model.get_decoder_states(*model_inputs) # type: ignore # flatten batch and seq_len dimensions, remove pads on the target pad_mask = (batch.target != C.PAD_ID)[:, :, 0] # shape: (batch, seq_len) diff --git a/sockeye/inference.py b/sockeye/inference.py index f1cc993e5..25e6480c2 100644 --- a/sockeye/inference.py +++ b/sockeye/inference.py @@ -110,7 +110,6 @@ def get_max_output_length(input_length: int): return max_input_len, get_max_output_length -BeamHistory = Dict[str, List] Tokens = List[str] TokenIds = List[List[int]] # each token id may contain multiple factors SentenceId = Union[int, str] diff --git a/sockeye/knn.py b/sockeye/knn.py index dbad4a38e..ce345183f 100755 --- a/sockeye/knn.py +++ b/sockeye/knn.py @@ -82,14 +82,14 @@ def init_faiss_index(self, train_sample: Optional[np.memmap] = None): return index - def add_items(self, index, keys: np.array): + def add_items(self, index, keys: np.ndarray): """Add items to the index (must call `init_faiss_index` first).""" item_count, key_dim = keys.shape assert key_dim == self.config.dimension index.add(keys.astype(np.float32)) # unfortunately, faiss index only supports float32 - def block_add_items(self, index, keys: np.array, block_size: int = C.DEFAULT_DATA_STORE_BLOCK_SIZE): + def block_add_items(self, index, keys: np.ndarray, block_size: int = C.DEFAULT_DATA_STORE_BLOCK_SIZE): """Add items to the index in blocks -- used for a large number of items (must call `init_faiss_index` first).""" item_count, key_dim = keys.shape assert key_dim == self.config.dimension @@ -106,7 +106,7 @@ def block_add_items(self, index, keys: np.array, block_size: int = C.DEFAULT_DAT index.add(keys[start:item_count].astype(np.float32)) # unfortunately, faiss index only supports float32 @staticmethod - def build_train_sample(keys: np.array, sample_size: int): + def build_train_sample(keys: np.ndarray, sample_size: int): """Randomly sample `sample_size` keys as training sample.""" item_count, _ = keys.shape assert 0 < sample_size <= item_count @@ -119,7 +119,7 @@ def build_train_sample(keys: np.array, sample_size: int): return train_sample - def build_faiss_index(self, keys: np.array, train_sample: Optional[np.memmap] = None): + def build_faiss_index(self, keys: np.ndarray, train_sample: Optional[np.memmap] = None): """ Top-level function of the class to build faiss index for a set of keys, optionally with samples for training. """ @@ -149,8 +149,10 @@ def get_config_path(dir): def build_knn_index_package(args): - """Top-level function that builds a kNN index package (kNN index and config file) - from an existing state and word store.""" + """ + Top-level function that builds a kNN index package (kNN index and config file) from an existing state and word + store. + """ state_store_filename = get_state_store_path(args.input_dir) word_store_filename = get_word_store_path(args.input_dir) config_filename = get_config_path(args.input_dir) diff --git a/sockeye/score.py b/sockeye/score.py index 552df3b24..12eef0147 100644 --- a/sockeye/score.py +++ b/sockeye/score.py @@ -18,14 +18,11 @@ import logging import os -import torch as pt - from . import arguments from . import constants as C from . import data_io from . import utils from .beam_search import CandidateScorer -from .device import init_device from .log import setup_main_logger from .model import load_model from .output_handler import get_output_handler @@ -51,7 +48,7 @@ def score(args: argparse.Namespace): utils.log_basic_info(args) - device = init_device(args, logger) + device = utils.init_device(args) logger.info(f"Scoring device: {device}") model, source_vocabs, target_vocabs = load_model(args.model, device=device, dtype=args.dtype) diff --git a/sockeye/train.py b/sockeye/train.py index ac9a9fbe6..57100b690 100644 --- a/sockeye/train.py +++ b/sockeye/train.py @@ -57,7 +57,6 @@ from . import utils from . import vocab from .config import Config -from .device import init_device from .log import setup_main_logger from .utils import check_condition @@ -998,7 +997,7 @@ def train(args: argparse.Namespace, custom_metrics_logger: Optional[Callable] = logger.info("Adjusting maximum length to reserve space for a BOS/EOS marker. New maximum length: (%d, %d)", max_seq_len_source, max_seq_len_target) - device = init_device(args, logger, utils.get_local_rank() if utils.is_distributed() else None) + device = utils.init_device(args) logger.info(f'Training Device: {device}') utils.seed_rngs(args.seed) diff --git a/sockeye/translate.py b/sockeye/translate.py index f336c3e72..17989ac49 100644 --- a/sockeye/translate.py +++ b/sockeye/translate.py @@ -22,9 +22,6 @@ from contextlib import ExitStack from typing import Dict, Generator, List, Optional, Union -import torch as pt - -from .device import init_device from sockeye.lexicon import load_restrict_lexicon, RestrictLexicon from sockeye.log import setup_main_logger from sockeye.model import load_models @@ -33,6 +30,7 @@ from . import arguments from . import constants as C from . import inference +from . import utils logger = logging.getLogger(__name__) @@ -67,7 +65,7 @@ def run_translate(args: argparse.Namespace): output_handler = get_output_handler(args.output_type, args.output) - device = init_device(args, logger) + device = utils.init_device(args) logger.info(f"Translate Device: {device}") models, source_vocabs, target_vocabs = load_models(device=device, diff --git a/sockeye/utils.py b/sockeye/utils.py index 1e1e981aa..8d2219746 100644 --- a/sockeye/utils.py +++ b/sockeye/utils.py @@ -14,6 +14,7 @@ """ A set of utility methods. """ +import argparse import binascii import gzip import itertools @@ -324,7 +325,8 @@ def shift_prefix_factors(prefix_factors: pt.Tensor) -> pt.Tensor: :return new prefix_factors_shift (batch size, length + 1, num of factors) """ prefix_factors_sizes = prefix_factors.size() - prefix_factors_shift = pt.zeros(prefix_factors_sizes[0], prefix_factors_sizes[1] + 1, prefix_factors_sizes[2], dtype=prefix_factors.dtype, device=prefix_factors.device) + prefix_factors_shift = pt.zeros(prefix_factors_sizes[0], prefix_factors_sizes[1] + 1, prefix_factors_sizes[2], + dtype=prefix_factors.dtype, device=prefix_factors.device) prefix_factors_shift[:, 1:] = prefix_factors return prefix_factors_shift @@ -785,3 +787,33 @@ def compute_isometric_score(hypothesis: str, hypothesis_score: float, source: st isometric_score = pred_sub_score + synchrony_sub_score return isometric_score + + +def init_device(args: argparse.Namespace) -> pt.device: + """ + Select Torch device based on CLI args: + - When CUDA is not available, the device defaults to CPU. + - When using CUDA, tf32 is enabled if specified. + - When running distributed training, the CUDA device is determined by local + rank instead of CLI args. + + :param args: Parsed CLI args including device parameters. + + :return: Torch device. + """ + + use_cpu = args.use_cpu + if not use_cpu and not pt.cuda.is_available(): + logger.info('CUDA not available, defaulting to CPU device') + use_cpu = True + if use_cpu: + return pt.device('cpu') + + device = pt.device('cuda', get_local_rank() if is_distributed() else args.device_id) + # Ensure that GPU operations use the correct device by default + pt.cuda.set_device(device) + if args.tf32: + pt.backends.cuda.matmul.allow_tf32 = True + logger.info('CUDA: allow tf32 (float32 but with 10 bits precision)') + + return device diff --git a/typechecked-files b/typechecked-files index 7e6a578a7..e0218c892 100644 --- a/typechecked-files +++ b/typechecked-files @@ -1,24 +1,31 @@ sockeye/__init__.py sockeye/arguments.py sockeye/average.py +sockeye/beam_search.py sockeye/checkpoint_decoder.py sockeye/config.py sockeye/constants.py -sockeye/beam_search.py +sockeye/convert_deepspeed.py sockeye/data_io.py sockeye/decoder.py sockeye/embeddings.py sockeye/encoder.py +sockeye/evaluate.py +sockeye/generate_decoder_states.py sockeye/inference.py +sockeye/initial_setup.py +sockeye/knn.py sockeye/layers.py sockeye/lexicon.py sockeye/log.py sockeye/loss.py sockeye/lr_scheduler.py sockeye/model.py +sockeye/nvs.py sockeye/optimizers.py sockeye/output_handler.py sockeye/prepare_data.py +sockeye/quantize.py sockeye/rerank.py sockeye/score.py sockeye/scoring.py @@ -28,4 +35,3 @@ sockeye/transformer.py sockeye/translate.py sockeye/utils.py sockeye/vocab.py -sockeye/initial_setup.py