diff --git a/README.md b/README.md index 20401ea..57f4c49 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ git clone https://github.com/Yale-LILY/AutoACU cd AutoACU pip install . ``` -The necessary dependencies include PyTorch and HuggingFace's Transformers. +The necessary dependencies include PyTorch and HuggingFace's Transformers. With the usage of the T5 Tokenizer in the Transformers class you will also need to have the SentencePiece package installed as well. It should be compatible with any of the recent versions of PyTorch and Transformers. However, to make sure that the dependencies are compatible, you may run the following command: @@ -42,17 +42,20 @@ Below is an example of using A2CU to evaluate the similarity between two text se ```python from autoacu import A2CU candidates, references = ["This is a test"], ["This is a test"] -a2cu = A2CU(device=0) # the GPU device to use +a2cu = A2CU() recall_scores, prec_scores, f1_scores = a2cu.score( references=references, candidates=candidates, generation_batch_size=2, # the batch size for ACU generation - matching_batch_size=16 # the batch size for ACU matching - output_path=None # the path to save the evaluation results - recall_only=False # whether to only compute the recall score + matching_batch_size=16, # the batch size for ACU matching + output_path=None, # the path to save the evaluation results + recall_only=False, # whether to only compute the recall score acu_path=None # the path to save the generated ACUs ) print(f"Recall: {recall_scores[0]:.4f}, Precision {prec_scores[0]:.4f}, F1: {f1_scores[0]:.4f}") + +#Sample Output: +Recall: 0.1250, Precision 0.1250, F1: 0.1250 ``` ### A3CU @@ -61,12 +64,15 @@ Below is an example of using A3CU to evaluate the similarity between two text se ```python from autoacu import A3CU candidates, references = ["This is a test"], ["This is a test"] -a3cu = A3CU(device=0) # the GPU device to use +a3cu = A3CU() recall_scores, prec_scores, f1_scores = a3cu.score( references=references, candidates=candidates, - batch_size=16 # the batch size for ACU generation + batch_size=16, # the batch size for ACU generation output_path=None # the path to save the evaluation results ) print(f"Recall: {recall_scores[0]:.4f}, Precision {prec_scores[0]:.4f}, F1: {f1_scores[0]:.4f}") + +#Sample Output: +Recall: 0.8007, Precision 0.8007, F1: 0.8007 ``` diff --git a/autoacu/a2cu.py b/autoacu/a2cu.py index 4aee6af..63dfe6b 100644 --- a/autoacu/a2cu.py +++ b/autoacu/a2cu.py @@ -13,7 +13,8 @@ class A2CU(): Automatic ACU Generation and Matching """ def __init__(self, generation_pt: str="Yale-LILY/a2cu-generator", - matching_pt: str="Yale-LILY/a2cu-classifier", device: int=0, no_ref: bool=True): + matching_pt: str="Yale-LILY/a2cu-classifier", no_ref: bool=True, + device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")): """ Args: generation_pt: path to the HuggingFace model for generation @@ -165,8 +166,8 @@ def data_loader(): segs = torch.tensor(segs) input_ids.append(src_input_ids) token_type_ids.append(segs) - input_ids = pad(input_ids, tok.pad_token_id).to(f"cuda:{gpuid}") - token_type_ids = pad(token_type_ids, 0).to(f"cuda:{gpuid}") + input_ids = pad(input_ids, tok.pad_token_id).to(self.device) + token_type_ids = pad(token_type_ids, 0).to(self.device) attenion_mask = (input_ids != tok.pad_token_id) yield { "input_ids": input_ids, diff --git a/autoacu/a3cu.py b/autoacu/a3cu.py index 0957eb7..0399577 100644 --- a/autoacu/a3cu.py +++ b/autoacu/a3cu.py @@ -23,7 +23,8 @@ class A3CU(): """ Efficient and Interpretable Automatic Summarization Evaluation Metrics """ - def __init__(self, model_pt: str="Yale-LILY/a3cu", device: int=0, max_len: int=254, cpu: bool=False): + def __init__(self, model_pt: str="Yale-LILY/a3cu", max_len: int=254, cpu: bool=False, + device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")): """ Args: model_pt: path to the HuggingFace model @@ -31,10 +32,8 @@ def __init__(self, model_pt: str="Yale-LILY/a3cu", device: int=0, max_len: int=2 max_len: max length of the input cpu: use CPU instead of GPU """ - if cpu: - self.device = "cpu" - else: - self.device = device + + self.device = device self.model = BertClassifier.from_pretrained(model_pt).to(self.device) self.tok = BertTokenizer.from_pretrained(model_pt) self.model.eval() @@ -164,4 +163,4 @@ def score_example(self, reference: str, candidate: str): - \ No newline at end of file +