Skip to content

Commit

Permalink
Update import paths
Browse files Browse the repository at this point in the history
  • Loading branch information
matgille committed Apr 19, 2024
1 parent ae787a9 commit 1bbc4f9
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 13 deletions.
4 changes: 2 additions & 2 deletions aquilign/align/bertalign/Bertalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
__author__ = "Jason ([email protected])"
__version__ = "1.1.0"

from bertalign.encoder import Encoder
from aquilign.align.bertalign.encoder import Encoder
# from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline
# See other cross-lingual embedding models at
# https://www.sbert.net/docs/pretrained_models.html
Expand All @@ -19,4 +19,4 @@
# nb = input(f'Choose a model:')
model = Encoder(models[int(1)])

from bertalign.aligner import Bertalign
from aquilign.align.bertalign.aligner import Bertalign
6 changes: 3 additions & 3 deletions aquilign/align/bertalign/aligner.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import numpy as np

from bertalign.Bertalign import model
import bertalign.corelib as core
import bertalign.utils as utils
from aquilign.align.bertalign.Bertalign import model
import aquilign.align.bertalign.corelib as core
import aquilign.align.bertalign.utils as utils
import torch.nn as nn
import torch

Expand Down
2 changes: 1 addition & 1 deletion aquilign/align/bertalign/encoder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from bertalign.utils import yield_overlaps
from aquilign.align.bertalign.utils import yield_overlaps
# from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline


Expand Down
4 changes: 2 additions & 2 deletions aquilign/align/bertalign/syntactic_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
import sys
import langid
import bertalign.utils as utils
import aquilign.align.bertalign.utils as utils

def syntactic_tokenization(path, corpus_limit=None, use_punctuation=True):
name = path.split("/")[-1].split(".")[0]
Expand All @@ -16,7 +16,7 @@ def syntactic_tokenization(path, corpus_limit=None, use_punctuation=True):
codelang, _ = langid.classify(text[:300])
print(text)
print(codelang)
with open("bertalign/delimiters.json", "r") as input_json:
with open("aquilign/align/bertalign/delimiters.json", "r") as input_json:
dictionary = json.load(input_json)
# Il ne reconnaît pas toujours le castillan
if codelang == "an":
Expand Down
14 changes: 9 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import sys
import numpy as np
# import collatex
import graph_merge
import bertalign.utils as utils
import bertalign.syntactic_tokenization as syntactic_tokenization
from bertalign.Bertalign import Bertalign
import aquilign.align.graph_merge as graph_merge
import aquilign.align.bertalign.utils as utils
import aquilign.align.bertalign.syntactic_tokenization as syntactic_tokenization
from aquilign.align.bertalign.Bertalign import Bertalign
import pandas as pd
import argparse
import glob
Expand Down Expand Up @@ -81,6 +81,10 @@ def __init__(self, corpus_size:None,
self.use_punctiation = use_punctuation
self.prefix = prefix

try:
os.mkdir(f"result_dir")
except FileExistsError:
pass
try:
os.mkdir(f"result_dir/{self.out_dir}/")
except FileExistsError:
Expand Down Expand Up @@ -219,7 +223,7 @@ def run_alignments():
prefix = args.prefix
use_punctuation = args.use_punctuation
print(f"Punctuation for tokenization: {use_punctuation}")
MyAligner = Aligner(corpus_size=None, max_align=3, out_dir=out_dir, use_punctuation=use_punctuation, input_dir=input_dir, main_wit=main_wit, prefix=prefix)
MyAligner = Aligner(corpus_size=100, max_align=3, out_dir=out_dir, use_punctuation=use_punctuation, input_dir=input_dir, main_wit=main_wit, prefix=prefix)
MyAligner.parallel_align()
utils.write_json(f"result_dir/{out_dir}/alignment_dict.json", MyAligner.alignment_dict)
align_dict = utils.read_json(f"result_dir/{out_dir}/alignment_dict.json")
Expand Down

0 comments on commit 1bbc4f9

Please sign in to comment.