Skip to content

Commit

Permalink
Merge pull request bfsujason#3 from ProMeText/update_imports
Browse files Browse the repository at this point in the history
Update imports
  • Loading branch information
Jean-Baptiste-Camps authored Apr 19, 2024
2 parents 6a637d8 + 42a8ab3 commit 5e90057
Show file tree
Hide file tree
Showing 29 changed files with 5,352 additions and 12 deletions.
11 changes: 11 additions & 0 deletions Bertalign.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Metadata-Version: 2.1
Name: Bertalign
Version: 0.1.0
Summary: An automatic mulitlingual sentence aligner.
Home-page: https://github.com/bfsujason/bertalign
License: UNKNOWN
Platform: UNKNOWN
License-File: LICENCE

UNKNOWN

7 changes: 7 additions & 0 deletions Bertalign.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
LICENCE
README.md
setup.py
Bertalign.egg-info/PKG-INFO
Bertalign.egg-info/SOURCES.txt
Bertalign.egg-info/dependency_links.txt
Bertalign.egg-info/top_level.txt
1 change: 1 addition & 0 deletions Bertalign.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions Bertalign.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

4 changes: 2 additions & 2 deletions aquilign/align/bertalign/Bertalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
__author__ = "Jason ([email protected])"
__version__ = "1.1.0"

from bertalign.encoder import Encoder
from aquilign.align.bertalign.encoder import Encoder
# from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline
# See other cross-lingual embedding models at
# https://www.sbert.net/docs/pretrained_models.html
Expand All @@ -19,4 +19,4 @@
# nb = input(f'Choose a model:')
model = Encoder(models[int(1)])

from bertalign.aligner import Bertalign
from aquilign.align.bertalign.aligner import Bertalign
6 changes: 3 additions & 3 deletions aquilign/align/bertalign/aligner.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import numpy as np

from bertalign.Bertalign import model
import bertalign.corelib as core
import bertalign.utils as utils
from aquilign.align.bertalign.Bertalign import model
import aquilign.align.bertalign.corelib as core
import aquilign.align.bertalign.utils as utils
import torch.nn as nn
import torch

Expand Down
2 changes: 1 addition & 1 deletion aquilign/align/bertalign/encoder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from bertalign.utils import yield_overlaps
from aquilign.align.bertalign.utils import yield_overlaps
# from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline


Expand Down
4 changes: 2 additions & 2 deletions aquilign/align/bertalign/syntactic_tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
import sys
import langid
import bertalign.utils as utils
import aquilign.align.bertalign.utils as utils

def syntactic_tokenization(path, corpus_limit=None, use_punctuation=True):
name = path.split("/")[-1].split(".")[0]
Expand All @@ -16,7 +16,7 @@ def syntactic_tokenization(path, corpus_limit=None, use_punctuation=True):
codelang, _ = langid.classify(text[:300])
print(text)
print(codelang)
with open("bertalign/delimiters.json", "r") as input_json:
with open("aquilign/align/bertalign/delimiters.json", "r") as input_json:
dictionary = json.load(input_json)
# Il ne reconnaît pas toujours le castillan
if codelang == "an":
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1,261 changes: 1,261 additions & 0 deletions data/extraitsLancelot/ii-61/fr111-ii-61.txt

Large diffs are not rendered by default.

1,556 changes: 1,556 additions & 0 deletions data/extraitsLancelot/ii-61/fr751-ii-61.txt

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2,499 changes: 2,499 additions & 0 deletions data/extraitsLancelot/iv-75/fr111-iv-75.txt

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
12 changes: 8 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import sys
import numpy as np
# import collatex
import graph_merge
import bertalign.utils as utils
import bertalign.syntactic_tokenization as syntactic_tokenization
from bertalign.Bertalign import Bertalign
import aquilign.align.graph_merge as graph_merge
import aquilign.align.bertalign.utils as utils
import aquilign.align.bertalign.syntactic_tokenization as syntactic_tokenization
from aquilign.align.bertalign.Bertalign import Bertalign
import pandas as pd
import argparse
import glob
Expand Down Expand Up @@ -81,6 +81,10 @@ def __init__(self, corpus_size:None,
self.use_punctiation = use_punctuation
self.prefix = prefix

try:
os.mkdir(f"result_dir")
except FileExistsError:
pass
try:
os.mkdir(f"result_dir/{self.out_dir}/")
except FileExistsError:
Expand Down

0 comments on commit 5e90057

Please sign in to comment.