Skip to content

Commit

Permalink
Revert "update"
Browse files Browse the repository at this point in the history
This reverts commit 4c2f32f.
  • Loading branch information
ArthurZucker committed Aug 7, 2024
1 parent 944f6e5 commit 16e6798
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 18 deletions.
15 changes: 4 additions & 11 deletions bindings/python/src/normalizers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,23 +347,16 @@ pub struct PySequence {}
#[pymethods]
impl PySequence {
#[new]
#[pyo3(text_signature = "(self)")]
#[pyo3(text_signature = None)]
fn new(normalizers: &Bound<'_, PyList>) -> PyResult<(Self, PyNormalizer)> {
let mut sequence = Vec::with_capacity(normalizers.len());
for n in normalizers.iter() {
let normalizer: PyRef<PyNormalizer> = n.extract()?;
match &normalizer.normalizer {
PyNormalizerTypeWrapper::Sequence(inner) => {
println!("sequence:{:?}", inner);
sequence.extend(inner.iter().cloned())
}
PyNormalizerTypeWrapper::Single(inner) => {
println!("dingle {:?}", inner);
sequence.push(inner.clone())
}
PyNormalizerTypeWrapper::Sequence(inner) => sequence.extend(inner.iter().cloned()),
PyNormalizerTypeWrapper::Single(inner) => sequence.push(inner.clone()),
}
}
println!("The sequence: {:?}", sequence);
Ok((
PySequence {},
PyNormalizer::new(PyNormalizerTypeWrapper::Sequence(sequence)),
Expand Down Expand Up @@ -419,7 +412,7 @@ impl PyStrip {
#[new]
#[pyo3(signature = (left = true, right = true), text_signature = "(self, left=True, right=True)")]
fn new(left: bool, right: bool) -> (Self, PyNormalizer) {
(PyStrip {}, StripNormalizer::new(left, right).into())
(PyStrip {}, Strip::new(left, right).into())
}
}

Expand Down
6 changes: 3 additions & 3 deletions tokenizers/src/normalizers/bert.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::tokenizer::{NormalizedString, Normalizer, Result};
use crate::utils::macro_rules_attribute;

use serde::{Deserialize, Serialize};
use unicode_categories::UnicodeCategories;

Expand Down Expand Up @@ -47,8 +47,8 @@ fn is_chinese_char(c: char) -> bool {
)
}

#[derive(Copy, Clone, Debug)]
#[macro_rules_attribute(impl_serde_type!)]
#[derive(Copy, Clone, Debug, Deserialize, Serialize)]
#[serde(tag = "type")]
#[non_exhaustive]
pub struct BertNormalizer {
/// Whether to do the bert basic cleaning:
Expand Down
4 changes: 2 additions & 2 deletions tokenizers/src/normalizers/strip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use crate::utils::macro_rules_attribute;
use serde::{Deserialize, Serialize};
use unicode_normalization_alignments::char::is_combining_mark;

#[derive(Copy, Clone, Debug)]
#[macro_rules_attribute(impl_serde_type!)]
#[derive(Copy, Clone, Debug, Deserialize, Serialize)]
#[serde(tag = "type")]
#[non_exhaustive]
pub struct Strip {
pub strip_left: bool,
Expand Down
4 changes: 2 additions & 2 deletions tokenizers/src/normalizers/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use crate::normalizers::NormalizerWrapper;
use crate::tokenizer::{NormalizedString, Normalizer, Result};
use crate::utils::macro_rules_attribute;

#[derive(Clone, Debug)]
#[macro_rules_attribute(impl_serde_type!)]
#[derive(Clone, Deserialize, Debug, Serialize)]
#[serde(tag = "type")]
/// Allows concatenating multiple other Normalizer as a Sequence.
/// All the normalizers run in sequence in the given order against the same NormalizedString.
pub struct Sequence {
Expand Down

0 comments on commit 16e6798

Please sign in to comment.