Skip to content

Commit

Permalink
modif metadata validity + tests + presentation filtres CL dans inform…
Browse files Browse the repository at this point in the history
…ations"
  • Loading branch information
Juliettejns committed Sep 17, 2024
1 parent c6c0a15 commit db60b6a
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 42 deletions.
1 change: 1 addition & 0 deletions app/main/views/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def tokens_correct_single(corpus_id, token_id):
token, change_record = WordToken.update(
user_id=current_user.id,
token_id=token_id, corpus_id=corpus_id,
form = string_to_none(request.form.get("form")),
lemma=string_to_none(request.form.get("lemma")),
POS=string_to_none(request.form.get("POS")),
morph=string_to_none(request.form.get("morph"))
Expand Down
73 changes: 42 additions & 31 deletions app/models/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,7 @@ def get_like(filter_id, form, group_by, type_like="lemma", allowed_list=False):
return query

@staticmethod
def is_valid(lemma, POS, morph, corpus):
def is_valid(lemma, POS, morph, corpus, form):
""" Check if a token is valid for a given corpus
:param lemma: Lemma value of the token to validate
Expand All @@ -1125,46 +1125,53 @@ def is_valid(lemma, POS, morph, corpus):
"morph": True
}
allowed_column = corpus.displayed_columns_by_name
if (lemma # If we changed the lemma
current_controlList = corpus.control_lists
print(form)
print(current_controlList.filter_metadata)
print(re.match(current_controlList.re_filter_metadata, form))
if form and current_controlList.filter_metadata and re.match(current_controlList.re_filter_metadata, form):
pass
else:
if (lemma # If we changed the lemma
and "lemma" in allowed_column # And if the lemma is a column known to the project
and allowed_lemma.count() # And if we have a list of accepted lemma,
):
# then we check for lemma validity
current_controlList = corpus.control_lists
regex_liste = []
if current_controlList:
if current_controlList.filter_ignore:
regex_liste.append(ControlLists.re_filter_ignore)
if current_controlList.filter_punct:
regex_liste.append(ControlLists.re_filter_punct)
if current_controlList.filter_numeral:
regex_liste.append(ControlLists.re_filter_numeral)
ignored_by_regex = False

for regex in regex_liste:
if re.match(regex, lemma) is not None:
ignored_by_regex = True
if (
ignored_by_regex is False and
corpus.has_custom_dictionary_value("lemma", lemma) is False and
corpus.get_allowed_values("lemma", label=lemma).count() == 0
):
# then we check for lemma validity

regex_liste = []
if current_controlList:
if current_controlList.filter_ignore:
regex_liste.append(ControlLists.re_filter_ignore)
if current_controlList.filter_punct:
regex_liste.append(ControlLists.re_filter_punct)
if current_controlList.filter_numeral:
regex_liste.append(ControlLists.re_filter_numeral)
ignored_by_regex = False

for regex in regex_liste:
if re.match(regex, lemma) is not None:
ignored_by_regex = True
if (
ignored_by_regex is False and
corpus.has_custom_dictionary_value("lemma", lemma) is False and
corpus.get_allowed_values("lemma", label=lemma).count() == 0
):

statuses["lemma"] = False
statuses["lemma"] = False

if POS is not None \
if POS is not None \
and "POS" in allowed_column \
and allowed_POS.count() > 0 \
and corpus.get_allowed_values("POS", label=POS).count() == 0:
if not corpus.has_custom_dictionary_value("POS", POS):
statuses["POS"] = False
if not corpus.has_custom_dictionary_value("POS", POS):
statuses["POS"] = False

if morph is not None \
if morph is not None \
and "morph" in allowed_column \
and allowed_morph.count() > 0 \
and corpus.get_allowed_values("morph", label=morph).count() == 0:
if not corpus.has_custom_dictionary_value("morph", morph):
statuses["morph"] = False
if not corpus.has_custom_dictionary_value("morph", morph):
statuses["morph"] = False

return statuses

Expand Down Expand Up @@ -1322,7 +1329,7 @@ def to_input_format(query):
return csv_file.getvalue()

@staticmethod
def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, form=None):
""" Update a given token with lemma, POS and morph value
:param user_id: ID of the user who performs the update
Expand All @@ -1331,6 +1338,8 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
:type corpus_id: int
:param token_id: Id of the token
:type token_id: int
:param form: Form
:type form: str
:param lemma: Lemma
:type lemma: str
:param POS: PartOfSpeech
Expand All @@ -1344,6 +1353,7 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
corpus = Corpus.query.filter_by(**{"id": corpus_id}).first_or_404()
token = WordToken.query.filter_by(**{"id": token_id, "corpus": corpus_id}).first_or_404()
# Strip if things are not None
form = strip_or_none(form)
lemma = strip_or_none(lemma)
POS = strip_or_none(POS)
morph = strip_or_none(morph)
Expand All @@ -1354,7 +1364,7 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
error.msg = "No value where changed"
raise error
# Check if values are correct regarding allowed values
validity = WordToken.is_valid(lemma=lemma, POS=POS, morph=morph, corpus=corpus)
validity = WordToken.is_valid(form=form, lemma=lemma, POS=POS, morph=morph, corpus=corpus)
if False in list(validity.values()):
error_msg = "Invalid value in {}".format(
", ".join([key for key in validity.keys() if validity[key] is False])
Expand Down Expand Up @@ -1700,6 +1710,7 @@ def apply_changes_to(self, user_id, token_ids):
).all():
apply = {"user_id": user_id, "token_id": token.id, "corpus_id": token.corpus}
apply.update({attr: val[1] for attr, val in watch.items() if val[0] == getattr(token, attr)})
print(apply)
WordToken.update(**apply)
changed.append(token)
return changed
Expand Down
8 changes: 8 additions & 0 deletions app/templates/control_lists/information_read.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,12 @@ <h3>{{ _('Bibliography') }}</h3>
<p>{{control_list.bibliography | markdown}}</p>
{% endif %}

<h3>{{ _('Filters') }}</h3>
<ul>
<li>Numeral : {{control_list.filter_numeral}}</li>
<li>Punctuation: {{control_list.filter_punct}}</li>
<li>Metadata: {{control_list.filter_metadata}}</li>
<li>Ignore: {{control_list.filter_ignore}}</li>
</ul>

{% endblock %}
15 changes: 9 additions & 6 deletions app/templates/main/corpus_new.html
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ <h1>{{ _('Create a new corpus') }}</h1>
the academic community. You will be able to propose new values to the administrators of control lists.') }}
</p>
</div>
<div>

<div class="col-md-9" id="use_public">
<select class="form-control" id="control_list_select" name="control_list_select">
Expand Down Expand Up @@ -215,11 +214,7 @@ <h1>{{ _('Create a new corpus') }}</h1>
<small id="allowed_morph_help" class="form-text text-muted">{{ _('The TSV should at least have the header : label and could have a <code>readable</code> column for human') }}</small>
<textarea aria-describedby="allowed_morph_help" class="form-control" id="allowed_morph" name="allowed_morph">{% if allowed_morph %}{{allowed_morph}}{%endif%}</textarea>
</div>
</div>


</div>
<div class="form-group row">
<div class="form-group row">
<div class="col">
<label for="ignoreforms" id="ignoreforms" class="form-text text-muted">
Ignore Elements in Control List:
Expand All @@ -239,6 +234,10 @@ <h1>{{ _('Create a new corpus') }}</h1>
Numeral
</label>
</li>
</ul>
</div>
<div class="col">
<ul>
<li>
<label>
<input type="checkbox" name="metadata" value="metadata">
Expand All @@ -254,7 +253,11 @@ <h1>{{ _('Create a new corpus') }}</h1>
</ul>
</div>
</div>
</div>


</div>

</fieldset>
<button type="submit" id="submit" class="btn btn-primary">Submit</button>
<script type="text/javascript">
Expand Down
4 changes: 2 additions & 2 deletions tests/test_models/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_similar_lemma_single_change(self):
token, change_record = WordToken.update(
user_id=1,
token_id=1, corpus_id=1,
lemma="cil", morph="smn", POS="p"
form = "Cil", lemma="cil", morph="smn", POS="p"
)
self.assertEqual(
(token.lemma, token.morph, token.POS),
Expand Down Expand Up @@ -97,7 +97,7 @@ def test_similar_lemma_double_change(self):
token, change_record = WordToken.update(
user_id=1,
token_id=1, corpus_id=1,
lemma="cil", morph="smn", POS="u")
form = "Cil", lemma="cil", morph="smn", POS="u")

self.assertEqual(
(token.lemma, token.morph, token.POS),
Expand Down
14 changes: 11 additions & 3 deletions tests/test_models/test_regex_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_filter_allowed_lemma(self):
token, change_record = WordToken.update(
user_id=1,
token_id=1, corpus_id=1,
lemma="#", morph="smn", POS="u")
form = "Cil", lemma="#", morph="smn", POS="u")

def test_combinatory_regex(self):
self.load_fixtures()
Expand All @@ -64,8 +64,7 @@ def test_combinatory_regex(self):
self.db.session.refresh(token)
self.db.session.refresh(corpus)
for category, filtre in tests:
validity = WordToken.is_valid(lemma=category, POS=token.POS, morph=token.morph, corpus=corpus)["lemma"]
print(combi, category, filtre, validity)
validity = WordToken.is_valid(form = token.form, lemma=category, POS=token.POS, morph=token.morph, corpus=corpus)["lemma"]
if filtre and filtre in combi or 'celui' in category:
self.assertTrue(validity, f"Filters are not working. `{category}` should be matched by `{filtre}` in {', '.join(combi) or 'absence of filters'}")
else:
Expand All @@ -83,4 +82,13 @@ def test_metadata_filter(self):
self.db.session.refresh(corpus)
token = corpus.get_unallowed().first()
self.assertNotEqual(token.form, "[METADATA:blabla]", f'Metadata filter is not working. [METADATA:blabla] should not be considered as unallowed.')

token = WordToken.query.get(2)
validity_lemma = WordToken.is_valid(form = token.form, lemma="blabla", POS=token.POS, morph=token.morph,corpus=corpus)["lemma"]
self.assertTrue(validity_lemma, f"Filter metadata is not working for lemma")
validity_pos = WordToken.is_valid(form = token.form, lemma=token.lemma, POS="blabla", morph=token.morph,corpus=corpus)["POS"]
validity_morph = WordToken.is_valid(form = token.form, lemma=token.lemma, POS=token.POS, morph="blabla",corpus=corpus)["morph"]




67 changes: 67 additions & 0 deletions tests/test_selenium/download_temp/wauchier.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
<fileDesc>
<titleStmt>
<title></title>
</titleStmt>
<publicationStmt><p></p></publicationStmt>
<sourceDesc><p></p></sourceDesc>
</fileDesc>

</teiHeader>
<text xml:lang="fr">
<body xml:lang="fro">
<div>
<ab>

<w xml:id="t1" n="1" lemma="de" type="POS=PRE" >De</w>
<w xml:id="t2" n="2" lemma="saint" type="POS=ADJqua" >seint</w>
</ab>
<ab>
<w xml:id="t4" n="4" lemma="martin" type="POS=NOMpro" >Martin</w>
<w xml:id="t5" n="5" lemma="mout" type="POS=ADVgen" >mout</w>
</ab>
<ab>
<w xml:id="t7" n="7" lemma="devoir" type="POS=VERcjg" >doit</w>
<w xml:id="t8" n="8" lemma="un" type="POS=PRE" >on</w>
</ab>
<ab>
<w xml:id="t10" n="10" lemma="doucement" type="POS=ADVgen" >doucement</w>
<w xml:id="t11" n="11" lemma="et" type="POS=CONcoo" >et</w>
</ab>
<ab>
<w xml:id="t13" n="13" lemma="volentiers" type="POS=ADVgen" >volentiers</w>
<w xml:id="t14" n="14" lemma="le" type="POS=DETdef" >le</w>
</ab>
<ab>
<w xml:id="t16" n="16" lemma="bien" type="POS=ADVgen" >bien</w>
<w xml:id="t17" n="17" lemma="öir" type="POS=VERinf" >oïr</w>
</ab>
<ab>
<w xml:id="t19" n="19" lemma="et" type="POS=CONcoo" >et</w>
<w xml:id="t20" n="20" lemma="entendre" type="POS=VERinf" >entendre</w>
</ab>
<ab>
<w xml:id="t22" n="22" lemma="," type="POS=PONfbl" >,</w>
<w xml:id="t23" n="23" lemma="car" type="POS=CONcoo" >car</w>
</ab>
<ab>
<w xml:id="t25" n="25" lemma="par" type="POS=PRE" >par</w>
<w xml:id="t26" n="26" lemma="le" type="POS=DETdef" >le</w>
</ab>
<ab>
<w xml:id="t28" n="28" lemma="bien" type="POS=ADVgen" >bien</w>
<w xml:id="t29" n="29" lemma="savoir" type="POS=VERinf" >savoir</w>
</ab>
<ab>
<w xml:id="t31" n="31" lemma="et" type="POS=CONcoo" >et</w>
<w xml:id="t32" n="32" lemma="retenir" type="POS=VERinf" >retenir</w>
</ab>
<ab>
<w xml:id="t34" n="34" lemma="pöoir" type="POS=VERcjg" >puet</w>
<w xml:id="t35" n="35" lemma="il" type="POS=PROper" >l</w></ab>
</div>
</body>
</text>
</TEI>

0 comments on commit db60b6a

Please sign in to comment.