modif metadata validity + tests + presentation filtres CL dans inform…

…ations"
hipster-philology · Sep 17, 2024 · db60b6a · db60b6a
1 parent c6c0a15
commit db60b6a
Show file tree

Hide file tree

Showing 7 changed files with 140 additions and 42 deletions.
diff --git a/app/main/views/tokens.py b/app/main/views/tokens.py
@@ -133,6 +133,7 @@ def tokens_correct_single(corpus_id, token_id):
         token, change_record = WordToken.update(
             user_id=current_user.id,
             token_id=token_id, corpus_id=corpus_id,
+            form = string_to_none(request.form.get("form")),
             lemma=string_to_none(request.form.get("lemma")),
             POS=string_to_none(request.form.get("POS")),
             morph=string_to_none(request.form.get("morph"))

diff --git a/app/models/corpus.py b/app/models/corpus.py
@@ -1101,7 +1101,7 @@ def get_like(filter_id, form, group_by, type_like="lemma", allowed_list=False):
         return query
 
     @staticmethod
-    def is_valid(lemma, POS, morph, corpus):
+    def is_valid(lemma, POS, morph, corpus, form):
         """ Check if a token is valid for a given corpus
 
         :param lemma: Lemma value of the token to validate
@@ -1125,46 +1125,53 @@ def is_valid(lemma, POS, morph, corpus):
             "morph": True
         }
         allowed_column = corpus.displayed_columns_by_name
-        if (lemma  # If we changed the lemma
+        current_controlList = corpus.control_lists
+        print(form)
+        print(current_controlList.filter_metadata)
+        print(re.match(current_controlList.re_filter_metadata, form))
+        if form and current_controlList.filter_metadata and re.match(current_controlList.re_filter_metadata, form):
+            pass
+        else:
+            if (lemma  # If we changed the lemma
                 and "lemma" in allowed_column  # And if the lemma is a column known to the project
                 and allowed_lemma.count()  # And if we have a list of accepted lemma,
-        ):
-            # then we check for lemma validity
-            current_controlList = corpus.control_lists
-            regex_liste = []
-            if current_controlList:
-                if current_controlList.filter_ignore:
-                    regex_liste.append(ControlLists.re_filter_ignore)
-                if current_controlList.filter_punct:
-                    regex_liste.append(ControlLists.re_filter_punct)
-                if current_controlList.filter_numeral:
-                    regex_liste.append(ControlLists.re_filter_numeral)
-            ignored_by_regex = False
-
-            for regex in regex_liste:
-                if re.match(regex, lemma) is not None:
-                    ignored_by_regex = True
-            if (
-                    ignored_by_regex is False and
-                    corpus.has_custom_dictionary_value("lemma", lemma) is False and
-                    corpus.get_allowed_values("lemma", label=lemma).count() == 0
                 ):
+            # then we check for lemma validity
+
+                    regex_liste = []
+                    if current_controlList:
+                        if current_controlList.filter_ignore:
+                            regex_liste.append(ControlLists.re_filter_ignore)
+                        if current_controlList.filter_punct:
+                            regex_liste.append(ControlLists.re_filter_punct)
+                        if current_controlList.filter_numeral:
+                            regex_liste.append(ControlLists.re_filter_numeral)
+                    ignored_by_regex = False
+
+                    for regex in regex_liste:
+                        if re.match(regex, lemma) is not None:
+                            ignored_by_regex = True
+                    if (
+                        ignored_by_regex is False and
+                        corpus.has_custom_dictionary_value("lemma", lemma) is False and
+                        corpus.get_allowed_values("lemma", label=lemma).count() == 0
+                        ):
 
-                    statuses["lemma"] = False
+                        statuses["lemma"] = False
 
-        if POS is not None \
+            if POS is not None \
                 and "POS" in allowed_column \
                 and allowed_POS.count() > 0 \
                 and corpus.get_allowed_values("POS", label=POS).count() == 0:
-            if not corpus.has_custom_dictionary_value("POS", POS):
-                statuses["POS"] = False
+                if not corpus.has_custom_dictionary_value("POS", POS):
+                    statuses["POS"] = False
 
-        if morph is not None \
+            if morph is not None \
                 and "morph" in allowed_column \
                 and allowed_morph.count() > 0 \
                 and corpus.get_allowed_values("morph", label=morph).count() == 0:
-            if not corpus.has_custom_dictionary_value("morph", morph):
-                statuses["morph"] = False
+                if not corpus.has_custom_dictionary_value("morph", morph):
+                    statuses["morph"] = False
 
         return statuses
 
@@ -1322,7 +1329,7 @@ def to_input_format(query):
         return csv_file.getvalue()
 
     @staticmethod
-    def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
+    def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None, form=None):
         """ Update a given token with lemma, POS and morph value
 
         :param user_id: ID of the user who performs the update
@@ -1331,6 +1338,8 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
         :type corpus_id: int
         :param token_id: Id of the token
         :type token_id: int
+        :param form: Form
+        :type form: str
         :param lemma: Lemma
         :type lemma: str
         :param POS: PartOfSpeech
@@ -1344,6 +1353,7 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
         corpus = Corpus.query.filter_by(**{"id": corpus_id}).first_or_404()
         token = WordToken.query.filter_by(**{"id": token_id, "corpus": corpus_id}).first_or_404()
         # Strip if things are not None
+        form = strip_or_none(form)
         lemma = strip_or_none(lemma)
         POS = strip_or_none(POS)
         morph = strip_or_none(morph)
@@ -1354,7 +1364,7 @@ def update(user_id, corpus_id, token_id, lemma=None, POS=None, morph=None):
             error.msg = "No value where changed"
             raise error
         # Check if values are correct regarding allowed values
-        validity = WordToken.is_valid(lemma=lemma, POS=POS, morph=morph, corpus=corpus)
+        validity = WordToken.is_valid(form=form, lemma=lemma, POS=POS, morph=morph, corpus=corpus)
         if False in list(validity.values()):
             error_msg = "Invalid value in {}".format(
                 ", ".join([key for key in validity.keys() if validity[key] is False])
@@ -1700,6 +1710,7 @@ def apply_changes_to(self, user_id, token_ids):
         ).all():
             apply = {"user_id": user_id, "token_id": token.id, "corpus_id": token.corpus}
             apply.update({attr: val[1] for attr, val in watch.items() if val[0] == getattr(token, attr)})
+            print(apply)
             WordToken.update(**apply)
             changed.append(token)
         return changed

diff --git a/app/templates/control_lists/information_read.html b/app/templates/control_lists/information_read.html
@@ -16,4 +16,12 @@ <h3>{{ _('Bibliography') }}</h3>
 <p>{{control_list.bibliography | markdown}}</p>
 {% endif %}
 
+<h3>{{ _('Filters') }}</h3>
+<ul>
+  <li>Numeral : {{control_list.filter_numeral}}</li>
+  <li>Punctuation: {{control_list.filter_punct}}</li>
+  <li>Metadata: {{control_list.filter_metadata}}</li>
+  <li>Ignore: {{control_list.filter_ignore}}</li>
+</ul>
+
 {% endblock %}
diff --git a/app/templates/main/corpus_new.html b/app/templates/main/corpus_new.html
@@ -147,7 +147,6 @@ <h1>{{ _('Create a new corpus') }}</h1>
                     the academic community. You will be able to propose new values to the administrators of control lists.') }}
                 </p>
             </div>
-                    <div>
 
             <div class="col-md-9" id="use_public">
               <select class="form-control" id="control_list_select" name="control_list_select">
@@ -215,11 +214,7 @@ <h1>{{ _('Create a new corpus') }}</h1>
                     <small id="allowed_morph_help" class="form-text text-muted">{{ _('The TSV should at least have the header : label and could have a <code>readable</code> column for human') }}</small>
                     <textarea aria-describedby="allowed_morph_help" class="form-control" id="allowed_morph" name="allowed_morph">{% if allowed_morph %}{{allowed_morph}}{%endif%}</textarea>
                 </div>
-            </div>
-
-
-        </div>
-<div class="form-group row">
+                <div class="form-group row">
             <div class="col">
                 <label for="ignoreforms" id="ignoreforms" class="form-text text-muted">
                     Ignore Elements in Control List:
@@ -239,6 +234,10 @@ <h1>{{ _('Create a new corpus') }}</h1>
                         Numeral
                     </label>
                 </li>
+            </ul>
+    </div>
+    <div class="col">
+        <ul>
             <li>
                     <label>
                         <input type="checkbox" name="metadata" value="metadata">
@@ -254,7 +253,11 @@ <h1>{{ _('Create a new corpus') }}</h1>
         </ul>
         </div>
         </div>
+            </div>
+
+
         </div>
+
     </fieldset>
 <button type="submit" id="submit" class="btn btn-primary">Submit</button>
 <script type="text/javascript">

diff --git a/tests/test_models/test_record.py b/tests/test_models/test_record.py
@@ -66,7 +66,7 @@ def test_similar_lemma_single_change(self):
         token, change_record = WordToken.update(
             user_id=1,
             token_id=1, corpus_id=1,
-            lemma="cil", morph="smn", POS="p"
+            form = "Cil", lemma="cil", morph="smn", POS="p"
         )
         self.assertEqual(
             (token.lemma, token.morph, token.POS),
@@ -97,7 +97,7 @@ def test_similar_lemma_double_change(self):
         token, change_record = WordToken.update(
             user_id=1,
             token_id=1, corpus_id=1,
-            lemma="cil", morph="smn", POS="u")
+            form = "Cil", lemma="cil", morph="smn", POS="u")
 
         self.assertEqual(
             (token.lemma, token.morph, token.POS),

diff --git a/tests/test_models/test_regex_filter.py b/tests/test_models/test_regex_filter.py
@@ -37,7 +37,7 @@ def test_filter_allowed_lemma(self):
             token, change_record = WordToken.update(
                 user_id=1,
                 token_id=1, corpus_id=1,
-                lemma="#", morph="smn", POS="u")
+                form = "Cil", lemma="#", morph="smn", POS="u")
 
     def test_combinatory_regex(self):
         self.load_fixtures()
@@ -64,8 +64,7 @@ def test_combinatory_regex(self):
                 self.db.session.refresh(token)
                 self.db.session.refresh(corpus)
                 for category, filtre in tests:
-                    validity = WordToken.is_valid(lemma=category, POS=token.POS, morph=token.morph, corpus=corpus)["lemma"]
-                    print(combi, category, filtre, validity)
+                    validity = WordToken.is_valid(form = token.form, lemma=category, POS=token.POS, morph=token.morph, corpus=corpus)["lemma"]
                     if filtre and filtre in combi or 'celui' in category:
                         self.assertTrue(validity, f"Filters are not working. `{category}` should be matched by `{filtre}` in {', '.join(combi) or 'absence of filters'}")
                     else:
@@ -83,4 +82,13 @@ def test_metadata_filter(self):
         self.db.session.refresh(corpus)
         token = corpus.get_unallowed().first()
         self.assertNotEqual(token.form, "[METADATA:blabla]", f'Metadata filter is not working. [METADATA:blabla] should not be considered as unallowed.')
+
+        token = WordToken.query.get(2)
+        validity_lemma = WordToken.is_valid(form = token.form, lemma="blabla", POS=token.POS, morph=token.morph,corpus=corpus)["lemma"]
+        self.assertTrue(validity_lemma, f"Filter metadata is not working for lemma")
+        validity_pos = WordToken.is_valid(form = token.form, lemma=token.lemma, POS="blabla", morph=token.morph,corpus=corpus)["POS"]
+        validity_morph = WordToken.is_valid(form = token.form, lemma=token.lemma, POS=token.POS, morph="blabla",corpus=corpus)["morph"]
+
+
+
 
diff --git a/tests/test_selenium/download_temp/wauchier.xml b/tests/test_selenium/download_temp/wauchier.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<TEI xmlns="http://www.tei-c.org/ns/1.0">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title></title>
+            </titleStmt>
+            <publicationStmt><p></p></publicationStmt>
+            <sourceDesc><p></p></sourceDesc>
+        </fileDesc>
+
+    </teiHeader>
+    <text xml:lang="fr">
+        <body xml:lang="fro">
+            <div>
+                <ab>
+
+                        <w xml:id="t1" n="1" lemma="de" type="POS=PRE" >De</w>
+                        <w xml:id="t2" n="2" lemma="saint" type="POS=ADJqua" >seint</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t4" n="4" lemma="martin" type="POS=NOMpro" >Martin</w>
+                        <w xml:id="t5" n="5" lemma="mout" type="POS=ADVgen" >mout</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t7" n="7" lemma="devoir" type="POS=VERcjg" >doit</w>
+                        <w xml:id="t8" n="8" lemma="un" type="POS=PRE" >on</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t10" n="10" lemma="doucement" type="POS=ADVgen" >doucement</w>
+                        <w xml:id="t11" n="11" lemma="et" type="POS=CONcoo" >et</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t13" n="13" lemma="volentiers" type="POS=ADVgen" >volentiers</w>
+                        <w xml:id="t14" n="14" lemma="le" type="POS=DETdef" >le</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t16" n="16" lemma="bien" type="POS=ADVgen" >bien</w>
+                        <w xml:id="t17" n="17" lemma="öir" type="POS=VERinf" >oïr</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t19" n="19" lemma="et" type="POS=CONcoo" >et</w>
+                        <w xml:id="t20" n="20" lemma="entendre" type="POS=VERinf" >entendre</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t22" n="22" lemma="," type="POS=PONfbl" >,</w>
+                        <w xml:id="t23" n="23" lemma="car" type="POS=CONcoo" >car</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t25" n="25" lemma="par" type="POS=PRE" >par</w>
+                        <w xml:id="t26" n="26" lemma="le" type="POS=DETdef" >le</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t28" n="28" lemma="bien" type="POS=ADVgen" >bien</w>
+                        <w xml:id="t29" n="29" lemma="savoir" type="POS=VERinf" >savoir</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t31" n="31" lemma="et" type="POS=CONcoo" >et</w>
+                        <w xml:id="t32" n="32" lemma="retenir" type="POS=VERinf" >retenir</w>
+                        </ab>
+                        <ab>
+                        <w xml:id="t34" n="34" lemma="pöoir" type="POS=VERcjg" >puet</w>
+                        <w xml:id="t35" n="35" lemma="il" type="POS=PROper" >l</w></ab>
+            </div>
+       </body>
+     </text>
+</TEI>