Skip to content

Commit

Permalink
Add routine to both analyse and generate missing lemmas
Browse files Browse the repository at this point in the history
  • Loading branch information
Trondtr committed Nov 19, 2023
1 parent 9a0ca2a commit 91af61f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 40 deletions.
49 changes: 31 additions & 18 deletions test/src/morphology/generate-adjective-lemmas.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,14 @@ for f in $fsttype; do
sed 's/$/+A+Der\/Comp+A+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix | \
cut -f2 | grep -v "A+" | grep -v "^$" | sort -u >> $generated_lemmas.$f.txt


# Generer lemmaer i adjectives med +A+Attr, lagre i generated-adjectives
sed 's/$/+A+Attr/' $lemmas | $lookup_tool $generator_file.$suffix | \
cut -f2 | grep -v "A+" | grep -v "^$" | sort -u >> $generated_lemmas.$f.txt

# Generer lemmaer i adjectives med +A+ABBR, lagre i generated-adjectives
sed 's/$/+A+ABBR/' $lemmas | $lookup_tool $generator_file.$suffix | \
cut -f2 | grep -v "A+" | grep -v "^$" | sort -u >> $generated_lemmas.$f.txt

# Generer plurale lemmaer i filtered-adjectives med +N+Pl+Nom (som ikke lar seg generere med +A+Sg+Nom).
# Lagre dem i generated-adjectives
sed 's/$/+A+Sg+Nom/' $lemmas | $lookup_tool $generator_file.$suffix | \
Expand All @@ -130,26 +133,36 @@ for f in $fsttype; do
sort -u -o $lemmas $lemmas
sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt

# Sammenlikne: Former som er i lemmalista med ikke i lista med genererte former,
# lagre i missing_adjectives_lemmas.txt. Formene genereres med +A+Sg+Nom for enklere debugging.
comm -23 $lemmas $generated_lemmas.$f.txt |\
grep -v '^$' | sed 's/$/+A+Sg+Nom/' |\
$lookup_tool $generator_file.$suffix > $result_file.$f.txt

# Open the diff file in SubEthaEdit (if there is a diff):
if [ -s $result_file.$f.txt ]; then
# Only open the failed lemmas in see if @SEE@ is defined:
if [ "$EXTEDITOR" ]; then
$EXTEDITOR $result_file.$f.txt
# Open the diff file in SubEthaEdit (if there is a diff):
LC_ALL=no_NO.UTF-8 comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt

if [ -s $result_file.$f.txt ]; then
grep -v '^$' $result_file.$f.txt \
| sed 's/$/+A+Sg+Nom/' \
| $lookup_tool $generator_file.$suffix \
> $gen_result_file.$f.txt
# If we have an analyser, analyse the missing lemmas as well:
if test -e $analyser_file.$suffix ; then
grep -v '^$' $result_file.$f.txt \
| $lookup_tool $analyser_file.$suffix \
> $ana_result_file.$f.txt
fi
# Only open the failed lemmas in see if @SEE@ is defined:
if [ "$EXTEDITOR" ]; then
$EXTEDITOR $result_file.$f.txt
$EXTEDITOR $gen_result_file.$f.txt
$EXTEDITOR $ana_result_file.$f.txt
else
echo "There were problem lemmas. Details in:"
echo "* $result_file.$f.txt "
fi
Fail=1
echo "$f - FAIL"
continue
fi
echo "$f - PASS"
echo "* $gen_result_file.$f.txt"
echo "* $ana_result_file.$f.txt"
fi
Fail=1
echo "$f - FAIL"
continue
fi
echo "$f - PASS"
fi
done

Expand Down
54 changes: 32 additions & 22 deletions test/src/morphology/generate-verb-lemmas.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -109,31 +109,41 @@ for f in $fsttype; do
sed 's/$/+V+Inf/' $lemmas | \
$lookup_tool $generator_file.$suffix | \
cut -f2 | grep -v "V+" | grep -v "^$" | sort -u > $generated_lemmas.$f.txt

# Sorter, unifiser
sort -u -o $lemmas $lemmas
sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt

# Sammenlikne: Former som er i lemmalista med ikke i lista med genererte former,
# lagre i missingverbLemmas.txt. Formene generes med +V+Inf for enklere debugging.
comm -23 $lemmas $generated_lemmas.$f.txt |\
grep -v '^$' | sed 's/$/+V+Inf/' |\
$lookup_tool $generator_file.$suffix > $result_file.$f.txt

# Open the diff file in SubEthaEdit (if there is a diff):
if [ -s $result_file.$f.txt ]; then
# Only open the failed lemmas in see if @SEE@ is defined:
if [ "$EXTEDITOR" ]; then
$EXTEDITOR $result_file.$f.txt

###### Collect results, and generate debug info if FAIL: #######
# Sort and compare original input with resulting output - the diff is
# used to generate lemmas which are opened in SEE:
sort -u -o $generated_lemmas.$f.txt $generated_lemmas.$f.txt
comm -23 $lemmas $generated_lemmas.$f.txt > $result_file.$f.txt

# Open the diff file in SubEthaEdit (if there is a diff):
if [ -s $result_file.$f.txt ]; then
grep -v '^$' $result_file.$f.txt \
| sed 's/$/+V+Inf/' \
| $lookup_tool $generator_file.$suffix \
> $gen_result_file.$f.txt
# If we have an analyser, analyse the missing lemmas as well:
if test -e $analyser_file.$suffix ; then
grep -v '^$' $result_file.$f.txt \
| $lookup_tool $analyser_file.$suffix \
> $ana_result_file.$f.txt
fi
# Only open the failed lemmas in see if @SEE@ is defined:
if [ "$EXTEDITOR" ]; then
$EXTEDITOR $result_file.$f.txt
$EXTEDITOR $gen_result_file.$f.txt
$EXTEDITOR $ana_result_file.$f.txt
else
echo "There were problem lemmas. Details in:"
echo "* $result_file.$f.txt "
fi
Fail=1
echo "$f - FAIL"
continue
fi
echo "$f - PASS"
echo "* $gen_result_file.$f.txt"
echo "* $ana_result_file.$f.txt"
fi
Fail=1
echo "$f - FAIL"
continue
fi
echo "$f - PASS"
fi
done

Expand Down

0 comments on commit 91af61f

Please sign in to comment.