From 76bb4016fd85696081743cdf03b3cd41c5d1e71f Mon Sep 17 00:00:00 2001 From: Jack Rueter Date: Mon, 18 Nov 2024 11:04:33 +0200 Subject: [PATCH] more words according to freq missing missing: 2716 --- src/fst/morphology/affixes/nouns.lexc | 11 +++++++--- src/fst/morphology/root.lexc | 18 ++++++++------- src/fst/morphology/stems/adpositions.lexc | 4 +++- src/fst/morphology/stems/adverbs.lexc | 11 ++++++++++ src/fst/morphology/stems/nouns_newwords.lexc | 22 ++++++++++++++++++- .../stems/propernouns_newwords.lexc | 2 +- src/fst/morphology/stems/verbs_newwords.lexc | 1 + 7 files changed, 55 insertions(+), 14 deletions(-) diff --git a/src/fst/morphology/affixes/nouns.lexc b/src/fst/morphology/affixes/nouns.lexc index 1c0cb405..a6712c81 100644 --- a/src/fst/morphology/affixes/nouns.lexc +++ b/src/fst/morphology/affixes/nouns.lexc @@ -369,6 +369,7 @@ LEXICON N_SEIBAZ ! seibaz:seib :az SG-NOM-SUF ; :as SG-PAR-SUF_t ; +Sg+Ill:haze K ; ++Sg+Ter1:hazesai K ; :ha SG-OBLIQUE_NO_DERIV/ILL/PAR ; R ; ! xxx check ! Plural @@ -440,6 +441,7 @@ LEXICON N_VEDEKAZ ! vedekaz:vedeka :z SG-NOM-SUF ; :s SG-PAR-SUF_t ; +Sg+Ill:haze K ; ++Sg+Ter1:hazesai K ; :ha SG-OBLIQUE_NO_DERIV/ILL/PAR ; R ; ! xxx check ! Plural @@ -1056,6 +1058,7 @@ LEXICON SG-OBLIQUE +Der+Der/Toi+A:%>to A_KENGATOI ; SG-ILL-SUF ; SG-PAR-SUF ; + SG-TER1-SUF ; LEXICON SG-OBLIQUE_NO_DERIV/ILL/PAR SG-ABE-SUF ; @@ -1074,7 +1077,6 @@ LEXICON SG-OBLIQUE_NO_DERIV/ILL/PAR SG-GEN-SUF ; SG-INE-SUF ; SG-PRL-SUF ; - SG-TER1-SUF ; SG-TER2-SUF ; SG-TER3-SUF ; SG-TRA-SUF ; @@ -1355,15 +1357,18 @@ LEXICON POSS_SUF !based on kin terminology, reflexive pronoun, personal pronouns +Sg+Acc+PxSg1:%>in K ;!tatain +Sg+Gen+PxSg1:%>in K ;!tatain +Sg+All+PxSg2:%>leiž K ;!Tataleiž -+Sg+All+Px3:%>leze K ;!mamaleze ++Sg+All+PxSg3:%>leze K ;!mamaleze ++Sg+All+PxPl3:%>leze K ;!mamaleze +Sg+Ela+PxSg1:%>sainpäi K ; +Sg+Ela+PxSg2:%>saižpäi K ; +Sg+Ela+PxSg3:%>sazepäi K ; ++Sg+Ela+PxPl3:%>sazepäi K ; +Sg+Ine+PxSg1:%>sain K ; +Sg+Ine+PxSg2:%>saiž K ; +Sg+Ine+PxSg3:%>saze K ; +Sg+Nom+PxSg2:%>iž K ;!mamaiž -+Sg+Nom+Px3:%>ze K ;!mamaze ++Sg+Nom+PxSg3:%>ze K ;!mamaze ++Sg+Nom+PxPl3:%>ze K ;!mamaze +Sg+Nom+PxPl1:%>m K ;!Tatam diff --git a/src/fst/morphology/root.lexc b/src/fst/morphology/root.lexc index c82388fb..64d0634b 100644 --- a/src/fst/morphology/root.lexc +++ b/src/fst/morphology/root.lexc @@ -126,7 +126,8 @@ Multichar_Symbols +Cond !!≈ * **@CODE@** = conditional +Ind !!≈ * **@CODE@** = indicative - +Imprt !!≈ * **@CODE@** = imperative + +Imprt !!≈ * **@CODE@** = imperative + +Pot !!≈ * **@CODE@** = potential linne- !! ##### Tenses +Prs !!≈ * **@CODE@** = @@ -208,12 +209,13 @@ Multichar_Symbols !! ##### Possessive suffixes: - +PxSg1 !!≈ * **@CODE@** = -+PxSg2 !!≈ * **@CODE@** = -+PxSg3 !!≈ * **@CODE@** = - +PxPl1 !!≈ * **@CODE@** = -+PxPl2 !!≈ * **@CODE@** = -+PxPl3 !!≈ * **@CODE@** = + +PxSg1 !!≈ * **@CODE@** = -in ++PxSg2 !!≈ * **@CODE@** = -iž ++PxSg3 !!≈ * **@CODE@** = -ze + +PxPl1 !!≈ * **@CODE@** = -moi ++PxPl2 !!≈ * **@CODE@** = -toi ++PxPl3 !!≈ * **@CODE@** = -ze + !! ##### Comparative tags: @@ -275,7 +277,7 @@ Multichar_Symbols +Err/Orth-no-pal !!≈ * **@CODE@** = palatalization mark missing +Use/-Spell !!≈ * **@CODE@** = - + +Cmp/SgNom !!≈ * **@CODE@** = compound words !! #### Semtags +Sem/Mal !!≈ * **@CODE@** diff --git a/src/fst/morphology/stems/adpositions.lexc b/src/fst/morphology/stems/adpositions.lexc index 4dffd77d..1c041184 100644 --- a/src/fst/morphology/stems/adpositions.lexc +++ b/src/fst/morphology/stems/adpositions.lexc @@ -7,9 +7,11 @@ edel+Pr:edel PR_ ; edes+Po:edes PO_INE ; jälʼghe+Po:jälʼghe PO_ ; kalʼt+Po:kalʼt PO_ ; -keskes+Po+Ine:keskes PO_INE ; +keskes+Po:keskes PO_INE ; +keskhe+Po:keskhe PO_ILL ; mödhe+Po:mödhe PO_ILL ; polhe+Po:polhe PO_ILL ; +taga+Po:taga PO_ ; tagut+Po:tagut PO_ ; täht+Po:täht PO_ ; diff --git a/src/fst/morphology/stems/adverbs.lexc b/src/fst/morphology/stems/adverbs.lexc index 8c373fb4..de83b737 100644 --- a/src/fst/morphology/stems/adverbs.lexc +++ b/src/fst/morphology/stems/adverbs.lexc @@ -1,6 +1,17 @@ LEXICON adverbs +ümbri+Adv:ümbri ADV_ ; +ühthe+Adv:ühthe ADV_ ; +üht+Adv:üht ADV_ ; +päliči+Adv:päliči ADV_ ; +edelpäi+Adv:edelpäi ADV_ ; +siriči+Adv:siriči ADV_ ; +läbi+Adv:läbi ADV_ ; +eriži+Adv:eriži ADV_ ; +ende+Adv:ende ADV_ ; +külläks+Adv:külläks ADV_ ; +ninga+Adv:ninga ADV_ ; alahaks+Adv:alahaks ADV_ ; dai+Adv:dai ADV_ ; edehe+Adv:edehe ADV_ ; diff --git a/src/fst/morphology/stems/nouns_newwords.lexc b/src/fst/morphology/stems/nouns_newwords.lexc index e6a2a3d8..06cc07a7 100644 --- a/src/fst/morphology/stems/nouns_newwords.lexc +++ b/src/fst/morphology/stems/nouns_newwords.lexc @@ -436,4 +436,24 @@ varaidai+N:varaida N_KACUI ; vajehtai+N:vajehta N_KACUI ; vestinkandai+N:vestinkanda N_KACUI ; viha+N:vih N_SANA ; -voikai+N:voika N_KACUI ; \ No newline at end of file +voikai+N:voika N_KACUI ; +rist+N:rist N_0/an/ad/ha/id ; +surm+N:surm N_0/an/ad/ha/id ; +bok+N:bok N_0/an/ad/ha/id ; +tusk+N:tusk N_0/an/ad/ha/id ; +mär+N:mär N_0/an/ad/aha/id ; +kaiv+N:kaiv N_0/on/od/ho/oid ; +leskiak+N:leski#ak N_MARJ ; +jälʼgestuz+N:jälʼgestuz N_ALUZ ; +künduz+N:künduz N_ALUZ ; +elʼgenduz+N:elʼgenduz N_ALUZ ; +londuz+N:londuz N_ALUZ ; +šuhaiduz+N:šuhaiduz N_ALUZ ; +kalʼlʼonduz+N:kalʼlʼonduz N_ALUZ ; +surmankünduz+N:surmankünduz N_ALUZ ; +püduz+N:püduz N_ALUZ ; +oksenduz+N:oksenduz N_ALUZ ; +nagranduz+N:nagranduz N_ALUZ ; +kokaiduz+N:kokaiduz N_ALUZ ; +kargaiduz+N:kargaiduz N_ALUZ ; +buraiduz+N:buraiduz N_ALUZ ; \ No newline at end of file diff --git a/src/fst/morphology/stems/propernouns_newwords.lexc b/src/fst/morphology/stems/propernouns_newwords.lexc index 9ade9e00..f023036b 100644 --- a/src/fst/morphology/stems/propernouns_newwords.lexc +++ b/src/fst/morphology/stems/propernouns_newwords.lexc @@ -51,7 +51,7 @@ Jumal+N+Prop:Jumal N_MARJ ; Ižand+N+Prop:Ižand N_POIG ; Hristos+N+Prop:Hristos N_MARJ ; David+N+Prop:David N_MARJ ; -Jerusalim+N+Prop:Jerusalim N_POIG ; +Jerusalim+N+Prop:Jerusalim N_0/an/ad/ha/id ; Petr+N+Prop:Petr N_MARJ ; Joann+N+Prop:Joann N_MARJ ; Saul+N+Prop:Saul N_MARJ ; diff --git a/src/fst/morphology/stems/verbs_newwords.lexc b/src/fst/morphology/stems/verbs_newwords.lexc index 2594fb94..59b04e03 100644 --- a/src/fst/morphology/stems/verbs_newwords.lexc +++ b/src/fst/morphology/stems/verbs_newwords.lexc @@ -503,6 +503,7 @@ harjata+V:harj V_ata/ab/oi/akaha ; hapata+V:hap V_ata/neb/ni/akaha ; !#V_ata/tab/si/Q ;_ezipaukata+V:ezipauk V_ata/tab/si/Q ; sugida+V:sugi V_da/b/0/gaha ; +lajida+V:laji V_da/b/0/gaha ;!CHECKME aidoida+V:aidoi V_da/b/0/gaha ; annuliruida+V:annulirui V_da/b/0/gaha ; arboida+V:arboi V_da/b/0/gaha ;