Skip to content

Commit

Permalink
All the phrase translation tags (so far)
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Nov 20, 2024
1 parent d08ac5e commit 85264f2
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 0 deletions.
31 changes: 31 additions & 0 deletions src/stoeng/resources/phrase_translate/noun_passthrough_tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"0": [
# word class
"+N",
"+A",
"+I",
"+D"
],
"2": [
# N: Possessives
"+Px1Sg",
"+Px2Sg",
"+Px3Sg",
"+Px1Pl",
"+Px2Pl",
# "+Px12Pl", # Needs to be recoded: 21 -> 12
"+Px3Pl",
"+Px4Sg/Pl",
"+PxX",
"+PxXPl"
],
"3": [
# N: number
"+Sg",
"+Pl",
"+Obv",
"+Loc",
"+Distr"
]
}

5 changes: 5 additions & 0 deletions src/stoeng/resources/phrase_translate/noun_tag_map.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
["+Dim", "+Der/Dim", 2],
["+Px21Pl", "+Px12Pl", 2],
["+PxXPl", "+PxX", 2]
]
1 change: 1 addition & 0 deletions src/stoeng/resources/phrase_translate/noun_tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["+N", "+A", "+I", "+D"]
25 changes: 25 additions & 0 deletions src/stoeng/resources/phrase_translate/noun_wordform_to_phrase.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[
["+N", null, 0],
["+A", null, 0],
["+I", null, 0],
["+D", null, 0],
# Number
["+Sg", "TagMap.COPY_TAG_NAME", 1],
["+Pl", "TagMap.COPY_TAG_NAME", 1],
["+Obv", "TagMap.COPY_TAG_NAME", 1],
["+Loc", "TagMap.COPY_TAG_NAME", 1],
["+Distr", "TagMap.COPY_TAG_NAME", 1],
# Diminutive
["+Dim", "TagMap.COPY_TAG_NAME", 2],
["+Der/Dim", "Dim+", 2],
# Possessives
["+Px1Sg", "TagMap.COPY_TAG_NAME", 3],
["+Px2Sg", "TagMap.COPY_TAG_NAME", 3],
["+Px3Sg", "TagMap.COPY_TAG_NAME", 3],
["+Px1Pl", "TagMap.COPY_TAG_NAME", 3],
["+Px2Pl", "TagMap.COPY_TAG_NAME", 3],
["+Px12Pl", "TagMap.COPY_TAG_NAME", 3], # Maybe needs to be recoded with 12 -> 21
["+Px3Pl", "TagMap.COPY_TAG_NAME", 3],
["+Px4Sg/Pl", "TagMap.COPY_TAG_NAME", 3],
["+PxX", "PxXPl+", 3]
]
36 changes: 36 additions & 0 deletions src/stoeng/resources/phrase_translate/verb_passthrough_tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"0": [
# word class
"+II",
"+TI"
],
"2": [
# V: Person - subject
"+1Sg",
"+2Sg",
"+3Sg",
"+1Pl",
# "+12Pl", # Needs to be recoded: 21 -> 12
"+2Pl",
"+3Pl",
"+4Sg/Pl",
"+5Sg/Pl",
"+X",
"+XPl"
],
"3": [
# V: Person - object
"+1SgO",
"+2SgO",
"+3SgO",
"+1PlO",
# "+21PlO", # Needs to be recoded: 21 -> 12
"+2PlO",
"+3PlO",
"+4Pl",
"+4Sg",
"+4Sg/PlO",
"+5Sg/PlO",
"+XPlO"
]
}
39 changes: 39 additions & 0 deletions src/stoeng/resources/phrase_translate/verb_tag_map.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[
# Tense
["+Prt", ["PV/ki+", "+Ind"], 1], # Preterite aka simple past
["+Cond", ["+Irr"], 1], # Future conditional
["+Imm", ["+Imp", "+Imm"], 1], # Immediate imperative
["+Del", ["+Imp", "+Del"], 1], # Delayed imperative
["+Fut", ["PV/wi+", "+Ind"], 1], # Future
# TODO: also handle ["+Fut", "PV/wi+", 1] # Also accept PV/wi without independent as future?
# Note that these crk features as disjoint, but both are needed for the eng feature
["+Def", ["PV/ka+", "+Ind"], 1],
["+Inf", ["+Irr"], 1],
["TagMap.DEFAULT", [], 3],
# Person - see https://github.com/UAlbertaALTLab/morphodict/issues/891
["+0Sg", "+3Sg", 2],
[
"+21Pl",
"+12Pl",
2
], # see https://github.com/UAlbertaALTLab/morphodict/issues/1005
["+XPl", "+X", 2],
# Person - object
["+0SgO", [], 3],
[
"+21PlO",
"+12PlO",
3
],
["+XPlO", "+XO", 3],
["+V", [], 3],
["+TA", ["+Dec"], 3],
["+AI", ["+Dec"], 3],
["+XO", [], 3],
[
"+PV/pimi",
["PV/pimi+"],
3
] # see https://github.com/UAlbertaALTLab/morphodict/issues/1005
# TODO: also handle "+Inf": ["PV/ta+", "+Cnj"] # future definite?
]
48 changes: 48 additions & 0 deletions src/stoeng/resources/phrase_translate/verb_wordform_to_phrase.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
[
["+V", null, 0],
["+TA", null, 0],
["+AI", null, 0],
["+II", null, 0],
["+TI", null, 0],
# Tense/Aspect
["PV/ki+", "Prt+", 1], # Preterite aka simple past
[["PV/ki+", "+Ind"], "Prt+", 1], # Preterite aka simple past
[["+Fut", "+Cond"], "Cond+", 1], # Future conditional
[["+Imp", "+Imm"], "Imm+", 1], # Immediate imperative
[["+Imp", "+Del"], "Del+", 1], # Delayed imperative
[["PV/wi+", "+Ind"], "Fut+", 1], # Future
["PV/wi+", "Fut+", 1], # Also accept PV/wi without indicative as future
[["PV/e+", "+Cnj"], null, 1], # conjunctive marker
# Note that these crk features as disjoint, but both are needed for the eng feature
[["PV/ka+", "+Ind"], "Def+", 1],
[["PV/ka+", "+Cnj"], "Inf+", 1],
[["PV/ta+", "+Cnj"], "Inf+", 1], # future definite
["+Ind", "Prs+", 1],
["TagMap.DEFAULT", "Prs+", 1], # default to present tense
["+Dec", "Prs+", 1],
# ["+Irr", null, 1], # Replace null with the corresponding phrase translation tag
# Person - Subject
["+1Sg", "TagMap.COPY_TAG_NAME", 2],
["+2Sg", "TagMap.COPY_TAG_NAME", 2],
["+3Sg", "TagMap.COPY_TAG_NAME", 2],
["+1Pl", "TagMap.COPY_TAG_NAME", 2],
["+12Pl", "21Pl+", 2],
["+2Pl", "TagMap.COPY_TAG_NAME", 2],
["+3Pl", "TagMap.COPY_TAG_NAME", 2],
["+4Sg/Pl", "TagMap.COPY_TAG_NAME", 2],
["+5Sg/Pl", "TagMap.COPY_TAG_NAME", 2],
["+X", "XPl+", 2],
# Person - Object
["+1SgO", "TagMap.COPY_TAG_NAME", 3],
["+2SgO", "TagMap.COPY_TAG_NAME", 3],
["+3SgO", "TagMap.COPY_TAG_NAME", 3],
["+1PlO", "TagMap.COPY_TAG_NAME", 3],
["+12PlO", "21PlO+", 3],
["+2PlO", "TagMap.COPY_TAG_NAME", 3],
["+3PlO", "TagMap.COPY_TAG_NAME", 3],
["+4Pl", "TagMap.COPY_TAG_NAME", 3],
["+4Sg", "TagMap.COPY_TAG_NAME", 3],
["+4Sg/PlO", "TagMap.COPY_TAG_NAME", 3],
["+5Sg/PlO", "TagMap.COPY_TAG_NAME", 3],
["+XO", "XPlO+", 3]
]

0 comments on commit 85264f2

Please sign in to comment.