-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
All the phrase translation tags (so far)
- Loading branch information
Showing
7 changed files
with
185 additions
and
0 deletions.
There are no files selected for viewing
31 changes: 31 additions & 0 deletions
31
src/stoeng/resources/phrase_translate/noun_passthrough_tags.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ | ||
"0": [ | ||
# word class | ||
"+N", | ||
"+A", | ||
"+I", | ||
"+D" | ||
], | ||
"2": [ | ||
# N: Possessives | ||
"+Px1Sg", | ||
"+Px2Sg", | ||
"+Px3Sg", | ||
"+Px1Pl", | ||
"+Px2Pl", | ||
# "+Px12Pl", # Needs to be recoded: 21 -> 12 | ||
"+Px3Pl", | ||
"+Px4Sg/Pl", | ||
"+PxX", | ||
"+PxXPl" | ||
], | ||
"3": [ | ||
# N: number | ||
"+Sg", | ||
"+Pl", | ||
"+Obv", | ||
"+Loc", | ||
"+Distr" | ||
] | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[ | ||
["+Dim", "+Der/Dim", 2], | ||
["+Px21Pl", "+Px12Pl", 2], | ||
["+PxXPl", "+PxX", 2] | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
["+N", "+A", "+I", "+D"] |
25 changes: 25 additions & 0 deletions
25
src/stoeng/resources/phrase_translate/noun_wordform_to_phrase.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[ | ||
["+N", null, 0], | ||
["+A", null, 0], | ||
["+I", null, 0], | ||
["+D", null, 0], | ||
# Number | ||
["+Sg", "TagMap.COPY_TAG_NAME", 1], | ||
["+Pl", "TagMap.COPY_TAG_NAME", 1], | ||
["+Obv", "TagMap.COPY_TAG_NAME", 1], | ||
["+Loc", "TagMap.COPY_TAG_NAME", 1], | ||
["+Distr", "TagMap.COPY_TAG_NAME", 1], | ||
# Diminutive | ||
["+Dim", "TagMap.COPY_TAG_NAME", 2], | ||
["+Der/Dim", "Dim+", 2], | ||
# Possessives | ||
["+Px1Sg", "TagMap.COPY_TAG_NAME", 3], | ||
["+Px2Sg", "TagMap.COPY_TAG_NAME", 3], | ||
["+Px3Sg", "TagMap.COPY_TAG_NAME", 3], | ||
["+Px1Pl", "TagMap.COPY_TAG_NAME", 3], | ||
["+Px2Pl", "TagMap.COPY_TAG_NAME", 3], | ||
["+Px12Pl", "TagMap.COPY_TAG_NAME", 3], # Maybe needs to be recoded with 12 -> 21 | ||
["+Px3Pl", "TagMap.COPY_TAG_NAME", 3], | ||
["+Px4Sg/Pl", "TagMap.COPY_TAG_NAME", 3], | ||
["+PxX", "PxXPl+", 3] | ||
] |
36 changes: 36 additions & 0 deletions
36
src/stoeng/resources/phrase_translate/verb_passthrough_tags.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"0": [ | ||
# word class | ||
"+II", | ||
"+TI" | ||
], | ||
"2": [ | ||
# V: Person - subject | ||
"+1Sg", | ||
"+2Sg", | ||
"+3Sg", | ||
"+1Pl", | ||
# "+12Pl", # Needs to be recoded: 21 -> 12 | ||
"+2Pl", | ||
"+3Pl", | ||
"+4Sg/Pl", | ||
"+5Sg/Pl", | ||
"+X", | ||
"+XPl" | ||
], | ||
"3": [ | ||
# V: Person - object | ||
"+1SgO", | ||
"+2SgO", | ||
"+3SgO", | ||
"+1PlO", | ||
# "+21PlO", # Needs to be recoded: 21 -> 12 | ||
"+2PlO", | ||
"+3PlO", | ||
"+4Pl", | ||
"+4Sg", | ||
"+4Sg/PlO", | ||
"+5Sg/PlO", | ||
"+XPlO" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
[ | ||
# Tense | ||
["+Prt", ["PV/ki+", "+Ind"], 1], # Preterite aka simple past | ||
["+Cond", ["+Irr"], 1], # Future conditional | ||
["+Imm", ["+Imp", "+Imm"], 1], # Immediate imperative | ||
["+Del", ["+Imp", "+Del"], 1], # Delayed imperative | ||
["+Fut", ["PV/wi+", "+Ind"], 1], # Future | ||
# TODO: also handle ["+Fut", "PV/wi+", 1] # Also accept PV/wi without independent as future? | ||
# Note that these crk features as disjoint, but both are needed for the eng feature | ||
["+Def", ["PV/ka+", "+Ind"], 1], | ||
["+Inf", ["+Irr"], 1], | ||
["TagMap.DEFAULT", [], 3], | ||
# Person - see https://github.com/UAlbertaALTLab/morphodict/issues/891 | ||
["+0Sg", "+3Sg", 2], | ||
[ | ||
"+21Pl", | ||
"+12Pl", | ||
2 | ||
], # see https://github.com/UAlbertaALTLab/morphodict/issues/1005 | ||
["+XPl", "+X", 2], | ||
# Person - object | ||
["+0SgO", [], 3], | ||
[ | ||
"+21PlO", | ||
"+12PlO", | ||
3 | ||
], | ||
["+XPlO", "+XO", 3], | ||
["+V", [], 3], | ||
["+TA", ["+Dec"], 3], | ||
["+AI", ["+Dec"], 3], | ||
["+XO", [], 3], | ||
[ | ||
"+PV/pimi", | ||
["PV/pimi+"], | ||
3 | ||
] # see https://github.com/UAlbertaALTLab/morphodict/issues/1005 | ||
# TODO: also handle "+Inf": ["PV/ta+", "+Cnj"] # future definite? | ||
] |
48 changes: 48 additions & 0 deletions
48
src/stoeng/resources/phrase_translate/verb_wordform_to_phrase.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
[ | ||
["+V", null, 0], | ||
["+TA", null, 0], | ||
["+AI", null, 0], | ||
["+II", null, 0], | ||
["+TI", null, 0], | ||
# Tense/Aspect | ||
["PV/ki+", "Prt+", 1], # Preterite aka simple past | ||
[["PV/ki+", "+Ind"], "Prt+", 1], # Preterite aka simple past | ||
[["+Fut", "+Cond"], "Cond+", 1], # Future conditional | ||
[["+Imp", "+Imm"], "Imm+", 1], # Immediate imperative | ||
[["+Imp", "+Del"], "Del+", 1], # Delayed imperative | ||
[["PV/wi+", "+Ind"], "Fut+", 1], # Future | ||
["PV/wi+", "Fut+", 1], # Also accept PV/wi without indicative as future | ||
[["PV/e+", "+Cnj"], null, 1], # conjunctive marker | ||
# Note that these crk features as disjoint, but both are needed for the eng feature | ||
[["PV/ka+", "+Ind"], "Def+", 1], | ||
[["PV/ka+", "+Cnj"], "Inf+", 1], | ||
[["PV/ta+", "+Cnj"], "Inf+", 1], # future definite | ||
["+Ind", "Prs+", 1], | ||
["TagMap.DEFAULT", "Prs+", 1], # default to present tense | ||
["+Dec", "Prs+", 1], | ||
# ["+Irr", null, 1], # Replace null with the corresponding phrase translation tag | ||
# Person - Subject | ||
["+1Sg", "TagMap.COPY_TAG_NAME", 2], | ||
["+2Sg", "TagMap.COPY_TAG_NAME", 2], | ||
["+3Sg", "TagMap.COPY_TAG_NAME", 2], | ||
["+1Pl", "TagMap.COPY_TAG_NAME", 2], | ||
["+12Pl", "21Pl+", 2], | ||
["+2Pl", "TagMap.COPY_TAG_NAME", 2], | ||
["+3Pl", "TagMap.COPY_TAG_NAME", 2], | ||
["+4Sg/Pl", "TagMap.COPY_TAG_NAME", 2], | ||
["+5Sg/Pl", "TagMap.COPY_TAG_NAME", 2], | ||
["+X", "XPl+", 2], | ||
# Person - Object | ||
["+1SgO", "TagMap.COPY_TAG_NAME", 3], | ||
["+2SgO", "TagMap.COPY_TAG_NAME", 3], | ||
["+3SgO", "TagMap.COPY_TAG_NAME", 3], | ||
["+1PlO", "TagMap.COPY_TAG_NAME", 3], | ||
["+12PlO", "21PlO+", 3], | ||
["+2PlO", "TagMap.COPY_TAG_NAME", 3], | ||
["+3PlO", "TagMap.COPY_TAG_NAME", 3], | ||
["+4Pl", "TagMap.COPY_TAG_NAME", 3], | ||
["+4Sg", "TagMap.COPY_TAG_NAME", 3], | ||
["+4Sg/PlO", "TagMap.COPY_TAG_NAME", 3], | ||
["+5Sg/PlO", "TagMap.COPY_TAG_NAME", 3], | ||
["+XO", "XPlO+", 3] | ||
] |