Skip to content

Commit

Permalink
Merge pull request #2 from delph-in/olzama-dev
Browse files Browse the repository at this point in the history
Prepare for initial release with Freeling 4.0
  • Loading branch information
olzama authored Mar 6, 2023
2 parents c4d325b + 9984b2e commit ad35726
Show file tree
Hide file tree
Showing 2,549 changed files with 1,857,469 additions and 182,021 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ See the wiki: https://github.com/delph-in/docs/wiki/SrgTop

The Spanish Resource Grammar was developed originally by Montserrat Marimon and her svn repository is http://svn.emmtee.net/trunk/upf/srg .

The LICENSE is the Lesser General Public License For Linguistic Resources.
The LICENSE is the MIT License.

128 changes: 127 additions & 1 deletion ace/config.tdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,95 @@ version := "../Version.lsp".

irregular-forms := ../irregs.tab.

quickcheck-code := "../ace/ace-qc.txt".
;quickcheck-code := "../ace/ace-qc.txt".

:begin :instance.

qc_unif_set := *top* &
[ ARGS.SYNSEM.LOCAL.CAT.HEAD "0" #| 1062708 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS "1" #| 762446 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.MOD "2" #| 547322 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SUBJ "3" #| 335982 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.KEYS.KEY "4" #| 318034 |#,
ARGS.SYNSEM.LOCAL.CAT.MC "5" #| 316814 |#,
ARGS.INFLECTED "6" #| 287620 |#,
ARGS.SYNSEM.LOCAL.CONT.HOOK.INDEX "7" #| 165854 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.CLTS "8" #| 152099 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SPR "9" #| 130597 |#,
ARGS.SYNSEM.NON-LOCAL.SLASH.LIST "10" #| 120750 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.MOD.FIRST.LOCAL.CAT.HEAD "11" #| 119531 |#,
ARGS.SYNSEM.NON-LOCAL.SLASH "12" #| 112842 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.MOD.FIRST.LOCAL "13" #| 110827 |#,
ARGS.SYNSEM "14" #| 109174 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.MOD.FIRST.LOCAL.CAT.VAL.SPR "15" #| 100989 |#,
ARGS.SYNSEM.LOCAL.AGR.PNG.PN "16" #| 92179 |#,
ARGS "17" #| 86641 |#,
ARGS.SYNSEM.LOCAL.CONT.RELS.LIST.REST.FIRST.PRED "18" #| 81556 |#,
ARGS.SYNSEM.LOCAL.COORD-STRAT "19" #| 77483 |#,
ARGS.SYNSEM.LOCAL.CONT.RELS.LIST.FIRST "20" #| 75468 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.VFORM "21" #| 75277 |#,
ARGS.SYNSEM.LOCAL.COORD "22" #| 51355 |#,
ARGS.SYNSEM.NON-LOCAL.REL "23" #| 50937 |#,
ARGS.SYNSEM.LOCAL.CONT.RELS.LIST.FIRST.PRED "24" #| 44912 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.KEYS.ALTKEY "25" #| 40093 |#,
ARGS.SYNSEM.NON-LOCAL.SLASH.LAST "26" #| 35079 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.FIRST.LOCAL "27" #| 33524 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.REST "28" #| 33108 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.TAM.MOOD "29" #| 33010 |#,
ARGS.SYNSEM.NON-LOCAL.SLASH.LIST.FIRST.CAT.HEAD "30" #| 25870 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.INV "31" #| 25009 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.FIRST.LOCAL.CAT.MC "32" #| 21633 |#,
ARGS.SYNSEM.LIGHT "33" #| 18786 |#,
ARGS.SYNSEM.LKEYS.KEYREL "34" #| 17935 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.FIRST.OPT "35" #| 17913 |#,
ARGS.SYNSEM.LOCAL.CAT.POSTHEAD "36" #| 16988 |#,
ARGS.SYNSEM.PUNCT.RPUNCT "37" #| 16080 |#,
ARGS.SYNSEM.NON-LOCAL.QUE "38" #| 15197 |#,
ARGS.SYNSEM.NON-LOCAL.SLASH.LIST.FIRST.CAT.HEAD.KEYS.KEY "39" #| 11901 |#,
ARGS.SYNSEM.LOCAL.CONT.HOOK.INDEX.SORT "40" #| 9742 |#,
ARGS.SYNSEM.LKEYS.KEYREL.PRED "41" #| 8621 |#,
ARGS.SYNSEM.LOCAL.AGR.PNG.GEN "42" #| 6062 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SUBJ.FIRST "43" #| 5783 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SUBJ.FIRST.OPT "44" #| 5574 |#,
ARGS.SYNSEM.LOCAL.CONT.RELS.LIST.REST.REST.FIRST.PRED "45" #| 5134 |#,
ARGS.SYNSEM.LKEYS.KEYREL.ARG0 "46" #| 4956 |#,
ARGS.SYNSEM.LOCAL.CONT.HOOK.INDEX.PNG.PN "47" #| 4586 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SPR.FIRST.OPT "48" #| 4384 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.FIRST.CLTCZD "49" #| 3876 |#,
ARGS.ALTS.VCALT "50" #| 3858 |#,
ARGS.SYNSEM.LOCAL.CONT.RELS.LIST.REST.FIRST.ARG0.SORT "51" #| 3809 |#,
ARGS.SYNSEM.LOCAL.CONT.RELS.LIST.REST.FIRST.ARG0.PNG.PN "52" #| 2857 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.REST.FIRST.LOCAL "53" #| 2557 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.CLTS.REST "54" #| 2222 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.FIRST.NON-LOCAL.SLASH "55" #| 2216 |#,
ARGS.ALTS "56" #| 2197 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.KEYS.ALT2KEY "57" #| 2068 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.FIRST.NON-LOCAL.SLASH.LIST "58" #| 1988 |#,
ARGS.SYNSEM.NON-LOCAL.SLASH.LIST.FIRST.CAT.VAL.SPR "59" #| 1953 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SPR.FIRST.LOCAL.CAT.HEAD.KEYS.KEY "60" #| 1860 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SUBJ.FIRST.NON-LOCAL.SLASH.LIST "61" #| 1855 |#,
ARGS.SYNSEM.LOCAL.STR.HEADING "62" #| 1827 |#,
ARGS.SYNSEM.MODIFIED "63" #| 1721 |#,
ARGS.SYNSEM.LOCAL.AGR.DIVISIBLE "64" #| 1208 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.VOICE "65" #| 1180 |#,
ARGS.SYNSEM.LOCAL.CONT.HOOK.INDEX.E.TENSE "66" #| 1018 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.MOD.FIRST.LOCAL.CONT.HOOK.XARG.PNG.PN "67" #| 1008 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.COMPS.FIRST.NON-LOCAL.SLASH.LAST "68" #| 978 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SPEC.FIRST.LOCAL.CAT.HEAD.KEYS.ALTKEY "69" #| 745 |#,
ARGS.ALTS.CAUS "70" #| 701 |#,
ARGS.SYNSEM.LOCAL.CONT.RELS.LIST.REST.REST.FIRST.ARG0.PNG.GEN "71" #| 518 |#,
ARGS.ALTS.IMPERS "72" #| 367 |#,
ARGS.SYNSEM.LOCAL.CONT.HOOK.INDEX.E.MOOD "73" #| 315 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL.SPR.FIRST.LOCAL.CONT.RELS.LIST.REST "74" #| 201 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.AUX "75" #| 104 |#,
ARGS.SYNSEM.LKEYS.ALTKEYREL.PRED "76" #| 76 |#,
ARGS.SYNSEM.PUNCT.LPUNCT "77" #| 29 |#,
ARGS.SYNSEM.LOCAL.CAT.VAL "78" #| 26 |#,
ARGS.SYNSEM.LOCAL.CAT.HEAD.MOD.FIRST.PUNCT.RPUNCT "79" #| 26 |# ].

:end :instance.

quickcheck-instance := qc_unif_set.

;post-model-path := "english-postagger.hmm".

Expand All @@ -32,6 +120,7 @@ lex-pred-path := SYNSEM LKEYS KEYREL PRED.
rule-rels-path := C-CONT RELS.

parsing-roots := root_vp_inf root_s root_vpnom root_np root_nbar root_pp root_ap root_rp root_c root_i.
;parsing-roots := root_s.

;generation-roots := root_strict root_frag.
;generation-roots := root_strict.
Expand All @@ -44,6 +133,43 @@ cons-type := cons.
null-type := null.
diff-list-type := diff-list.

;;;Token Mapping

;;;|| token-mapping || enabled (normal) or disabled (legacy; not well supported) ||
;;;|| token-type || type of a token feature structure ||
;;;|| lexicon-tokens-path || path to the list of tokens that license a lexeme ||
;;;|| lexicon-tokens-last-path || path to the LAST pointer for the diff-list-like token list ||
;;;|| token-form-path || path within a token to the surface form string ||
;;;|| token-from-path || path within a token to the CFROM (character start position) field ||
;;;|| token-to-path || path within a token to the CTO (character end position) field ||
;;;|| token-id-path || path within a token to the ID ||
;;;|| token-postags-path || path within a token to the POS tag list ||
;;;|| token-posprobs-path || path within a token to the POS tag probability list ||
;; token settings
token-mapping := enabled.

lexicon-tokens-path := TOKENS +LIST.
lexicon-last-token-path := TOKENS +LAST.
token-type := token.
token-form-path := +FORM. ; [required] string for lexical lookup
token-id-path := +ID. ; [optional] list of external ids
token-from-path := +FROM. ; [optional] surface start position
token-to-path := +TO. ; [optional] surface end position
token-postags-path := +POS +TAGS. ; [optional] list of POS tags
token-posprobs-path := +POS +PRBS. ; [optional] list of POS probabilities


;;;|| lattice-mapping-input-path || path within a lattice mapping rule to the input list ||
;;;|| lattice-mapping-output-path || path within a lattice mapping rule to the output list ||
;;;|| lattice-mapping-context-path || path within a lattice mapping rule to the context list ||
;;;|| lattice-mapping-position-path || path within a lattice mapping rule to the positional constraints ||
;; lattice mapping settings
lattice-mapping-input-path := +INPUT.
lattice-mapping-output-path := +OUTPUT.
lattice-mapping-context-path := +CONTEXT.
lattice-mapping-position-path := +POSITION.


deleted-daughters :=
ARGS HEAD-DTR NON-HEAD-DTR.

Expand Down
3 changes: 3 additions & 0 deletions debug-yy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@



Empty file added debug.txt
Empty file.
32 changes: 32 additions & 0 deletions freeling/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

=== SPPP FreeLing interface for the SRG ===

This directory contains the configuration and data files for SPPP interface
for the LKB Spanish Resource Grammar, which allows it to use the low-level
language processors in FreeLing library.

Contents of this directory:

sppp.dat: File with rules that control the interface behaviour, in order
to make it meet the needs of your LKB grammar.

You can:
- Specify how FreeLing output (form, lemma, PoS) is mapped
to SPPP fields (stem, rule_id, form)
- List forms for which all analysis are to be retained (i.e.
ignoring PoS tagger decisions)
- List forms for which FreeLing output is to be
changed by another user-specified list of tags.
- List tag combinations that are to be changed to a single tag

See the comments in sppp.dat to find out more about
its possiblities.

logon.cfg: Configuration file for FreeLing. You can configure the
kind of output (morphological analysys, PoS disambiguation...),
the linguistic resources used (e.g. form dictionary, multiword
lists, enabled/disabled modules, etc.).
See FreeLing documentation for details on the available options.

data: Subdirectory containing linguistic data (different than FreeLing defaults)
which are used in sppp.dat.
Loading

0 comments on commit ad35726

Please sign in to comment.