-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdelims.tdl
151 lines (131 loc) · 6.04 KB
/
delims.tdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
;; Delimiters (strong and weak brackets)
;; These constraints enable the propagation and discharge of strong and weak
;; delimiters, where the strong ones come from some external source for
;; (optionally labeled) bracketing, and the weak ones are introduced in
;; token mapping for hyphens as in
;; |Santa Clara-based|.
;; Add features in ORTH for left and right delimiters (RB, LB for (strong)
;; brackets), with diff-list values.
;;
orthog :+
[ LB bracket_list,
RB bracket_list,
LD bracket_list,
RD bracket_list ].
root_orth :+
[ LB bracket_null, RB bracket_null,
LD bracket_null, RD bracket_null ].
;; DPF 2013-06-20 - We have added support for introducing bracketing
;; and syntactic dependency constraints onto tokens in preprocessing,
;; recorded in their +TRAIT feature. These constraints are propagated
;; here onto attributes of the feature structure which is visible to the
;; syntactic rules.
;;
;; The +LB (left bracket) and +RB (right bracket) attributes take as
;; value a (possibly empty) list of ctypes, elements in the hierarchy
;; of construction types defined in the file `ctypes.tdl'. The basic
;; idea is that a well-formed phrase cannot cross brackets, and each
;; construction will unify its own ctype with that required by a
;; bracket pair (where the bracket ctype can be more or less
;; construction-specific. Such bracket constraints could be supplied
;; by a preprocessor such as a named-entity recognizer or a chunk
;; parser. At present we have two types of token-mapping rules that
;; introduce brackets: (1) weak brackets to keep hyphenated expressions such
;; as |snow-covered| together, avoiding spurious analyses that would
;; associate the left and right sides of the hyphen to different
;; phrases; and (2) some placeholder token-mapping rules for
;; named entities such as |[New York] [Stock Exchange]|, where eventually
;; we would expect to get these labeled bracketed expressions from some
;; external resource.
;;
;; Propagate delimiters from tokens to ORTH, both for strong and weak brackets:
;; (1) strong brackets (LB, RB) let us constrain constructions over a phrase
;; based on external guidance via the insertion in the input string of the
;; special tokens ⌊(⌋ and ⌊)⌋ as in
;; |We admire ⌊(⌋ the cat ⌊)⌋ in the garden.|
;;
;;
;; DPF 2019-11-28 - With punct clitics, for multi-words, need to propagate
;; +LB from last token, since that is where the token-mapping stamps a nonempty
;; bracket for a following punct token.
;; DPF 2020-02-15 - Similarly need to keep +RB from first token, for left
;; punct marks as in |(such as|
;;
basic_word :+
[ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ],
TOKENS [ +LIST.FIRST.+TRAIT [ +LB #lb, +LD #ld ],
+LAST.+TRAIT [ +RB #rb, +RD #rd ] ] ].
;; Lexical rules: identify delimiter constraints on dtr and mother
lex_rule_supermost :+
[ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ],
ARGS.FIRST.ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ].
phrase :+
[ ORTH [ LD bracket_null, RD bracket_null ] ].
;; Unary phrases: identify delimiter constraints on dtr and mother.
phrase :+
[ ORTH [ LD bracket_null, RD bracket_null ] ].
basic_basic_unary_phrase :+
[ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ],
ARGS < [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ] > ].
n-adj_phrase :+
[ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ],
ARGS < [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ] > ].
basic_npadv_mod_phrase :+
[ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ],
ARGS < [ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ] ] > ].
;; Normal headed binary phrases: block crossing of delimiter brackets, and
;; propagate from dtrs to mother.
no_inner_delim_phrase := sign &
[ ORTH [ LB #lb, RB #rb, LD #ld, RD #rd ],
ARGS < [ ORTH [ LB #lb, RB bracket_null,
LD #ld, RD bracket_null ] ],
[ ORTH [ LB bracket_null, RB #rb,
LD bracket_null, RD #rd ] ] > ].
basic_head_initial :+ no_inner_delim_phrase.
binary_headed_phrase :+ no_inner_delim_phrase.
basic_head_filler_phrase :+ no_inner_delim_phrase.
basic_head_comp_or_marker_phrase :+ no_inner_delim_phrase.
basic_head_spec_phrase :+ no_inner_delim_phrase.
subjh_rule_basic :+ no_inner_delim_phrase.
basic_head_mod_phrase_simple :+ no_inner_delim_phrase.
;n_n_cmpnd_phr :+ no_inner_delim_phrase.
basic_n_n_cmpnd_phr :+ no_inner_delim_phrase.
fw_sequence_left_phrase :+ no_inner_delim_phrase.
fw_sequence_right_phrase :+ no_inner_delim_phrase.
basic_run_on_rule :+ no_inner_delim_phrase.
min_coord_phr :+ no_inner_delim_phrase.
np_pred_phr :+ no_inner_delim_phrase.
binary_frag_rule :+ no_inner_delim_phrase.
bridge_binary_phrase :+ no_inner_delim_phrase.
basic_appos_phr :+ no_inner_delim_phrase.
binary_measure_np_phrase :+ no_inner_delim_phrase.
punct_marker_hfinal_nobrk_rule :+ no_inner_delim_phrase.
punct_marker_hinit_nobrk_rule :+ no_inner_delim_phrase.
;; Special head_final binary phrases: Pull delimiters on dtrs outward to
;; left and right perimeters of mother, by appending values of dtrs.
;; DPF 2018-06-12 - Exclude internal strong brackets.
perimeter_delims_phrase := phrase &
[ ORTH [ LB [ LIST #lblist, LAST #lblast ],
RB [ LIST #rblist, LAST #rblast ],
LD [ LIST #ldlist, LAST #ldlast ],
RD [ LIST #rdlist, LAST #rdlast ] ],
ARGS < [ ORTH [ LB [ LIST #lblist, LAST #lbmid ],
RB [ LIST #rblist, LAST #rbmid ],
LD [ LIST #ldlist, LAST #ldmid ],
RD [ LIST #rdlist, LAST #rdmid ] ] ],
[ ORTH [ LB [ LIST #lbmid, LAST #lblast ],
RB [ LIST #rbmid, LAST #rblast ],
LD [ LIST #ldmid, LAST #ldlast ],
RD [ LIST #rdmid, LAST #rdlast ] ] ] > ].
#|
basic_np_name_cmpnd_phr :+ perimeter_delims_phrase.
basic_n_v-or-adj_cmpnd_phr :+ perimeter_delims_phrase.
adj_v_cmpnd_phr :+ perimeter_delims_phrase.
binary_measure_np_phrase :+ perimeter_delims_phrase.
num_noun_sequence_phrase :+ perimeter_delims_phrase.
|#
basic_basic_n_v-or-adj_cmpnd_phr :+ no_inner_delim_phrase.
adj_v_cmpnd_phr :+ no_inner_delim_phrase.
adv_v_cmpnd_phr :+ no_inner_delim_phrase.
num_noun_sequence_phrase :+ no_inner_delim_phrase.
;; Note: Also added constraints in roots.tdl, which cannot be included here.