forked from kobotoolbox/kpi
-
Notifications
You must be signed in to change notification settings - Fork 1
/
query_parser.py
145 lines (119 loc) · 4.88 KB
/
query_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# coding: utf-8
import json
from django.db.models import Q
from .canopy_autogenerated_parser import parse as grammar_parse
"""
This is a utility for parsing a Boolean, Whoosh-like query string and
translating it into a Django Q object, which can then be used to filter a
queryset in the ORM.
Syntax examples:
* `name:term` returns any object whose `name` field exactly matches
`term` (case sensitive)
* `owner__username=meg` traverses the `owner` relationship, returning
any object whose `owner` field matches an object whose `username` field
exactly matches `meg` (case sensitive)
* `color:orange NOT (type:fruit OR type:chair)` returns anything
whose color is orange so long as it is not a fruit or a chair. AND, OR,
and NOT operators are supported. They must be in ALL CAPS.
Special notes:
* If no field is specified in the query,
`SEARCH_DEFAULT_FIELD_LOOKUP` is assumed. For example, if that
constant is `summary__icontains`, then the query `term` returns any
object whose `summary` field contains `term` (case insensitive)
* The value `null` in a query is translated to `None`, e.g. `parent:null`
effectively becomes the ORM `filter(parent=None)`
"""
SEARCH_DEFAULT_FIELD_LOOKUP = 'summary__icontains'
class QueryParseActions(object):
"""
Actions for the parser to take when it encounters certain identifiers
(see the file grammar.peg)
"""
@staticmethod
def process_value(field, value):
# If all we're doing when we have a type mismatch with a field
# is returning an empty set, then we don't need to do type validation.
# Django compares between field values and string versions just fine.
# But there's no magic string for null, so we're adding one.
# TODO: Use Django or DRF machinery (or JSON parsing?) to handle types
# that need special treatment, like dates
# The `summary` text-based JSONField uses `\u` escaping, e.g. `prémier`
# is saved in the database as `pr\u00e9mier`. Let's handle this only
# for `summary` to make people's lives easier. If they try to search
# another text-based JSONField, tough luck.
# TODO: Remove after converting `summary` to a JSONBField
if field == 'summary' or field.startswith('summary_'):
return json.dumps(value).strip('"')
if value == 'null':
return None
else:
return value
@staticmethod
def query(text, a, b, elements):
exp = elements[1]
if hasattr(exp, 'text') and exp.text == '':
# Handle the empty query case with an empty Q object, returning all
return Q()
else:
# fallthrough
return exp
@staticmethod
def orexp(text, a, b, elements):
# fallthrough if singular
if elements[1].text == '':
return elements[0]
# else, combine full sequence of ORs into flattened expression
else:
# Start with the first Q object
orgroup = elements[0]
# Loop through the repeated clauses and OR the subexpressions.
for clause in elements[1].elements:
orgroup |= clause.expr
return orgroup
@staticmethod
def andexp(text, a, b, elements):
# fallthrough if singular
if elements[1].text == '':
return elements[0]
# else, combine full sequence of ANDs into flattened expression
else:
# Start with the first Q object
andgroup = elements[0]
# Loop through the repeated clauses and AND the subexpressions.
for clause in elements[1].elements:
andgroup &= clause.expr
return andgroup
@staticmethod
def parenexp(text, a, b, elements):
# fallthrough to subexpression
exp = elements[2]
return exp
@staticmethod
def notexp(text, a, b, elements):
# negate subexpression (Q object)
exp = elements[2]
return ~exp
@classmethod
def term(cls, text, a, b, elements):
if elements[0].text == '':
# A search term by itself without a specified field
field = SEARCH_DEFAULT_FIELD_LOOKUP
else:
# A field+colon, and a value [[field,':'],value]
field = elements[0].elements[0]
value = elements[1]
# Process the value with as much type-validation as necessary
value = cls.process_value(field, value)
return Q(**{field: value})
@staticmethod
def word(text, a, b, elements):
return text[a:b]
@staticmethod
def string(text, a, b, elements):
return text[a+1:b-1]
@staticmethod
def name(text, a, b, elements):
return text[a:b]
def parse(query):
""" Parse a query string, returning a Django Q object """
return grammar_parse(query, QueryParseActions)