Skip to content

Commit

Permalink
support alias, cast and math funtions
Browse files Browse the repository at this point in the history
  • Loading branch information
jychen7 committed Feb 2, 2022
1 parent 120fa78 commit 08a0be0
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 37 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ us-west2#3698#2021-03-05-1204 96021 9.6 63 624
we are able to calculate average pressure of the period by

```
import bigtableql
from bigtableql.client import Client
# config follows offical python bigtable client
client = bigtableql.Client(config)
client = Client(config)
client.register_table(
"weather_balloons",
Expand Down Expand Up @@ -107,9 +107,9 @@ However, as of 2022-01, it
- ✅ LIMIT
- ✅ Aggregate (e.g. avg, sum, count)
- ✅ AND
- [ ] Alias
- [ ] Cast
- [ ] Common Math Functions
- Alias
- Cast
- Common Math Functions
- [ ] Common Date/Time Functions
- [ ] OR ???
- [ ] Join ???
Expand All @@ -119,7 +119,7 @@ However, as of 2022-01, it
- ✅ Partition Pruning
- ✅ Projection pushdown
- [ ] Predicate push down (only [Value range](https://cloud.google.com/bigtable/docs/using-filters#value-range) is possible)
- [ ] Limit Pushdown
- [ ] Limit Pushdown ???

## Limitation

Expand Down
48 changes: 28 additions & 20 deletions bigtableql/select/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,15 @@ def _parse_projection(select_projection: list) -> set:
if SELECT_STAR in select_projection:
return {SELECT_STAR}

projection = [
[
_parse_identifier_key(arg["Unnamed"])
for arg in expr["UnnamedExpr"][FUNCTION_OPERATION]["args"]
]
if FUNCTION_OPERATION in expr["UnnamedExpr"]
else _parse_identifier_key(expr["UnnamedExpr"])
for expr in select_projection
]
return functools.reduce(
lambda sum, x: sum | set(x) if isinstance(x, list) else sum | set([x]),
projection,
set(),
)
projection = []
for expr in select_projection:
if "ExprWithAlias" in expr:
projection.append(_parse_identifier_key(expr["ExprWithAlias"]["expr"]))
continue

projection.append(_parse_identifier_key(expr["UnnamedExpr"]))

return set(projection)


def _parse_selection(select_selection) -> set:
Expand All @@ -79,17 +74,30 @@ def _parse_selection(select_selection) -> set:
return {identifier}


def _parse_identifier_key(left):
return left["Identifier"]["value"]
def _parse_identifier_key(expr):
if FUNCTION_OPERATION in expr:
expr = expr[FUNCTION_OPERATION]["args"][0]["Unnamed"]

if "Cast" in expr:
expr = expr["Cast"]["expr"]

if "Identifier" in expr:
return expr["Identifier"]["value"]

if BINARY_OPERATION in expr:
return _parse_identifier_key(
expr[BINARY_OPERATION]["left"]
) or _parse_identifier_key(expr[BINARY_OPERATION]["right"])
return None


def _parse_identifier_value(identifier, right):
if "Identifier" in right:
def _parse_identifier_value(identifier, expr):
if "Identifier" in expr:
# {'Identifier': {'value': 'a', 'quote_style': '"'}}
return right["Identifier"]["value"]
return expr["Identifier"]["value"]

# {'Value': {'SingleQuotedString': '20220116'}}
value = right.get("Value", {}).get("SingleQuotedString")
value = expr.get("Value", {}).get("SingleQuotedString")
if not value:
raise Exception(f"selection ({identifier}): only support string value")
return value
Expand Down
25 changes: 14 additions & 11 deletions e2e_tests/test_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,32 @@ def test_read_after_write():
},
)

# write
responses = client.query(
"measurements",
"""
INSERT INTO weather_balloons
("_row_key", "pressure", "temperature")
values
("us-west2#3698#2021-03-05-1200", 94558, "9.6"),
("us-west2#3698#2021-03-05-1201", 94122, "9.7")
("us-west2#3698#2021-03-05-1200", 94558, "76"),
("us-west2#3698#2021-03-05-1201", 94122, "78")
""",
)
assert responses[0][0]
assert responses[1][0]

# read via row_key
record_batchs = client.query(
"measurements",
"""
SELECT avg(pressure) FROM weather_balloons
SELECT avg(pressure) as avg_pressure FROM weather_balloons
WHERE
"_row_key" BETWEEN 'us-west2#3698#2021-03-05-1200' AND 'us-west2#3698#2021-03-05-1204'
""",
)
assert record_batchs[0].to_pydict() == {"AVG(weather_balloons.pressure)": [94340.0]}
assert record_batchs[0].to_pydict() == {"avg_pressure": [94340.0]}

# read via composite key
client.register_table(
table_name,
instance_id=instance_id,
Expand All @@ -84,11 +87,11 @@ def test_read_after_write():
record_batchs = client.query(
"measurements",
"""
SELECT avg(pressure) FROM weather_balloons
WHERE
location = 'us-west2'
AND balloon_id = '3698'
AND event_minute BETWEEN '2021-03-05-1200' AND '2021-03-05-1204'
""",
SELECT avg(cast(temperature as int)) as avg_temperature FROM weather_balloons
WHERE
location = 'us-west2'
AND balloon_id = '3698'
AND event_minute BETWEEN '2021-03-05-1200' AND '2021-03-05-1204'
""",
)
assert record_batchs[0].to_pydict() == {"AVG(weather_balloons.pressure)": [94340.0]}
assert record_batchs[0].to_pydict() == {"avg_temperature": [77]}
65 changes: 65 additions & 0 deletions tests/select/sql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,68 @@
- device_id: !!python/tuple ["3698", "3700"]
event_minute: ["2021-03-05-1200"]
compose_error: "row_key partition: prefix device_id only support '=' and 'IN'"
# functions
- sql: select avg(age) from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]
- sql: select age as year from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]
- sql: select avg(age) as avg_age from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]
- sql: select age + 1 from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]
- sql: select 1 + age from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]
- sql: select age + 1 as age_plus from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]
- sql: select cast(age as int) from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]
- sql: select avg(cast(age as int)) as avg_age from users where "_row_key" between 'abc' and 'def'
parse_success:
- "users"
- ["age"]
- ["_row_key"]
- {"_row_key": !!python/tuple ["abc", "def"]}
compose_success:
- !!python/tuple ["abc", "def"]

0 comments on commit 08a0be0

Please sign in to comment.