From 83d3c5a945b8331eceb8383f7d33495b3476919d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 25 Aug 2024 06:57:06 -0400 Subject: [PATCH] Add query with STDDEV and VAR to Clickbench extended (#12146) --- benchmarks/queries/clickbench/README.md | 15 +++++++++++++++ benchmarks/queries/clickbench/extended.sql | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/benchmarks/queries/clickbench/README.md b/benchmarks/queries/clickbench/README.md index 560b54181d5f..c6bd8fe9d6f5 100644 --- a/benchmarks/queries/clickbench/README.md +++ b/benchmarks/queries/clickbench/README.md @@ -58,6 +58,21 @@ LIMIT 10; ``` +### Q3: What is the income distribution for users in specific regions + +**Question**: "What regions and social networks have the highest variance of parameter price + +**Important Query Properties**: STDDEV and VAR aggregation functions, GROUP BY multiple small ints + +```sql +SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice") +FROM 'hits.parquet' +GROUP BY "SocialSourceNetworkID", "RegionID" +HAVING s IS NOT NULL +ORDER BY s DESC +LIMIT 10; +``` + ## Data Notes Here are some interesting statistics about the data used in the queries diff --git a/benchmarks/queries/clickbench/extended.sql b/benchmarks/queries/clickbench/extended.sql index 0a2999fceb49..2f814ad8450a 100644 --- a/benchmarks/queries/clickbench/extended.sql +++ b/benchmarks/queries/clickbench/extended.sql @@ -1,3 +1,4 @@ SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits; SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage") FROM hits; -SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10; \ No newline at end of file +SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10; +SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice") FROM hits GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s DESC LIMIT 10;