From ea5c53f867d5cffaeea3660d5071183fd49859d2 Mon Sep 17 00:00:00 2001 From: Thomas van der Kleij Date: Wed, 24 Aug 2016 14:08:06 +0200 Subject: [PATCH 1/5] Add Timeseries type aggregations --- README.md | 3 +- src/Druid/Query/AbstractQuery.php | 3 +- .../Aggregation/AbstractAggrationQuery.php | 198 ++++++++++++++++++ src/Druid/Query/Aggregation/GroupBy.php | 164 +-------------- src/Druid/Query/Aggregation/Timerseries.php | 68 ++++++ .../Query/Component/Descending/Descending.php | 61 ++++++ .../Query/Component/DescendingInterface.php | 41 ++++ .../AbstractAggregationQueryBuilder.php | 105 ++++++++++ .../QueryBuilder/GroupByQueryBuilder.php | 70 +------ .../QueryBuilder/TimeseriesQueryBuilder.php | 75 +++++++ .../TimeseriesQueryBuilderTest.php | 67 ++++++ 11 files changed, 620 insertions(+), 235 deletions(-) create mode 100644 src/Druid/Query/Aggregation/AbstractAggrationQuery.php create mode 100644 src/Druid/Query/Aggregation/Timerseries.php create mode 100644 src/Druid/Query/Component/Descending/Descending.php create mode 100644 src/Druid/Query/Component/DescendingInterface.php create mode 100644 src/Druid/QueryBuilder/AbstractAggregationQueryBuilder.php create mode 100644 src/Druid/QueryBuilder/TimeseriesQueryBuilder.php create mode 100644 tests/Druid/Tests/QueryBuilder/TimeseriesQueryBuilderTest.php diff --git a/README.md b/README.md index 079ccb6..dba2bf4 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Put the following into your composer.json ## Current State -Currently this driver supports only **GroupBy** aggregation type which is tested on out production environment. +Currently this driver supports **GroupBy** and **Timeseries** aggregation types. Everybody is welcome to create pull requests to implement some of the missing things. Also, some unit tests are bound to running on our internal Druid instance, there is plan to change it to docker container @@ -93,7 +93,6 @@ PHP Mess Detector [PHPMD](http://phpmd.org/) 1. **Query types** * Aggregation queries * TopN - * Timeseries * Metadata Queries * Time Boundary * Segment Metadata diff --git a/src/Druid/Query/AbstractQuery.php b/src/Druid/Query/AbstractQuery.php index cb4f7bd..2e20f69 100644 --- a/src/Druid/Query/AbstractQuery.php +++ b/src/Druid/Query/AbstractQuery.php @@ -32,9 +32,10 @@ /** * Class AbstractQuery. */ -abstract class AbstractQuery +abstract class AbstractQuery implements QueryInterface { const TYPE_GROUP_BY = 'groupBy'; + const TYPE_TIMESERIES = 'timeseries'; /** * @var string diff --git a/src/Druid/Query/Aggregation/AbstractAggrationQuery.php b/src/Druid/Query/Aggregation/AbstractAggrationQuery.php new file mode 100644 index 0000000..f2de921 --- /dev/null +++ b/src/Druid/Query/Aggregation/AbstractAggrationQuery.php @@ -0,0 +1,198 @@ +") + */ + private $intervals; + + /** + * @return DataSourceInterface + */ + public function getDataSource() + { + return $this->dataSource; + } + + /** + * @param DataSourceInterface $dataSource + * + * @return GroupBy + */ + public function setDataSource(DataSourceInterface $dataSource) + { + $this->dataSource = $dataSource; + + return $this; + } + + /** + * @return GranularityInterface + */ + public function getGranularity() + { + return $this->granularity; + } + + /** + * @param GranularityInterface $granularity + * + * @return GroupBy + */ + public function setGranularity(GranularityInterface $granularity) + { + $this->granularity = $granularity; + + return $this; + } + + /** + * @return array|\Druid\Query\Component\AggregatorInterface[] + */ + public function getAggregations() + { + return $this->aggregations; + } + + /** + * @param array|\Druid\Query\Component\AggregatorInterface[] $aggregations + * + * @return GroupBy + */ + public function setAggregations(array $aggregations) + { + $this->aggregations = $aggregations; + + return $this; + } + + /** + * @return array|\Druid\Query\Component\PostAggregatorInterface[] + */ + public function getPostAggregations() + { + return $this->postAggregations; + } + + /** + * @param array|\Druid\Query\Component\PostAggregatorInterface[] $postAggregations + * + * @return GroupBy + */ + public function setPostAggregations(array $postAggregations) + { + $this->postAggregations = $postAggregations; + + return $this; + } + + /** + * @return array|\Druid\Query\Component\IntervalInterface[] + */ + public function getIntervals() + { + return $this->intervals; + } + + /** + * @param array|\Druid\Query\Component\IntervalInterface[] $intervals + * + * @return GroupBy + */ + public function setIntervals(array $intervals) + { + $this->intervals = $intervals; + + return $this; + } + + /** + * @return FilterInterface + */ + public function getFilter() + { + return $this->filter; + } + + /** + * @param FilterInterface $filter + * + * @return GroupBy + */ + public function setFilter(FilterInterface $filter) + { + $this->filter = $filter; + + return $this; + } +} diff --git a/src/Druid/Query/Aggregation/GroupBy.php b/src/Druid/Query/Aggregation/GroupBy.php index 2f47174..217fc9c 100644 --- a/src/Druid/Query/Aggregation/GroupBy.php +++ b/src/Druid/Query/Aggregation/GroupBy.php @@ -29,31 +29,15 @@ namespace Druid\Query\Aggregation; -use Druid\Query\AbstractQuery; -use Druid\Query\Component\AggregatorInterface; -use Druid\Query\Component\DataSourceInterface; use Druid\Query\Component\DimensionSpecInterface; -use Druid\Query\Component\FilterInterface; -use Druid\Query\Component\GranularityInterface; use Druid\Query\Component\HavingInterface; -use Druid\Query\Component\IntervalInterface; use Druid\Query\Component\LimitSpecInterface; -use Druid\Query\Component\PostAggregatorInterface; -use Druid\Query\QueryInterface; -use JMS\Serializer\Annotation as Serializer; /** * Class GroupBy. - * - * @SuppressWarnings(PHPMD.CouplingBetweenObjects) */ -class GroupBy extends AbstractQuery implements QueryInterface +class GroupBy extends AbstractAggregationQuery { - /** - * @var DataSourceInterface - */ - private $dataSource; - /** * @var array|DimensionSpecInterface[] */ @@ -64,62 +48,16 @@ class GroupBy extends AbstractQuery implements QueryInterface */ private $limitSpec; - /** - * @var GranularityInterface - */ - private $granularity; - - /** - * @var array|AggregatorInterface[] - */ - private $aggregations; - - /** - * @var array|PostAggregatorInterface[] - */ - private $postAggregations; - - /** - * @var FilterInterface - */ - private $filter; - /** * @var HavingInterface */ private $having; - /** - * @var array|IntervalInterface[] - * @Serializer\Type("array") - */ - private $intervals; - public function __construct() { parent::__construct(self::TYPE_GROUP_BY); } - /** - * @return DataSourceInterface - */ - public function getDataSource() - { - return $this->dataSource; - } - - /** - * @param DataSourceInterface $dataSource - * - * @return GroupBy - */ - public function setDataSource(DataSourceInterface $dataSource) - { - $this->dataSource = $dataSource; - - return $this; - } - /** * @return array|\Druid\Query\Component\DimensionSpecInterface[] */ @@ -160,106 +98,6 @@ public function setLimitSpec(LimitSpecInterface $limitSpec) return $this; } - /** - * @return GranularityInterface - */ - public function getGranularity() - { - return $this->granularity; - } - - /** - * @param GranularityInterface $granularity - * - * @return GroupBy - */ - public function setGranularity(GranularityInterface $granularity) - { - $this->granularity = $granularity; - - return $this; - } - - /** - * @return array|\Druid\Query\Component\AggregatorInterface[] - */ - public function getAggregations() - { - return $this->aggregations; - } - - /** - * @param array|\Druid\Query\Component\AggregatorInterface[] $aggregations - * - * @return GroupBy - */ - public function setAggregations(array $aggregations) - { - $this->aggregations = $aggregations; - - return $this; - } - - /** - * @return array|\Druid\Query\Component\PostAggregatorInterface[] - */ - public function getPostAggregations() - { - return $this->postAggregations; - } - - /** - * @param array|\Druid\Query\Component\PostAggregatorInterface[] $postAggregations - * - * @return GroupBy - */ - public function setPostAggregations(array $postAggregations) - { - $this->postAggregations = $postAggregations; - - return $this; - } - - /** - * @return array|\Druid\Query\Component\IntervalInterface[] - */ - public function getIntervals() - { - return $this->intervals; - } - - /** - * @param array|\Druid\Query\Component\IntervalInterface[] $intervals - * - * @return GroupBy - */ - public function setIntervals(array $intervals) - { - $this->intervals = $intervals; - - return $this; - } - - /** - * @return FilterInterface - */ - public function getFilter() - { - return $this->filter; - } - - /** - * @param FilterInterface $filter - * - * @return GroupBy - */ - public function setFilter(FilterInterface $filter) - { - $this->filter = $filter; - - return $this; - } - /** * @return HavingInterface */ diff --git a/src/Druid/Query/Aggregation/Timerseries.php b/src/Druid/Query/Aggregation/Timerseries.php new file mode 100644 index 0000000..ff822d3 --- /dev/null +++ b/src/Druid/Query/Aggregation/Timerseries.php @@ -0,0 +1,68 @@ +descending; + } + + /** + * @param DescendingInterface + * + * @return Timeseries + */ + public function setDescending(DescendingInterface $descending) + { + $this->descending = $descending; + + return $this; + } +} diff --git a/src/Druid/Query/Component/Descending/Descending.php b/src/Druid/Query/Component/Descending/Descending.php new file mode 100644 index 0000000..751e384 --- /dev/null +++ b/src/Druid/Query/Component/Descending/Descending.php @@ -0,0 +1,61 @@ +descending = $descending; + } + + /** + * @return bool + */ + public function getDescending() + { + return $this->descending; + } +} diff --git a/src/Druid/Query/Component/DescendingInterface.php b/src/Druid/Query/Component/DescendingInterface.php new file mode 100644 index 0000000..1ff4a02 --- /dev/null +++ b/src/Druid/Query/Component/DescendingInterface.php @@ -0,0 +1,41 @@ +addComponent('dataSource', new TableDataSource($dataSource)); + } + + /** + * @param GranularityInterface $granularity + * + * @return $this + */ + public function setGranularity(GranularityInterface $granularity) + { + return $this->addComponent('granularity', $granularity); + } + + /** + * @param \DateTime $start + * @param \DateTime $end + * @param bool $useZuluTime + * + * @return $this + */ + public function addInterval(\DateTime $start, \DateTime $end, $useZuluTime = false) + { + return $this->addComponent('intervals', new Interval($start, $end, $useZuluTime)); + } + + /** + * @param FilterInterface $filter + * + * @return $this + */ + public function setFilter(FilterInterface $filter) + { + return $this->addComponent('filter', $filter); + } + + /** + * @param AggregatorInterface $aggregator + * + * @return $this + */ + public function addAggregator(AggregatorInterface $aggregator) + { + return $this->addComponent('aggregations', $aggregator); + } + + /** + * @param PostAggregatorInterface $postAggregator + * + * @return $this + */ + public function addPostAggregator(PostAggregatorInterface $postAggregator) + { + return $this->addComponent('postAggregations', $postAggregator); + } +} diff --git a/src/Druid/QueryBuilder/GroupByQueryBuilder.php b/src/Druid/QueryBuilder/GroupByQueryBuilder.php index bbfe88c..eff009a 100644 --- a/src/Druid/QueryBuilder/GroupByQueryBuilder.php +++ b/src/Druid/QueryBuilder/GroupByQueryBuilder.php @@ -30,19 +30,13 @@ namespace Druid\QueryBuilder; use Druid\Query\Aggregation\GroupBy; -use Druid\Query\Component\AggregatorInterface; -use Druid\Query\Component\DataSource\TableDataSource; use Druid\Query\Component\DimensionSpec\DefaultDimensionSpec; -use Druid\Query\Component\FilterInterface; -use Druid\Query\Component\GranularityInterface; use Druid\Query\Component\HavingInterface; -use Druid\Query\Component\Interval\Interval; -use Druid\Query\Component\PostAggregatorInterface; /** * Class GroupByQueryBuilder. */ -class GroupByQueryBuilder extends AbstractQueryBuilder +class GroupByQueryBuilder extends AbstractAggregationQueryBuilder { protected $components = [ 'dataSource' => null, @@ -56,48 +50,6 @@ class GroupByQueryBuilder extends AbstractQueryBuilder 'intervals' => [], ]; - /** - * @param string $dataSource - * - * @return $this - */ - public function setDataSource($dataSource) - { - return $this->addComponent('dataSource', new TableDataSource($dataSource)); - } - - /** - * @param GranularityInterface $granularity - * - * @return $this - */ - public function setGranularity(GranularityInterface $granularity) - { - return $this->addComponent('granularity', $granularity); - } - - /** - * @param \DateTime $start - * @param \DateTime $end - * @param bool $useZuluTime - * - * @return $this - */ - public function addInterval(\DateTime $start, \DateTime $end, $useZuluTime = false) - { - return $this->addComponent('intervals', new Interval($start, $end, $useZuluTime)); - } - - /** - * @param AggregatorInterface $aggregator - * - * @return $this - */ - public function addAggregator(AggregatorInterface $aggregator) - { - return $this->addComponent('aggregations', $aggregator); - } - /** * @param string $dimension * @param string $outputName @@ -109,26 +61,6 @@ public function addDimension($dimension, $outputName) return $this->addComponent('dimensions', new DefaultDimensionSpec($dimension, $outputName)); } - /** - * @param PostAggregatorInterface $postAggregator - * - * @return $this - */ - public function addPostAggregator(PostAggregatorInterface $postAggregator) - { - return $this->addComponent('postAggregations', $postAggregator); - } - - /** - * @param FilterInterface $filter - * - * @return $this - */ - public function setFilter(FilterInterface $filter) - { - return $this->addComponent('filter', $filter); - } - /** * @param HavingInterface $having * diff --git a/src/Druid/QueryBuilder/TimeseriesQueryBuilder.php b/src/Druid/QueryBuilder/TimeseriesQueryBuilder.php new file mode 100644 index 0000000..a78dfb8 --- /dev/null +++ b/src/Druid/QueryBuilder/TimeseriesQueryBuilder.php @@ -0,0 +1,75 @@ + null, + 'granularity' => null, + 'descending' => null, + 'filter' => null, + 'aggregations' => [], + 'postAggregations' => [], + 'intervals' => [], + ]; + + /** + * @param bool $descending + * + * @return $this + */ + public function setDescending($descending) + { + return $this->addComponent('descending', new Descending($descending)); + } + + /** + * @return Timeseries + */ + public function getQuery() + { + $query = new Timeseries(); + foreach ($this->components as $componentName => $component) { + if (!empty($component)) { + $method = 'set'.ucfirst($componentName); + $query->$method($component); + } + } + + return $query; + } +} diff --git a/tests/Druid/Tests/QueryBuilder/TimeseriesQueryBuilderTest.php b/tests/Druid/Tests/QueryBuilder/TimeseriesQueryBuilderTest.php new file mode 100644 index 0000000..fd653bb --- /dev/null +++ b/tests/Druid/Tests/QueryBuilder/TimeseriesQueryBuilderTest.php @@ -0,0 +1,67 @@ +setDataSource('dataSource') + ->setGranularity(new PeriodGranularity('P1D', 'UTC')) + ->setFilter($builder->filter()->selectorFilter('gender', 'male')) + ->addInterval($now, new \DateTime()) + ->addAggregator($builder->aggregator()->doubleSum('sum', 'sum')) + ->addAggregator($builder->aggregator()->count('count')) + ->addPostAggregator($builder->postAggregator()->arithmeticPostAggregator('average', '/', [ + $builder->postAggregator()->fieldAccessPostAggregator('sum', 'sum'), + $builder->postAggregator()->fieldAccessPostAggregator('count', 'count'), + ])) + ->setDescending(true) + ; + + $query = $builder->getQuery(); + + $this->assertEquals('dataSource', $query->getDataSource()->getName()); + $this->assertEquals('gender', $query->getFilter()->getDimension()); + $this->assertEquals('male', $query->getFilter()->getValue()); + $this->assertEquals($now->format('Y-m-d\TH:i:s+0000'), $query->getIntervals()[0]->getStart()); + $this->assertEquals('sum', $query->getAggregations()[0]->getName()); + $this->assertEquals('count', $query->getAggregations()[1]->getName()); + $this->assertEquals('average', $query->getPostAggregations()[0]->getName()); + $this->assertEquals(true, $query->getDescending()->getDescending()); + } +} From e9481e8925d77204c4fe4a2818e1f9818e3c0481 Mon Sep 17 00:00:00 2001 From: Thomas van der Kleij Date: Wed, 24 Aug 2016 15:31:43 +0200 Subject: [PATCH 2/5] Fix filename typos --- .../{AbstractAggrationQuery.php => AbstractAggregationQuery.php} | 0 src/Druid/Query/Aggregation/{Timerseries.php => Timeseries.php} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/Druid/Query/Aggregation/{AbstractAggrationQuery.php => AbstractAggregationQuery.php} (100%) rename src/Druid/Query/Aggregation/{Timerseries.php => Timeseries.php} (100%) diff --git a/src/Druid/Query/Aggregation/AbstractAggrationQuery.php b/src/Druid/Query/Aggregation/AbstractAggregationQuery.php similarity index 100% rename from src/Druid/Query/Aggregation/AbstractAggrationQuery.php rename to src/Druid/Query/Aggregation/AbstractAggregationQuery.php diff --git a/src/Druid/Query/Aggregation/Timerseries.php b/src/Druid/Query/Aggregation/Timeseries.php similarity index 100% rename from src/Druid/Query/Aggregation/Timerseries.php rename to src/Druid/Query/Aggregation/Timeseries.php From 212f12d4cf68e28a7d1a44cf5d70025da36782f5 Mon Sep 17 00:00:00 2001 From: Thomas van der Kleij Date: Wed, 24 Aug 2016 15:32:42 +0200 Subject: [PATCH 3/5] Allow creation of TimeseriesQueryBuilder --- src/Druid/Druid.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Druid/Druid.php b/src/Druid/Druid.php index 4198c67..a4af1a2 100644 --- a/src/Druid/Druid.php +++ b/src/Druid/Druid.php @@ -36,6 +36,7 @@ use Druid\Query\QueryInterface; use Druid\QueryBuilder\AbstractQueryBuilder; use Druid\QueryBuilder\GroupByQueryBuilder; +use Druid\QueryBuilder\TimeseriesQueryBuilder; /** * Class Connection. @@ -103,6 +104,10 @@ public function createQueryBuilder($queryType) switch ($queryType) { case AbstractQuery::TYPE_GROUP_BY: return new GroupByQueryBuilder(); + break; + case AbstractQuery::TYPE_TIMESERIES: + return new TimeseriesQueryBuilder(); + break; default: throw new \RuntimeException( sprintf('Invalid query type %s', $queryType) From ae323c2af5e65799a6b6a795e3003e7d453614b8 Mon Sep 17 00:00:00 2001 From: Thomas van der Kleij Date: Wed, 24 Aug 2016 16:58:14 +0200 Subject: [PATCH 4/5] Fix return type in docblock --- .../Query/Aggregation/AbstractAggregationQuery.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Druid/Query/Aggregation/AbstractAggregationQuery.php b/src/Druid/Query/Aggregation/AbstractAggregationQuery.php index f2de921..b1e650f 100644 --- a/src/Druid/Query/Aggregation/AbstractAggregationQuery.php +++ b/src/Druid/Query/Aggregation/AbstractAggregationQuery.php @@ -87,7 +87,7 @@ public function getDataSource() /** * @param DataSourceInterface $dataSource * - * @return GroupBy + * @return $this */ public function setDataSource(DataSourceInterface $dataSource) { @@ -107,7 +107,7 @@ public function getGranularity() /** * @param GranularityInterface $granularity * - * @return GroupBy + * @return $this */ public function setGranularity(GranularityInterface $granularity) { @@ -127,7 +127,7 @@ public function getAggregations() /** * @param array|\Druid\Query\Component\AggregatorInterface[] $aggregations * - * @return GroupBy + * @return $this */ public function setAggregations(array $aggregations) { @@ -147,7 +147,7 @@ public function getPostAggregations() /** * @param array|\Druid\Query\Component\PostAggregatorInterface[] $postAggregations * - * @return GroupBy + * @return $this */ public function setPostAggregations(array $postAggregations) { @@ -167,7 +167,7 @@ public function getIntervals() /** * @param array|\Druid\Query\Component\IntervalInterface[] $intervals * - * @return GroupBy + * @return $this */ public function setIntervals(array $intervals) { @@ -187,7 +187,7 @@ public function getFilter() /** * @param FilterInterface $filter * - * @return GroupBy + * @return $this */ public function setFilter(FilterInterface $filter) { From 5b8bd78d39cdaa355f177728c69ad609cf446ae2 Mon Sep 17 00:00:00 2001 From: Thomas van der Kleij Date: Thu, 25 Aug 2016 15:38:45 +0200 Subject: [PATCH 5/5] Update docs for Timeseries queries --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dba2bf4..149a3dc 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ $druid = new Druid( ] ); -$queryBuilder = $druid->createQueryBuilder(AbstractQuery::TYPE_GROUP_BY); +$queryBuilder = $druid->createQueryBuilder(AbstractQuery::TYPE_GROUP_BY); // or AbstractQuery::TYPE_TIMESERIES $queryBuilder->setDataSource('kpi_registrations_v1'); $queryBuilder->addInterval(new \DateTime('2000-01-01'), new \DateTime()); @@ -60,6 +60,7 @@ $queryBuilder->addAggregator($queryBuilder->aggregator()->count('count_rows')); $queryBuilder->addAggregator($queryBuilder->aggregator()->doubleSum('sum_rows', 'event_count_metric')); $queryBuilder->addAggregator($queryBuilder->aggregator()->hyperUnique('registrations', 'registrations')); +// Only include for GroupBy queries $queryBuilder->addDimension('project', 'project'); $queryBuilder->addPostAggregator(