From 94c1f73127aaa9d8db5466480fd4c7828abf1f74 Mon Sep 17 00:00:00 2001 From: Kellen <9484709+goodroot@users.noreply.github.com> Date: Tue, 3 Oct 2023 12:46:44 -0700 Subject: [PATCH] all the things --- .github/workflows/netlify-deploy.yml | 56 + .github/workflows/netlify-preview.yml | 59 + docs/__guidelines/content-hierarchy.md | 53 + docs/__guidelines/influences.md | 14 + docs/__guidelines/lexicon.md | 23 + docs/__guidelines/markdown.md | 116 ++ docs/__guidelines/naming-convention.md | 49 + docs/__guidelines/sql-code-blocks.md | 60 + docs/__guidelines/template/function.md | 132 ++ docs/__guidelines/template/guide.md | 34 + docs/__guidelines/template/sql.md | 53 + docs/concept/deduplication.md | 154 +++ docs/concept/designated-timestamp.md | 56 + docs/concept/geohashes.md | 534 +++++++ docs/concept/indexes.md | 140 ++ docs/concept/jit-compiler.md | 101 ++ docs/concept/partitions.md | 195 +++ docs/concept/root-directory-structure.md | 272 ++++ docs/concept/sql-execution-order.md | 34 + docs/concept/sql-extensions.md | 103 ++ docs/concept/storage-model.md | 103 ++ docs/concept/symbol.md | 83 ++ docs/concept/write-ahead-log.md | 135 ++ docs/deployment/aws-official-ami.md | 153 +++ docs/deployment/capacity-planning.md | 423 ++++++ docs/deployment/digitalocean.md | 151 ++ docs/deployment/google-cloud-platform.md | 141 ++ docs/deployment/kubernetes.md | 76 + docs/develop/connect.md | 99 ++ docs/develop/insert-data.md | 1132 +++++++++++++++ docs/develop/query-data.md | 568 ++++++++ docs/develop/questdb.io.code-workspace | 35 + docs/develop/update-data.md | 370 +++++ docs/develop/web-console.md | 284 ++++ .../_no-jre-prerequisites.partial.mdx | 37 + docs/get-started/_run-windows.partial.mdx | 12 + docs/get-started/_upgrade-linux.partial.mdx | 9 + docs/get-started/_upgrade-windows.partial.mdx | 9 + docs/get-started/binaries.mdx | 150 ++ docs/get-started/docker.md | 349 +++++ docs/get-started/first-database.md | 227 +++ docs/get-started/homebrew.md | 109 ++ docs/get-started/learn-more.md | 23 + docs/guides/importing-data-rest.md | 174 +++ docs/guides/importing-data.md | 619 +++++++++ docs/guides/influxdb-migration.md | 180 +++ docs/guides/modifying-data.md | 109 ++ docs/guides/v6-migration.md | 215 +++ .../working-with-timestamps-timezones.md | 192 +++ docs/introduction.md | 247 ++++ docs/operations/backup.md | 160 +++ docs/operations/data-retention.md | 132 ++ docs/operations/design-for-performance.md | 135 ++ docs/operations/health-monitoring.md | 103 ++ docs/operations/updating-data.md | 76 + docs/reference/_cairo.config.json | 402 ++++++ docs/reference/_csv-import.config.json | 30 + docs/reference/_http-minimal.config.json | 34 + docs/reference/_http.config.json | 210 +++ docs/reference/_parallel-sql.config.json | 26 + docs/reference/_postgres.config.json | 146 ++ docs/reference/_shared-worker.config.json | 14 + docs/reference/_tcp.config.json | 114 ++ docs/reference/_udp.config.json | 50 + docs/reference/_wal.config.json | 34 + docs/reference/api/ilp/authenticate.md | 327 +++++ docs/reference/api/ilp/columnset-types.md | 303 ++++ docs/reference/api/ilp/overview.md | 437 ++++++ docs/reference/api/ilp/tcp-receiver.md | 44 + docs/reference/api/ilp/udp-receiver.md | 50 + docs/reference/api/java-embedded.md | 479 +++++++ docs/reference/api/postgres.md | 93 ++ docs/reference/api/rest.md | 512 +++++++ docs/reference/clients/java_ilp.md | 158 +++ docs/reference/clients/overview.md | 27 + docs/reference/command-line-options.md | 322 +++++ docs/reference/configuration.md | 546 ++++++++ docs/reference/function/aggregation.md | 469 +++++++ docs/reference/function/analytic.md | 159 +++ docs/reference/function/binary.md | 36 + docs/reference/function/boolean.md | 58 + docs/reference/function/conditional.md | 96 ++ docs/reference/function/date-time.md | 1222 +++++++++++++++++ docs/reference/function/ipv4.md | 37 + docs/reference/function/meta.md | 263 ++++ docs/reference/function/numeric.md | 305 ++++ .../function/random-value-generator.md | 559 ++++++++ docs/reference/function/row-generator.md | 96 ++ docs/reference/function/spatial.md | 77 ++ docs/reference/function/text.md | 291 ++++ .../reference/function/timestamp-generator.md | 66 + docs/reference/function/timestamp.md | 90 ++ docs/reference/function/touch.md | 80 ++ docs/reference/function/trigonometric.md | 348 +++++ docs/reference/logging.md | 217 +++ docs/reference/operators/bitwise.md | 77 ++ docs/reference/operators/comparison.md | 131 ++ docs/reference/operators/ipv4.md | 280 ++++ docs/reference/operators/pattern-matching.md | 128 ++ docs/reference/operators/spatial.md | 74 + docs/reference/sql/alter-table-add-column.md | 46 + .../sql/alter-table-alter-column-add-index.md | 21 + .../sql/alter-table-alter-column-cache.md | 23 + .../alter-table-alter-column-drop-index.md | 36 + .../sql/alter-table-attach-partition.md | 211 +++ .../sql/alter-table-detach-partition.md | 42 + .../sql/alter-table-disable-deduplication.md | 21 + docs/reference/sql/alter-table-drop-column.md | 35 + .../sql/alter-table-drop-partition.md | 63 + .../sql/alter-table-enable-deduplication.md | 40 + .../sql/alter-table-rename-column.md | 27 + docs/reference/sql/alter-table-resume-wal.md | 66 + docs/reference/sql/alter-table-set-param.md | 99 ++ docs/reference/sql/alter-table-set-type.md | 38 + .../sql/alter-table-squash-partitions.md | 52 + docs/reference/sql/backup.md | 76 + docs/reference/sql/case.md | 65 + docs/reference/sql/cast.md | 109 ++ docs/reference/sql/copy.md | 195 +++ docs/reference/sql/create-table.md | 408 ++++++ docs/reference/sql/datatypes.md | 142 ++ docs/reference/sql/distinct.md | 35 + docs/reference/sql/drop.md | 45 + docs/reference/sql/explain.md | 265 ++++ docs/reference/sql/fill.md | 17 + docs/reference/sql/group-by.md | 71 + docs/reference/sql/insert.md | 109 ++ docs/reference/sql/join.md | 506 +++++++ docs/reference/sql/latest-on.md | 255 ++++ docs/reference/sql/limit.md | 46 + docs/reference/sql/order-by.md | 34 + docs/reference/sql/reindex.md | 40 + docs/reference/sql/rename.md | 17 + docs/reference/sql/sample-by.md | 418 ++++++ docs/reference/sql/select.md | 280 ++++ docs/reference/sql/show.md | 68 + docs/reference/sql/snapshot.md | 99 ++ docs/reference/sql/truncate.md | 29 + docs/reference/sql/union-except-intersect.md | 263 ++++ docs/reference/sql/update.md | 58 + docs/reference/sql/vacuum-table.md | 41 + docs/reference/sql/where.md | 432 ++++++ docs/reference/sql/with.md | 63 + docs/third-party-tools.md | 38 + docs/third-party-tools/cube.md | 91 ++ docs/third-party-tools/flink.md | 195 +++ docs/third-party-tools/grafana.md | 140 ++ docs/third-party-tools/kafka/jdbc.md | 190 +++ docs/third-party-tools/kafka/overview.md | 71 + docs/third-party-tools/kafka/questdb-kafka.md | 469 +++++++ docs/third-party-tools/mindsdb.md | 457 ++++++ docs/third-party-tools/pandas.md | 70 + docs/third-party-tools/prometheus.md | 247 ++++ docs/third-party-tools/qstudio.md | 55 + docs/third-party-tools/redpanda.md | 240 ++++ docs/third-party-tools/spark.md | 220 +++ docs/third-party-tools/sqlalchemy.md | 79 ++ docs/third-party-tools/superset.md | 95 ++ docs/third-party-tools/telegraf.md | 174 +++ docs/troubleshooting/faq.md | 124 ++ docs/troubleshooting/log.md | 119 ++ docs/troubleshooting/os-error-codes.md | 322 +++++ sidebars.js | 301 ++++ 163 files changed, 27478 insertions(+) create mode 100644 .github/workflows/netlify-deploy.yml create mode 100644 .github/workflows/netlify-preview.yml create mode 100644 docs/__guidelines/content-hierarchy.md create mode 100644 docs/__guidelines/influences.md create mode 100644 docs/__guidelines/lexicon.md create mode 100644 docs/__guidelines/markdown.md create mode 100644 docs/__guidelines/naming-convention.md create mode 100644 docs/__guidelines/sql-code-blocks.md create mode 100644 docs/__guidelines/template/function.md create mode 100644 docs/__guidelines/template/guide.md create mode 100644 docs/__guidelines/template/sql.md create mode 100644 docs/concept/deduplication.md create mode 100644 docs/concept/designated-timestamp.md create mode 100644 docs/concept/geohashes.md create mode 100644 docs/concept/indexes.md create mode 100755 docs/concept/jit-compiler.md create mode 100644 docs/concept/partitions.md create mode 100644 docs/concept/root-directory-structure.md create mode 100644 docs/concept/sql-execution-order.md create mode 100644 docs/concept/sql-extensions.md create mode 100644 docs/concept/storage-model.md create mode 100755 docs/concept/symbol.md create mode 100644 docs/concept/write-ahead-log.md create mode 100644 docs/deployment/aws-official-ami.md create mode 100644 docs/deployment/capacity-planning.md create mode 100644 docs/deployment/digitalocean.md create mode 100644 docs/deployment/google-cloud-platform.md create mode 100644 docs/deployment/kubernetes.md create mode 100644 docs/develop/connect.md create mode 100644 docs/develop/insert-data.md create mode 100644 docs/develop/query-data.md create mode 100644 docs/develop/questdb.io.code-workspace create mode 100644 docs/develop/update-data.md create mode 100644 docs/develop/web-console.md create mode 100644 docs/get-started/_no-jre-prerequisites.partial.mdx create mode 100644 docs/get-started/_run-windows.partial.mdx create mode 100644 docs/get-started/_upgrade-linux.partial.mdx create mode 100644 docs/get-started/_upgrade-windows.partial.mdx create mode 100644 docs/get-started/binaries.mdx create mode 100644 docs/get-started/docker.md create mode 100644 docs/get-started/first-database.md create mode 100644 docs/get-started/homebrew.md create mode 100644 docs/get-started/learn-more.md create mode 100644 docs/guides/importing-data-rest.md create mode 100644 docs/guides/importing-data.md create mode 100644 docs/guides/influxdb-migration.md create mode 100644 docs/guides/modifying-data.md create mode 100644 docs/guides/v6-migration.md create mode 100644 docs/guides/working-with-timestamps-timezones.md create mode 100644 docs/introduction.md create mode 100644 docs/operations/backup.md create mode 100644 docs/operations/data-retention.md create mode 100644 docs/operations/design-for-performance.md create mode 100644 docs/operations/health-monitoring.md create mode 100644 docs/operations/updating-data.md create mode 100644 docs/reference/_cairo.config.json create mode 100644 docs/reference/_csv-import.config.json create mode 100644 docs/reference/_http-minimal.config.json create mode 100644 docs/reference/_http.config.json create mode 100644 docs/reference/_parallel-sql.config.json create mode 100644 docs/reference/_postgres.config.json create mode 100644 docs/reference/_shared-worker.config.json create mode 100644 docs/reference/_tcp.config.json create mode 100644 docs/reference/_udp.config.json create mode 100644 docs/reference/_wal.config.json create mode 100644 docs/reference/api/ilp/authenticate.md create mode 100644 docs/reference/api/ilp/columnset-types.md create mode 100644 docs/reference/api/ilp/overview.md create mode 100644 docs/reference/api/ilp/tcp-receiver.md create mode 100644 docs/reference/api/ilp/udp-receiver.md create mode 100644 docs/reference/api/java-embedded.md create mode 100644 docs/reference/api/postgres.md create mode 100644 docs/reference/api/rest.md create mode 100644 docs/reference/clients/java_ilp.md create mode 100644 docs/reference/clients/overview.md create mode 100644 docs/reference/command-line-options.md create mode 100644 docs/reference/configuration.md create mode 100644 docs/reference/function/aggregation.md create mode 100644 docs/reference/function/analytic.md create mode 100644 docs/reference/function/binary.md create mode 100644 docs/reference/function/boolean.md create mode 100644 docs/reference/function/conditional.md create mode 100644 docs/reference/function/date-time.md create mode 100644 docs/reference/function/ipv4.md create mode 100644 docs/reference/function/meta.md create mode 100644 docs/reference/function/numeric.md create mode 100644 docs/reference/function/random-value-generator.md create mode 100644 docs/reference/function/row-generator.md create mode 100644 docs/reference/function/spatial.md create mode 100644 docs/reference/function/text.md create mode 100644 docs/reference/function/timestamp-generator.md create mode 100644 docs/reference/function/timestamp.md create mode 100644 docs/reference/function/touch.md create mode 100644 docs/reference/function/trigonometric.md create mode 100644 docs/reference/logging.md create mode 100644 docs/reference/operators/bitwise.md create mode 100644 docs/reference/operators/comparison.md create mode 100644 docs/reference/operators/ipv4.md create mode 100644 docs/reference/operators/pattern-matching.md create mode 100644 docs/reference/operators/spatial.md create mode 100644 docs/reference/sql/alter-table-add-column.md create mode 100644 docs/reference/sql/alter-table-alter-column-add-index.md create mode 100644 docs/reference/sql/alter-table-alter-column-cache.md create mode 100644 docs/reference/sql/alter-table-alter-column-drop-index.md create mode 100644 docs/reference/sql/alter-table-attach-partition.md create mode 100644 docs/reference/sql/alter-table-detach-partition.md create mode 100644 docs/reference/sql/alter-table-disable-deduplication.md create mode 100644 docs/reference/sql/alter-table-drop-column.md create mode 100644 docs/reference/sql/alter-table-drop-partition.md create mode 100644 docs/reference/sql/alter-table-enable-deduplication.md create mode 100644 docs/reference/sql/alter-table-rename-column.md create mode 100644 docs/reference/sql/alter-table-resume-wal.md create mode 100644 docs/reference/sql/alter-table-set-param.md create mode 100644 docs/reference/sql/alter-table-set-type.md create mode 100644 docs/reference/sql/alter-table-squash-partitions.md create mode 100644 docs/reference/sql/backup.md create mode 100644 docs/reference/sql/case.md create mode 100644 docs/reference/sql/cast.md create mode 100644 docs/reference/sql/copy.md create mode 100644 docs/reference/sql/create-table.md create mode 100644 docs/reference/sql/datatypes.md create mode 100644 docs/reference/sql/distinct.md create mode 100644 docs/reference/sql/drop.md create mode 100644 docs/reference/sql/explain.md create mode 100644 docs/reference/sql/fill.md create mode 100644 docs/reference/sql/group-by.md create mode 100644 docs/reference/sql/insert.md create mode 100644 docs/reference/sql/join.md create mode 100644 docs/reference/sql/latest-on.md create mode 100644 docs/reference/sql/limit.md create mode 100644 docs/reference/sql/order-by.md create mode 100644 docs/reference/sql/reindex.md create mode 100644 docs/reference/sql/rename.md create mode 100644 docs/reference/sql/sample-by.md create mode 100644 docs/reference/sql/select.md create mode 100644 docs/reference/sql/show.md create mode 100644 docs/reference/sql/snapshot.md create mode 100644 docs/reference/sql/truncate.md create mode 100644 docs/reference/sql/union-except-intersect.md create mode 100644 docs/reference/sql/update.md create mode 100644 docs/reference/sql/vacuum-table.md create mode 100644 docs/reference/sql/where.md create mode 100644 docs/reference/sql/with.md create mode 100644 docs/third-party-tools.md create mode 100644 docs/third-party-tools/cube.md create mode 100644 docs/third-party-tools/flink.md create mode 100644 docs/third-party-tools/grafana.md create mode 100644 docs/third-party-tools/kafka/jdbc.md create mode 100644 docs/third-party-tools/kafka/overview.md create mode 100644 docs/third-party-tools/kafka/questdb-kafka.md create mode 100644 docs/third-party-tools/mindsdb.md create mode 100644 docs/third-party-tools/pandas.md create mode 100644 docs/third-party-tools/prometheus.md create mode 100644 docs/third-party-tools/qstudio.md create mode 100644 docs/third-party-tools/redpanda.md create mode 100644 docs/third-party-tools/spark.md create mode 100644 docs/third-party-tools/sqlalchemy.md create mode 100644 docs/third-party-tools/superset.md create mode 100644 docs/third-party-tools/telegraf.md create mode 100644 docs/troubleshooting/faq.md create mode 100644 docs/troubleshooting/log.md create mode 100644 docs/troubleshooting/os-error-codes.md create mode 100644 sidebars.js diff --git a/.github/workflows/netlify-deploy.yml b/.github/workflows/netlify-deploy.yml new file mode 100644 index 00000000..ea7a7eb6 --- /dev/null +++ b/.github/workflows/netlify-deploy.yml @@ -0,0 +1,56 @@ +name: "Prod deploy" + +on: + push: + branches: + - master + +jobs: + deploy: + name: "Deploy" + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + path: "src" + ref: "main" + persist-credentials: false + + - name: Checkout2 + uses: actions/checkout@v3 + with: + path: "dst" + ref: "main" + repository: "questdb/questdb.io" + token: ${{ secrets.DEPLOY_TOKEN }} + fetch-depth: 0 + persist-credentials: true + + - name: Copy common files to new repo + shell: bash + run: | + cp -rvT src/docs/* dst/docs/. + cp -v src/sidebars.js dst/sidebars.js + + - name: Check For New Files + id: newFiles + shell: bash + working-directory: dest + run: | + git add -A + if [ $(git status --porcelain | wc -l) -gt 0 ]; then + echo "::set-output name=hasNewFiles::true" + else + echo "::set-output name=hasNewFiles::false" + fi + + - name: Push New Files + if: ${{ steps.newFiles.outputs.hasNewFiles == 'true' }} + shell: bash + working-directory: dst + run: | + git config --global user.name "questbot [bot]" + git config --global user.email "${{ secrets.DEPLOY_TOKEN }}" + git commit -m "deploy: Updated docs from ref: ${GITHUB_REF}" + git push diff --git a/.github/workflows/netlify-preview.yml b/.github/workflows/netlify-preview.yml new file mode 100644 index 00000000..84a6a46f --- /dev/null +++ b/.github/workflows/netlify-preview.yml @@ -0,0 +1,59 @@ +name: "Preview deploy" + +on: + pull_request: + types: ["opened", "edited", "synchronize"] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + deploy: + if: ${{ github.event.label.name == 'run-ci' }} + name: "build on Netlify" + runs-on: ubuntu-latest + + env: + ALGOLIA_API_KEY: ${{ vars.ALGOLIA_API_KEY }} + ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }} + GOOGLE_ANALYTICS_ID: ${{ vars.GOOGLE_ANALYTICS_ID }} + + steps: + - name: Checkout main website repo + uses: actions/checkout@v4 + with: + repository: questdb/questdb.io + token: ${{ secrets.GH_PAT }} # `GH_PAT` is a secret that contains your PAT + fetch-depth: 0 + persist-credentials: true + + - name: Checkout local docs + uses: actions/checkout@v3 + with: + path: docs/. + + - name: Move sidebars.js + run: | + mv docs/sidebars.js . + + - uses: jsmrcaga/action-netlify-deploy@v2.0.0 + with: + NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} + NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} + + # Comment PR if preview was successful and NETLIFY_PREVIEW_URL is set + - name: "Update PR" + if: ${{ env.NETLIFY_PREVIEW_URL }} + uses: thollander/actions-comment-pull-request@v2 + with: + message: | + :rocket: **Build success!** + + Latest successful preview: ${{ env.NETLIFY_PREVIEW_URL }} + + Commit SHA: ${{ github.event.pull_request.head.sha }} + + > :package: Build generates a preview & updates link on each commit. + + comment_tag: execution diff --git a/docs/__guidelines/content-hierarchy.md b/docs/__guidelines/content-hierarchy.md new file mode 100644 index 00000000..81c5a0fe --- /dev/null +++ b/docs/__guidelines/content-hierarchy.md @@ -0,0 +1,53 @@ +--- +title: Content hierarchy +--- + +Documentation should follow a hierarchy, this is true both for the content and +how titles are organized. In most cases, you can refer to a template and reuse +the hierarchy exposed there. When you write a page that does not derive from a +template, please follow the guidelines exposed here. + +## Content + +When you need to show a command, please show it at the top of your page as much +as possible. This will ensure that users can easily copy/paste code without +having to scan the whole page. + +It is okay to be very descriptive and thorough, however not every detail should +have the same weight. If you are documenting a function with many arguments, +please start with the most common ones first, gradually defining the ones that +people are less likely to use. + +## Titles + +Pages need to start with text, not a title. Titles should always follow the +following hierarchy: + +```shell +h1 (title) > h2 (##) > h3 (###) > h4 (####) +``` + +This will improve readability and SEO. Example: + +```markdown +## The first title should be H2 + +Then there should be some text + +### Then further titles should be H3 + +Then ideally some text. Subsequent titles can then be used + +#### For example H4 + +or even + +##### For example H5 + +etc +``` + +## Bad practices + +- Repetitive subtitles +- Too many Tip/Info/Warning. Please use maximum 1-2 per page. diff --git a/docs/__guidelines/influences.md b/docs/__guidelines/influences.md new file mode 100644 index 00000000..c71eec0c --- /dev/null +++ b/docs/__guidelines/influences.md @@ -0,0 +1,14 @@ +--- +title: Influences +--- + +Writing documentation is hard, some projects tackle that problem in a very +elegant manner. You can find here the list of documentations that we find +greatly architectured, well written and pleasant to use: + +- [Mozilla Developer Network](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API) + (a.k.a. MDN) +- [Shopify](https://shopify.dev/concepts/about-apis/authentication) +- [Stripe](https://stripe.com/docs) + +When looking for new ideas, it is recommended to look at those first. diff --git a/docs/__guidelines/lexicon.md b/docs/__guidelines/lexicon.md new file mode 100644 index 00000000..55ab36f8 --- /dev/null +++ b/docs/__guidelines/lexicon.md @@ -0,0 +1,23 @@ +--- +title: Lexicon +--- + +## Capitalization + +Please refer to these terms using the **exact** same casing. + +- InfluxDB line protocol +- Embedded Java API +- PostgreSQL wire protocol +- PGWire +- REST API (not HTTP API) +- Web Console + +## Singular or plural? + +- Data is treated as a singular noun. + +## Spell checking + +Please install and use a spell checker when editing markdown, this will prevent +common typos and make code reviews smoother. diff --git a/docs/__guidelines/markdown.md b/docs/__guidelines/markdown.md new file mode 100644 index 00000000..7c702cf9 --- /dev/null +++ b/docs/__guidelines/markdown.md @@ -0,0 +1,116 @@ +--- +title: Markdown +--- + +This page describes elements and components from +[MDX](https://github.com/mdx-js/mdx) that we use and elements that we avoid. + +## Supported elements / components + +We use the following elements. + +- No more than two blocks per page + +### Note + +:::note + +This is something important the user should be aware of. But no danger. If the content of the note flows well with the main text, consider avoiding using the note block. + +::: + +### Tip + +:::tip + +Can be used to highlight nice tricks on a very occasional basis. + +::: + +### Caution + +:::caution + +Gives a warning about something dangerous. + +::: + +### Table + +| table | table | +| ----- | ----- | +| value | value | +| value | value | + +### "questdb-sql" code block + +Title is optional. + +```questdb-sql title="title" +SELECT * FROM users; +``` + +### Multi language code block + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + + + + + +```questdb-sql +SELECT * FROM users; +``` + + + + +```shell +curl -G "http://localhost:13005/exec" +``` + + + + +```java +final CairoEngine engine = new CairoEngine(); +``` + + + + + +### "Script" code block + +Use the `script` language. The title is optional. + +```shell +some shell command +``` + +## Elements / components to avoid + +### Info + +:::info + +Not used because hard to differentiate from note + +::: + +### Warning + +:::warning + +Warning + +::: + +### Quote + +> We don't use quotes diff --git a/docs/__guidelines/naming-convention.md b/docs/__guidelines/naming-convention.md new file mode 100644 index 00000000..4ea41426 --- /dev/null +++ b/docs/__guidelines/naming-convention.md @@ -0,0 +1,49 @@ +--- +title: Naming convention +--- + +Our conventions improve consistency and ensure that we have optimal SEO. + +## New page + +### File name + +`file-name.md` (kebab-case, no uppercase here, the same rule applies to blog +posts) + +### Title + +`A descriptive title` (No Pascal Case) + +### Markdown attributes + +- `id`: please do NOT set the `id`, Docusaurus will automatically compute it + from the file path and name. +- `sidebar_label`: `Custom name`, do not set if it is the same as title, + Docusaurus will automatically fallback to it. + +## Images + +Images should always be inserted in markdown, not HTML: + +```shell +[Image description](path/to/img.jpg) +``` + +The description is important, it will populate the `alt` attribute which +improves SEO. + +## Links + +Links should always be inserted in markdown, not HTML: + +```shell +[link text](https://path/to/resource.html) +``` + +:::caution + +Please use **meaningful** wording for the link text, content such as "here" or +"click here" is forbidden since it will negatively impact SEO. + +::: diff --git a/docs/__guidelines/sql-code-blocks.md b/docs/__guidelines/sql-code-blocks.md new file mode 100644 index 00000000..ae5a9de9 --- /dev/null +++ b/docs/__guidelines/sql-code-blocks.md @@ -0,0 +1,60 @@ +--- +title: SQL code blocks +--- + +### Checklist + +- [ ] Use the `questdb-sql` language +- [ ] Keywords are uppercase +- [ ] Types are uppercase +- [ ] Column names are camelCase +- [ ] Table names are camelCase +- [ ] Function names are lowercase +- [ ] Statements finish with `;` + +### Formatting + +- Always write explicit `SELECT * FROM table` instead of `table` with the + exception of pages describing the shorthand expression. +- `timestamp` is not a valid column name. Neither are any type or function + names. +- Char returns are allowed either (1) after a `,` (2) after a `SQL keyword` (3) + after opening or closing a bracket `(`,`)` + +### Examples + +```questdb-sql +SELECT * FROM tableName; +``` + +```questdb-sql +SELECT columnName, min(columnName) FROM tableName; +``` + +```questdb-sql +CREATE TABLE tableName(columnName TYPE, columnName TYPE) timestamp(columnName) PARTITION BY DAY; +``` + +```questdb-sql +SELECT cast(columnName AS INT) FROM tableName; +``` + +```questdb-sql +SELECT columnName, min(columnName) +FROM tableName +WHERE columnName > 3; +``` + +```questdb-sql +SELECT +columnName, +min(columnName), +max(columnName) +FROM table WHERE columnName > 3; +``` + +```questdb-sql +CREATE TABLE tableName AS( +... +); +``` diff --git a/docs/__guidelines/template/function.md b/docs/__guidelines/template/function.md new file mode 100644 index 00000000..23e46cbc --- /dev/null +++ b/docs/__guidelines/template/function.md @@ -0,0 +1,132 @@ +--- +title: FUNCTION +sidebar_label: Function +--- + +Function reference pages list one or several (when closely related to each +other) functions. For this reason, it the section should start with a short +paragraph describing the class of functions described in this page and what they +are used for. + +## function_name + + +function_name(`arg1`,`arg2`) - does this and that. + + +### Arguments + +`arg1` is [data_type]. +`arg2` is is [data_type]. + + +### Return value + +Return value type is [data_type]. + +### Description + +`function_name` requires a longer explanation of the logic because it is a bit +complex. In this case, it is good to describe it with some text. It should start +by a plain-text explanation. Then branches should be expressed as bullet points. +For example a given `argument` might have various behaviors depending on value + +- `true` makes it do this. It means that this happens and that might happen. +- `false` makes the function do that. So `function_name` may have this behavior. + +### Examples + +#### Basic usage + +```questdb-sql title="Example description - Scalar result" +SELECT function_name(arg1,arg2) FROM table; +``` + +| function_name | +| ------------- | +| true | + +#### With optional arguments + +```questdb-sql title="Example description - Table result" +SELECT function_name(arg1,arg2,opt1) FROM table; +``` + +| a | b | function_name | function_name | +| ----- | --- | ------------- | ------------- | +| true | 47 | true | 47 | +| false | 53 | false | 53 | + +#### With null + +```questdb-sql title="Example description - Series result" +SELECT function_name(arg1,arg2) FROM table; +``` + +| a | b | function_name | function_name | +| ----- | --- | ------------- | ------------- | +| true | 47 | true | 47 | +| ... | ... | ... | ... | +| false | 53 | false | 53 | + +## function_name_2 + + +function_name(`arg1`,`arg2`) - does this and that. + +### Arguments + +`arg1` is [data_type]. +`arg2` is is [data_type]. + +### Return value + +Return value type is [data_type]. + +### Description + +`function_name` requires a longer explanation of the logic because it is a bit +complex. In this case, it is good to describe it with some text. It should start +by a plain-text explanation. Then branches should be expressed as bullet points. +For example a given `argument` might have various behaviors depending on value + +- `true` makes it do this. It means that this happens and that might happen. +- `false` makes the function do that. So `function_name` may have this behavior. + +### Examples + +At minimum, examples need a descriptive title. An optional description can be +used to. + +#### Basic usage + +```questdb-sql title="Example description - Scalar result" +SELECT function_name(arg1,arg2) FROM table; +``` + +| function_name | +| ------------- | +| true | + +#### With optional arguments + +```questdb-sql title="Example description - Table result" +SELECT function_name(arg1,arg2) FROM table; +``` + +| a | b | function_name | function_name | +| ----- | --- | ------------- | ------------- | +| true | 47 | true | 47 | +| false | 53 | false | 53 | + +#### With null + +```questdb-sql title="Example description - Series result" +SELECT function_name(arg1,arg2) FROM table; +``` + +| a | b | function_name | function_name | +| ----- | --- | ------------- | ------------- | +| true | 47 | true | 47 | +| ... | ... | ... | ... | +| false | 53 | false | 53 | diff --git a/docs/__guidelines/template/guide.md b/docs/__guidelines/template/guide.md new file mode 100644 index 00000000..404a2fbe --- /dev/null +++ b/docs/__guidelines/template/guide.md @@ -0,0 +1,34 @@ +--- +title: How to "xyz" +sidebar_label: Guide +--- + +How To Guides should have a short description of what they intend to cover. If +your How To has a corresponding section for reference elsewhere in the docs, +make sure to include it here, or in a 'References' section. [References](sql.md) + +## First step + +Beginning steps. Include any code in code blocks. Make sure that the code is +runnable, and actually works. Each snippet should be executable as one entity - +no several commands into one single snippet. + +```shell +/bin/bash -f shell-code-here +``` + +## Second step + +Break things up into sections to make it easy to follow. + +:::note + +Use Notes to call out important bits of information. + +::: + +## More links + +include links to other relevant documentation. + +- with [DOCKER](/docs/get-started/docker/) diff --git a/docs/__guidelines/template/sql.md b/docs/__guidelines/template/sql.md new file mode 100644 index 00000000..6e00e15e --- /dev/null +++ b/docs/__guidelines/template/sql.md @@ -0,0 +1,53 @@ +--- +title: KEYWORD +sidebar_label: SQL +--- + +`KEYWORD` does something interesting. + +## Syntax + +![Flow chart showing the syntax of CASE](/img/docs/diagrams/case.svg) + +Some arguments may need explanations. + +- `this` does that and is useful +- `this` does that and is also useful + +## Description + +Further explanation for when the first line introduction is not sufficient. + +## Notes + +Edge cases. + +## Examples + +Examples are introduced by a title text and an optional description. Ideally +they are made clear enough so that neither text nor output example is required. +There should be many examples describing various situations. + +### Basic usage + +```questdb-sql title="This example does that" +SELECT a FROM b; +``` + +### Aggregations + +```questdb-sql title="Do something interesting" +SELECT a FROM b; +``` + +### Joining tables + +```questdb-sql title="Something else, equally interesting" +SELECT a FROM b; +``` + +### With dynamic designated timestamp + +```questdb-sql title="This is only interesting for certain users" +SELECT a FROM b; +``` diff --git a/docs/concept/deduplication.md b/docs/concept/deduplication.md new file mode 100644 index 00000000..8f15d351 --- /dev/null +++ b/docs/concept/deduplication.md @@ -0,0 +1,154 @@ +--- +title: Data Deduplication +sidebar_label: Data Deduplication +description: What is built-in storage deduplication and why it can be useful. +--- + +Starting from QuestDB 7.3, there is an option to enable storage-level data deduplication on a table. Data deduplication works on all the data inserted into the table and replaces matching rows with the new versions. Only new rows that do no match existing data will be inserted. + +:::note +Deduplication can only be enabled for [Write-Ahead Log (WAL)](/docs/concept/write-ahead-log) tables. +::: + +## Practical considerations + +Deduplication in QuestDB makes table inserts [idempotent](https://en.wikipedia.org/wiki/Idempotence). The primary use case is to allow for re-sending data within a given time range without creating duplicates. + +This can be particularly useful in situations where there is an error in sending data, such as when using [InfluxDB Line Protocol](/docs/reference/api/ilp/overview), and there is no clear indication of how much of the data has already been written. With deduplication enabled, it is safe to re-send data from a fixed period in the past to resume the writing process. + +Enabling deduplication on a table has an impact on writing performance, especially when multiple `UPSERT KEYS` are configured. Generally, if the data have mostly unique timestamps across all the rows, the performance impact of deduplication is low. Conversely, the most demanding data pattern occurs when there are many rows with the same timestamp that need to be deduplicated on additional columns. + +For example, in use cases where 10 million devices send CPU metrics every second precisely, deduplicating the data based on the device ID can be expensive. However, in cases where CPU metrics are sent at random and typically have unique timestamps, the cost of deduplication is negligible. + +## Configuration + +Create a WAL-enabled table with deduplication using [`CREATE TABLE`](/docs/reference/sql/create-table/#deduplication) syntax. + +Enable or disable deduplication at any time for individual tables using the following statements: + +- [ALTER TABLE DEDUP ENABLE ](/docs/reference/sql/alter-table-enable-deduplication) +- [ALTER TABLE DEDUP DISABLE](/docs/reference/sql/alter-table-disable-deduplication) + +Remember: correct `UPSERT KEYS` ensure that deduplication functions as expected. + +## Deduplication UPSERT Keys + +_UPSERT_ is an abbreviation for _UPDATE_ or _INSERT_, which is a common database concept. It means that the new row _UPDATEs_ the existing row (or multiple rows in the general case) when the matching criteria are met. Otherwise, the new row is _INSERTed_ into the table. In QuestDB deduplication, the _UPSERT_ matching criteria are set by defining a column list in the `UPSERT KEYS` clause in the `CREATE` or `ALTER` table statement. + +`UPSERT KEYS` can be changed at any time. It can contain one or more columns. + +However, there are some limitations on the `UPSERT KEYS` list: + +- The [Designated Timestamp](/docs/concept/designated-timestamp) column must be included in the `UPSERT KEYS` list. +- Columns of [STRING and BINARY](/docs/reference/sql/datatypes) types cannot be used in the `UPSERT KEYS` list. + +## Example + +The easiest way to explain the usage of `UPSERT KEYS` is through an example: + +```sql +CREATE TABLE TICKER_PRICE ( + ts TIMESTAMP, + ticker SYMBOL, + price DOUBLE +) TIMESTAMP(ts) PARTITION BY DAY WAL +DEDUP UPSERT KEYS(ts, ticker); +``` + +In this example, the deduplication keys are set to the `ts` column, which is the designated timestamp, and the `ticker` column. The intention is to have no more than one price point per ticker at any given time. Therefore, if the same price/day combination is sent twice, only the last price is saved. + +The following inserts demonstrate the deduplication behavior: + +```sql +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'QQQ', 78.56); -- row 1 +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'QQQ', 78.34); -- row 2 + +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'AAPL', 104.40); -- row 3 +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'AAPL', 105.18); -- row 4 +``` + +In this case, deduplication overwrites row 1 with row 2 because both deduplication keys have the same values: `ts='2023-07-14'` and `ticker='QQQ'`. The same behavior applies to the second pair of rows, where row 4 overwrites row 3. + +As a result, the table contains only two rows: + +```sql +SELECT * FROM TICKER_PRICE; +``` + +| ts | ticker | price | +| ---------- | ------ | ------ | +| 2023-07-14 | QQQ | 78.34 | +| 2023-07-14 | AAPL | 105.18 | + +Regardless of whether the inserts are executed in a single transaction/batch or as individual inserts, the outcome remains unchanged as long as the order of the inserts is maintained. + +Deduplication can be disabled using the DEDUP DISABLE SQL statement: + +```sql +ALTER TABLE TICKER_PRICE DEDUP DISABLE +``` + +This reverts the table to behave as usual, allowing the following inserts: + +```sql +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'QQQ', 84.59); -- row 1 +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'AAPL', 105.21); -- row 2 +``` + +These inserts add two more rows to the TICKER_PRICE table: + +```sql +SELECT * FROM TICKER_PRICE; +``` + +| ts | ticker | price | +| ---------- | ------ | ------ | +| 2023-07-14 | QQQ | 78.34 | +| 2023-07-14 | QQQ | 84.59 | +| 2023-07-14 | AAPL | 105.18 | +| 2023-07-14 | AAPL | 105.21 | + +Deduplication can be enabled again at any time: + +```sql +ALTER TABLE TICKER_PRICE DEDUP ENABLE UPSERT KEYS(ts, ticker) +``` + +:::note +Enabling deduplication does not have any effect on the existing data and only applies to newly inserted data. This means that a table with deduplication enabled can still contain duplicate data. +::: + +Enabling deduplication does not change the number of rows in the table. After enabling deduplication, the following inserts demonstrate the deduplication behavior: + +```sql +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'QQQ', 98.02); -- row 1 +INSERT INTO TICKER_PRICE VALUES ('2023-07-14', 'QQQ', 91.16); -- row 2 +``` + +After these inserts, all rows with `ts='2023-07-14'` and `ticker='QQQ'` are replaced, first by row 1 and then by row 2, and the price is set to **91.16**: + +```sql +SELECT * FROM TICKER_PRICE; +``` + +| ts | ticker | price | +| ---------- | ------ | ------ | +| 2023-07-14 | QQQ | 91.16 | +| 2023-07-14 | QQQ | 91.16 | +| 2023-07-14 | AAPL | 105.18 | +| 2023-07-14 | AAPL | 105.21 | + + +## Checking Deduplication Configuration + +It is possible to utilize metadata [tables](/docs/reference/function/meta#tables) query to verify whether deduplication is enabled for a specific table: + +```sql +SELECT dedup FROM tables() WHERE name = '' +``` + +The function [table_columns](/docs/reference/function/meta#table_columns) can be used to identify which columns are configured as deduplication UPSERT KEYS: + +```sql +SELECT column, upsertKey from table_columns('') +``` \ No newline at end of file diff --git a/docs/concept/designated-timestamp.md b/docs/concept/designated-timestamp.md new file mode 100644 index 00000000..50338696 --- /dev/null +++ b/docs/concept/designated-timestamp.md @@ -0,0 +1,56 @@ +--- +title: Designated timestamp +sidebar_label: Designated timestamp +description: + How designated timestamps are implemented and why it is an important + functionality for time series. +--- + +QuestDB offers the option to elect a column as a _designated timestamp_. This +allows you to specify which column the tables will be indexed by in order to +leverage time-oriented language features and high-performance functionalities. + +A designated timestamp is elected by using the +[`timestamp(columnName)`](/docs/reference/function/timestamp/) function: + +- during a [CREATE TABLE](/docs/reference/sql/create-table/#timestamp) operation +- during a [SELECT](/docs/reference/sql/select/#timestamp) operation + (`dynamic timestamp`) +- when ingesting data via InfluxDB Line Protocol, for tables that do not already exist in QuestDB, + partitions are applied automatically by day by default with a `timestamp` + column + +:::note + +- The native timestamp format used by QuestDB is a Unix timestamp in microsecond + resolution. See + [Timestamps in QuestDB](/docs/guides/working-with-timestamps-timezones/#timestamps-in-questdb) + for more details. + +::: + +## Properties + +- Only a column of type `timestamp` can be elected as a designated timestamp. +- Only one column can be elected for a given table. + +## Checking the designated timestamp settings + +The [meta functions](/docs/reference/function/meta/), `tables()` and +`table_columns()`, are designed to show the designated timestamp settings of the +selected table. + +## Advantages of electing a designated timestamp + +Electing a designated timestamp allows you to: + +- Partition tables by time range. For more information, see the + [partitions reference](/docs/concept/partitions/). +- Use time series joins such as `ASOF JOIN`. For more information, see the + [JOIN reference](/docs/reference/sql/join/). + +## Out-of-order policy + +As of version 6.0.0, QuestDB supports the ingestion of records that are +out-of-order (O3) by time. QuestDB detects and adjusts data ingestion for O3 +data automatically and no manual configuration is required. diff --git a/docs/concept/geohashes.md b/docs/concept/geohashes.md new file mode 100644 index 00000000..86cbb223 --- /dev/null +++ b/docs/concept/geohashes.md @@ -0,0 +1,534 @@ +--- +title: Geospatial data +description: + This document describes how to work with geohashes as geospatial types in + QuestDB, including hints on converting back and forth from latitude and + longitude, inserting via SQL, InfluxDB line protocol, CSV, and more. +--- + +QuestDB adds support for working with geospatial data through a `geohash` type. +This page describes how to use geohashes, with an overview of the syntax, +including hints on converting from latitude and longitude, inserting via SQL, +InfluxDB line protocol, and via Java embedded API. + +To facilitate working with this data type, +[spatial functions](/docs/reference/function/spatial/) and +[operators](/docs/reference/operators/spatial/) have been added to help with +filtering and generating data. + +## Geohash description + +A geohash is a convenient way of expressing a location using a short +alphanumeric string, with greater precision obtained with longer strings. The +basic idea is that the Earth is divided into grids of defined size, and each +area is assigned a unique id called its Geohash. For a given location on Earth, +we can convert latitude and longitude as +[the approximate center point](https://en.wikipedia.org/wiki/Geohash#Limitations_when_used_for_deciding_proximity) +of a grid represented by a geohash string. This string is the Geohash and will +determine which of the predefined regions the point belongs to. + +In order to be compact, [base32](https://en.wikipedia.org/wiki/Base32#Geohash) +is used as a representation of Geohashes, and is therefore comprised of: + +- all decimal digits (0-9) and +- almost all of the alphabet (case-insensitive) **except "a", "i", "l", "o"**. + +The following figure illustrates how increasing the length of a geohash result +in a higher-precision grid size: + +import Screenshot from "@theme/Screenshot" + + + +## QuestDB geohash type + +Geohash column types are represented in QuestDB as `geohash()`. +Precision is specified in the format `n{units}` where `n` is a numeric +multiplier and `units` may be either `c` for char or `b` for bits (`c` being +shorthand for 5 x `b`). + +The following example shows basic usage of geohashes by creating a column of 5 +`char` precision, 29 bits of precision, and inserting geohash values into these +columns: + +```questdb-sql +CREATE TABLE geo_data (g5c geohash(5c), g29b geohash(29b)); +INSERT INTO geo_data VALUES(#u33d8, ##10101111100101111111101101101) +-- Querying by geohash +SELECT * FROM geo_data WHERE g5c = #u33d8; +``` + +It's not possible to store variable size geohashes within a column, therefore +the size and precision of a geohash must be known beforehand. Shorter-precision +geohashes **cannot be inserted** into longer-precision columns as all bits are +significant. Details on the size of geohashes is described in the +[geohash precision](#specifying-geohash-precision) section below. Additionally, +`NULL` is supported as a separate value for geohash columns of all precision. + +### Geohash literals + +Geohashes have a literal syntax which starts with the hash `#` symbol followed +by up to 12 chars, i.e.: + +```questdb-sql +INSERT INTO my_geo_data VALUES(#u33, #u33d8b12) +``` + +Geohash literals with a single hash (`#`) may include a suffix in the format +`/{bits}` where `bits` is the number of bits from 1-60 to allow for further +granularity of the geohash size. This is useful if a specific precision is +desired on the column size, but the values being inserted are using a char +notation: + +```questdb-sql +-- insert a 5-bit geohash into a 4 bit column +INSERT INTO my_geo_data VALUES(#a/4) +-- insert a 20-bit geohash into an 18 bit column +INSERT INTO my_geo_data VALUES(#u33d/18) +``` + +The binary equivalent of geohashes may be expressed with two hash symbols (`##`) +followed by up to 60 bits: + +```questdb-sql +INSERT INTO my_geo_data VALUES(##0111001001001001000111000110) +``` + +Implicit casts from strings to literal geohashes is possible, but less efficient +as string conversion to geohash must be performed: + +```questdb-sql +INSERT INTO my_geo_data VALUES(#u33, #u33d8b12) +-- equivalent to +INSERT INTO my_geo_data VALUES('u33', 'u33d8b12') +``` + +`NULL` values reserve 1 bit which means 8-bit geohashes are stored in 9-bits as +`short`s internally. + +### Specifying geohash precision + +The size of the `geohash` type may be: + +- 1 to 12 chars or +- 1 to 60 bits + +The following table shows all options for geohash precision using `char`s and +the calculated area of the grid the geohash refers to: + +| Type | Example | Area | +| -------------- | --------------- | ----------------- | +| `geohash(1c)` | `#u` | 5,000km × 5,000km | +| `geohash(2c)` | `#u3` | 1,250km × 625km | +| `geohash(3c)` | `#u33` | 156km × 156km | +| `geohash(4c)` | `#u33d` | 39.1km × 19.5km | +| `geohash(5c)` | `#u33d8` | 4.89km × 4.89km | +| `geohash(6c)` | `#u33d8b` | 1.22km × 0.61km | +| `geohash(7c)` | `#u33d8b1` | 153m × 153m | +| `geohash(8c)` | `#u33d8b12` | 38.2m × 19.1m | +| `geohash(9c)` | `#u33d8b121` | 4.77m × 4.77m | +| `geohash(10c)` | `#u33d8b1212` | 1.19m × 0.596m | +| `geohash(11c)` | `#u33d8b12123` | 149mm × 149mm | +| `geohash(12c)` | `#u33d8b121234` | 37.2mm × 18.6mm | + +For geohashes with size determined by `b` for bits, the following table compares +the precision of some geohashes with units expressed in bits compared to chars: + +| Type (char) | Equivalent to | +| -------------- | -------------- | +| `geohash(1c)` | `geohash(5b)` | +| `geohash(6c)` | `geohash(30b)` | +| `geohash(12c)` | `geohash(60b)` | + +### Casting geohashes + +Explicit casts are not necessary, but given certain constraints, it may be +required to cast from strings to geohashes. Empty strings are cast as `null` for +geohash values which are stored in the column with all bits set: + +```questdb-sql +INSERT INTO my_geo_data VALUES(cast({my_string} as geohash(8c)) +``` + +It may be desirable to cast as geohashes in the circumstance where a table with +a desired schema should be created such as the following query. Note that the +use of `WHERE 1 != 1` means that no rows are inserted, only the table schema is +prepared: + +```questdb-sql +CREATE TABLE new_table AS +(SELECT cast(null AS geohash(4c)) gh4c) +FROM source_table WHERE 1 != 1 +``` + +Geohash types can be cast from higher to lower precision, but not from lower to +higher precision: + +```questdb-sql +-- The following cast is valid: +CAST(#123 as geohash(1c)) +-- Invalid (low-to-high precision): +CAST(#123 as geohash(4c)) +``` + +### make_geohash() function + +Need to create a geohash? Use `make_geohash()` + +Returns a geohash equivalent of latitude and longitude, with precision specified +in bits. + +```sql title=make_geohash(lat,long,bits) +SELECT make_geohash(142.89124148, -12.90604153, 40) +``` + +Returns: + +``` +rjtwedd0 +``` + +Use this function via: + +- SQL over HTTP +- PostgreSQL wire protocol +- [Java embedded usage](/docs/concept/geohashes/#java-embedded-usage) scenario + +For more information on the `make_geohash()` function, see the +[Spatial functions](/docs/reference/function/spatial/#make_geohash) page. + +## SQL examples + +The following queries create a table with two `geohash` type columns of varying +precision and insert geohashes as string values: + +```questdb-sql +CREATE TABLE my_geo_data (g1c geohash(1c), g8c geohash(8c)); +INSERT INTO my_geo_data values(#u, #u33d8b12); +``` + +Larger-precision geohashes are truncated when inserted into smaller-precision +columns, and inserting smaller-precision geohases into larger-precision columns +produces an error, i.e.: + +```questdb-sql +-- SQL will execute successfully with 'u33d8b12' truncated to 'u' +INSERT INTO my_geo_data values(#u33d8b12, #eet531sq); +-- ERROR as '#e' is too short to insert into 8c_geohash column +INSERT INTO my_geo_data values(#u, #e); +``` + +Performing geospatial queries is done by checking if geohash values are equal to +or within other geohashes. Consider the following table: + +```questdb-sql +CREATE TABLE geo_data + (ts timestamp, + device_id symbol, + g1c geohash(1c), + g8c geohash(8c)), +index(device_id) timestamp(ts); +``` + +This creates a table with a `symbol` type column as an identifier and we can +insert values as follows: + +```questdb-sql +INSERT INTO geo_data values(now(), 'device_1', #u, #u33d8b12); +INSERT INTO geo_data values(now(), 'device_1', #u, #u33d8b18); +INSERT INTO geo_data values(now(), 'device_2', #e, #ezzn5kxb); +INSERT INTO geo_data values(now(), 'device_1', #u, #u33d8b1b); +INSERT INTO geo_data values(now(), 'device_2', #e, #ezzn5kxc); +INSERT INTO geo_data values(now(), 'device_3', #e, #u33dr01d); +``` + +This table contains the following values: + +| ts | device_id | g1c | g8c | +| --------------------------- | --------- | --- | -------- | +| 2021-09-02T14:20:04.669312Z | device_1 | u | u33d8b12 | +| 2021-09-02T14:20:06.553721Z | device_1 | u | u33d8b12 | +| 2021-09-02T14:20:07.095639Z | device_1 | u | u33d8b18 | +| 2021-09-02T14:20:07.721444Z | device_2 | e | ezzn5kxb | +| 2021-09-02T14:20:08.241489Z | device_1 | u | u33d8b1b | +| 2021-09-02T14:20:08.807707Z | device_2 | e | ezzn5kxc | +| 2021-09-02T14:20:09.280980Z | device_3 | e | u33dr01d | + +We can check if the last-known location of a device is a specific geohash with +the following query which will return an exact match based on geohash: + +```questdb-sql +SELECT * FROM geo_data WHERE g8c = #u33dr01d LATEST ON ts PARTITION BY device_id +``` + +| ts | device_id | g1c | g8c | +| --------------------------- | --------- | --- | -------- | +| 2021-09-02T14:20:09.280980Z | device_3 | e | u33dr01d | + +### First and last functions + +The use of `first()` and `last()` functions within geospatial queries has been +significantly optimized so that common types of queries relating to location are +improved. This means that queries such as "last-known location" by indexed +column for a given time range or sample bucket is specifically optimized for +query speed over large datasets: + +```questdb-sql +SELECT ts, last(g8c) FROM geo_data WHERE device_id = 'device_3'; +-- first and last locations sample by 1 hour: +SELECT ts, last(g8c), first(g8c) FROM geo_data +WHERE device_id = 'device_3' sample by 1h; +``` + +### Within operator + +The `within` operator can be used as a prefix match to evaluate if a geohash is +equal to or is within a larger grid. + +It can only be applied in `LATEST ON` queries and all symbol columns within the +query **must be indexed**. + +The following query will return the most recent entries by device ID if the +`g8c` column contains a geohash within `u33d`: + +```questdb-sql title="LATEST BY usage" +SELECT * FROM geo_data +WHERE g8c within(#u33d) +LATEST ON ts PARTITION BY device_id; +``` + +| ts | device_id | g1c | g8c | +| --------------------------- | --------- | --- | -------- | +| 2021-09-02T14:20:08.241489Z | device_1 | u | u33d8b1b | +| 2021-09-02T14:20:09.280980Z | device_3 | e | u33dr01d | + +For more information on the use of this operator, see the +[spatial operators](/docs/reference/operators/spatial/) reference. + +## Java embedded usage + +Geohashes are inserted into tables via Java (embedded) QuestDB instance through +the selected `Writer`'s `putGeoHash` method. The `putGeoHash` method accepts +`LONG` values natively with the destination precision. Additionally, +`GeoHashes.fromString` may be used for string conversion, but comes with some +performance overhead as opposed to `long` values directly. + +Depending on whether the table is a [WAL](/docs/concept/write-ahead-log/) table +or not, the following components may be used: + +- `TableWriter` is used to write data directly into a table. +- `WalWriter` is used to write data into a WAL-enabled table via WAL. +- `TableWriterAPI` is used for both WAL and non-WAL tables, as it requests the + suitable `Writer` based on the table metadata. + +```java title="TableWriter" +// Insert data into a non-WAL table: +final TableToken tableToken = engine.getTableToken("geohash_table"); +try (TableWriter writer = engine.getTableWriter(ctx.getCairoSecurityContext(), tableToken, "test")) { + for (int i = 0; i < 10; i++) { + TableWriter.Row row = writer.newRow(); + row.putSym(0, "my_device"); + // putGeoStr(columnIndex, hash) + row.putGeoStr(1, "u33d8b1b"); + // putGeoHashDeg(columnIndex, latitude, longitude) + row.putGeoHashDeg(2, 48.669, -4.329) + row.append(); + } + writer.commit(); +} + +``` + +```java title="WalWriter" +// Insert data into a WAL table: +final TableToken tableToken = engine.getTableToken("geohash_table"); +try (WalWriter writer = engine.getWalWriter(ctx.getCairoSecurityContext(), tableToken)) { + for (int i = 0; i < 10; i++) { + TableWriter.Row row = writer.newRow(); + row.putSym(0, "my_device"); + // putGeoStr(columnIndex, hash) + row.putGeoStr(1, "u33d8b1b"); + // putGeoHashDeg(columnIndex, latitude, longitude) + row.putGeoHashDeg(2, 48.669, -4.329) + row.append(); + } + writer.commit(); + + // Apply WAL to the table + try (ApplyWal2TableJob walApplyJob = new ApplyWal2TableJob(engine, 1, 1)) { + while (walApplyJob.run(0)); + } +} + +``` + +```java title="TableWriterAPI" +// Insert table into either a WAL or a non-WAL table: +final TableToken tableToken = engine.getTableToken("geohash_table"); +try (TableWriterAPI writer = engine.getTableWriterAPI(ctx.getCairoSecurityContext(), tableToken, "test")) { + for (int i = 0; i < 10; i++) { + TableWriter.Row row = writer.newRow(); + row.putSym(0, "my_device"); + // putGeoStr(columnIndex, hash) + row.putGeoStr(1, "u33d8b1b"); + // putGeoHashDeg(columnIndex, latitude, longitude) + row.putGeoHashDeg(2, 48.669, -4.329) + row.append(); + } + writer.commit(); + + // Apply WAL to the table + try (ApplyWal2TableJob walApplyJob = new ApplyWal2TableJob(engine, 1, 1)) { + while (walApplyJob.run(0)); + } +} +``` + +Reading geohashes via Java is done by means of the following methods: + +- `Record.getGeoByte(columnIndex)` +- `Record.getGeoShort(columnIndex)` +- `Record.getGeoInt(columnIndex)` +- `Record.getGeoLong(columnIndex)` + +Therefore it's necessary to know the type of the column beforehand through +column metadata by index: + +```java +ColumnType.tagOf(TableWriter.getMetadata().getColumnType(columnIndex)); +``` + +Invoking the method above will return one of the following: + +- `ColumnType.GEOBYTE` +- `ColumnType.GEOSHORT` +- `ColumnType.GEOINT` +- `ColumnType.GEOLONG` + +For more information and detailed examples of using table readers and writers, +see the [Java API documentation](/docs/reference/api/java-embedded/). + +## InfluxDB Line Protocol + +Geohashes may also be inserted via InfluxDB Line Protocol by the following +steps: + +1. Create a table with columns of geohash type beforehand: + +```questdb-sql +CREATE TABLE tracking (ts timestamp, geohash geohash(8c)); +``` + +2. Insert via InfluxDB Line Protocol using the `geohash` field: + +```bash +tracking geohash="46swgj10" +``` + +:::note + +The InfluxDB Line Protocol parser does not support geohash literals, only +strings. This means that table columns of type `geohash` type with the desired +precision must exist before inserting rows with this protocol. + +If a value cannot be converted or is omitted it will be set as `NULL` + +::: + +Inserting geohashes with larger precision than the column it is being inserted +into will result in the value being truncated, for instance, given a column with +`8c` precision: + +```bash +# Inserting the following line +geo_data geohash="46swgj10r88k" +# Equivalent to truncating to this value: +geo_data geohash="46swgj10" +``` + +## CSV import + +Geohashes may also be inserted via +[REST API](/docs/guides/importing-data-rest/). In order to perform inserts in +this way; + +1. Create a table with columns of geohash type beforehand: + +```questdb-sql +CREATE TABLE tracking (ts timestamp, geohash geohash(8c)); +``` + +Note that you may skip this step, if you specify column types in the `schema` +JSON object. + +2. Import the CSV file via REST API using the `geohash` field: + +```bash +curl -F data=@tracking.csv 'http://localhost:9000/imp?name=tracking' +``` + +The `tracking.csv` file's contents may look like the following: + +```csv +ts,geohash +17/01/2022 01:02:21,46swgj10 +``` + +Just like InfluxDB Line Protocol, CSV import supports geohash strings only, so +the same restrictions apply. + +## The PostgreSQL wire protocol + +Geohashes may also be used over Postgres wire protocol as other data types. When +querying geohash values over Postgres wire protocol, QuestDB always returns +geohashes in text mode (i.e. as strings) as opposed to binary + +The Python example below demonstrates how to connect to QuestDB over postgres +wire, insert and query geohashes. It uses the +[psychopg3](https://www.psycopg.org/psycopg3/docs/) adapter. + +To install `psychopg3`, use `pip`: + +```shell +python3 -m pip install "psycopg[binary]" +``` + +```python +import psycopg as pg +import time +# Connect to an existing QuestDB instance +conn_str = 'user=admin password=quest host=127.0.0.1 port=8812 dbname=qdb' +with pg.connect(conn_str, autocommit=True) as connection: + # Open a cursor to perform database operations + with connection.cursor() as cur: + cur.execute(''' + CREATE TABLE IF NOT EXISTS geo_data ( + ts timestamp, + device_id symbol index, + g1c geohash(1c), + g8c geohash(8c) + ) + timestamp(ts); + ''') + print('Table created.') + + cur.execute("INSERT INTO geo_data values(now(), 'device_1', 'u', 'u33d8b12');") + cur.execute("INSERT INTO geo_data values(now(), 'device_1', 'u', 'u33d8b18');") + cur.execute("INSERT INTO geo_data values(now(), 'device_2', 'e', 'ezzn5kxb');") + cur.execute("INSERT INTO geo_data values(now(), 'device_3', 'e', 'u33dr01d');") + print('Data in geo_data table:') + cur.execute('SELECT * FROM geo_data;') + records = cur.fetchall() + for row in records: + print(row) + print('Records within the "u33d" geohash:') + cur.execute('SELECT * FROM geo_data WHERE g8c within(#u33d) LATEST ON ts PARTITION BY device_id;') + records = cur.fetchall() + for row in records: + print(row) +``` diff --git a/docs/concept/indexes.md b/docs/concept/indexes.md new file mode 100644 index 00000000..d850242d --- /dev/null +++ b/docs/concept/indexes.md @@ -0,0 +1,140 @@ +--- +title: Indexes +sidebar_label: Indexes +description: + Explanation on how indexes work as well as the pros and cons that you need to + be aware of when using them. +--- + +An index stores the row locations for each value of the target column in order +to provide faster read access. It allows you to bypass full table scans by +directly accessing the relevant rows during queries with `WHERE` conditions. + +Indexing is available for [symbol](/docs/concept/symbol/) columns. Index support +for other types will be added over time. + +## Index creation and deletion + +The following are ways to index a `symbol` column: + +- At table creation time using + [CREATE TABLE](/docs/reference/sql/create-table/#index) +- Using + [ALTER TABLE ALTER COLUMN ADD INDEX](/docs/reference/sql/alter-table-alter-column-add-index/) + to index an existing `symbol` column + +To delete an index: + +- [ALTER TABLE ALTER COLUMN DROP INDEX](/docs/reference/sql/alter-table-alter-column-drop-index/) + +## How indexes work + +Index creates a table of row locations for each distinct value for the target +[symbol](/docs/concept/symbol/). Once the index is created, inserting data into +the table will update the index. Lookups on indexed values will be performed in +the index table directly which will provide the memory locations of the items, +thus avoiding unnecessary table scans. + +Here is an example of a table and its index table. + +```shell +Table Index +|Row ID | Symbol | Value | | Symbol | Row IDs | +| 1 | A | 1 | | A | 1, 2, 4 | +| 2 | A | 0 | | B | 3 | +| 3 | B | 1 | | C | 5 | +| 4 | A | 1 | +| 5 | C | 0 | +``` + +`INSERT INTO Table values(B, 1);` would trigger two updates: one for the Table, +and one for the Index. + +```shell +Table Index +|Row ID | Symbol | Value | | Symbol | Row IDs | +| 1 | A | 1 | | A | 1, 2, 4 | +| 2 | A | 0 | | B | 3, 6 | +| 3 | B | 1 | | C | 5 | +| 4 | A | 1 | +| 5 | C | 0 | +| 6 | B | 1 | +``` + +### Index capacity + +When a symbol column is indexed, an additional **index capacity** can be defined +to specify how many row IDs to store in a single storage block on disk: + +- Server-wide setting: `cairo.index.value.block.size` with a default of `256` +- Column-wide setting: The + [`index` option](/docs/reference/sql/create-table/#column-indexes) for + `CREATE TABLE` +- Column-wide setting: + [ALTER TABLE COLUMN ADD INDEX](/docs/reference/sql/alter-table-alter-column-add-index/) + +Fewer blocks used to store row IDs achieves better performance. At the same time +over-sizing the setting will result in higher than necessary disk space usage. + +:::note + +- The **index capacity** and + [**symbol capacity**](/docs/concept/symbol/#usage-of-symbols) are different + settings. +- The index capacity value should not be changed, unless an user is aware of all + the implications. + +::: + +## Advantages + +Index allows you to greatly reduce the complexity of queries that span a subset +of an indexed column, typically when using `WHERE` clauses. + +Consider the following query applied to the above table +`SELECT sum(Value) FROM Table WHERE Symbol='A';` + +- **Without Index**, the query engine would scan the whole table in order to + perform the query. It will need to perform 6 operations (read each of the 6 + rows once). +- **With Index**, the query engine will first scan the index table, which is + considerably smaller. In our example, it will find A in the first row. Then, + the query engine would check the values at the specific locations 1, 2, 4 in + the table to read the corresponding values. As a result, it would only scan + the relevant rows in the table and leave irrelevant rows untouched. + +## Trade-offs + +- **Storage space**: The index will maintain a table with each distinct symbol + value and the locations where these symbols can be found. As a result, there + is a small cost of storage associated with indexing a symbol field. + +- **Ingestion performance**: Each new entry in the table will trigger an entry + in the Index table. This means that any write will now require two write + operations, and therefore take twice as long. + +## Examples + +### Table with index + +An example of `CREATE TABLE` command creating a table with an index capacity of +128: + +```questdb-sql +CREATE TABLE my_table(symb SYMBOL, price DOUBLE, ts TIMESTAMP), + INDEX (symb CAPACITY 128) timestamp(ts); +-- equivalent to +CREATE TABLE my_table(symb SYMBOL INDEX CAPACITY 128, price DOUBLE, ts TIMESTAMP), + timestamp(ts); +``` + +### Index capacity + +Consider an example table with 200 unique stock symbols and 1,000,000,000 +records over time. The index will have to store 1,000,000,000 / 200 row IDs for +each symbol, i.e. 5,000,000 per symbol. + +- If the index capacity is set to 1,048,576 in this case, QuestDB will use 5 + blocks to store the row IDs. +- If the index capacity is set to 1,024 in this case, the block count will be + 4,883. diff --git a/docs/concept/jit-compiler.md b/docs/concept/jit-compiler.md new file mode 100755 index 00000000..b4c738e6 --- /dev/null +++ b/docs/concept/jit-compiler.md @@ -0,0 +1,101 @@ +--- +title: JIT compiler +sidebar_label: JIT compiler +description: + Documentation for usage of the just-in-time (JIT) SQL compiler in QuestDB. JIT + compilation enhances the performance of the system to run SQL queries which + contain filters with arithmetical expressions. +--- + +QuestDB includes a JIT compiler which is run on queries (and sub-queries) that +perform a full scan over a table or table partitions. The main goal behind this +feature is to improve performance for filters with arithmetical expressions. To +do so, the JIT compiler emits machine code with a single function that may also +use SIMD (vector) instructions. + +For details on the implementation, motivation, and internals of this feature, +see our [article about SQL JIT compilation](/blog/2022/01/12/jit-sql-compiler). +This post describes our storage model, how we built a JIT compiler for SQL and +our plans for improving it in future. + +## Queries eligible for JIT compilation + +The types of queries that are eligible for performance improvements via JIT +compilation are those which contain `WHERE` clauses. Here are some examples +which are supported based on the `cpu-only` data set from the +[Time Series Benchmark Suite](https://github.com/timescale/tsbs/blob/master/docs/questdb.md) +use case: + +```questdb-sql +-- basic filtering in WHERE clauses +SELECT count(), max(usage_user) FROM cpu WHERE usage_user > 75; + +-- sub-queries +SELECT * FROM cpu +WHERE usage_user > 75 +AND (region = 'us-west-1' OR region = 'us-east-1'); +``` + +## JIT compiler usage + +The JIT compiler is enabled by default for QuestDB 6.3 onwards. If you wish to +disable it, change the `cairo.sql.jit.mode` setting in the +[server configuration](/docs/reference/configuration/) file from `on` to `off`: + +```ini title="path/to/server.conf" +cairo.sql.jit.mode=off +``` + +Embedded API users are able to enable or disable the compiler globally by +providing their `CairoConfiguration` implementation. Alternatively, JIT +compilation can be changed for a single query by using the +`SqlExecutionContext#setJitMode` method. The latter may look like the following: + +```java +final CairoConfiguration configuration = new DefaultCairoConfiguration(temp.getRoot().getAbsolutePath()); +try (CairoEngine engine = new CairoEngine(configuration)) { + final SqlExecutionContextImpl ctx = new SqlExecutionContextImpl(engine, 1); + // Enable SQL JIT compiler + ctx.setJitMode(SqlJitMode.JIT_MODE_ENABLED); + // Subsequent query execution (called as usual) with have JIT enabled + try (SqlCompiler compiler = new SqlCompiler(engine)) { + try (RecordCursorFactory factory = compiler.compile("abc", ctx).getRecordCursorFactory()) { + try (RecordCursor cursor = factory.getCursor(ctx)) { + // ... + } + } + } +} +``` + +Server logs should contain references to `SQL JIT compiler mode`: + +```log +2021-12-16T09:25:34.472450Z A server-main SQL JIT compiler mode: on +``` + +Due to certain limitations noted below, JIT compilation won't take place for all +queries. To understand whether JIT compilation took place for a query, one will +see something similar in the server logs: + +```log +2021-12-16T09:35:01.738910Z I i.q.g.SqlCompiler plan [q=`select-group-by count() count from (select [usage_user] from cpu timestamp (timestamp) where usage_user > 75)`, fd=73] +2021-12-16T09:35:01.742777Z I i.q.g.SqlCodeGenerator JIT enabled for (sub)query [tableName=cpu, fd=73] +``` + +## Known limitations + +The current implementation of the JIT SQL compiler has a number of limitations: + +- Only x86-64 CPUs are currently supported. +- Vectorized filter execution requires AVX2 instruction set. +- Filters with any SQL function, such as `now()`, or `abs()`, or `round()`, are + not supported. +- Filters with any pseudo-function or operator, such as `in()` on symbol column, + or `between` on non-designated timestamp column, or `within` on geohash + column, are not supported. +- Only the following arithmetic operations are allowed to be present in the + filter: `+`, `-`, `*`, `/`. +- Only filters with fixed-size columns are supported: BOOLEAN, BYTE, GEOBYTE, + SHORT, GEOSHORT, CHAR, INT, GEOINT, SYMBOL, FLOAT, LONG, GEOLONG, DATE, + TIMESTAMP, DOUBLE, UUID. diff --git a/docs/concept/partitions.md b/docs/concept/partitions.md new file mode 100644 index 00000000..46b95abd --- /dev/null +++ b/docs/concept/partitions.md @@ -0,0 +1,195 @@ +--- +title: Time Partitions +sidebar_label: Partitions +description: + Overview of QuestDB's partition system for time-series. This is an important + feature that will help you craft more efficient queries. +--- + +[Database partitioning](/glossary/database-partitioning/) is the technique that +splits data in a large database into smaller chunks in order to improve the +performance and scalability of the database system. + +QuestDB offers the option to partition tables by intervals of time. Data for +each interval is stored in separate sets of files. + +import Screenshot from "@theme/Screenshot" + + + +## Properties + +- Partitioning is only possible on tables with a + [designated timestamp](/docs/concept/designated-timestamp/). +- Available partition intervals are `NONE`, `YEAR`, `MONTH`, `WEEK`, `DAY`, and + `HOUR`. +- Default behavior is`PARTITION BY NONE` when using + [CREATE TABLE](/docs/reference/sql/create-table/). +- Default behavior is `PARTITION BY DAY` via + [InfluxDB Line Protocol ingestion](/docs/reference/api/ilp/overview/). This is + set by `line.default.partition.by`. +- Partitions are defined at table creation. For more information, refer to the + [CREATE TABLE section](/docs/reference/sql/create-table/). +- The naming convention for partition directories is as follows: + +| Table Partition | Partition format | +| --------------- | ---------------- | +| `HOUR` | `YYYY-MM-DD-HH` | +| `DAY` | `YYYY-MM-DD` | +| `WEEK` | `YYYY-Www` | +| `MONTH` | `YYYY-MM` | +| `YEAR` | `YYYY` | + +## Advantages of adding time partitions + +We recommend partitioning tables to benefit from the following advantages: + +- Reducing disk IO for timestamp interval searches. This is because our SQL + optimizer leverages partitioning. +- Significantly improving calculations and seek times. This is achieved by + leveraging the chronology and relative immutability of data for previous + partitions. +- Separating data files physically. This makes it easy to implement file + retention policies or extract certain intervals. +- Enables out-of-order indexing. From QuestDB 7.2, heavily out-of-order commits + can [split the partitions](#splitting-and-squashing-time-partitions) into + parts to reduce + [write amplification](/docs/deployment/capacity-planning/#write-amplification). + +## Checking time partition information + +The following SQL keyword and function are implemented to present the partition +information of a table: + +- The SQL keyword [SHOW PARTITIONS](/docs/reference/sql/show/#show-partitions) + returns general partition information for the selected table. +- The function [table_partitions('tableName')](/docs/reference/function/meta/) + returns the same information as `SHOW PARTITIONS` and can be used in a + `SELECT` statement to support more complicated queries such as `WHERE`, + `JOIN`, and `UNION`. + +## Splitting and squashing time partitions + +From QuestDB 7.2, heavily out-of-order commits can split the partitions into +parts to reduce write amplification. When data is merged into an existing +partition as a result of an out-of-order insert, the partition will be split +into two parts: the prefix sub-partition and the suffix sub-partition. + +A partition split happens when both of the following are true: + +- The prefix size is bigger than the combination of the suffix and the rows to + be merged. +- The estimated prefix size on disk is higher than + `cairo.o3.partition.split.min.size` (50MB by default). + +Partition split is iterative and therefore a partition can be split into more +than two parts after several commits. To control the number of parts QuestDB +squashes them together following the following principles: + +- For the last (yearly, ..., hourly) partition, its parts are squashed together + when the number of parts exceeds `cairo.o3.last.partition.max.splits` (20 by + default). +- For all the partitions except the last one, the QuestDB engine squashes them + aggressively to maintain only one physical partition at the end of every + commit. + +All partition operations (ALTER TABLE +[ATTACH](/docs/reference/sql/alter-table-attach-partition/)/ +[DETACH](/docs/reference/sql/alter-table-detach-partition/)/ +[DROP](/docs/reference/sql/alter-table-drop-partition/) PARTITION) do not +consider partition splits as individual partitions and work on the table +partitioning unit (year, week, ..., hour). + +For example, when a daily partition consisting of several parts is dropped, all +the parts belonging to the given date are dropped. Similarly, when the multipart +daily partition is detached, it is squashed into a single piece first and then +detached. + +### Examples + +For example, Let's consider the following table `x`: + +```questdb-sql +CREATE TABLE x AS ( + SELECT + cast(x as int) i, + - x j, + rnd_str(5, 16, 2) as str, + timestamp_sequence('2023-02-04T00', 60 * 1000L) ts + FROM + long_sequence(60 * 23 * 2 * 1000) +) timestamp (ts) PARTITION BY DAY WAL; +``` + +```questdb-sql +SHOW PARTITIONS FROM x; +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +| ----- | ----------- | ---------- | --------------------------- | --------------------------- | ------- | --------- | ------------- | -------- | ------ | -------- | -------- | ---------- | +| 0 | DAY | 2023-02-04 | 2023-02-04T00:00:00.000000Z | 2023-02-04T23:59:59.940000Z | 1440000 | 71281136 | 68.0 MiB | FALSE | FALSE | TRUE | FALSE | FALSE | +| 1 | DAY | 2023-02-05 | 2023-02-05T00:00:00.000000Z | 2023-02-05T21:59:59.940000Z | 1320000 | 100663296 | 96.0 MiB | FALSE | TRUE | TRUE | FALSE | FALSE | + +Inserting an out-of-order row: + +```questdb-sql +INSERT INTO + x(ts) +VALUES + ('2023-02-05T21'); + + +SHOW PARTITIONS FROM x; +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +| ----- | ----------- | ------------------------ | --------------------------- | --------------------------- | ------- | -------- | ------------- | -------- | ------ | -------- | -------- | ---------- | +| 0 | DAY | 2023-02-04 | 2023-02-04T00:00:00.000000Z | 2023-02-04T23:59:59.940000Z | 1440000 | 71281136 | 68.0 MiB | FALSE | FALSE | TRUE | FALSE | FALSE | +| 1 | DAY | 2023-02-05 | 2023-02-05T00:00:00.000000Z | 2023-02-05T20:59:59.880000Z | 1259999 | 65388544 | 62.4 MiB | FALSE | FALSE | TRUE | FALSE | FALSE | +| 2 | DAY | 2023-02-05T205959-880001 | 2023-02-05T20:59:59.940000Z | 2023-02-05T21:59:59.940000Z | 60002 | 83886080 | 80.0 MiB | FALSE | TRUE | TRUE | FALSE | FALSE | + +To merge the new partition part back to the main partition for downgrading: + +```questdb-sql +ALTER TABLE x SQUASH PARTITIONS; + +SHOW PARTITIONS FROM x; +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +| ----- | ----------- | ---------- | --------------------------- | --------------------------- | ------- | -------- | ------------- | -------- | ------ | -------- | -------- | ---------- | +| 0 | DAY | 2023-02-04 | 2023-02-04T00:00:00.000000Z | 2023-02-04T23:59:59.940000Z | 1440000 | 71281136 | 68.0 MiB | FALSE | FALSE | TRUE | FALSE | FALSE | +| 1 | DAY | 2023-02-05 | 2023-02-05T00:00:00.000000Z | 2023-02-05T21:59:59.940000Z | 1320001 | 65388544 | 62.4 MiB | FALSE | TRUE | TRUE | FALSE | FALSE | + +## Storage example + +Each partition effectively is a directory on the host machine corresponding to +the partitioning interval. In the example below, we assume a table `trips` that +has been partitioned using `PARTITION BY MONTH`. + +``` +[quest-user trips]$ dir +2017-03 2017-10 2018-05 2019-02 +2017-04 2017-11 2018-06 2019-03 +2017-05 2017-12 2018-07 2019-04 +2017-06 2018-01 2018-08 2019-05 +2017-07 2018-02 2018-09 2019-06 +2017-08 2018-03 2018-10 +2017-09 2018-04 2018-11 +``` + +Each partition on the disk contains the column data files of the corresponding +timestamp interval. + +``` +[quest-user 2019-06]$ dir +_archive cab_type.v dropoff_latitude.d ehail_fee.d +cab_type.d congestion_surcharge.d dropoff_location_id.d extra.d +cab_type.k dropoff_datetime.d dropoff_longitude.d fare_amount.d +``` diff --git a/docs/concept/root-directory-structure.md b/docs/concept/root-directory-structure.md new file mode 100644 index 00000000..02df30ac --- /dev/null +++ b/docs/concept/root-directory-structure.md @@ -0,0 +1,272 @@ +--- +title: Root directory structure +sidebar_label: Root directory +description: Contents of the folder explained. +--- + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + +QuestDB creates the following file structure in its `root_directory`: + +```filestructure +questdb +├── conf +├── db +├── log +├── public +└── snapshot (optional) +``` + +By default, QuestDB's root directory will be the following: + + + + + + + + + + +```shell +$HOME/.questdb +``` + + + + + + +Path on Macs with Apple Silicon (M1 or M2) chip: + +```shell +/opt/homebrew/var/questdb +``` + +Path on Macs with Intel chip: + +```shell +/usr/local/var/questdb +``` + + + + + + + +```shell +C:\Windows\System32\qdbroot +``` + + + + + + + +## `conf` directory + +Contains configuration files for QuestDB: + +```filestructure +├── conf +│   ├── date.formats +│   ├── mime.types +│   └── server.conf +``` + +| file | description | +| -------------- |-----------------------------------------------------------------------------------------------------------------| +| `date.formats` | A list of date formats in plain text. | +| `mime.types` | Mapping file used by the HTTP server to map file extension to response type when an user downloads a file. | +| `server.conf` | Server configuration file. Find out more in the [server configuration](/docs/reference/configuration/) section. | + +## `db` directory + +This directory contains all the files related to database tables. It is +organised as follows: + +- Each table has its own `table_directory` under `root_directory/db/table_name` +- Within a `table_directory`, each [partition](/docs/concept/partitions/) has its + own `partition_directory`. +- Within each `partition directory`, each column has its own `column_file`, for + example `mycolumn.d` +- If a given column has an [index](/docs/concept/indexes/), then there will also + be an `index_file`, for example `mycolumn.k` + +The table also stores metadata in `_meta` files: + +```filestructure +├── db +│   ├── Table +│   │   │   +│   │   ├── Partition 1 +│   │   │   ├── _archive +│   │   │   ├── column1.d +│   │   │   ├── column2.d +│   │   │   ├── column2.k +│   │   │   └── ... +│   │   ├── Partition 2 +│   │   │   ├── _archive +│   │   │   ├── column1.d +│   │   │   ├── column2.d +│   │   │   ├── column2.k +│   │   │   └── ... +│   │   │   +│   │   ├── _meta +│   │   ├── _txn +│   │   └── _cv +│   └── table_1.lock + +``` + +If the table is not partitioned, data is stored in a directory called `default`: + +```filestructure +├── db +│   ├── Table +│   │   │   +│   │   ├── default +│   │   │   ├── _archive +│   │   │   ├── column1.d +│   │   │   ├── column2.d +│   │   │   ├── column2.k +│   │   │   └── ... +│   │   ├── _meta +│   │   └── _txn +│   └── table_1.lock +``` + +For a [WAL table](/docs/concept/write-ahead-log/), the table structure contains one or more `wal` folders and a +`seq` folder representing the Sequencer: + +``` wal table filestructure +├── db +│   ├── Table +│   │   │   +│   │   ├── Partition 1 +│   │   │   ├── _archive +│   │   │   ├── column1.d +│   │   │   ├── column2.d +│   │   │   ├── column2.k +│   │   │   └── ... +│   │   ├── Partition 2 +│   │   │   ├── _archive +│   │   │   ├── column1.d +│   │   │   ├── column2.d +│   │   │   ├── column2.k +│   │   │   └── ... +│   │   ├── txn_seq +│   │   │   ├── _meta +│   │   │   ├── _txnlog +│   │   │   └── _wal_index.d +│   │   ├── wal1 +│   │   │   └── 0 +│   │   │   ├── _meta +│   │   │   ├── _event +│   │   │   ├── column1.d +│   │   │   ├── column2.d +│   │   │ └── ... +| | |  +│   │   ├── wal2 +│   │   │   └── 0 +│   │   │ │  ├── _meta +│   │   │ │  ├── _event +│   │   │ │  ├── column1.d +│   │   │ │  ├── column2.d +│   │   │ │  └── ... +│   │   │   └── 1 +│   │   │      ├── _meta +│   │   │   ├── _event +│   │   │   ├── column1.d +│   │   │   ├── column2.d +│   │   │   └── ... +│   │   │  +│   │   ├── _meta +│   │   ├── _txn +│   │   └── _cv +│   | +``` + +:::caution + +As tempting as it may be to delete partitions by manually removing the +directories from the file system, we really discourage this. The partitions are +organised with metadata and deleting them directly could corrupt the table. We +recommend you use +[ALTER TABLE DROP PARTITION](/docs/reference/sql/alter-table-drop-partition/) for +this effect. + +::: + +## `log` directory + +Contains the [log files](/docs/troubleshooting/log/) for QuestDB: + +```filestructure +├── log +│   ├── stdout-2020-04-15T11-59-59.txt +│   └── stdout-2020-04-12T13-31-22.txt +``` + +Log files look like this: + +```shell +2020-04-15T16:42:32.879970Z I i.q.c.TableReader new transaction [txn=2, transientRowCount=1, fixedRowCount=1, maxTimestamp=1585755801000000, attempts=0] +2020-04-15T16:42:32.880051Z I i.q.g.FunctionParser call to_timestamp('2020-05-01:15:43:21','yyyy-MM-dd:HH:mm:ss') -> to_timestamp(Ss) +2020-04-15T16:42:32.880657Z I i.q.c.p.WriterPool >> [table=`table_1`, thread=12] +2020-04-15T16:42:32.881330Z I i.q.c.AppendMemory truncated and closed [fd=32] +2020-04-15T16:42:32.881448Z I i.q.c.AppendMemory open /usr/local/var/questdb/db/table_1/2020-05/timestamp.d [fd=32, pageSize=16777216] +2020-04-15T16:42:32.881708Z I i.q.c.AppendMemory truncated and closed [fd=33] +2020-04-15T16:42:32.881830Z I i.q.c.AppendMemory open /usr/local/var/questdb/db/table_1/2020-05/temperature.d [fd=33, pageSize=16777216] +2020-04-15T16:42:32.882092Z I i.q.c.AppendMemory truncated and closed [fd=34] +2020-04-15T16:42:32.882210Z I i.q.c.AppendMemory open /usr/local/var/questdb/db/table_1/2020-05/humidity.d [fd=34, pageSize=16777216] +2020-04-15T16:42:32.882248Z I i.q.c.TableWriter switched partition to '/usr/local/var/questdb/db/table_1/2020-05' +2020-04-15T16:42:32.882571Z I i.q.c.p.WriterPool << [table=`table_1`, thread=12] +2020-04-15T16:44:33.245144Z I i.q.c.AppendMemory truncated and closed [fd=32] +2020-04-15T16:44:33.245418Z I i.q.c.AppendMemory truncated and closed [fd=33] +2020-04-15T16:44:33.245712Z I i.q.c.AppendMemory truncated and closed [fd=34] +2020-04-15T16:44:33.246096Z I i.q.c.ReadWriteMemory truncated and closed [fd=30] +2020-04-15T16:44:33.246217Z I i.q.c.ReadOnlyMemory closed [fd=31] +2020-04-15T16:44:33.246461Z I i.q.c.TableWriter closed 'table_1' +2020-04-15T16:44:33.246492Z I i.q.c.p.WriterPool closed [table=`table_1`, reason=IDLE, by=12] +2020-04-15T16:44:33.247184Z I i.q.c.OnePageMemory closed [fd=28] +2020-04-15T16:44:33.247239Z I i.q.c.ReadOnlyMemory closed [fd=27] +2020-04-15T16:44:33.247267Z I i.q.c.TableReader closed 'table_1' +2020-04-15T16:44:33.247287Z I i.q.c.p.ReaderPool closed 'table_1' [at=0:0, reason=IDLE] +2020-04-15T16:44:39.763406Z I http-server disconnected [ip=127.0.0.1, fd=24] +2020-04-15T16:44:39.763729Z I i.q.c.h.HttpServer pushed +``` + +## `public` directory + +Contains the web files for the Web Console: + +```filestructure +└── public + ├── assets + │   ├── console-configuration.json + │   └── favicon.png + ├── index.html + ├── qdb.js + ├── qdb.css + └── ... +``` + +## `snapshot` directory + +Created when a filesystem (disk) [snapshot](/docs/reference/sql/snapshot/) is +collected. Contains table metadata file copies. + +## `tmp` directory + +Created when a [`COPY`](/docs/reference/sql/copy/) SQL command is run for a +partitioned table and no value is set for the `cairo.sql.copy.work.root` +configuration setting. Contains temporary import files like indexes or temporary +partitions. diff --git a/docs/concept/sql-execution-order.md b/docs/concept/sql-execution-order.md new file mode 100644 index 00000000..052aad89 --- /dev/null +++ b/docs/concept/sql-execution-order.md @@ -0,0 +1,34 @@ +--- +title: SQL execution order +sidebar_label: SQL execution order +description: + Execution order for SQL clauses in QuestDB. This covers the SQL keywords you + may already be familiar with as well as extensions to the language that are + unique to QuestDB. +--- + +QuestDB attempts to implement standard ANSI SQL. We also try to be compatible +with PostgreSQL, although parts of this are a work in progress. QuestDB +implements these clauses which have the following execution order: + +1. [FROM](/docs/reference/sql/select/) +2. [ON](/docs/reference/sql/join/) +3. [JOIN](/docs/reference/sql/join/) +4. [WHERE](/docs/reference/sql/where/) +5. [LATEST ON](/docs/reference/sql/latest-on/) +6. [GROUP BY](/docs/reference/sql/group-by/) (optional) +7. [WITH](/docs/reference/sql/with/) +8. [HAVING](/docs/concept/sql-extensions/#implicit-having) (implicit) +9. [SELECT](/docs/reference/sql/select/) +10. [DISTINCT](/docs/reference/sql/distinct/) +11. [ORDER BY](/docs/reference/sql/order-by/) +12. [LIMIT](/docs/reference/sql/limit/) + +We have also implemented sub-queries that users may execute at any part of a +query that mentions a table name. The sub-query implementation adds almost zero +execution cost to SQL. We encourage the use of sub-queries as they add flavors +of functional language features to traditional SQL. + +For more information on the SQL extensions in QuestDB which deviate from ANSI +SQL and PostgreSQL, see the +[SQL extensions documentation](/docs/concept/sql-extensions/). diff --git a/docs/concept/sql-extensions.md b/docs/concept/sql-extensions.md new file mode 100644 index 00000000..cccb60c6 --- /dev/null +++ b/docs/concept/sql-extensions.md @@ -0,0 +1,103 @@ +--- +title: SQL extensions +description: + QuestDB attempts to implement standard ANSI SQL with time-based extensions for + convenience. This document describes SQL extensions in QuestDB and how users + can benefit from them. +--- + +QuestDB attempts to implement standard ANSI SQL. We also try to be compatible +with PostgreSQL, although parts of this are a work in progress. This page +presents the main extensions we bring to SQL and the main differences that one +might find in SQL but not in QuestDB's dialect. + +## SQL extensions + +We have extended SQL to support our data storage model and simplify semantics of +time series analytics. + +### LATEST ON + +[LATEST ON](/docs/reference/sql/latest-on/) is a clause introduced to help find +the latest entry by timestamp for a given key or combination of keys as part of +a `SELECT` statement. + +```questdb-sql title="LATEST ON customer ID and currency" +SELECT * FROM balances +WHERE balance > 800 +LATEST ON ts PARTITION BY customer_id, currency; +``` + +### SAMPLE BY + +[SAMPLE BY](/docs/reference/sql/select/#sample-by) is used for time-based +[aggregations](/docs/reference/function/aggregation/) with an efficient syntax. +The short query below will return the simple average balance from a list of +accounts by one month buckets. + +```questdb-sql title="SAMPLE BY one month buckets" +SELECT avg(balance) FROM accounts SAMPLE BY 1M +``` + +### Timestamp search + +Timestamp search can be performed with regular operators, e.g `>`, `<=` etc. +However, QuestDB provides a +[native notation](/docs/reference/sql/where/#timestamp-and-date) which is faster +and less verbose. + +```questdb-sql title="Results in a given year" +SELECT * FROM scores WHERE ts IN '2018'; +``` + +## Differences from standard SQL + +### SELECT \* FROM is optional + +In QuestDB, using `SELECT * FROM` is optional, so `SELECT * FROM my_table;` will +return the same result as `my_table;`. While adding `SELECT * FROM` makes SQL +look more complete, there are examples where omitting these keywords makes +queries a lot easier to read. + +```questdb-sql title="Optional use of SELECT * FROM" +my_table; +-- equivalent to: +SELECT * FROM my_table; +``` + +### GROUP BY is optional + +The `GROUP BY` clause is optional and can be omitted as the QuestDB optimizer +derives group-by implementation from the `SELECT` clause. In standard SQL, users +might write a query like the following: + +```questdb-sql +SELECT a, b, c, d, sum(e) FROM tab GROUP BY a, b, c, d; +``` + +However, enumerating a subset of `SELECT` columns in the `GROUP BY` clause is +redundant and therefore unnecessary. The same SQL in QuestDB SQL-dialect can be +written as: + +```questdb-sql +SELECT a, b, c, d, sum(e) FROM tab; +``` + +### Implicit HAVING + +Let's look at another more complex example using `HAVING` in standard SQL: + +```questdb-sql +SELECT a, b, c, d, sum(e) +FROM tab +GROUP BY a, b, c, d +HAVING sum(e) > 100; +``` + +In QuestDB's dialect, featherweight sub-queries come to the rescue to create a +smaller, more readable query, without unnecessary repetitive aggregations. +`HAVING` functionality can be obtained implicitly as follows: + +```questdb-sql +(SELECT a, b, c, d, sum(e) s FROM tab) WHERE s > 100; +``` diff --git a/docs/concept/storage-model.md b/docs/concept/storage-model.md new file mode 100644 index 00000000..a6176a44 --- /dev/null +++ b/docs/concept/storage-model.md @@ -0,0 +1,103 @@ +--- +title: Storage model +sidebar_label: Storage model +description: + Overview of QuestDB's column-based storage model. It ensures table level + atomicity and durability while keeping low overhead for maximum performance. +--- + +QuestDB uses a [**columnar**](/glossary/columnar-database/) storage model. Data +is stored in tables with each column stored in its own file and its own native +format. New data is appended to the bottom of each column to allow data to be +organically retrieved in the same order that it was ingested. + +## Append model + +QuestDB appends one column at a time and each one is updated using the same +method. The tail of column file is mapped into the memory page in RAM and the +column append is effectively a memory write at an address. Once the memory page +is exhausted it is unmapped and a new page is mapped. + +**This method ensures minimum resource churn and consistent append latency.** + +import Screenshot from "@theme/Screenshot" + + + +## Read model + +Table columns are randomly accessible. Columns with fixed size data types are +read by translating the record number into a file offset by a simple bit shift. +The offset in the column file is then translated into an offset in a lazily +mapped memory page, where the required value is read from. + + + +## Consistency and durability + +QuestDB ensures table level **isolation** and **consistency** by applying table +updates **atomically**. Updates to a table are applied in the context of a table +transaction which is either committed or rolled back in an atomic operation. +Queries that are concurrent with table updates are consistent in the sense that +they will return data either as it was before or after the table transaction was +committed — no intermediate uncommitted data will be shown in a query result. + +To guarantee **atomicity**, each table maintains a `last_committed_record_count` +in a separate file. By convention, any table reader will never read more records +than the transaction count. This enables the **isolation** property: where +uncommitted data cannot be read. Since uncommitted data is appended directly to +the table, the transaction size is only limited by the available disk space. + +Once all data is appended, QuestDB `commit()` ensures that the transaction count +is updated atomically both in multi-threaded and multi-process environments. It +does so lock-free to ensure minimal impact on concurrent reads. + +The **consistency** assurance of the data stored is limited to QuestDB +auto-repairing abnormally terminated transactions. We do not yet support +user-defined constraints, checks, and triggers. + +By default, QuestDB relies on OS-level data **durability** for data files +leaving the OS to write dirty pages to disk. Data durability can also be +configured to invoke `msync()`/`fsync()` for column files on each commit at the +cost of reduced ingestion throughput. Consider enabling the `sync` commit mode +to improve data durability in the face of OS errors or power loss: + +```ini title="server.conf" +cairo.commit.mode=sync +``` + + + +It is important to note that, as a result of the increase of `msync()` and `fsync()` calls, CPU usage will increase especially with the frequent commit pattern. + +## Summary + +The QuestDB storage model uses memory-mapped files and cross-process atomic +transaction updates as a low-overhead method of inter-process communication. +Data committed by one process can be instantaneously read by another process, +either randomly (via queries) or incrementally (as a data queue). QuestDB +provides a variety of reader implementations. + + + + diff --git a/docs/concept/symbol.md b/docs/concept/symbol.md new file mode 100755 index 00000000..5a646a47 --- /dev/null +++ b/docs/concept/symbol.md @@ -0,0 +1,83 @@ +--- +title: Symbol +sidebar_label: Symbol +description: + Documentation for usage of the symbol data type in QuestDB. This type is used + to store repetitive strings in order to enable optimizations on storage and + search. +--- + +QuestDB introduces a data type called `symbol`; a data structure used to store +repetitive strings. Internally, `symbol` types are stored as a table of integers +and their corresponding string values. + +This page presents the concept, optional setting, and their indication for +`symbol` types. + +## Advantages of `symbol` types + +- Greatly improved query performance as string operations compare and write + `int` types instead of `string`. +- Greatly improved storage efficiency as `int` maps to `string` types. +- Unobtrusive to the user because SQL execution has the same result as handling + string values. +- Reduced complexity of database schemas by removing the need for explicit + additional tables or joins. + +## Properties + +- Symbol tables are stored separately from column data. +- Fast conversion from `string` to `int` and vice-versa when reading or writing + data. +- Columns defined as `symbol` types support indexing. +- By default, QuestDB caches `symbol` types in memory for improved query speed + and InfluxDB Line Protocol ingestion speed. The setting is configurable. + +## Usage of `symbols` + +### `Symbol` columns + +Columns can be specified as `SYMBOL` using +[CREATE TABLE](/docs/reference/sql/create-table/), similar to other types: + +```questdb-sql title="Create table with a SYMBOL type" +CREATE TABLE my_table + (symb SYMBOL CAPACITY 128 NOCACHE, price DOUBLE, ts TIMESTAMP) +timestamp(ts); +``` + +The following additional symbol settings are defined, either globally as part of +the [server configuration](/docs/reference/configuration/) or locally when a +table is created: + +- **Symbol capacity**: Optional setting used to indicate how many distinct + values this column is expected to have. Based on the value used, the data + structures will resize themselves when necessary, to allow QuestDB to function + correctly. Underestimating the symbol value count may result in drop of + performance whereas over-estimating may result in higher disk space and memory + consumption. Symbol capacity is also used to set the initial symbol cache size + when the cache is enabled. + + - Server-wide setting: `cairo.default.symbol.capacity` with a default of `256` + - Column-wide setting: The + [`CAPACITY` option](/docs/reference/sql/create-table/#symbol-capacity) for + `CREATE TABLE` + +- **Cache**: Optional setting specifying whether a symbol should be cached. When + a `symbol` column is cached, QuestDB will use a Java heap-based hash table to + resolve symbol values and keys. When a column has a large number of distinct + symbol values (over 100,000, for example), the heap impact might be + significant and may cause OutOfMemory errors, depending on the heap size. Not + caching leverages a memory-mapped structure which can deal with larger value + counts but is slower. + + - Server-wide setting: `cairo.default.symbol.cache.flag` with a default of + `true` + - Column-wide setting when a table is created: The + [`CACHE | NOCACHE` keyword](/docs/reference/sql/create-table/#symbol-caching) + for `CREATE TABLE` + +### Symbols for column indexing + +`Symbols` may also be indexed for faster query execution. See +[Index](/docs/concept/indexes/) for more information. diff --git a/docs/concept/write-ahead-log.md b/docs/concept/write-ahead-log.md new file mode 100644 index 00000000..233f19cc --- /dev/null +++ b/docs/concept/write-ahead-log.md @@ -0,0 +1,135 @@ +--- +title: Write-Ahead Log (WAL) +sidebar_label: Write-Ahead Log +description: + Documentation for properties of a WAL table and comparison with its non-WAL + counterpart. +--- + +import Screenshot from "@theme/Screenshot" + +In QuestDB 7.0, we added a new approach to ingest data using a write-ahead log +(WAL). This page introduces the properties of a WAL-enabled table (WAL table) +and compares it to a non-WAL table. It also contains a summary of key +components, relevant functions, as well as related SQL keywords. + +## Properties + +A WAL table must be [partitioned](/docs/concept/partitions/). It permits the +following concurrent transactions: + +- Data ingestion through different interfaces +- Data modifications +- Table schema changes + +### Enabling WAL and configurations + +The following keywords enable WAL tables: + +- WAL table creation via [`CREATE TABLE`](/docs/reference/sql/create-table/) + +- Converting an existing table to a WAL table or vice versa via + [`SET TYPE`](/docs/reference/sql/alter-table-set-type/) following a database + restart. + +- Server-wide configuration via `cairo.wal.enabled.default` + - When `cairo.wal.enabled.default` is set to `true` (default), the + [`CREATE TABLE`](/docs/reference/sql/create-table/) SQL keyword generates + WAL tables without `WAL`. The `BYPASS WAL` keyword still works as expected. + +Parallel threads to apply WAL data to the table storage can be configured, see +[WAL table configuration](/docs/reference/configuration/#wal-table-configurations) +for more details. + +### Comparison + +The following table highlights the main difference between a WAL and a non-WAL +table: + +| WAL table | Non-WAL table | +| ------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------- | +| Concurrent data ingestion via multiple interfaces | InfluxDB Line Protocol locks the table for ingestion; concurrent data ingestion via other interfaces is not allowed - `Table Busy`error | +| Unconstrained concurrent DDLs and DMLs | Concurrent DDLs and DMLs for InfluxDB Line Protocol interface only | +| Asynchronous operations - in rare situations there may be slight delays in data visibility | Synchronous operations - no-wait commits | +| Improved data freshness for `DROP` and `RENAME` of the table with a system-wide lock | No change | +| Some [impacts](#limitations) on existing operations | No change | + +### Limitations + +:::note + +We are working hard to reduce the below limitations. + +::: + +For a WAL table, the following existing operations may have different behaviors +from a non-WAL table: + +- [`UPDATE`](/docs/reference/sql/update/) + + - No row count returned + - No support for `JOIN` + +- `ALTER TABLE` + + - [`ADD COLUMN`](/docs/reference/sql/alter-table-add-column/) can only add 1 + column per statement + - Non-structural operations may fail silently. These are partition-level and + configuration operations: + + - [`ATTACH PARTITION`](/docs/reference/sql/alter-table-attach-partition/) + - [`DETACH PARTITION`](/docs/reference/sql/alter-table-detach-partition/) + - [`DROP PARTITION`](/docs/reference/sql/alter-table-drop-partition/) + - [`SET PARAM`](/docs/reference/sql/alter-table-set-param/) + +## Key components + +A WAL table uses the following components to manage concurrent commit requests: + +- **WAL**: acts as a dedicated API for each ingestion interface. When data is + ingested via multiple interfaces, dedicated `WALs` ensure that the table is + not locked by one interface only. + +- **Sequencer**: centrally manages transactions, providing a single source of + truth. The sequencer generates unique `txn` numbers as transaction identifiers + and keeps a log that tracks their allocation, preventing duplicates. This log + is called `TransactionLog` and is stored in a meta file called `_txnlog`. See + [root directory](/docs/concept/root-directory-structure/#db-directory) for + more information. + +- **WAL apply job**: collects the commit requests based on the unique `txn` + numbers and sends them to the `TableWriter` to be committed. + +- **TableWriter**: updates the database and resolves any out-of-order data + writes. + + + + + +## Checking WAL configurations + +The following table metadata functions are useful for checking WAL table +settings: + +- [`tables()`](/docs/reference/function/meta/#tables) returns general table + metadata, including whether a table is a WAL table or not. +- [`wal_tables()`](/docs/reference/function/meta/#wal_tables) returns WAL-table + status. +- [ALTER TABLE RESUME WAL](/docs/reference/sql/alter-table-resume-wal/) restarts + suspended transactions. + + + diff --git a/docs/deployment/aws-official-ami.md b/docs/deployment/aws-official-ami.md new file mode 100644 index 00000000..8e0c6d47 --- /dev/null +++ b/docs/deployment/aws-official-ami.md @@ -0,0 +1,153 @@ +--- +title: Launch the official QuestDB AMI via the AWS Marketplace +sidebar_label: AWS Marketplace AMI +description: + This document describes how to launch the official AWS Marketplace AMI with + QuestDB installed and how to access and secure the instance on Amazon Web + Services +--- +import InterpolateReleaseData from "../../src/components/InterpolateReleaseData" +import CodeBlock from "@theme/CodeBlock" + + +AWS Marketplace is a digital catalog with software listings from independent +software vendors that runs on AWS. This guide describes how to launch QuestDB +via the AWS Marketplace using the official listing. This document also describes +usage instructions after you have launched the instance, including hints for +authentication, the available interfaces, and tips for accessing the REST API +and web console. + +## Prerequisites + +- An [Amazon Web Services](https://console.aws.amazon.com) account + +## Launching QuestDB on the AWS Marketplace + +The QuestDB listing can be found in the AWS Marketplace under the databases +category. To launch a QuestDB instance: + +1. Navigate to the + [QuestDB listing](https://aws.amazon.com/marketplace/search/results?searchTerms=questdb) +2. Click **Continue to Subscribe** and subscribe to the offering +3. **Configure** a version, an AWS region and click **Continue to** **Launch** +4. Choose an instance type and network configuration and click **Launch** + +An information panel displays the ID of the QuestDB instance with launch +configuration details and hints for locating the instance in the EC2 console. + +## QuestDB configuration + +The server configuration file is at the following location on the AMI: + +```bash +/var/lib/questdb/conf/server.conf +``` + +For details on the server properties and using this file, see the +[server configuration documentation](/docs/reference/configuration/). + +The default ports used by QuestDB interfaces are as follows: + +- Web console & REST API is available on port `9000` +- PostgreSQL wire protocol available on `8812` +- InfluxDB line protocol `9009` (TCP and UDP) +- Health monitoring & Prometheus `/metrics` `9003` + +### Postgres credentials + +Generated credentials can be found in the server configuration file: + +```bash +/var/lib/questdb/conf/server.conf +``` + +The default Postgres username is `admin` and a password is randomly generated +during startup: + +```ini +pg.user=admin +pg.password=... +``` + +### InfluxDB line protocol credentials + +The credentials for InfluxDB line protocol can be found at + +```bash +/var/lib/questdb/conf/full_auth.json +``` + +For details on authentication using this protocol, see the +[InfluxDB line protocol authentication guide](/docs/reference/api/ilp/authenticate/). + +### Disabling authentication + +If you would like to disable authentication for Postgres wire protocol or +InfluxDB line protocol, comment out the following lines in the server +configuration file: + +```ini title="/var/lib/questdb/conf/server.conf" +# pg.password=... + +# line.tcp.auth.db.path=conf/auth.txt +``` + +### Disabling interfaces + +Interfaces may be **disabled completely** with the following configuration: + +```ini title="/var/lib/questdb/conf/server.conf" +# disable postgres +pg.enabled=false + +# disable InfluxDB line protocol over TCP and UDP +line.tcp.enabled=false +line.udp.enabled=false + +# disable HTTP (web console and REST API) +http.enabled=false +``` + +The HTTP interface may alternatively be set to **readonly**: + +```ini title="/var/lib/questdb/conf/server.conf" +# set HTTP interface to readonly +http.security.readonly=true +``` + +## Upgrading QuestDB + +:::note + +- Check the [release notes](https://github.com/questdb/questdb/releases) and + ensure that necessary [backup](/docs/operations/backup/) is completed. + +::: + +You can perform the following steps to upgrade your QuestDB version on an official AWS QuestDB AMI: + +- Stop the service: + +```shell +systemctl stop questdb.service +``` + +- Download and copy over the new binary + + ( + + {`wget https://github.com/questdb/questdb/releases/download/${release.name}/questdb-${release.name}-no-jre-bin.tar.gz \\ +tar xzvf questdb-${release.name}-no-jre-bin.tar.gz +cp questdb-${release.name}-no-jre-bin/questdb.jar /usr/local/bin/questdb.jar +cp questdb-${release.name}-no-jre-bin/questdb.jar /usr/local/bin/questdb-${release.name}.jar`} + + )} +/> + +- Restart the service again: + +```shell +systemctl restart questdb.service +systemctl status questdb.service +``` \ No newline at end of file diff --git a/docs/deployment/capacity-planning.md b/docs/deployment/capacity-planning.md new file mode 100644 index 00000000..fefbe9f8 --- /dev/null +++ b/docs/deployment/capacity-planning.md @@ -0,0 +1,423 @@ +--- +title: Capacity planning +description: + How to plan and configure system resources, database configuration, and client + application code available to QuestDB to ensure that server operation + continues uninterrupted. +--- + +Capacity planning should be considered as part of the requirements of deploying +QuestDB to forecast CPU, memory, network capacity, and a combination of these +elements, depending on the expected demands of the system. This page describes +configuring these system resources with example scenarios that align with both +edge cases and common setup configurations. + +Most of the configuration settings referred to below except for OS settings are +configured in QuestDB by either a `server.conf` configuration file or as +environment variables. For more details on applying configuration settings in +QuestDB, refer to the [configuration](/docs/reference/configuration/) page. + +To monitor various metrics of the QuestDB instances, refer to the +[Prometheus monitoring page](/docs/third-party-tools/prometheus/) or the +[Health monitoring page](/docs/operations/health-monitoring/). + +## Storage and filesystem + +The following sections describe aspects to consider regarding the storage of +data and file systems. + +### Supported filesystem + +QuestDB officially supports the following filesystems: + +- APFS +- EXT4 +- NTFS +- OVERLAYFS (used by Docker) +- XFS (`ftype=1` only) + +Other file systems supporting +[mmap](https://man7.org/linux/man-pages/man2/mmap.2.html) feature may work with +QuestDB but they should not be used in production, as QuestDB does not run tests +on them. + +When an unsupported file system is used, QuestDB logs show the following +warning: + +``` +-> UNSUPPORTED (SYSTEM COULD BE UNSTABLE)" +``` + +:::caution + +Users **can't use NFS or similar distributed filesystems** directly with a +QuestDB database. + +::: + +### Write amplification + +In QuestDB the write amplification is calculated by the +[metrics](/docs/third-party-tools/prometheus/#scraping-prometheus-metrics-from-questdb): +`questdb_physically_written_rows_total` / `questdb_committed_rows_total`. + +When ingesting out-of-order data, a high disk write rate combined with high +write amplification may slow down the performance. + +For data ingestion over PGWire, or as a further step for InfluxDB Line Protocol +ingestion, smaller table [partitions](/docs/concept/partitions/) maybe reduce +the write amplification. This applies to tables with partition directories +exceeding a few hundred MBs on disk. For example, partition by day can be +reduced to by hour, partition by month to by day, and so on. + +### Partitioning + +Database partitioning splits database data into fractional pieces called +partitions. Partitions are stored and accessed separately from one another to +improve overall performance and scalability. + +Partitioning is only possible on tables with a designated timestamp. Available +partition intervals are `NONE`, `YEAR`, `MONTH`, `WEEK`, `DAY`, and `HOUR`. For +more information, see the +[full partitioning documentation](/docs/concept/partitions/). + +Beyond the base performance benefits of a database partition, QuestDB requires a +database partition to provide out-of-order (O3) indexing. + +#### Partition split + +From QuestDB 7.2, heavily out-of-order commits can split the partitions into +parts to reduce write amplification. When data is merged into an existing +partition as a result of an out-of-order insert, the partition will be split +into two parts: the prefix sub-partition and the suffix sub-partition. + +Considering an example of the following partition details: + +- A partition `2023-01-01.1` with 1,000 rows every hour, and therefore 24,000 + rows in total. +- Inserting one row with the timestamp `2023-01-01T23:00` + +When the out-of-order row `2023-01-01T23:00` is inserted, the partition is split +into 2 parts: + +- Prefix: `2023-01-01.1` with 23,000 rows +- Suffix (including the merged row):`2023-01-01T75959-999999.2` with 1,001 rows + +See +[Splitting and squashing time partitions](/docs/concept/partitions/#splitting-and-squashing-time-partitions) +for more information. + +## CPU and RAM configuration + +This section describes configuration strategies based on the forecast behavior +of the database. + +### RAM size + +We recommend having at least 8GB of RAM for basic workloads and 32GB for more +advanced ones. + +For relatively small datasets, typically a few to a few dozen GB, if the need +for reads is high, performance can benefit from maximizing the use of the OS +page cache. Users may consider increasing available RAM to improve the speed of +read operations. + +### Memory page size configuration + +For frequent out-of-order (O3) writes over a high number of columns/tables, the +performance may be impacted by the size of the memory page being too big as this +increases the demand for RAM. The memory page, `cairo.o3.column.memory.size`, is +set to 8M by default. This means that the table writer uses 16MB (2x8MB) RAM per +column when it receives O3 writes. Decreasing the value in the interval of +[128K, 8M] based on the number of columns used may improve O3 write performance. + +### CPU cores + +By default, QuestDB attempts to use all available CPU cores. +[The guide on shared worker configuration](#shared-workers) details how to +change the default setting. Assuming that the disk does not have bottleneck for +operations, the throughput of read-only queries scales proportionally with the +number of available cores. As a result, a machine with more cores will provide +better query performance. + +### Shared workers + +In QuestDB, there are worker pools which can help separate CPU-load between +sub-systems. + +:::caution + +In case if you are configuring thread pool sizes manually, the total number of +threads to be used by QuestDB should not exceed the number of available CPU +cores. + +::: + +The number of worker threads shared across the application can be configured as +well as affinity to pin processes to specific CPUs by ID. Shared worker threads +service SQL execution subsystems and, in the default configuration, every other +subsystem. More information on these settings can be found on the +[shared worker](/docs/reference/configuration/#shared-worker) configuration +page. + +QuestDB will allocate CPU resources differently depending on how many CPU cores +are available. This default can be overridden via configuration. We recommend at +least 4 cores for basic workloads and 16 for advanced ones. + +#### 8 CPU cores or less + +QuestDB will configure a shared worker pool to handle everything except the +InfluxDB line protocol writer which gets a dedicated CPU core. The worker count +is calculated as follows: + +`(cpuAvailable) - (line.tcp.writer.worker.count)` + +The minimal size of the shared worker pool is 2, even on a single-core machine. + +#### 16 CPU cores or less + +InfluxDB Line Protocol I/O Worker pool is configured to use 2 CPU cores to speed +up ingestion and the InfluxDB Line Protocol Writer is using 1 core. The shared +worker pool is handling everything else and is configured using this formula: + +`(cpuAvailable) - 1 - (line.tcp.writer.worker.count) - (line.tcp.io.worker.count)` + +For example, with 16 cores, the shared pool will have 12 threads: + +`(16) - 1 - (2) - (1)` + +`= 12` + +#### 17 CPU cores and more + +The InfluxDB Line Protocol I/O Worker pool is configured to use 6 CPU cores to +speed up ingestion and the InfluxDB Line Protocol Writer is using 1 core. The +shared worker pool is handling everything else and is configured using this +formula: + +`(cpuAvailable) - 2 - (line.tcp.writer.worker.count) - (line.tcp.io.worker.count)` + +For example, with 32 cores, the shared pool will have 23 threads: + +` (32) - 2 - (6) - (1)` + +`= 23` + +### Writer page size + +The default page size for writers is 16MB. In cases where there are a large +number of small tables, using 16MB to write a maximum of 1MB of data, for +example, is a waste of OS resources. To change the default value, set the +`cairo.writer.data.append.page.size` value in `server.conf`: + +```ini title="server.conf" +cairo.writer.data.append.page.size=1M +``` + +### InfluxDB over TCP + +We have +[a documentation page](/docs/reference/api/ilp/tcp-receiver/#capacity-planning) +dedicated to capacity planning for InfluxDB Line Protocol ingestion. + +### InfluxDB over UDP + +:::note + +The UDP receiver is deprecated since QuestDB version 6.5.2. We recommend the +[TCP receiver](/docs/reference/api/ilp/tcp-receiver/) instead. + +::: + +Given a single client sending data to QuestDB via InfluxDB line protocol over +UDP, the following configuration can be applied which dedicates a thread for a +UDP writer and specifies a CPU core by ID: + +```ini title="server.conf" +line.udp.own.thread=true +line.udp.own.thread.affinity=1 +``` + +### Postgres + +Given clients sending data to QuestDB via Postgres interface, the following +configuration can be applied which sets a dedicated worker and pins it with +`affinity` to a CPU by core ID: + +```ini title="server.conf" +pg.worker.count=4 +pg.worker.affinity=1,2,3,4 +``` + +## Network Configuration + +For InfluxDB line, PGWire and HTTP protocols, there are a set of configuration +settings relating to the number of clients that may connect, the internal I/O +capacity and connection timeout settings. These settings are configured in the +`server.conf` file in the format: + +```ini +.net.connection. +``` + +Where `` is one of: + +- `http` - HTTP connections +- `pg` - PGWire protocol +- `line.tcp` - InfluxDB line protocol over TCP + +And `` is one of the following settings: + +| key | description | +| :-------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `limit` | The number of simultaneous connections to the server. This value is intended to control server memory consumption. | +| `timeout` | Connection idle timeout in milliseconds. Connections are closed by the server when this timeout lapses. | +| `hint` | Applicable only for Windows, where TCP backlog limit is hit. For example Windows 10 allows max of 200 connection. Even if limit is set higher, without hint=true it won't be possible to connect more than 200 connection. | +| `sndbuf` | Maximum send buffer size on each TCP socket. If value is -1 socket send buffer remains unchanged from OS default. | +| `rcvbuf` | Maximum receive buffer size on each TCP socket. If value is -1, the socket receive buffer remains unchanged from OS default. | + +For example, this is the configuration for Linux with a relatively low number of +concurrent connections: + +```ini title="server.conf InfluxDB line protocol network example configuration for moderate number of concurrent connections" +# bind to all IP addresses on port 9009 +line.tcp.net.bind.to=0.0.0.0:9009 +# maximum of 30 concurrent connection allowed +line.tcp.net.connection.limit=30 +# nothing to do here, connection limit is quite low +line.tcp.net.connection.hint=false +# connections will time out after 60s of no activity +line.tcp.net.connection.timeout=60000 +# receive buffer is 4Mb to accomodate large messages +line.tcp.net.rcvbuf=4m +``` + +Let's assume you would like to configure InfluxDB line protocol for a large +number of concurrent connections on Windows: + +```ini title="server.conf InfluxDB line protocol network example configuration for large number of concurrent connections on Windows" +# bind to specific NIC on port 9009, NIC is identified by IP address +line.tcp.net.bind.to=10.75.26.3:9009 +# large number of concurrent connections +line.tcp.net.connection.limit=400 +# Windows will not allow 400 client to connect unless we use the "hint" +line.tcp.net.connection.hint=true +# connections will time out after 30s of no activity +line.tcp.net.connection.timeout=30000 +# receive buffer is 1Mb because messages are small, smaller buffer will +# reduce memory usage, 400 connection times 1MB = 400MB RAM is required to handle input +line.tcp.net.rcvbuf=1m +``` + +For reference on the defaults of the `http` and `pg` protocols, refer to the +[server configuration page](/docs/reference/configuration/). + +### Pooled connection + +Connection pooling should be used for any production-ready use of PGWire or +InfluxDB Line Protocol over TCP. + +The maximum number of pooled connections is configurable, +(`pg.connection.pool.capacity` for PGWire and +(`line.tcp.connection.pool.capacity` for InfluxDB Line Protocol over TCP. The +default number of connections for both interfaces is 64. Users should avoid +using too many connections. + +## OS configuration + +This section describes approaches for changing system settings on the host +QuestDB is running on when system limits are reached due to maximum open files +or virtual memory areas. QuestDB passes operating system errors to its logs +unchanged and as such, changing the following system settings should only be +done in response to such OS errors. + +### Maximum open files + +QuestDB uses a [columnar](/glossary/columnar-database/) storage model and +therefore most data structures relate closely to the file system, with columnar +data being stored in its own `.d` file per partition. In edge cases with +extremely large tables, frequent out-of-order ingestion, or a high number of +table partitions, the number of open files may hit a user or system-wide maximum +limit and can cause unpredictable behavior. + +In a Linux/macOS environment, the following commands allow for checking the +current user limits for the maximum number of open files: + +```bash +# Soft limit +ulimit -Sn +# Hard limit +ulimit -Hn +``` + +#### Setting the open file limit for the current user: + +On a Linux environment, it is enough to increase the hard limit, while on macOS, +both hard and soft limits should be set. See +[Max Open Files Limit on MacOS for the JVM](/blog/max-open-file-limit-macos-jvm/) +for more details. + +Modify user limits using `ulimit`: + +```bash +# Hard limit +ulimit -H -n 49152 +# Soft limit +ulimit -S -n 49152 +``` + +If the user limit is set above the system-wide limit, the system-wide limit +should be increased to the same value, too. + +#### Setting the system-wide open file limit on Linux: + +To increase this setting and have the configuration persistent, the limit on the +number of concurrently open files can be changed in `/etc/sysctl.conf`: + +```ini title="/etc/sysctl.conf" +fs.file-max=262144 +``` + +To confirm that this value has been correctly configured, reload `sysctl` and +check the current value: + +```bash +# reload configuration +sysctl -p +# query current settings +sysctl fs.file-max +``` + +#### Setting system-wide open file limit on macOS: + +On macOS, the system-wide limit can be modified by using `launchctl`: + +```shell +sudo launchctl limit maxfiles 98304 2147483647 +``` + +To confirm the change, view the current settings using `sysctl`: + +```shell +sysctl -a | grep kern.maxf +``` + +### Max virtual memory areas limit + +If the host machine has insufficient limits of map areas, this may result in +out- of-memory exceptions. To increase this value and have the configuration +persistent, mapped memory area limits can be changed in `/etc/sysctl.conf`: + +```ini title="/etc/sysctl.conf" +vm.max_map_count=262144 +``` + +Each mapped area needs kernel memory, and it's recommended to have around 128 +bytes available per 1 map count. + +```bash +# reload configuration +sysctl -p +# query current settings +cat /proc/sys/vm/max_map_count +``` diff --git a/docs/deployment/digitalocean.md b/docs/deployment/digitalocean.md new file mode 100644 index 00000000..83bf83b5 --- /dev/null +++ b/docs/deployment/digitalocean.md @@ -0,0 +1,151 @@ +--- +title: Launch QuestDB on DigitalOcean +sidebar_label: DigitalOcean Droplet +description: + This document describes how to launch DigitalOcean droplet with QuestDB +--- + +DigitalOcean is a platform with software listings from independent vendors that +run on cloud resources. This guide describes how to launch QuestDB via the +DigitalOcean marketplace using the official listing. This document also +describes usage instructions after you have launched the instance, including +hints for authentication, the available interfaces, and tips for accessing the +REST API and web console. + +## Prerequisites + +The prerequisites for deploying QuestDB on DigitalOcean are as follows: + +- A DigitalOcean account (sign up using + [the QuestDB referral link](https://m.do.co/c/50d6b551562b) for 100 USD free + credit) +- Basic `shell` knowledge for executing commands on the DigitalOcean droplet + +## Create a QuestDB Droplet + +DigitalOcean has a marketplace which offers **1-Click Apps** reviewed by their +staff. QuestDB is available on the marketplace recently, so setup using this +method is preferred: + +1. Navigate to the + [QuestDB listing](https://marketplace.digitalocean.com/apps/questdb?refcode=50d6b551562b) + on DigitalOcean +2. Click **Create QuestDB Droplet** +3. Select the basic plan for your Droplet (4GB RAM is recommended) + +import Screenshot from "@theme/Screenshot" + + + +4. Choose a region closest to you +5. At the **Authentication** section, enter your SSH public key, or set a + password +6. Set a hostname for the droplet such as `questdb-demo` +7. Leave all other settings with their defaults, and click **Create Droplet** at + the bottom of the page + + + +After 30 seconds, QuestDB should be ready to use. To validate that we set +everything up successfully, copy the Droplet's IP address by clicking on it and +navigate to `http://:9000/` where `` is the IP address +you just copied. The interactive console should load and we can start querying +the database and inserting data. + +## QuestDB droplet configuration + +The server configuration file is at the following location on the droplet: + +```bash +/home/questdb/server.conf +``` + +For details on the server properties and using this file, see the +[server configuration documentation](/docs/reference/configuration/). + +The default ports used by QuestDB interfaces are as follows: + +- Web console & REST API is available on port `9000` +- PostgreSQL wire protocol available on `8812` +- InfluxDB line protocol `9009` (TCP and UDP) +- Health monitoring & Prometheus `/metrics` `9003` + +### QuestDB Credentials + +Credentials may be configured in the server configuration file: + +```bash +/home/questdb/server.conf +``` + +The default Postgres credentials should be changed: + +```ini +pg.user=... +pg.password=... +``` + +For details on authentication using InfluxDB line protocol, see the +[InfluxDB line protocol authentication guide](/docs/reference/api/ilp/authenticate/). + +### Disabling authentication + +If you would like to disable authentication for Postgres wire protocol or +InfluxDB line protocol, comment out the following lines in the server +configuration file: + +```ini title="/home/questdb/server.conf" +# pg.password=... + +# line.tcp.auth.db.path=conf/auth.txt +``` + +### Disabling interfaces + +Interfaces may be **disabled completely** with the following configuration: + +```ini title="/home/questdb/server.conf" +# disable postgres +pg.enabled=false + +# disable InfluxDB line protocol over TCP and UDP +line.tcp.enabled=false +line.udp.enabled=false + +# disable HTTP (web console and REST API) +http.enabled=false +``` + +The HTTP interface may alternatively be set to **readonly**: + +```ini title="/home/questdb/server.conf" +# set HTTP interface to readonly +http.security.readonly=true +``` + +## API creation + +In addition to creating a Droplet from the QuestDB 1-Click App via the control +panel, you can also +[use the DigitalOcean API](https://digitalocean.com/docs/api/). + +As an example, to create a 4GB QuestDB Droplet in the SFO2 region, you can use +the following curl command. You’ll need to either save your API access token to +an environment variable or substitute it into the command below. + +```bash +curl -X POST -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer '$TOKEN'' -d \ + '{"name":"choose_a_name","region":"sfo2","size":"s-2vcpu-4gb","image":"questdb-20-04"}' \ + "https://api.digitalocean.com/v2/droplets" +``` diff --git a/docs/deployment/google-cloud-platform.md b/docs/deployment/google-cloud-platform.md new file mode 100644 index 00000000..7cce1c7d --- /dev/null +++ b/docs/deployment/google-cloud-platform.md @@ -0,0 +1,141 @@ +--- +title: Google Cloud Platform +description: + This document describes how to deploy QuestDB on Google Cloud platform using a + Compute Engine VM with additional details on configuring networking rules +--- + +This guide describes how to run QuestDB on a Compute Engine instance on Google +Cloud platform with details on how to enable networking on various interfaces by +means of firewall rules. + +This guide uses the official QuestDB Docker image during VM instance creation to +simplify setup steps for a quick, robust deployment. The networking rules below +show how to make ports for PostgreSQL wire protocol and REST API publicly +accessible or by whitelisted IP. + +## Prerequisites + +- A [Google Cloud Platform](https://console.cloud.google.com/getting-started) + (GCP) account and a GCP Project +- The + [Compute Engine API](https://console.cloud.google.com/apis/api/compute.googleapis.com) + must be enabled for the corresponding Google Cloud Platform project + +## Create a Compute Engine VM + +1. In the Google Cloud Console, navigate to + [Compute Engine](https://console.cloud.google.com/compute/instances) and + click **Create Instance** + +import Screenshot from "@theme/Screenshot" + + + +2. Give the instance a name, this example uses `questdb-europe-west3` +3. Choose a **Region** and **Zone**, this example uses + `europe-west3 (Frankfurt)` and the default zone +4. Choose a machine configuration, a general-purpose instance is `e2-medium` + with 4GB memory +5. Enable the checkbox under **Container** and provide the latest QuestDB Docker + image: + + ```text + questdb/questdb:latest + ``` + +Given the steps so far, the VM Instance configuration page should look like the +following: + + + +Before creating the instance, assign a **Network tag** so that a firewall rule +for networking can be easily applied to instances of the same type. + +1. Expand the menu item **Management, security, disks, networking, sole + tenancy** towards the bottom of the page +2. In the **Networking** panel add a **Network tag** to identify the instance, + this example uses `questdb` +3. Launch the instance by clicking **Create** + + + +## Create a firewall rule + +1. Navigate to the + [Firewalls configuration](https://console.cloud.google.com/networking/firewalls) + page under **VPC network** -> **Firewalls** +2. Add the target tag `questdb` +3. Choose an IP range that this rule applies to, this example uses `0.0.0.0/0` + (i.e. any IP) +4. In the **Protocols and ports** section, enable `8812` and `9000` for TCP. +5. Click **create** + + + +All VM instances on Compute Engine within this account which have the **Network +tag** `questdb` will have this firewall rule applied. + +The ports we have opened are: + +- `9000` for the REST API and Web Console +- `8812` for the PostgreSQL wire protocol + +The configuration above allows networking from any IP address for the selected +ports. A more secure approach would be to only allow incoming connections from +whitelisted IPs. + +## Verify the deployment + +To verify the instance state, navigate to **Compute Engine** -> +[VM Instances](https://console.cloud.google.com/compute/instances). A status +indicator should show the instance as **running**: + + + +To verify that the QuestDB deployment is operating as expected: + +1. Copy the **External IP** of the instance +2. Navigate to `:9000` in a browser + +The Web Console should be visible: + + + +Alternatively, a request may be sent against the REST API exposed on port 9000: + +```bash +curl -G \ + --data-urlencode "query=SELECT * FROM telemetry_config" \ + :9000/exec +``` diff --git a/docs/deployment/kubernetes.md b/docs/deployment/kubernetes.md new file mode 100644 index 00000000..84c73754 --- /dev/null +++ b/docs/deployment/kubernetes.md @@ -0,0 +1,76 @@ +--- +title: Run QuestDB on Kubernetes +sidebar_label: Kubernetes +description: + This document describes how to deploy QuestDB using a Kubernetes cluster by + means of official Helm charts maintained by the QuestDB project +--- + +You can deploy QuestDB in a [Kubernetes](https://kubernetes.io) cluster using a +[StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) +and a +[persistent volume](https://kubernetes.io/docs/concepts/storage/persistent-volumes/). +We distribute QuestDB via [Helm](https://helm.sh) on +[ArtifactHub](https://artifacthub.io/packages/helm/questdb/questdb). + +## Prerequisites + +- [Helm](https://helm.sh/docs/intro/install/) +- [Kubernetes CLI](https://kubernetes.io/docs/tasks/tools/install-kubectl/) +- [minikube](https://minikube.sigs.k8s.io/docs/start/) + +## Get the QuestDB Helm chart + +Using the Helm client, add the official Helm chart repository: + +```shell +helm repo add questdb https://helm.questdb.io/ +``` + +Update the Helm index: + +```shell +helm repo update +``` + +## Run QuestDB + +Start a local cluster using `minikube`: + +```shell +minikube start +``` + +Then install the chart: + +```shell +helm install my-questdb questdb/questdb +``` + +Finally, use the Kubernetes CLI to get the pod name: + +```shell +kubectl get pods +``` + +Result: + +| NAME | READY | STATUS | RESTARTS | AGE | +| ------------ | ----- | ------- | -------- | ----- | +| my-questdb-0 | 1/1 | Running | 1 | 9m59s | + +## Querying QuestDB locally + +In order to run queries against your local instance of QuestDB, you can use port +forwarding: + +```shell +kubectl port-forward my-questdb-0 9000 +``` + +The following ports may also be used: + +- 9000: [REST API](/docs/reference/api/rest/) and + [Web Console](/docs/develop/web-console/) +- 8812: [Postgres](/docs/reference/api/postgres/) +- 9009: [InfluxDB line protocol](/docs/reference/api/ilp/overview/) diff --git a/docs/develop/connect.md b/docs/develop/connect.md new file mode 100644 index 00000000..eca6cea5 --- /dev/null +++ b/docs/develop/connect.md @@ -0,0 +1,99 @@ +# Connect + +You can interact with a QuestDB database by connecting to one of its various +network endpoints. + +|Network Endpoint|Port|Inserting & modifying data*|Querying data| +|:---------------|:---|:-------------------------------------|:------------| +|[Web Console](#web-console)|9000|SQL `INSERT`, `UPDATE`, CSV|SQL `SELECT`, charting| +|[InfluxDB Line Protocol](#influxdb-line-protocol)|9009|High performance bulk insert|-| +|[PostgreSQL wire protocol](#postgresql-wire-protocol)|8812|SQL `INSERT`, `UPDATE`|SQL `SELECT`| +|[HTTP REST API](#http-rest-api)|9000|SQL `INSERT`, `UPDATE`, CSV|SQL `SELECT`, CSV| + +`*` `UPDATE` is available from [QuestDB 6.4](/blog/2022/05/31/questdb-release-6-4/). + + +:::note + +All network ports may be [configured](/docs/reference/configuration/). + +::: + +## Web console + +The [web console](/docs/develop/web-console/) is a general admin and query +interface. +It's great for quickly trying things out. You can also chart your query results. + +Connect your web browser to http://[server-address]:9000/. When running locally, +this will be `http://localhost:9000/`. + +import Screenshot from "@theme/Screenshot" + + + + + +## InfluxDB Line Protocol (ILP) + +The fastest way to insert data into QuestDB is using the InfluxDB Line +Protocol. + +It is an insert-only protocol that bypasses SQL `INSERT` statements achieving +higher throughput. + +```shell +readings,city=London temperature=23.2 1465839830100400000\n +readings,city=London temperature=23.6 1465839830100700000\n +readings,make=Honeywell temperature=23.2,humidity=0.443 1465839830100800000\n +``` + +Our [InfluxDB Line Protocol tutorial](/docs/develop/insert-data/#influxdb-line-protocol) covers +ingesting data with various client libraries. + +For a more in-depth understanding, see our +[protocol documentation](/docs/reference/api/ilp/overview/). + +## PostgreSQL wire protocol + +For SQL, we support the same wire protocol as PostgreSQL, allowing you to +connect and query the database with various third-party pre-existing client +libraries and tools. + +```python + +import psycopg as pg + +# Connect to an existing QuestDB instance using the with statement + +conn_str = 'user=admin password=quest host=127.0.0.1 port=8812 dbname=qdb' +with pg.connect(conn_str, autocommit=True) as connection: + +``` + +See how you can connect through the PostgreSQL wire protocol from +different programming languages to: + +* [Insert data](/docs/develop/insert-data/#postgresql-wire-protocol) +* [Query data](/docs/develop/query-data/#postgresql-wire-protocol) + + +## HTTP REST API + +The HTTP interface that hosts the web console also provides a REST API for +importing data, exporting data and querying. + +```shell +curl -F data=@data.csv http://localhost:9000/imp +``` + +Find out how to: + +* [Insert data](/docs/develop/insert-data/#http-rest-api) +* [Query data](/docs/develop/query-data/#http-rest-api) diff --git a/docs/develop/insert-data.md b/docs/develop/insert-data.md new file mode 100644 index 00000000..25cbfc7f --- /dev/null +++ b/docs/develop/insert-data.md @@ -0,0 +1,1132 @@ +# Insert data + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" +import { RemoteRepoExample } from "@theme/RemoteRepoExample" +import Screenshot from "@theme/Screenshot" + +This page shows how to insert data into QuestDB using different programming +languages and tools. + +## Overview + +QuestDB supports the following data ingestion methods: + +- [InfluxDB Line Protocol](#influxdb-line-protocol-ilp): the recommended primary + ingestion method in QuestDB for high-performance applications. + - Dedicated InfluxDB Line Protocol + [client libraries](/docs/reference/clients/overview/) available +- [PostgreSQL wire protocol](#postgresql-wire-protocol): interoperability with + the PostgreSQL ecosystem. + + - SQL `INSERT` and `COPY` statements, including parameterized queries. + - `psql` on the command line + - Support for most PostgreSQL keywords and functions + +- [HTTP REST API](#http-rest-api): compatibility with a wide range of libraries + and tools. + - SQL `INSERT` for ad-hoc SQL queries + - `curl` command and CSV file upload on the commend line + - Accessing QuestDB via the [Web Console](#web-console): + - Code editor for SQL `INSERT` queries + - SQL `COPY` for one-off [database migration](/docs/guides/importing-data/) + - [CSV file upload](#uploading-csv-file) for uploading batches of CSV files + +### Recommended insert method + +The table below outlines the general recommendation for data ingestion based on +the shape of the data and different scenarios: + +#### One-off data import + +| | [CSV Upload](#uploading-csv-file) | SQL `COPY`
([Web Console](#web-console)) | [InfluxDB Line Protocol](#influxdb-line-protocol-ilp) | [PostgreSQL](#postgresql-wire-protocol) | +| :------------------- | :-------------------------------: | :------------------------------------------: | :---------------------------------------------------: | :-------------------------------------: | +| Sorted | ✓ | ✓ | | | +| Lightly out of order | ✓ | | | | +| Heavily out of order | | ✓ | | | + +#### Periodic batch ingest + +| | [CSV Upload](#uploading-csv-file) | SQL `COPY`
([Web Console](#web-console)) | [InfluxDB Line Protocol](#influxdb-line-protocol-ilp) | [PostgreSQL](#postgresql-wire-protocol) | +| :------------------- | :-------------------------------: | :------------------------------------------: | :---------------------------------------------------: | :-------------------------------------: | +| Sorted | ✓ | | ✓ | ✓ | +| Lightly out of order | ✓ | | ✓ | ✓ | +| Heavily out of order | ✓ | | ✓ | ✓ | + +#### Real-time ingest + +| | [CSV Upload](#uploading-csv-file) | SQL `COPY`
([Web Console](#web-console)) | [InfluxDB Line Protocol](#influxdb-line-protocol-ilp) | [PostgreSQL](#postgresql-wire-protocol) | +| :------------------- | :-------------------------------: | :------------------------------------------: | :---------------------------------------------------: | :-------------------------------------: | +| Sorted | | | ✓ | | +| Lightly out of order | | | ✓ | | +| Heavily out of order | | | ✓ | | + +**Lightly out of order data** refers to data with the following traits: + +- The expected lag is usually within a few minutes. +- The data is mostly sorted. Timestamps are growing in time with occasional + exceptions that are within the lag. + +**Heavily out of order data** refers to data with the following traits: + +- The data is mostly unsorted. +- The data belongs to different parts of different partitions in an arbitrary + manner. + +## InfluxDB Line Protocol (ILP) + +The InfluxDB Line Protocol is a text protocol over TCP on port `9009`. + +It is a one-way protocol to insert data, focusing on simplicity and performance. + +This interface is the preferred ingestion method as it provides the following +benefits: + +- High-throughput ingestion +- Robust ingestion from multiple sources into tables with dedicated systems for + reducing congestion +- Supports on-the-fly, concurrent schema changes + +On the [InfluxDB line protocol](/docs/reference/api/ilp/overview/) page, you may +find additional details on the message format, ports and authentication. + +### Client libraries + +The [Client Libraries](/docs/reference/clients/overview/) provide user-friendly +InfluxDB Line Protocol clients for a growing number of languages. + +### Authentication + +By default, Open Source InfluxDB Line Protocol Server is unauthenticated. To +configure authentication on the server, follow our +[server configuration guide](/docs/reference/api/ilp/authenticate/#server-configuration). +To configure authentication on the client, follow the relevant documentation +section in the [Client Libraries overview](/docs/reference/clients/overview/). + +QuestDB Cloud servers are configured for authentication already. Snippets for +all the supported languages can be found at https://cloud.questdb.com under the +instance "Connect" tab. + +### Examples + +These examples send a few rows of input. These use client libraries as well as +raw TCP socket connections, when a client library is not available. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +```ruby +require 'socket' +HOST = 'localhost' +PORT = 9009 +# Returns the current time in nanoseconds +def time_in_nsec + now = Time.now + return now.to_i * (10 ** 9) + now.nsec +end +begin + s = TCPSocket.new HOST, PORT + # Single record insert + s.puts "trades,name=client_timestamp value=12.4 #{time_in_nsec}\n" + # Omitting the timestamp allows the server to assign one + s.puts "trades,name=client_timestamp value=12.4\n" + # Streams of readings must be newline-delimited + s.puts "trades,name=client_timestamp value=12.4\n" + + "trades,name=client_timestamp value=11.4\n" +rescue SocketError => ex + puts ex.inspect +ensure + s.close() if s +end +``` + + + + + + + +```php + +``` + + + + + + + +### Telegraf + +The [Telegraf guide](/docs/third-party-tools/telegraf/) helps you configure a +Telegraf agent to collect and send metrics to QuestDB via InfluxDB Line +Protocol. + +## PostgreSQL wire protocol + +QuestDB also supports the same wire protocol as PostgreSQL, allowing you to +connect and query the database with various third-party pre-existing client +libraries and tools. + +You can connect to TCP port `8812` and use both `INSERT` and `SELECT` SQL +queries. + +PostgreSQL wire protocol is better suited for applications inserting via SQL +programmatically as it provides parameterized queries, which avoid SQL injection +issues. + +:::tip + +[InfluxDB Line Protocol](#influxdb-line-protocol) is the recommended primary +ingestion method in QuestDB. SQL `INSERT` statements over the PostgreSQL offer +feedback and error reporting, but have worse overall performance. + +::: + +Here are a few examples demonstrating SQL `INSERT` queries: + + + + + + + +:::note + +If you using the QuestDB Cloud, your database requires TLS to connect. You can +find host, port, and password configuration at https://cloud.questdb.com, on +your instance "Connect" tab. To enable SSL from psql in the commands below, +please follow this pattern: + +psql -h {hostname} -p {port} -U admin "dbname=qdb sslmode=require" -c +'{SQL_STATEMENT}' + +::: + +Create the table: + +```shell +psql -h localhost -p 8812 -U admin -d qdb \ + -c "CREATE TABLE IF NOT EXISTS t1 (name STRING, value INT);" +``` + +Insert row: + +```shell +psql -h localhost -p 8812 -U admin -d qdb -c "INSERT INTO t1 VALUES('a', 42)" +``` + +Query back: + +```shell +psql -h localhost -p 8812 -U admin -d qdb -c "SELECT * FROM t1" +``` + +Note that you can also run `psql` from Docker without installing the client +locally: + +``` +docker run -it --rm --network=host -e PGPASSWORD=quest \ + postgres psql .... +``` + + + + + + + +This example uses the [psychopg3](https://www.psycopg.org/psycopg3/docs/) +adapter. + +To [install](https://www.psycopg.org/psycopg3/docs/basic/install.html) the +client library, use `pip`: + +```shell +python3 -m pip install "psycopg[binary]" +``` + +```python +import psycopg as pg +import time + +# Connect to an existing QuestDB instance + +conn_str = 'user=admin password=quest host=127.0.0.1 port=8812 dbname=qdb' +with pg.connect(conn_str, autocommit=True) as connection: + + # Open a cursor to perform database operations + + with connection.cursor() as cur: + + # Execute a command: this creates a new table + + cur.execute(''' + CREATE TABLE IF NOT EXISTS test_pg ( + ts TIMESTAMP, + name STRING, + value INT + ) timestamp(ts); + ''') + + print('Table created.') + + # Insert data into the table. + + for x in range(10): + + # Converting datetime into millisecond for QuestDB + + timestamp = time.time_ns() // 1000 + + cur.execute(''' + INSERT INTO test_pg + VALUES (%s, %s, %s); + ''', + (timestamp, 'python example', x)) + + print('Rows inserted.') + + #Query the database and obtain data as Python objects. + + cur.execute('SELECT * FROM test_pg;') + records = cur.fetchall() + for row in records: + print(row) + +# the connection is now closed + +``` + + + + + + + +```java +package com.myco; + +import java.sql.*; +import java.util.Properties; + +class App { + public static void main(String[] args) throws SQLException { + Properties properties = new Properties(); + properties.setProperty("user", "admin"); + properties.setProperty("password", "quest"); + properties.setProperty("sslmode", "disable"); + + final Connection connection = DriverManager.getConnection( + "jdbc:postgresql://localhost:8812/qdb", properties); + connection.setAutoCommit(false); + + final PreparedStatement statement = connection.prepareStatement( + "CREATE TABLE IF NOT EXISTS trades (" + + " ts TIMESTAMP, date DATE, name STRING, value INT" + + ") timestamp(ts);"); + statement.execute(); + + try (PreparedStatement preparedStatement = connection.prepareStatement( + "INSERT INTO TRADES VALUES (?, ?, ?, ?)")) { + preparedStatement.setTimestamp( + 1, + new Timestamp(io.questdb.std.Os.currentTimeMicros())); + preparedStatement.setDate(2, new Date(System.currentTimeMillis())); + preparedStatement.setString(3, "abc"); + preparedStatement.setInt(4, 123); + preparedStatement.execute(); + } + System.out.println("Done"); + connection.close(); + } +} +``` + + + + + + + +This example uses the [`pg` package](https://www.npmjs.com/package/pg) which +allows for quickly building queries using Postgres wire protocol. Details on the +use of this package can be found on the +[node-postgres documentation](https://node-postgres.com/). + +This example uses naive `Date.now() * 1000` inserts for Timestamp types in +microsecond resolution. For accurate microsecond timestamps, the +[process.hrtime.bigint()](https://nodejs.org/api/process.html#processhrtimebigint) +call can be used. + +```javascript +"use strict" + +const { Client } = require("pg") + +const start = async () => { + const client = new Client({ + database: "qdb", + host: "127.0.0.1", + password: "quest", + port: 8812, + user: "admin", + }) + await client.connect() + + const createTable = await client.query( + "CREATE TABLE IF NOT EXISTS trades (" + + " ts TIMESTAMP, date DATE, name STRING, value INT" + + ") timestamp(ts);", + ) + console.log(createTable) + + let now = new Date().toISOString() + const insertData = await client.query( + "INSERT INTO trades VALUES($1, $2, $3, $4);", + [now, now, "node pg example", 123], + ) + await client.query("COMMIT") + + console.log(insertData) + + for (let rows = 0; rows < 10; rows++) { + // Providing a 'name' field allows for prepared statements / bind variables + now = new Date().toISOString() + const query = { + name: "insert-values", + text: "INSERT INTO trades VALUES($1, $2, $3, $4);", + values: [now, now, "node pg prep statement", rows], + } + await client.query(query) + } + await client.query("COMMIT") + + const readAll = await client.query("SELECT * FROM trades") + console.log(readAll.rows) + + await client.end() +} + +start() + .then(() => console.log("Done")) + .catch(console.error) +``` + + + + + + + +This example uses the [pgx](https://github.com/jackc/pgx) driver and toolkit for +PostgreSQL in Go. More details on the use of this toolkit can be found on the +[GitHub repository for pgx](https://github.com/jackc/pgx/wiki/Getting-started-with-pgx). + +```go +package main + +import ( + "context" + "fmt" + "log" + "time" + + "github.com/jackc/pgx/v4" +) + +var conn *pgx.Conn +var err error + +func main() { + ctx := context.Background() + conn, _ = pgx.Connect(ctx, "postgresql://admin:quest@localhost:8812/qdb") + defer conn.Close(ctx) + + // text-based query + _, err := conn.Exec(ctx, + ("CREATE TABLE IF NOT EXISTS trades (" + + " ts TIMESTAMP, date DATE, name STRING, value INT" + + ") timestamp(ts);")) + if err != nil { + log.Fatalln(err) + } + + // Prepared statement given the name 'ps1' + _, err = conn.Prepare(ctx, "ps1", "INSERT INTO trades VALUES($1,$2,$3,$4)") + if err != nil { + log.Fatalln(err) + } + + // Insert all rows in a single commit + tx, err := conn.Begin(ctx) + if err != nil { + log.Fatalln(err) + } + + for i := 0; i < 10; i++ { + // Execute 'ps1' statement with a string and the loop iterator value + _, err = conn.Exec( + ctx, + "ps1", + time.Now(), + time.Now().Round(time.Millisecond), + "go prepared statement", + i + 1) + if err != nil { + log.Fatalln(err) + } + } + + // Commit the transaction + err = tx.Commit(ctx) + if err != nil { + log.Fatalln(err) + } + + // Read all rows from table + rows, err := conn.Query(ctx, "SELECT * FROM trades") + fmt.Println("Reading from trades table:") + for rows.Next() { + var name string + var value int64 + var ts time.Time + var date time.Time + err = rows.Scan(&ts, &date, &name, &value) + fmt.Println(ts, date, name, value) + } + + err = conn.Close(ctx) +} +``` + + + + + + + +The following example shows how to use parameterized queries and prepared +statements using the [rust-postgres](https://docs.rs/postgres/0.19.0/postgres/) +client. + +```rust +use postgres::{Client, NoTls, Error}; +use chrono::{Utc}; +use std::time::SystemTime; + +fn main() -> Result<(), Error> { + let mut client = Client::connect("postgresql://admin:quest@localhost:8812/qdb", NoTls)?; + + // Basic query + client.batch_execute( + "CREATE TABLE IF NOT EXISTS trades ( \ + ts TIMESTAMP, date DATE, name STRING, value INT \ + ) timestamp(ts);")?; + + // Parameterized query + let name: &str = "rust example"; + let val: i32 = 123; + let utc = Utc::now(); + let sys_time = SystemTime::now(); + client.execute( + "INSERT INTO trades VALUES($1,$2,$3,$4)", + &[&utc.naive_local(), &sys_time, &name, &val], + )?; + + // Prepared statement + let mut txn = client.transaction()?; + let statement = txn.prepare("INSERT INTO trades VALUES ($1,$2,$3,$4)")?; + for value in 0..10 { + let utc = Utc::now(); + let sys_time = SystemTime::now(); + txn.execute(&statement, &[&utc.naive_local(), &sys_time, &name, &value])?; + } + txn.commit()?; + + println!("import finished"); + Ok(()) +} +``` + + + + + + + +## Web Console + +QuestDB ships with an embedded [Web Console](/docs/develop/web-console/) running +by default on port `9000`. + +### Inserting data via SQL + +SQL statements can be written in the code editor and executed by clicking the +**Run** button. Note that the Web Console runs a single statement at a time. + +There are two SQL keywords to insert data: + +- [INSERT](/docs/reference/sql/insert/): + + ```questdb-sql title='Creating a table and inserting some data' + + CREATE TABLE takeaway_order (ts TIMESTAMP, id SYMBOL, status SYMBOL) + TIMESTAMP(ts); + + INSERT INTO takeaway_order VALUES (now(), 'order1', 'placed'); + INSERT INTO takeaway_order VALUES (now(), 'order2', 'placed'); + ``` + +- [COPY](/docs/reference/sql/copy/): For inserting bulk data or migrating data + from other databases. See [large CSV import](/docs/guides/importing-data/). + +### Uploading CSV file + +It is also possible to upload CSV files using the +[Import tab](/docs/develop/web-console/#import) in the Web Console: + + + +## HTTP REST API + +QuestDB exposes a REST API for compatibility with a wide range of libraries and +tools. The REST API is accessible on port `9000` and has the following +insert-capable entrypoints: + +| Entrypoint | HTTP Method | Description | API Docs | +| :----------------------------------------- | :---------- | :-------------------------------------- | :------------------------------------------------------------ | +| [`/imp`](#imp-uploading-tabular-data) | POST | Import CSV data | [Reference](/docs/reference/api/rest/#imp---import-data) | +| [`/exec?query=..`](#exec-sql-insert-query) | GET | Run SQL Query returning JSON result set | [Reference](/docs/reference/api/rest/#exec---execute-queries) | + +For details such as content type, query parameters and more, refer to the +[REST API](/docs/reference/api/rest/) docs. + +### `/imp`: Uploading Tabular Data + +:::tip + +[InfluxDB Line Protocol](#influxdb-line-protocol) is the recommended primary +ingestion method in QuestDB. CSV uploading offers insertion feedback and error +reporting, but has worse overall performance. + +See `/imp`'s [`atomicity`](/docs/reference/api/rest/#url-parameters) query +parameter to customize behavior on error. + +::: + +Let's assume you want to upload the following data via the `/imp` entrypoint: + + + + + + + +```csv title=data.csv +col1,col2,col3 +a,10.5,True +b,100,False +c,,True +``` + + + + + + + +| col1 | col2 | col3 | +| :--- | :----- | :------ | +| a | 10.5 | _true_ | +| b | 100 | _false_ | +| c | _NULL_ | _true_ | + + + + + + + +You can do so via the command line using `cURL` or programmatically via HTTP +APIs in your scripts and applications. + +By default, the response is designed to be human-readable. Use the `fmt=json` +query argument to obtain a response in JSON. You can also specify the schema +explicitly. See the second example in Python for these features. + + + + + + + +This example imports a CSV file with automatic schema detection. + +```shell title="Basic import with table name" +curl -F data=@data.csv http://localhost:9000/imp?name=table_name +``` + +This example overwrites an existing table and specifies a timestamp format and a +designated timestamp column. For more information on the optional parameters to +specify timestamp formats, partitioning and renaming tables, see the +[REST API documentation](/docs/reference/api/rest/#examples). + +```bash title="Providing a user-defined schema" +curl \ +-F schema='[{"name":"ts", "type": "TIMESTAMP", "pattern": "yyyy-MM-dd - HH:mm:ss"}]' \ +-F data=@weather.csv 'http://localhost:9000/imp?overwrite=true×tamp=ts' +``` + + + + + + + +This first example shows uploading the `data.csv` file with automatic schema +detection. + +```python +import sys +import requests + +csv = {'data': ('my_table', open('./data.csv', 'r'))} +host = 'http://localhost:9000' + +try: + response = requests.post(host + '/imp', files=csv) + print(response.text) +except requests.exceptions.RequestException as e: + print(f'Error: {e}', file=sys.stderr) +``` + +The second example creates a CSV buffer from Python objects and uploads them +with a custom schema. Note UTF-8 encoding. + +The `fmt=json` parameter allows us to obtain a parsable response, rather than a +tabular response designed for human consumption. + +```python +import io +import csv +import requests +import pprint +import json + + +def to_csv_str(table): + output = io.StringIO() + csv.writer(output, dialect='excel').writerows(table) + return output.getvalue().encode('utf-8') + + +def main(): + table_name = 'example_table2' + table = [ + ['col1', 'col2', 'col3'], + ['a', 10.5, True], + ['b', 100, False], + ['c', None, True]] + + table_csv = to_csv_str(table) + print(table_csv) + schema = json.dumps([ + {'name': 'col1', 'type': 'SYMBOL'}, + {'name': 'col2', 'type': 'DOUBLE'}, + {'name': 'col3', 'type': 'BOOLEAN'}]) + response = requests.post( + 'http://localhost:9000/imp', + params={'fmt': 'json'}, + files={ + 'schema': schema, + 'data': (table_name, table_csv)}).json() + + # You can parse the `status` field and `error` fields + # of individual columns. See Reference/API/REST docs for details. + pprint.pprint(response) + + +if __name__ == '__main__': + main() +``` + + + + + + + +```javascript +const fetch = require("node-fetch") +const FormData = require("form-data") +const fs = require("fs") + +const HOST = "http://localhost:9000" + +async function run() { + const form = new FormData() + + form.append("data", fs.readFileSync(__dirname + "/data.csv"), { + filename: fileMetadata.name, + contentType: "application/octet-stream", + }) + + try { + const r = await fetch(`${HOST}/imp`, { + method: "POST", + body: form, + headers: form.getHeaders(), + }) + + console.log(r) + } catch (e) { + console.error(e) + } +} + +run() +``` + + + + + + + +```go +package main + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "log" + "mime/multipart" + "net/http" + "net/url" + "os" +) + +func main() { + u, err := url.Parse("http://localhost:9000") + checkErr(err) + u.Path += "imp" + url := fmt.Sprintf("%v", u) + fileName := "/path/to/data.csv" + file, err := os.Open(fileName) + checkErr(err) + + defer file.Close() + + buf := new(bytes.Buffer) + writer := multipart.NewWriter(buf) + uploadFile, _ := writer.CreateFormFile("data", "data.csv") + _, err = io.Copy(uploadFile, file) + checkErr(err) + writer.Close() + + req, err := http.NewRequest(http.MethodPut, url, buf) + checkErr(err) + req.Header.Add("Content-Type", writer.FormDataContentType()) + + client := &http.Client{} + res, err := client.Do(req) + checkErr(err) + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + checkErr(err) + + log.Println(string(body)) +} + +func checkErr(err error) { + if err != nil { + panic(err) + } +} +``` + + + + + + + +### `/exec`: SQL `INSERT` Query + +The `/exec` entrypoint takes a SQL query and returns results as JSON. + +We can use this for quick SQL inserts too, but note that there's no support for +parameterized queries that are necessary to avoid SQL injection issues. + +:::tip + +Prefer the [PostgreSQL interface](#postgresql-wire-protocol) if you are +generating sql programmatically. + +Prefer [InfluxDB Line Protocol](#influxdb-line-protocol) if you need +high-performance inserts. + +::: + + + + + + + +```shell +# Create Table +curl -G \ + --data-urlencode "query=CREATE TABLE IF NOT EXISTS trades(name STRING, value INT)" \ + http://localhost:9000/exec + +# Insert a row +curl -G \ + --data-urlencode "query=INSERT INTO trades VALUES('abc', 123456)" \ + http://localhost:9000/exec +``` + + + + + + + +```python +import sys +import requests +import json + +host = 'http://localhost:9000' + +def run_query(sql_query): + query_params = {'query': sql_query, 'fmt' : 'json'} + try: + response = requests.get(host + '/exec', params=query_params) + json_response = json.loads(response.text) + print(json_response) + except requests.exceptions.RequestException as e: + print(f'Error: {e}', file=sys.stderr) + +# create table +run_query("CREATE TABLE IF NOT EXISTS trades (name STRING, value INT)") +# insert row +run_query("INSERT INTO trades VALUES('abc', 123456)") +``` + + + + + + + +The `node-fetch` package can be installed using `npm i node-fetch`. + +```javascript +const fetch = require("node-fetch") + +const HOST = "http://localhost:9000" + +async function createTable() { + try { + const query = "CREATE TABLE IF NOT EXISTS trades (name STRING, value INT)" + + const response = await fetch( + `${HOST}/exec?query=${encodeURIComponent(query)}`, + ) + const json = await response.json() + + console.log(json) + } catch (error) { + console.log(error) + } +} + +async function insertData() { + try { + const query = "INSERT INTO trades VALUES('abc', 123456)" + + const response = await fetch( + `${HOST}/exec?query=${encodeURIComponent(query)}`, + ) + const json = await response.json() + + console.log(json) + } catch (error) { + console.log(error) + } +} + +createTable().then(insertData) +``` + + + + + + + +```go +package main + +import ( + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" +) + +func main() { + u, err := url.Parse("http://localhost:9000") + checkErr(err) + + u.Path += "exec" + params := url.Values{} + params.Add("query", ` + CREATE TABLE IF NOT EXISTS + trades (name STRING, value INT); + INSERT INTO + trades + VALUES( + "abc", + 123456 + ); + `) + u.RawQuery = params.Encode() + url := fmt.Sprintf("%v", u) + + res, err := http.Get(url) + checkErr(err) + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + checkErr(err) + + log.Println(string(body)) +} + +func checkErr(err error) { + if err != nil { + panic(err) + } +} +``` + + + + + + diff --git a/docs/develop/query-data.md b/docs/develop/query-data.md new file mode 100644 index 00000000..a4ccf0e5 --- /dev/null +++ b/docs/develop/query-data.md @@ -0,0 +1,568 @@ +# Query data + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + +This page describes how to query data from QuestDB using different programming +languages and tools. + +## Overview + +For ad-hoc SQL queries, including CSV download and charting use the web console. +Applications can choose between the HTTP REST API which returns JSON or use +the PostgreSQL wire protocol. + +QuestDB supports the following data querying methods: + +- [HTTP REST API](#http-rest-api): compatibility with a wide range of libraries + and tools. + - Accessing QuestDB via the [Web Console](#web-console): + - SQL `SELECT` statements. + - Download query results as CSV. + - Chart query results. + - SQL `SELECT` statements as JSON or CSV + - Result paging +- [PostgreSQL wire protocol](#postgresql-wire-protocol): interoperability with + the PostgreSQL ecosystem. + - SQL `SELECT` statements. + - Use `psql` on the command line. + +## Web Console + +QuestDB ships with an embedded Web Console running by default on port `9000`. + +import Screenshot from "@theme/Screenshot" + + + + + +To query data from the web console, SQL statements can be written in the code +editor and executed by clicking the **Run** button. + +```questdb-sql title='Listing tables and querying a table' +SHOW TABLES; +SELECT * FROM my_table; + +--Note that `SELECT * FROM` is optional +my_table; +``` + +Aside from the Code Editor, the Web Console includes a data visualization panel +for viewing query results as tables or graphs and an Import tab for uploading +datasets as CSV files. For more details on these components and general use of +the console, see the [Web Console](/docs/develop/web-console/) page. + +## PostgreSQL wire protocol + +You can query data using the Postgres endpoint +that QuestDB exposes which is accessible by default via port `8812`. Examples in +multiple languages are shown below. To learn more, check out our docs about +[Postgres compatibility and tools](/docs/reference/api/postgres/). + + + + + + +```python +import psycopg as pg +import time + +# Connect to an existing QuestDB instance + +conn_str = 'user=admin password=quest host=127.0.0.1 port=8812 dbname=qdb' +with pg.connect(conn_str, autocommit=True) as connection: + + # Open a cursor to perform database operations + + with connection.cursor() as cur: + + #Query the database and obtain data as Python objects. + + cur.execute('SELECT * FROM trades_pg;') + records = cur.fetchall() + for row in records: + print(row) + +# the connection is now closed + +``` + + + + + + + +```java +package com.myco; + +import java.sql.*; +import java.util.Properties; + +public class App { + public static void main(String[] args) throws SQLException { + Properties properties = new Properties(); + properties.setProperty("user", "admin"); + properties.setProperty("password", "quest"); + //set sslmode value to 'require' if connecting to a QuestDB Cloud instance + properties.setProperty("sslmode", "disable"); + + final Connection connection = DriverManager.getConnection( + "jdbc:postgresql://localhost:8812/qdb", properties); + try (PreparedStatement preparedStatement = connection.prepareStatement( + "SELECT x FROM long_sequence(5);")) { + try (ResultSet rs = preparedStatement.executeQuery()) { + while (rs.next()) { + System.out.println(rs.getLong(1)); + } + } + } + connection.close(); + } +} + +``` + + + + + + + +```javascript +"use strict" + +const { Client } = require("pg") + +const start = async () => { + const client = new Client({ + database: "qdb", + host: "127.0.0.1", + password: "quest", + port: 8812, + user: "admin", + }) + await client.connect() + + const res = await client.query("SELECT x FROM long_sequence(5);") + + console.log(res.rows) + + await client.end() +} + +start().catch(console.error) +``` + + + + + + + +```go +package main + +import ( + "database/sql" + "fmt" + + _ "github.com/lib/pq" +) + +const ( + host = "localhost" + port = 8812 + user = "admin" + password = "quest" + dbname = "qdb" +) + +func main() { + connStr := fmt.Sprintf( + "host=%s port=%d user=%s password=%s dbname=%s sslmode=disable", + host, port, user, password, dbname) + db, err := sql.Open("postgres", connStr) + checkErr(err) + defer db.Close() + + // Currently, we do not support queries with bind parameters in Go + rows, err := db.Query("SELECT x FROM long_sequence(5);") + checkErr(err) + defer rows.Close() + + for rows.Next() { + var num string + err = rows.Scan(&num) + checkErr(err) + fmt.Println(num) + } + + err = rows.Err() + checkErr(err) +} + +func checkErr(err error) { + if err != nil { + panic(err) + } +} + +``` + + + + + + + +```c +// compile with +// g++ libpq_example.c -o libpq_example.exe -I pgsql\include -L dev\pgsql\lib +// -std=c++17 -lpthread -lpq +#include +#include +#include + +void do_exit(PGconn *conn) { + PQfinish(conn); + exit(1); +} + +int main() { + PGconn *conn = PQconnectdb( + "host=localhost user=admin password=quest port=8812 dbname=testdb"); + if (PQstatus(conn) == CONNECTION_BAD) { + fprintf(stderr, "Connection to database failed: %s\n", + PQerrorMessage(conn)); + do_exit(conn); + } + PGresult *res = PQexec(conn, "SELECT x FROM long_sequence(5);"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { + printf("No data retrieved\n"); + PQclear(res); + do_exit(conn); + } + int rows = PQntuples(res); + for (int i = 0; i < rows; i++) { + printf("%s\n", PQgetvalue(res, i, 0)); + } + PQclear(res); + PQfinish(conn); + return 0; +} +``` + + + + + + + +```csharp +using Npgsql; +string username = "admin"; +string password = "quest"; +string database = "qdb"; +int port = 8812; +var connectionString = $@"host=localhost;port={port};username={username};password={password}; +database={database};ServerCompatibilityMode=NoTypeLoading;"; +await using NpgsqlConnection connection = new NpgsqlConnection(connectionString); +await connection.OpenAsync(); + +var sql = "SELECT x FROM long_sequence(5);"; + +await using NpgsqlCommand command = new NpgsqlCommand(sql, connection); +await using (var reader = await command.ExecuteReaderAsync()) { + while (await reader.ReadAsync()) + { + var x = reader.GetInt64(0); + } +} +``` + + + + + +```ruby +require 'pg' +begin + conn =PG.connect( host: "127.0.0.1", port: 8812, dbname: 'qdb', + user: 'admin', password: 'quest' ) + rows = conn.exec 'SELECT x FROM long_sequence(5);' + rows.each do |row| + puts row + end +rescue PG::Error => e + puts e.message +ensure + conn.close if conn +end +``` + + + + + +```php +getMessage(), "\n"; +} finally { + if (!is_null($db_conn)) { + pg_close($db_conn); + } +} + +?> +``` + + + + + +## HTTP REST API + +QuestDB exposes a REST API for compatibility with a wide range of libraries and +tools. The REST API is accessible on port `9000` and has the following +query-capable entrypoints: + +|Entrypoint |HTTP Method|Description | API Docs | +|:------------------------------------------|:----------|:--------------------------------------|:--------------------------------------------------------------| +|[`/exp?query=..`](#exp-sql-query-to-csv) |GET |Export SQL Query as CSV | [Reference](/docs/reference/api/rest/#exp---export-data) | +|[`/exec?query=..`](#exec-sql-query-to-json)|GET |Run SQL Query returning JSON result set| [Reference](/docs/reference/api/rest/#exec---execute-queries) | + +For details such as content type, query parameters and more, refer to the +[REST API](/docs/reference/api/rest/) docs. + +### `/exp`: SQL Query to CSV + +The `/exp` entrypoint allows querying the database with a SQL select query and +obtaining the results as CSV. + +For obtaining results in JSON, use `/exec` instead, documented next. + + + + + +```bash +curl -G --data-urlencode \ + "query=SELECT * FROM example_table2 LIMIT 3" \ + http://localhost:9000/exp +``` + +```csv +"col1","col2","col3" +"a",10.5,true +"b",100.0,false +"c",,true +``` + + + + + +```python +import requests + +resp = requests.get( + 'http://localhost:9000/exp', + { + 'query': 'SELECT * FROM example_table2', + 'limit': '3,6' # Rows 3, 4, 5 + }) +print(resp.text) +``` + +```csv +"col1","col2","col3" +"d",20.5,true +"e",200.0,false +"f",,true +``` + + + + + +### `/exec`: SQL Query to JSON + +The `/exec` entrypoint takes a SQL query and returns results as JSON. + +This is similar to the `/exec` entry point which returns results as CSV. + +### Querying Data + + + + + + + + +```shell +curl -G \ + --data-urlencode "query=SELECT x FROM long_sequence(5);" \ + http://localhost:9000/exec +``` + +The JSON response contains the original query, a `"columns"` key with the schema +of the results, a `"count"` number of rows and a `"dataset"` with the results. + +```json +{ + "query": "SELECT x FROM long_sequence(5);", + "columns": [ + {"name": "x", "type": "LONG"}], + "dataset": [ + [1], + [2], + [3], + [4], + [5]], + "count": 5 +} +``` + + + + + + + +```python +import sys +import requests + +host = 'http://localhost:9000' + +sql_query = "select * from long_sequence(10)" + +try: + response = requests.get( + host + '/exec', + params={'query': sql_query}).json() + for row in response['dataset']: + print(row[0]) +except requests.exceptions.RequestException as e: + print(f'Error: {e}', file=sys.stderr) +``` + + + + + + + +```javascript +const fetch = require("node-fetch") + +const HOST = "http://localhost:9000" + +async function run() { + try { + const query = "SELECT x FROM long_sequence(5);" + + const response = await fetch(`${HOST}/exec?query=${encodeURIComponent(query)}`) + const json = await response.json() + + console.log(json) + } catch (error) { + console.log(error) + } +} + +run() +``` + + + + + + + +```go +package main + +import ( + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" +) + +func main() { + u, err := url.Parse("http://localhost:9000") + checkErr(err) + + u.Path += "exec" + params := url.Values{} + params.Add("query", "SELECT x FROM long_sequence(5);") + u.RawQuery = params.Encode() + url := fmt.Sprintf("%v", u) + + res, err := http.Get(url) + checkErr(err) + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + checkErr(err) + + log.Println(string(body)) +} + +func checkErr(err error) { + if err != nil { + panic(err) + } +} +``` + + + + + diff --git a/docs/develop/questdb.io.code-workspace b/docs/develop/questdb.io.code-workspace new file mode 100644 index 00000000..66dd5d45 --- /dev/null +++ b/docs/develop/questdb.io.code-workspace @@ -0,0 +1,35 @@ +{ + "folders": [ + { + "path": "../.." + } + ], + "settings": { + "grammarly.selectors": [ + { + "language": "markdown", + "scheme": "file" + }, + { + "language": "Log", + "scheme": "output" + }, + { + "language": "plaintext", + "scheme": "file", + "pattern": "blog/2022-11-30-the-case-for-mechanical-sympathy.mdx" + }, + { + "language": "mdx", + "scheme": "file", + "pattern": "blog/2022-11-30-the-case-for-mechanical-sympathy.mdx" + }, + { + "language": "mdx", + "scheme": "file", + "pattern": "blog/2022-11-30-full-table-scan-are-fast.mdx" + } + ], + "grammarly.startTextCheckInPausedState": true + } +} \ No newline at end of file diff --git a/docs/develop/update-data.md b/docs/develop/update-data.md new file mode 100644 index 00000000..77b7527a --- /dev/null +++ b/docs/develop/update-data.md @@ -0,0 +1,370 @@ +--- +title: Update data +description: + This page demonstrates how to update time-series data in QuestDB from + NodeJS, Java, Python and cURL. The examples show how to use the REST and Postgres APIs. +--- + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + +This page shows how to update existing data in QuestDB using different programming +languages and tools. There are two main methods for updating data: + +- [Postgres wire](#postgres-compatibility) protocol for compatibility with a + range of clients +- [REST API](#rest-api) provides access to QuestDB via HTTP + +## Prerequisites + +This page assumes that QuestDB is running and accessible. QuestDB can be run +using either [Docker](/docs/get-started/docker/), the +[Binaries](/docs/get-started/binaries/) or +[Homebrew](/docs/get-started/homebrew/) for macOS users. + +## Postgres compatibility + +You can query data using the [Postgres](/docs/reference/api/postgres/) endpoint +that QuestDB exposes. This is accessible via port `8812` by default. More +information on the Postgres wire protocol implementation with details on +supported features can be found on the +[Postgres API reference](/docs/reference/api/postgres/) page. + + + + + + + + + +This example uses the [`pg` package](https://www.npmjs.com/package/pg) which +allows for quickly building queries using Postgres wire protocol. Details on the +use of this package can be found on the +[node-postgres documentation](https://node-postgres.com/). + +This example uses naive `Date.now() * 1000` inserts for Timestamp types in +microsecond resolution. For accurate microsecond timestamps, the +[process.hrtime.bigint()](https://nodejs.org/api/process.html#processhrtimebigint) call can be used. + +```javascript +"use strict" + +const { Client } = require("pg") + +const start = async () => { + const client = new Client({ + database: "qdb", + host: "127.0.0.1", + password: "quest", + port: 8812, + user: "admin", + options: "-c statement_timeout=300000" + }) + await client.connect() + + const createTable = await client.query( + "CREATE TABLE IF NOT EXISTS trades (ts TIMESTAMP, date DATE, name STRING, value INT) timestamp(ts);" + ) + console.log(createTable) + + for (let rows = 0; rows < 10; rows++) { + // Providing a 'name' field allows for prepared statements / bind variables + let now = new Date().toISOString() + const query = { + name: "insert-values", + text: "INSERT INTO trades VALUES($1, $2, $3, $4);", + values: [now, now, "node pg prep statement", rows], + } + await client.query(query) + } + + const updateData = await client.query( + "UPDATE trades SET name = 'update example', value = 123 WHERE value > 7;" + ) + console.log(updateData) + + await client.query("COMMIT") + + const readAll = await client.query("SELECT * FROM trades") + console.log(readAll.rows) + + await client.end() +} + +start() + .then(() => console.log("Done")) + .catch(console.error) +``` + + + + + + + +```java +package com.myco; + +import java.sql.*; +import java.util.Properties; + +class App { + public static void main(String[] args) throws SQLException { + Properties properties = new Properties(); + properties.setProperty("user", "admin"); + properties.setProperty("password", "quest"); + //set sslmode value to 'require' if connecting to a QuestDB Cloud instance + properties.setProperty("sslmode", "disable"); + properties.setProperty("options", "-c statement_timeout=300000"); + + final Connection connection = DriverManager.getConnection("jdbc:postgresql://localhost:8812/qdb", properties); + connection.setAutoCommit(false); + + final PreparedStatement statement = connection.prepareStatement("CREATE TABLE IF NOT EXISTS trades (ts TIMESTAMP, date DATE, name STRING, value INT) timestamp(ts);"); + statement.execute(); + + try (PreparedStatement preparedStatement = connection.prepareStatement("INSERT INTO trades VALUES (?, ?, ?, ?)")) { + preparedStatement.setTimestamp(1, new Timestamp(io.questdb.std.Os.currentTimeMicros())); + preparedStatement.setDate(2, new Date(System.currentTimeMillis())); + preparedStatement.setString(3, "abc"); + preparedStatement.setInt(4, 123); + preparedStatement.execute(); + } + + final PreparedStatement statement = connection.prepareStatement("UPDATE trades SET value = value + 1000;"); + statement.execute(); + + System.out.println("Done"); + connection.close(); + } +} +``` + + + + + + + +This example uses the +[psychopg3](https://www.psycopg.org/psycopg3/docs/) adapter. + +To [install](https://www.psycopg.org/psycopg3/docs/basic/install.html) the client library, use `pip`: + +```shell +python3 -m pip install "psycopg[binary]" +``` + +```python +import psycopg as pg +import time + +# Connect to an existing QuestDB instance + +conn_str = 'user=admin password=quest host=127.0.0.1 port=8812 dbname=qdb' +with pg.connect(conn_str, autocommit=True) as connection: + + # Open a cursor to perform database operations + with connection.cursor() as cur: + + # Execute a command: this creates a new table + cur.execute(''' + CREATE TABLE IF NOT EXISTS test_pg ( + ts TIMESTAMP, + name STRING, + value INT + ) timestamp(ts); + ''') + + print('Table created.') + + # Insert data into the table. + for x in range(10): + + # Converting datetime into millisecond for QuestDB + timestamp = time.time_ns() // 1000 + + cur.execute(''' + INSERT INTO test_pg + VALUES (%s, %s, %s); + ''', + (timestamp, 'python example', x)) + + print('Rows inserted:') + + # Query the database and obtain data as Python objects. + cur.execute('SELECT * FROM test_pg;') + records = cur.fetchall() + for row in records: + print(row) + + # Update records + cur.execute('UPDATE test_pg SET value = value + 100;') + + # Query the database to show the updated values. + print("Updated data:") + cur.execute('SELECT * FROM test_pg;') + records = cur.fetchall() + for row in records: + print(row) +``` + + + + + + +## REST API + +QuestDB exposes a REST API for compatibility with a wide range of libraries and +tools. The REST API is accessible on port `9000` and has the following +entrypoints: + +- `/imp` - import data +- `/exec` - execute an SQL statement + +More details on the use of these entrypoints can be found on the +[REST API reference](/docs/reference/api/rest/) page. + +### `/imp` endpoint + +The `/imp` endpoint does not allow for updating data. + + +### `/exec` endpoint + +Alternatively, the `/exec` endpoint can be used to create a table and the +`INSERT` statement can be used to populate it with values: + + + + + + + + + + +```shell +# Create Table +curl -G \ + --data-urlencode "query=CREATE TABLE IF NOT EXISTS trades(name STRING, value INT)" \ + http://localhost:9000/exec + +# Insert a row +curl -G \ + --data-urlencode "query=INSERT INTO trades VALUES('abc', 123456)" \ + http://localhost:9000/exec + +# Update a row +curl -G \ + --data-urlencode "query=UPDATE trades SET value = 9876 WHERE name = 'abc'" \ + http://localhost:9000/exec +``` + + + + + + + +The `node-fetch` package can be installed using `npm i node-fetch`. + +```javascript +const fetch = require("node-fetch") + +const HOST = "http://localhost:9000" + +async function createTable() { + try { + const query = "CREATE TABLE IF NOT EXISTS trades (name STRING, value INT)"; + + const response = await fetch(`${HOST}/exec?query=${encodeURIComponent(query)}`) + const json = await response.json() + + console.log(json) + } catch (error) { + console.log(error) + } +} + +async function insertData() { + try { + const query = "INSERT INTO trades VALUES('abc', 123456)" + + const response = await fetch(`${HOST}/exec?query=${encodeURIComponent(query)}`) + const json = await response.json() + + console.log(json) + } catch (error) { + console.log(error) + } +} + +async function updateData() { + try { + const query = "UPDATE trades SET value = 9876 WHERE name = 'abc'" + + const response = await fetch(`${HOST}/exec?query=${encodeURIComponent(query)}`) + const json = await response.json() + + console.log(json) + } catch (error) { + console.log(error) + } +} + +createTable().then(insertData).then(updateData) +``` + + + + + + + +```python +import requests +import json + +host = 'http://localhost:9000' + +def run_query(sql_query): + query_params = {'query': sql_query, 'fmt' : 'json'} + try: + response = requests.get(host + '/exec', params=query_params) + json_response = json.loads(response.text) + print(json_response) + except requests.exceptions.RequestException as e: + print("Error: %s" % (e)) + +# create table +run_query("CREATE TABLE IF NOT EXISTS trades (name STRING, value INT)") +# insert row +run_query("INSERT INTO trades VALUES('abc', 123456)") +# update row +run_query("UPDATE trades SET value = 9876 WHERE name = 'abc'") +``` + + + + + + +## Web Console + +By default, QuestDB has an embedded Web Console running at +http://[server-address]:9000. When running locally, this is accessible at +`http://localhost:9000`. The Web Console can be used to +explore table schemas, visualizing query results as tables or graphs, and +importing datasets from CSV files. For details on these components, refer to the +[Web Console reference](/docs/develop/web-console/) page. diff --git a/docs/develop/web-console.md b/docs/develop/web-console.md new file mode 100644 index 00000000..4f965863 --- /dev/null +++ b/docs/develop/web-console.md @@ -0,0 +1,284 @@ +--- +title: Web Console +description: + How to use the Web Console in QuestDB for importing, querying, and visualizing + data. +--- + +The Web Console is a client that allows you to interact with QuestDB. It +provides UI tools to query data and visualize the results in a table or plot. + +import Screenshot from "@theme/Screenshot" + + + +## Accessing the Web Console + +The Web Console will be available at `http://[server-address]:9000`. When +running locally, this will be `http://localhost:9000`. + +## Layout + + + +### System tables in Schema explorer + +It is possible to hide QuestDB system tables (`telemetry` and +`telemetry_config`) in Schema explorer by setting up the following configuration +option in a [server.conf](/docs/concept/root-directory-structure/#serverconf) +file: + +```bash title="/var/lib/questdb/conf/server.conf" +telemetry.hide.tables=true +``` + +## Code editor + +The default panel shown in the web console is the code editor which allows you +to write and run SQL queries. + +### Shortcuts + +| Command | Action | +| :------------ | :-------------------------------------------------------------------------- | +| Run query | `f9` or `ctrl/cmd + enter` | +| Locate cursor | `f2`, use this to focus the SQL editor on your cursor in order to locate it | + +### Behavior + +As you can write multiple SQL commands separated by a semicolon, the Web Console +uses the following logic to decide which queries to execute: + +- Check if a query or part of a query is highlighted. If yes, it will be + executed, otherwise: +- Check if the cursor is within a SQL statement. If yes, the statement will be + executed, otherwise: +- Check if the cursor is on the same line as a SQL statement and after the + semicolon. If yes, this statement will be executed, finally: +- If the cursor is on a line that does not contain a SQL statement, the next + encountered statement will be executed. If there is no statement after the + cursor, the previous statement will be used. + +### Visualizing results + +You can run a query and click on the `Chart` button. This will display the chart +editor. You can then choose chart type, for example, `line` and then press +`Draw`. + +### Toggle the grid table + +The following options are available to toggle the grid layout: + + + +- Freeze left column: + + To freeze more columns, drag the vertical solid line to the desired column + + + +- Move the selected column to the front +- Reset grid layout +- Refresh + +### Downloading results + +You can download the query result by clicking the `CSV` button. This file will +be useful to test the import functionality below. + +## Notification panel + +The panel at the bottom of the web console shows the status of the most-recent +query. This panel can be toggled by clicking the up-arrow icon on the right of +the panel and shows the last 20 messages and notifications after query +execution. + + + +## Import + +The Web Console offers an interface to import small batches of CSV files as new +tables or to existing tables. + +The import tab can be accessed by clicking this icon on the left-side navigation +menu: + + + +The import UI: + + + +### Import configurations + +Once a file is added to the Upload queue, the following configurations will be +displayed: + + + +- `File`: The file name, size, and the import status +- `Table name`: The name for the table to be imported. By default, this is the + name of the imported file. +- `Schema`: The Colum data name and data type. The schema is automatically + detected but it is possible to set it manually. See + [Table schema](#table-schema) for more information. +- `Write mode`: + - `Append`: Uploaded data will be appended to the end of the table. + - `Overwrite`: Uploaded data will override existing data in the table +- `Actions`: + - `Settings`: Additional configuration for the import. See + [Import settings](#import-settings) for more information. + - `Upload`: Start the upload + - `X`: Delete the file from the Upload queue + +#### Table schema + +To update the schema of an existing table, select `Overwrite` write mode to +replace the rows and the partition unit with the CSV file. + +For an existing table, changing the table name allows importing it as a new +separate table. + +The following setting is available for configuration for both existing and new +table import: + +| Setting | Description | +| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| Partition | Change the partition setting of the table. | +| Designated timestamp | Electing a Designated timestamp. This is mandatory if the partition unit is not `NONE`. | +| Data type | Define the data type. For timestamp, the timestamp format is mandatory and there is the option to elect the column as the designated timestamp. | + +To update the schema of a new table, in addition to the above, the following +settings are also available for configuration: + +| Setting | Description | +| ------------- | ----------------------------------------------------------------------------------------- | +| Delete column | Click `x` to delete the column from the table. | +| Add column | At the end of the column list, select “Add column” to insert a new column into the table. | + +The following table schema details are imported based on the csv file: + +- The column order +- The column name + +#### Import settings + +The Settings panel displays the following configurations: + +| Setting | Description | Default value | +| --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | +| Maximum number of uncommitted row | The size of the commit batch. A commit will be issued when this number is reached in the buffer. This setting is the same as `cairo.max.uncommitted.rows`. To avoid running out of memory during an import, set this value based on the RAM size of the machine. | 500000 | +| Delimiter | The delimiter character to parse the CSV file. | Automatic | +| Atomicity | Error behavior. Rejected rows or columns will be reported in the [Details](#import-details) panel after the import is completed. | Skip column | +| Force header | Whether to interpret the first line as the header. The result will be reported in the [Details](#import-details) panel after the import is completed. | FALSE | +| Skip line extra values | Whether the parser should ignore extra values by ignoring the entire line. An extra value is something in addition to what is defined by the header. | FALSE | + +### Import details + +The import status is displayed in the file column. Once the action is completed, +the number of rows inserted is displayed alongside the `Details` tab: + + + +The `Details` panel lists rejected rows and errors in importing each column: + + + +The details such as header forced, table name, and rejected rows are related to +the defined import settings. For example, setting Atomicity in Settings to Skip +row will result in skipped rows being reported in Rejected rows after the +import. + +## Create table + +The "Create" tab on the top of the page allows table creation using interactive +UI: + + + +Use the create table panel to define table partition, WAL setting, and add +column to a new table: + + + +## Providing an asset path + +It's possible to provide an asset path if QuestDB is being run from somewhere +that is not the server root. In this case, create a `.env` file in the UI +directory of QuestDB and provide the path to web console assets as follows: + +```bash +ASSET_PATH=/path/to/questdb/ui +``` + +An +[example dotenv](https://github.com/questdb/ui/blob/main/packages/web-console/.env.example) +file is provided which can be renamed to `.env` and placed in QuestDB's UI +directory. diff --git a/docs/get-started/_no-jre-prerequisites.partial.mdx b/docs/get-started/_no-jre-prerequisites.partial.mdx new file mode 100644 index 00000000..16fdc1d2 --- /dev/null +++ b/docs/get-started/_no-jre-prerequisites.partial.mdx @@ -0,0 +1,37 @@ +import Button from "@theme/Button" + +

+ {props.label} +

+ +:::note + +This package differs from others by not embedding Java. + +You should use it only if there is no package specific to your platform. +In practice, this often means ARM Linux. + +::: + +#### Java 11 + +You need to have Java 11 installed locally. To check your installed version: + +```shell +java -version +``` + +If you do not have Java installed, install one of the following supported +packages for your operating system: + +- AdoptOpenJDK +- Amazon Corretto +- OpenJDK +- Oracle Java + +Other Java distributions might work but are not tested. + +#### `JAVA_HOME` environment variable + +The environment variable `JAVA_HOME` needs to point to your Java 11 installation +folder. diff --git a/docs/get-started/_run-windows.partial.mdx b/docs/get-started/_run-windows.partial.mdx new file mode 100644 index 00000000..f1026e13 --- /dev/null +++ b/docs/get-started/_run-windows.partial.mdx @@ -0,0 +1,12 @@ +To run the instance as a windows service (preferably as a privileged account): + +```shell +questdb.exe install +questdb.exe start +``` + +To run a one-off instance in the current work directory: + +```shell +questdb.exe +``` diff --git a/docs/get-started/_upgrade-linux.partial.mdx b/docs/get-started/_upgrade-linux.partial.mdx new file mode 100644 index 00000000..32e15473 --- /dev/null +++ b/docs/get-started/_upgrade-linux.partial.mdx @@ -0,0 +1,9 @@ +```shell +./questdb.sh stop +``` + +Overwrite `bin` and `lib` folders with the new files + +```shell +./questdb.sh start +``` diff --git a/docs/get-started/_upgrade-windows.partial.mdx b/docs/get-started/_upgrade-windows.partial.mdx new file mode 100644 index 00000000..f6995735 --- /dev/null +++ b/docs/get-started/_upgrade-windows.partial.mdx @@ -0,0 +1,9 @@ +```shell +./questdb.exe stop +``` + +Overwrite `bin` and `lib` folders with the new files + +```shell +./questdb.exe start +``` diff --git a/docs/get-started/binaries.mdx b/docs/get-started/binaries.mdx new file mode 100644 index 00000000..c1ba18e7 --- /dev/null +++ b/docs/get-started/binaries.mdx @@ -0,0 +1,150 @@ +--- +title: Get started with QuestDB from the binaries +sidebar_label: Binaries +description: + How to install and launch QuestDB from the binaries which are available on the + Get QuestDB page. +--- + +import CodeBlock from "@theme/CodeBlock" +import Button from "@theme/Button" +import { getAssets } from "../../src/utils/get-assets" +import { TabsPlatforms } from "../../src/modules/TabsPlatforms" +import NoJrePrerequisite from "./_no-jre-prerequisites.partial.mdx" +import RunWindows from "./_run-windows.partial.mdx" +import UpgradeLinux from "./_upgrade-linux.partial.mdx" +import UpgradeWindows from "./_upgrade-windows.partial.mdx" + +export const platforms = [ + { label: "Linux", value: "linux" }, + { label: "Windows", value: "windows" }, + { label: "FreeBSD", value: "bsd" }, + { label: "Any (no JVM)", value: "noJre" }, +] + +This page describes how to download and run QuestDB via binaries. + +- Linux or FreeBSD versions include `questdb.sh` script, +- Windows version has `questdb.exe` executable, +- For macOS, check out [Homebrew](/docs/get-started/homebrew/). + +## Download the binaries + +Use the following links to download a tarball for your operating system: + + + + + platform === "noJre" ? ( + + ) : ( + + ) + } +/> + + + +## Extract the tarballs + + + + ( + + {`tar -xvf ${href.split("/").reverse()[0]}`} + + )} +/> + + + +## Run QuestDB + +This section lists commands to run QuestDB for different OSes. They can be used +to run No JVM binaries. + + + + { + if (platform === "linux" || platform === "bsd") { + return ( + ./questdb.sh start + ); + } + if (platform === "windows") { + return (); + } + }} +/> + + + +The file structure is as the following: + +```bash + +/questdb +├── conf +├── db +├── log +└── public +``` + +For information about the file structure, see +[root directory](/docs/concept/root-directory-structure/). + +## Upgrade QuestDB version + +This section lists commands to upgrade QuestDB for different OSes. They can be +used to run No JVM binaries. + +:::note + +Check the [release notes](https://github.com/questdb/questdb/releases) and +ensure that necessary [backup](/docs/operations/backup/) is completed. + +::: + +Steps to upgrade the QuestDB version: + +- Stop the instance +- Overwrite the `bin` and `lib` folders with the new files +- Restart the instance + + + + { + if (platform === "linux" || platform === "bsd") { + return (); + } + if (platform === "windows") { + return (); + } + }} +/> + + + +## Next steps + +Once you extracted the tarball, you are ready to use QuestDB. Navigate to our +[command-line options](/docs/reference/command-line-options/) page to learn more +about its usage. diff --git a/docs/get-started/docker.md b/docs/get-started/docker.md new file mode 100644 index 00000000..390caf6d --- /dev/null +++ b/docs/get-started/docker.md @@ -0,0 +1,349 @@ +--- +title: Get started with QuestDB via Docker +sidebar_label: Docker +description: + Guide showing how to use QuestDB with Docker. This also covers how to import + and persist QuestDB data in a docker container. +--- + +import InterpolateReleaseData from "../../src/components/InterpolateReleaseData" +import CodeBlock from "@theme/CodeBlock" +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + +QuestDB has images for both Linux/macOS and Windows on +[Docker Hub]({@dockerUrl@}). + +## Install Docker + +To begin, install Docker. You can find guides for your platform on the +[official documentation](https://docs.docker.com/get-docker/). + +## Run QuestDB image + +Once Docker is installed, you will need to pull QuestDB's image from +[Docker Hub]({@dockerUrl@}) and create a container. + +This can be done with a single command using: + + ( + + {`docker run \\ + -p 9000:9000 -p 9009:9009 -p 8812:8812 -p 9003:9003 \\ + questdb/questdb:${release.name}`} + + )} +/> + +This command starts a Docker container from `questdb/questdb` image. In +addition, it exposes some ports, allowing you to explore QuestDB. + +In order to configure QuestDB, it is recommended to mount a +[volume](#v-parameter-to-mount-storage) to allow data persistance. This can be +done by adding a `-v` flag to the above command: + +``` +-v "/host/volume/location:/var/lib/questdb" +``` + +Below each parameter is described in detail. + +### `-p` parameter to expose ports + +This parameter will expose a port to the host. You can specify: + +- `-p 9000:9000` - [REST API](/docs/reference/api/rest/) and + [Web Console](/docs/develop/web-console/) +- `-p 9009:9009` - [InfluxDB line protocol](/docs/reference/api/ilp/overview/) +- `-p 8812:8812` - [Postgres wire protocol](/docs/reference/api/postgres/) +- `-p 9003:9003` - + [Min health server](/docs/reference/configuration#minimal-http-server/) + +All ports are optional, you can pick only the ones you need. For example, it is +enough to expose `8812` if you only plan to use +[InfluxDB line protocol](/docs/reference/api/ilp/overview/). + +### `-v` parameter to mount storage + +This parameter will make a local directory available to QuestDB Docker +container. It will have all data ingested to QuestDB, server logs and +configuration. + +The QuestDB [root_directory](/docs/concept/root-directory-structure/) is located +at the `/var/lib/questdb` path in the container. + +### Docker image version + +By default, `questdb/questdb` points to the latest QuestDB version available on +Docker. However, it is recommended to define the version used. + + ( + + {`questdb/questdb:${release.name}`} + + )} +/> + +## Container status + +You can check the status of your container with `docker ps`. + +It also lists the exposed ports, container name, uptime and more: + +```shell title="Finding container status with docker ps" +docker ps +``` + +```shell title="Result of docker ps" +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +dd363939f261 questdb/questdb "/app/bin/java -m io…" 3 seconds ago Up 2 seconds 8812/tcp, 9000/tcp frosty_gauss +``` + +This container: + +* has an id of `dd363939f261` +* uses ports `8812` & `9000`, for Postgres wire protocol and HTTP respectively +* is using a `questdb/questdb` image +* ran java to start the binary +* is 3 seconds old +* has been up for 2 seconds +* has the unfortunate name of `frosty_gauss` + +For full container status information, see the [`docker ps` manual](https://docs.docker.com/engine/reference/commandline/ps/). + +### Debugging container logs + +Docker may generate a runtime error. + +The error may not be accurate, as the true culprit is often indicated higher up in the logs. + +To see the full log, retrieve the UUID - also known as the `CONTAINER ID` - using `docker ps`: + +```shell title="Finding the CONTAINER ID" +CONTAINER ID IMAGE ... +dd363939f261 questdb/questdb ... +``` + +Now pass the `CONTAINER ID` - or `dd363939f261` - to the `docker logs` command: + + +```shell title="Generating a docker log from a CONTAINER ID" +$ docker logs dd363939f261 +No arguments found, start with default arguments +Running as questdb user +Log configuration loaded from: /var/lib/questdb/conf/log.conf +... +... +``` + +Note that the log will pull from `/var/lib/questdb/conf/log.conf` by default. + +Sharing this log when seeking support for Docker deployments will help us find the root cause. + +## Importing data and sending queries + +When QuestDB is running, you can start interacting with it: + +- Port `9000` is for REST. More info is available on the + [REST documentation page](/docs/reference/api/rest/). +- Port `8812` is used for Postgres. Check our + [Postgres reference page](/docs/reference/api/postgres/). +- Port `9009` is dedicated to InfluxDB Line Protocol. Consult our + [InfluxDB protocol page](/docs/reference/api/ilp/overview/). + +## Data persistence + +### Mounting a volume + +Volumes can be mounted to the QuestDB Docker container so that data may be +persisted or server configuration settings may be passed to an instance. The +following example demonstrated how to mount the current directory to a QuestDB +container using the `-v` flag in a Docker `run` command: + + ( + + {`docker run -p 9000:9000 \\ + -p 9009:9009 \\ + -p 8812:8812 \\ + -p 9003:9003 \\ + -v "$(pwd):/var/lib/questdb" \\ + questdb/questdb:${release.name}`} + + )} +/> + +The current directory will then have data persisted to disk for convenient +migration or backups: + +```bash title="Current directory contents" +├── conf +│ └── server.conf +├── db +├── log +├── public +└── snapshot (optional) +``` + +For details on passing QuestDB server settings to a Docker container, see the +[Docker section](/docs/reference/configuration/#docker) of the server +configuration documentation. + +For information about the file structure, see +[root directory](/docs/concept/root-directory-structure/). + +### Upgrade QuestDB version + +It is possible to upgrade your QuestDB instance on Docker when a volume is +mounted to maintain data persistence. + +:::note + +- Check the [release notes](https://github.com/questdb/questdb/releases) and + ensure that necessary [backup](/docs/operations/backup/) is completed. +- Upgrading an instance is possible only when the original instance has a volume + mounted. Without mounting a volume for the original instance, the following + steps create a new instance and data in the old instance cannot be retrieved. + +::: + +1. Run `docker ps` to copy the container name or ID: + +```shell title="Container status" + +# The existing QuestDB version is 6.5.2: + +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +dd363939f261 questdb/questdb:6.5.2 "/app/bin/java -m io…" 3 seconds ago Up 2 seconds 8812/tcp, 9000/tcp frosty_gauss +``` + +2. Stop the instance and then remove the container: + +```shell +docker stop dd363939f261 +docker rm dd363939f261 +``` + +3. Download the latest QuestDB image: + + ( + + {`docker pull questdb/questdb:${release.name}`} + + )} +/> + +4. Start a new container with the new version and the same volume mounted: + + ( + + {`docker run -p 8812:8812 -p 9000:9000 -v "$(pwd):/var/lib/questdb" questdb/questdb:${release.name}`} + + )} +/> + +### Writing logs to disk + +When mounting a volume to a Docker container, a logging configuration file may +be provided in the container located at `/conf/log.conf`: + +```bash title="Current directory contents" +└── conf + ├── log.conf + └── server.conf +``` + +For example, a file with the following contents can be created: + +```shell title="./conf/log.conf" +# list of configured writers +writers=file,stdout,http.min + +# file writer +w.file.class=io.questdb.log.LogFileWriter +w.file.location=questdb-docker.log +w.file.level=INFO,ERROR,DEBUG + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO + +# min http server, used monitoring +w.http.min.class=io.questdb.log.LogConsoleWriter +w.http.min.level=ERROR +w.http.min.scope=http-min-server +``` + +The current directory can be mounted: + +```shell title="Mounting the current directory to a QuestDB container" +docker run -p 9000:9000 \ + -p 9009:9009 \ + -p 8812:8812 \ + -p 9003:9003 \ + -v "$(pwd):/root/.questdb/" questdb/questdb +``` + +The container logs will be written to disk using the logging level and file name +provided in the `conf/log.conf` file, in this case in `./questdb-docker.log`: + +```shell title="Current directory tree" +├── conf +│ ├── log.conf +│ └── server.conf +├── db +│ ├── table1 +│ └── table2 +├── public +│ ├── ui / assets +│ ├── ... +│ └── version.txt +└── questdb-docker.log +``` + +For more information on logging, see the +[configuration reference documentation](/docs/reference/configuration/#logging). + +### Restart an existing container + +Running the following command will create a new container for the QuestDB image: + +```shell +docker run -p 9000:9000 \ + -p 9009:9009 \ + -p 8812:8812 \ + -p 9003:9003 \ + questdb/questdb +``` + +By giving the container a name with `--name container_name`, we have an easy way +to refer to the container created by run later on: + +```shell +docker run -p 9000:9000 \ + -p 9009:9009 \ + -p 8812:8812 \ + -p 9003:9003 \ + --name docker_questdb \ + questdb/questdb +``` + +If we want to re-use this container and its data after it has been stopped, we +can use the following commands: + +```shell +# bring the container up +docker start docker_questdb +# shut the container down +docker stop docker_questdb +``` + +Alternatively, restart it using the `CONTAINER ID`: + +```shell title="Starting a container by CONTAINER ID" +docker start dd363939f261 +``` \ No newline at end of file diff --git a/docs/get-started/first-database.md b/docs/get-started/first-database.md new file mode 100644 index 00000000..8474f56e --- /dev/null +++ b/docs/get-started/first-database.md @@ -0,0 +1,227 @@ +--- +title: Create my first dataset +description: + This document shows how to work with QuestDB as a time-series database by + generating dummy time-series data, insert the data into a table, then querying + and cleaning up the example data set. +--- + +The goal of this guide is to explore QuestDB's features and to interact with +time-series data and assumes you have an instance running. You can find guides +to setup QuestDB on the [introduction page](/docs/). In this tutorial, you will +learn how to: + +1. [Create tables](#creating-a-table) +2. [Populate tables with sample data](#inserting-data) +3. [Run simple and advanced queries](#running-queries) +4. [Delete tables](#deleting-tables) + +As an example, we will look at hypothetical temperature readings from a variety +of sensors. + +:::note + +All commands are run through the [Web Console](/docs/develop/web-console/) +accessible at `http://localhost:9000`. + +You can also run the same SQL via the +[Postgres endpoint](/docs/reference/api/postgres/) or the +[REST API](/docs/reference/api/rest/). + +::: + +## Creating a table + +The first step is to create tables. One table will contain the metadata of our +sensors, and the other will contain the actual readings (payload data) from +these sensors. + +Let's start by creating the `sensors` table: + +```questdb-sql +CREATE TABLE sensors (ID LONG, make STRING, city STRING); +``` + +For more information about this statement, please refer to the +[CREATE TABLE](/docs/reference/sql/create-table/) reference documentation. + +## Inserting data + +Let's populate our `sensors` table with procedurally-generated data: + +```questdb-sql title="Insert as SELECT" +INSERT INTO sensors + SELECT + x ID, --increasing integer + rnd_str('Eberle', 'Honeywell', 'Omron', 'United Automation', 'RS Pro') make, + rnd_str('New York', 'Miami', 'Boston', 'Chicago', 'San Francisco') city + FROM long_sequence(10000) x; +``` + +For more information about insert statements, refer to the +[INSERT](/docs/reference/sql/insert/) reference documentation. To learn more +about the functions used here, see the +[random generator](/docs/reference/function/random-value-generator/) and +[row generator](/docs/reference/function/row-generator/) pages. + +Our `sensors` table now contains 10,000 randomly-generated sensor values of +different makes and in various cities. Use this command to view the table: + +```questdb-sql +'sensors'; +``` + +It should look like the table below: + +| ID | make | city | +| :-- | :---------------- | :------ | +| 1 | Honeywell | Chicago | +| 2 | United Automation | Miami | +| 3 | Honeywell | Chicago | +| 4 | Omron | Miami | +| ... | ... | ... | + +Let's now create some sensor readings. In this case, we will create the table +and generate the data at the same time: + +```questdb-sql title="Create table as" +CREATE TABLE readings +AS( + SELECT + x ID, + timestamp_sequence(to_timestamp('2019-10-17T00:00:00', 'yyyy-MM-ddTHH:mm:ss'), rnd_long(1,10,0) * 100000L) ts, + rnd_double(0)*8 + 15 temp, + rnd_long(0, 10000, 0) sensorId + FROM long_sequence(10000000) x) +TIMESTAMP(ts) +PARTITION BY MONTH; +``` + +The query above demonstrates how to use the following features: + +- `TIMESTAMP(ts)` elects the `ts` column as a + [designated timestamp](/docs/concept/designated-timestamp/). This enables + partitioning tables by time. +- `PARTITION BY MONTH` creates a monthly partitioning strategy where the stored + data is effectively sharded by month. + +The generated data will look like the following: + +| ID | ts | temp | sensorId | +| :-- | :-------------------------- | :---------- | :------- | +| 1 | 2019-10-17T00:00:00.000000Z | 19.37373911 | 9160 | +| 2 | 2019-10-17T00:00:00.600000Z | 21.91184617 | 9671 | +| 3 | 2019-10-17T00:00:01.400000Z | 16.58367834 | 8731 | +| 4 | 2019-10-17T00:00:01.500000Z | 16.69308815 | 3447 | +| 5 | 2019-10-17T00:00:01.600000Z | 19.67991569 | 7985 | +| ... | ... | ... | ... | + +## Running queries + +Let's select all records from the `readings` table (note that `SELECT * FROM` is +optional in QuestDB): + +```questdb-sql +readings; +``` + +Let's also select the `count` of records from `readings`: + +```questdb-sql +SELECT count() FROM readings; +``` + +| count | +| :--------- | +| 10,000,000 | + +and the average reading: + +```questdb-sql +SELECT avg(temp) FROM readings; +``` + +| average | +| :-------------- | +| 18.999217780895 | + +We can now use the `sensors` table alongside the `readings` table to get more +interesting results using a `JOIN`: + +```questdb-sql +SELECT * +FROM readings +JOIN( + SELECT ID sensId, make, city + FROM sensors) +ON readings.sensorId = sensId; +``` + +The results should look like the table below: + +| ID | ts | temp | sensorId | sensId | make | city | +| :-- | :-------------------------- | :-------------- | :------- | :----- | :-------- | :------------ | +| 1 | 2019-10-17T00:00:00.000000Z | 16.472200460982 | 3211 | 3211 | Omron | New York | +| 2 | 2019-10-17T00:00:00.100000Z | 16.598432033599 | 2319 | 2319 | Honeywell | San Francisco | +| 3 | 2019-10-17T00:00:00.100000Z | 20.293681747009 | 8723 | 8723 | Honeywell | New York | +| 4 | 2019-10-17T00:00:00.100000Z | 20.939263119843 | 885 | 885 | RS Pro | San Francisco | +| 5 | 2019-10-17T00:00:00.200000Z | 19.336660059029 | 3200 | 3200 | Honeywell | San Francisco | +| 6 | 2019-10-17T00:00:01.100000Z | 20.946643576954 | 4053 | 4053 | Honeywell | Miami | + +```questdb-sql title="Aggregation keyed by city" +SELECT city, max(temp) +FROM readings +JOIN( + SELECT ID sensId, city + FROM sensors) a +ON readings.sensorId = a.sensId; +``` + +The results should look like the table below: + +| city | max | +| :------------ | :-------------- | +| New York | 22.999998786398 | +| San Francisco | 22.999998138348 | +| Miami | 22.99999994818 | +| Chicago | 22.999991705861 | +| Boston | 22.999999233377 | + +```questdb-sql title="Aggregation by hourly time buckets" +SELECT ts, city, make, avg(temp) +FROM readings timestamp(ts) +JOIN + (SELECT ID sensId, city, make + FROM sensors + WHERE city='Miami' AND make='Omron') a +ON readings.sensorId = a.sensId +WHERE ts IN '2019-10-21;1d' -- this is an interval between 2019/10/21 and the next day +SAMPLE BY 1h -- aggregation by hourly time buckets +ALIGN TO CALENDAR; -- align the ts with the start of the hour (hh:00:00) +``` + +The results should look like the table below: + +| ts | city | make | average | +| :-------------------------- | :---- | :---- | :-------------- | +| 2019-10-21T00:00:00.000000Z | Miami | Omron | 20.004285872098 | +| 2019-10-21T00:01:00.000000Z | Miami | Omron | 16.68436714013 | +| 2019-10-21T00:02:00.000000Z | Miami | Omron | 15.243684089291 | +| 2019-10-21T00:03:00.000000Z | Miami | Omron | 17.193984104315 | +| 2019-10-21T00:04:00.000000Z | Miami | Omron | 20.778686822666 | +| ... | ... | ... | ... | + +For more information about these statements, please refer to the +[SELECT](/docs/reference/sql/select/), [JOIN](/docs/reference/sql/join/) and +[SAMPLE BY](/docs/reference/sql/sample-by/) pages. + +## Deleting tables + +We can now clean up the demo data by using +[`DROP TABLE`](/docs/reference/sql/drop/) SQL. Be careful using this statement +as QuestDB cannot recover data that is deleted in this way: + +```questdb-sql +DROP TABLE readings; +DROP TABLE sensors; +``` diff --git a/docs/get-started/homebrew.md b/docs/get-started/homebrew.md new file mode 100644 index 00000000..af173e81 --- /dev/null +++ b/docs/get-started/homebrew.md @@ -0,0 +1,109 @@ +--- +title: Get started with QuestDB via Homebrew (macOS) +sidebar_label: Homebrew +description: + A short guide for getting started with installing and running QuestDB via + Homebrew on macOS. +--- + +Each software release of QuestDB is distributed via the +[Homebrew](https://brew.sh/) package manager. + +## Install Homebrew + +Users who already have Homebrew installed may skip this section and proceed to +[Install QuestDB](#install-questdb). Otherwise, Homebrew can be installed by +running the official +[installation script](https://github.com/Homebrew/install/blob/master/install.sh) +via bash: + +```shell +/bin/bash -c \ +"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)" +``` + +## Install QuestDB + +To install QuestDB via Homebrew, run the following command: + +```shell +brew install questdb +``` + +On macOS, the location of the root directory of QuestDB and +[server configuration](/docs/reference/configuration/) files depending on the +chip: + +- Path on Macs with Apple Silicon (M1 or M2) chip: + + ```shell + /opt/homebrew/var/questdb + ``` + +- Path on Macs with Intel chip: + + ```shell + /usr/local/var/questdb + ``` + +The file structure is as the following: + +```bash + +/questdb +├── conf +├── db +├── log +└── public +``` + +For information about the file structure, see +[root directory](/docs/concept/root-directory-structure/). + +## Uninstall QuestDB + +To remove QuestDB, use Homebrew's `uninstall` command: + +```shell +brew uninstall questdb +``` + +## Troubleshooting Homebrew issues + +It's recommended to first run `update` before trying to install packages or +diagnose errors: + +```shell +brew update +``` + +Homebrew comes with a basic diagnostic command which can help find +inconsistencies with system settings and permissions. This command will exit +with a non-zero status if any potential problems are found: + +```shell +brew doctor +``` + +## Upgrade QuestDB version + +:::note + +Check the [release note](https://github.com/questdb/questdb/releases) and ensure +that necessary [backup](/docs/operations/backup/) is completed. + +::: + +Once the latest QuestDB version is published on +[Homebrew](https://github.com/Homebrew/homebrew-core/blob/master/Formula/q/questdb.rb), +the command to upgrade QuestDB version is: + +```shell +brew upgrade questdb +``` + +## Next steps + +Once you installed the QuestDB with Homebrew, you can navigate to our +[command-line options](/docs/reference/command-line-options/) page to learn more +about its usage. diff --git a/docs/get-started/learn-more.md b/docs/get-started/learn-more.md new file mode 100644 index 00000000..c29180fd --- /dev/null +++ b/docs/get-started/learn-more.md @@ -0,0 +1,23 @@ +--- +title: Learn more +sidebar_label: Learn more +description: + This document collects key concepts and guides for users starting testing data + with QuestDB. +--- + +To learn more about QuestDB and to create a proof of concept, please refer to +[Design for performance](/docs/operations/design-for-performance/). In addition, +our [Capacity planning](/docs/deployment/capacity-planning/) provides a more +advanced optimization guide for using QuestDB in production environment. + +The below are a few concepts and SQL keywords that QuestDB leverages for optimal +time series performance: + +- [Symbol](/docs/concept/symbol/): A data type designed for frequently used + string values +- [SQL extensions](/docs/concept/sql-extensions/): A guide to special QuestDB + SQL keywords and syntax designed for time-series data +- [Designated timestamp](/docs/concept/designated-timestamp/) and + [Partitions](/docs/concept/partitions/): Concepts and settings relevant to + partitioning tables based on time diff --git a/docs/guides/importing-data-rest.md b/docs/guides/importing-data-rest.md new file mode 100644 index 00000000..b985673c --- /dev/null +++ b/docs/guides/importing-data-rest.md @@ -0,0 +1,174 @@ +--- +title: CSV import via REST API +sidebar_label: Small CSV import +description: + This document describes how to load CSV data and specify text loader + configuration for timestamp and date parsing +--- + +The REST API provides an `/imp` endpoint exposed on port `9000` by default. This +endpoint allows streaming tabular text data directly into a table, supporting +CSV, TAB and pipe (`|`) delimited inputs with optional headers. Data types and +structures are detected automatically, but additional configurations can be +provided to improve automatic detection. + +:::note + +The REST API is better suited when the following conditions are true: + +- Regular uploads of small batches of data into the same table. +- The file batches do not contain overlapping periods (they contain distinct + days/weeks/months). Otherwise, the import performance will be impacted. + +For database migrations, or uploading one large CSV file into QuestDB, users may +consider using the `COPY` SQL command. See +[COPY command documentation](/docs/reference/sql/copy/) and +[Guide on CSV import](/docs/guides/importing-data/) for more details. + +::: + +### Importing compressed files + +It is possible to upload compressed files directly without decompression: + +```bash +gzip -cd compressed_data.tsv.gz | curl -v -F data=@- 'http://localhost:9000/imp' +``` + +The `data=@-` value instructs `curl` to read the file contents from `stdin`. + +### Specifying a schema during CSV import + +A `schema` JSON object can be provided with POST requests to `/imp` while +creating tables via CSV import. This allows for more control over user-defined +patterns for timestamps, or for explicitly setting types during column-creation. +The following example demonstrates basic usage, in this case, that the +`ticker_name` column should be parsed as `SYMBOL` type instead of `STRING`: + +```bash +curl \ + -F schema='[{"name":"ticker_name", "type": "SYMBOL"}]' \ + -F data=@trades.csv 'http://localhost:9000/imp' +``` + +If a timestamp column (`ts`) in this CSV file has a custom or non-standard +timestamp format, this may be included with the call as follows: + +```bash +curl \ + -F schema='[ \ + {"name":"ts", "type": "TIMESTAMP", "pattern": "yyyy-MM-dd - HH:mm:ss"}, \ + {"name":"ticker_name", "type": "SYMBOL"} \ + ]' \ + -F data=@trades.csv 'http://localhost:9000/imp' +``` + +For **nanosecond-precision** timestamps such as +`2021-06-22T12:08:41.077338934Z`, a pattern can be provided in the following +way: + +```bash +curl \ + -F schema='[ \ + {"name":"ts", "type": "TIMESTAMP", "pattern": "yyyy-MM-ddTHH:mm:ss.SSSUUUNNNZ"} \ + ]' \ + -F data=@my_file.csv \ + http://localhost:9000/imp +``` + +More information on the patterns for timestamps can be found on the +[date and time functions](/docs/reference/function/date-time/#date-and-timestamp-format) +page. + +:::note + +The `schema` object must precede the `data` object in calls to this REST +endpoint. For example: + +```bash +# correct order +curl -F schema='{my_schema_obj}' -F data=@my_file.csv http://localhost:9000/imp +# incorrect order +curl -F data=@my_file.csv -F schema='{my_schema_obj}' http://localhost:9000/imp +``` + +::: + +### Text loader configuration + +QuestDB uses a `text_loader.json` configuration file which can be placed in the +server's `conf` directory. This file does not exist by default, but has the +following implicit settings: + +```json title="conf/text_loader.json" +{ + "date": [ + { + "format": "dd/MM/y" + }, + { + "format": "yyyy-MM-dd HH:mm:ss" + }, + { + "format": "yyyy-MM-ddTHH:mm:ss.SSSz", + "locale": "en-US", + "utf8": false + }, + { + "format": "MM/dd/y" + } + ], + "timestamp": [ + { + "format": "yyyy-MM-ddTHH:mm:ss.SSSUUUz", + "utf8": false + } + ] +} +``` + +#### Example + +Given a CSV file which contains timestamps in the format +`yyyy-MM-dd - HH:mm:ss.SSSUUU`, the following text loader configuration will +provide the correct timestamp parsing: + +```json title="conf/text_loader.json" +{ + "date": [ + { + "format": "dd/MM/y" + }, + { + "format": "yyyy-MM-dd HH:mm:ss" + }, + { + "format": "yyyy-MM-ddTHH:mm:ss.SSSz", + "locale": "en-US", + "utf8": false + }, + { + "format": "MM/dd/y" + } + ], + "timestamp": [ + { + "format": "yyyy-MM-ddTHH:mm:ss.SSSUUUz", + "utf8": false + }, + { + "format": "yyyy-MM-dd - HH:mm:ss.SSSUUU", + "utf8": false + } + ] +} +``` + +The CSV data can then be loaded via POST request, for example, using cURL: + +```curl +curl -F data=@weather.csv 'http://localhost:9000/imp' +``` + +For more information on the `/imp` entry point, refer to the +[REST API documentation](/docs/reference/api/rest/#imp---import-data). diff --git a/docs/guides/importing-data.md b/docs/guides/importing-data.md new file mode 100644 index 00000000..87c1f2d2 --- /dev/null +++ b/docs/guides/importing-data.md @@ -0,0 +1,619 @@ +--- +title: CSV import via COPY SQL +sidebar_label: Large CSV import +description: + This document describes how to load large CSV data using COPY SQL keyword. +--- + +The [COPY](/docs/reference/sql/copy/) SQL command is the preferred way to import +large CSV files into partitioned tables. It should be used to migrate data from +another database into QuestDB. This guide describes the method of migrating data +to QuestDB via CSV files. For the time being this is the only way to migrate +data from other databases into QuestDB. + +This guide is applicable for QuestDB version 6.5 and higher. + +:::caution + +For partitioned tables, the best `COPY` performance can be achieved only on a +machine with a local, physically attached SSD. It is possible to use a network +block storage, such as an AWS EBS volume to perform the operation, with the +following impact: + +- Users need to configure the maximum IOPS and throughput setting values for the + volume. +- The required import time is likely to be 5-10x longer. + +::: + +## Prepare the import + +Preparation is key. Import is a multi-step process, which consists of: + +- Export the existing database as CSV files +- Enable and configure `COPY` command to be optimal for the system +- Prepare target schema in QuestDB + +### Export the existing database + +Export data using one CSV file per table. Make sure to export a column, which +can be used as timestamp. Data in CSV is not expected to be in any particular +order. If it is not possible to export the table as one CSV, export multiple +files and concatenate these files before importing into QuestDB. + +#### Concatenate multiple CSV files + +The way to concatenate files depends on whether the CSV files have headers. + +For CSV files without headers, concatenation is straightforward: + + + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + + + + + + + +```shell +ls *.csv | xargs cat > singleFile.csv +``` + + + + + +```shell +ls *.csv | xargs cat > singleFile.csv +``` + + + + + +```shell +$TextFiles = Get-Item C:\Users\path\to\csv\*.csv +# The files are moved to the same folder. +$TextFiles foreach { Add-Content -Value $(Get-Content $_) -Path C:\Users\path\to\csv\singleFile.csv} +``` + + + + + +For CSV files with headers, concatenation can be tricky. You could manually +remove the first line of the files before concatenating, or use some smart +command line to concatenate and remove the headers. A good alternative is using +the open source tool +[csvstack](https://csvkit.readthedocs.io/en/latest/scripts/csvstack.html). + +This is how you can concatenate multiple CSV files using _csvstack_: + +```shell +csvstack *.csv > singleFile.csv +``` + +### Things to know about `COPY` + +- `COPY` is disabled by default, as a security precaution. Configuration is + required. + +- `COPY` is more efficient when source and target disks are different. + +- `COPY` is parallel when target table is partitioned. + +- `COPY` is _serial_ when target table is non-partitioned, out-of-order + timestamps will be rejected. + +- `COPY` cannot import data into non-empty table. + +- `COPY` indexes CSV file; reading indexed CSV file benefits hugely from disk + IOPS. We recommend using NVME. + +- `COPY` imports one file at a time; there is no internal queuing system yet. + +- [COPY reference](/docs/reference/sql/copy/) + +### Configure `COPY` + +- Enable `COPY` and [configure](/docs/reference/configuration/#csv-import) the + `COPY` directories to suit your server. +- `cairo.sql.copy.root` must be set for `COPY` to work. + +## Create the target table schema + +If you know the target table schema already, you can +[skip this section](/docs/guides/importing-data/#import-csv). + +QuestDB could analyze the input file and "guess" the schema. This logic is +activated when target table does not exist. + +To have QuestDB help with determining file schema, it is best to work with a +sub-set of CSV. A smaller file allows us to iterate faster if iteration is +required. + +Let's assume we have the following CSV: + +```csv "weather.csv" +"locationId","timestamp","windDir","windSpeed","windGust","cloudCeiling","skyCover","visMiles","tempF","dewpF","rain1H","rain6H","rain24H","snowDepth" +1,"2010-07-05T00:23:58.981263Z",3050,442,512,,"OBS",11.774906006761,-5,-31,58.228032196984,70.471606345673,77.938252342637,58 +2,"2017-10-10T10:13:55.246046Z",900,63,428,5487,"BKN",4.958601701089,-19,-7,4.328016420894,36.020659549374,97.821114441800,41 +3,"2010-03-12T11:17:13.727137Z",2880,299,889,371,"BKN",10.342717709226,46,81,9.149518425127,20.229637391479,20.074738007931,80 +4,"2018-08-21T15:42:23.107543Z",930,457,695,4540,"OBS",13.359184086767,90,-47,33.346163208862,37.501996055160,58.316836760009,13 +... +``` + +1. Extract the first 1000 line to `test_file.csv` (assuming both files are in + the `cairo.sql.copy.root` directory): + +```shell +head -1000 weather.csv > test_file.csv +``` + +2. Use a simple `COPY` command to import `test_file.csv` and define the table + name: + + ```questdb-sql + COPY weather from 'test_file.csv' WITH HEADER true; + ``` + +Table `weather` is created and it quickly returns an id of asynchronous import +process running in the background: + +| id | +| ---------------- | +| 5179978a6d7a1772 | + +3. In the Web Console right click table and select `Copy Schema to Clipboard` - + this copies the schema generated by the input file analysis. + +4. Paste the table schema to the code editor: + + ```questdb-sql + CREATE TABLE 'weather' ( + timestamp TIMESTAMP, + windDir INT, + windSpeed INT, + windGust INT, + cloudCeiling INT, + skyCover STRING, + visMiles DOUBLE, + tempF INT, + dewpF INT, + rain1H DOUBLE, + rain6H DOUBLE, + rain24H DOUBLE, + snowDepth INT + ); + ``` + +5. Identify the correct schema: + + 5.1. The generated schema may not be completely correct. Check the log table + and log file to resolve common errors using the id (see also + [Track import progress](/docs/guides/importing-data/#track-import-progress) + and [FAQ](/docs/guides/importing-data/#faq)): + + ```questdb-sql + SELECT * FROM sys.text_import_log WHERE id = '5179978a6d7a1772' ORDER BY ts DESC; + ``` + +| ts | id | table | file | phase | status | message | rows_handled | rows_imported | errors | +| --------------------------- | ---------------- | ------- | -------------------------- | ----- | -------- | ------- | ------------ | ------------- | ------ | +| 2022-08-08T16:38:06.262706Z | 5179978a6d7a1772 | weather | test_file.csvtest_file.csv | | finished | | 999 | 999 | 0 | +| 2022-08-08T16:38:06.226162Z | 5179978a6d7a1772 | weather | test_file.csvtest_file.csv | | started | | | | 0 | + +Check `rows_handled`, `rows_imported`, and `message` for any errors and amend +the schema as required. + +5.2. Drop the table and re-import `test_file.csv` using the updated schema. + +6. Repeat the steps to narrow down to a correct schema. + + The process may require either truncating: + + ```questdb-sql + TRUNCATE TABLE table_name; + ``` + + or dropping the target table: + + ```questdb-sql + DROP TABLE table_name; + ``` + +7. Clean up: Once all the errors are resolved, copy the final schema, drop the + small table. +8. Make sure table is correctly partitioned. The final schema in our example + should look like this: + + ```questdb-sql + CREATE TABLE 'weather' ( + timestamp TIMESTAMP, + windDir INT, + windSpeed INT, + windGust INT, + cloudCeiling INT, + skyCover STRING, + visMiles DOUBLE, + tempF INT, + dewpF INT, + rain1H DOUBLE, + rain6H DOUBLE, + rain24H DOUBLE, + snowDepth INT + ) TIMESTAMP (timestamp) partition by DAY; + ``` + +9. Ready for import: Create an empty table using the final schema. + +## Import CSV + +Once an empty table is created in QuestDB using the correct schema, import can +be initiated with: + +```questdb-sql +COPY weather FROM 'weather.csv' WITH HEADER true TIMESTAMP 'timestamp' FORMAT 'yyyy-MM-ddTHH:mm:ss.SSSUUUZ'; +``` + +It quickly returns id of asynchronous import process running in the background: + +| id | +| :--------------- | +| 55020329020b446a | + +## Track import progress + +`COPY` returns an id for querying the log table (`sys.text_import_log`), to +monitor the progress of ongoing import: + +```questdb-sql +SELECT * FROM sys.text_import_log WHERE id = '55020329020b446a'; +``` + +| ts | id | table | file | phase | status | message | rows_handled | rows_imported | errors | +| :-------------------------- | ---------------- | ------- | ----------- | ---------------------- | -------- | ------- | ------------ | ------------- | ------ | +| 2022-08-03T14:00:40.907224Z | 55020329020b446a | weather | weather.csv | null | started | null | null | null | 0 | +| 2022-08-03T14:00:40.910709Z | 55020329020b446a | weather | weather.csv | analyze_file_structure | started | null | null | null | 0 | +| 2022-08-03T14:00:42.370563Z | 55020329020b446a | weather | weather.csv | analyze_file_structure | finished | null | null | null | 0 | +| 2022-08-03T14:00:42.370793Z | 55020329020b446a | weather | weather.csv | boundary_check | started | null | null | null | 0 | + +Looking at the log from the newest to the oldest might be more convenient: + +```questdb-sql +SELECT * FROM sys.text_import_log WHERE id = '55020329020b446a' ORDER BY ts DESC; +``` + +Once import successfully ends the log table should contain a row with a 'null' +phase and 'finished' status : + +| ts | id | table | file | phase | status | message | rows_handled | rows_imported | errors | +| :-------------------------- | ---------------- | ------- | ----------- | ----- | -------- | ------- | ------------ | ------------- | ------ | +| 2022-08-03T14:10:59.198672Z | 55020329020b446a | weather | weather.csv | null | finished | | 300000000 | 300000000 | 0 | + +Import into non-partitioned tables uses single-threaded implementation (serial +import) that reports only start and finish records in the status table. Given an +ordered CSV file `weather1mil.csv`, when importing, the log table shows: + +| ts | id | table | file | phase | status | message | rows_handled | rows_imported | errors | +| :-------------------------- | ---------------- | ------- | --------------- | ----- | -------- | ------- | ------------ | ------------- | ------ | +| 2022-08-03T15:00:40.907224Z | 42d31603842f771a | weather | weather1mil.csv | null | started | null | null | null | 0 | +| 2022-08-03T15:01:20.000709Z | 42d31603842f771a | weather | weather1mil.csv | null | finished | null | 999999 | 999999 | 0 | + +The log table contains only coarse-grained, top-level data. Import phase run +times vary a lot (e.g. `partition_import` often takes 80% of the whole import +execution time), and therefore +[the server log](/docs/reference/configuration/#logging) provides an alternative +to follow more details of import: + +```log title="import log" +2022-08-03T14:00:40.907224Z I i.q.c.t.ParallelCsvFileImporter started [importId=5502031634e923b2, phase=analyze_file_structure, file=`C:\dev\tmp\weather.csv`, workerCount=10] +2022-08-03T14:00:40.917224Z I i.q.c.p.WriterPool >> [table=`weather`, thread=43] +2022-08-03T14:00:41.440049Z I i.q.c.t.ParallelCsvFileImporter finished [importId=5502031634e923b2, phase=analyze_file_structure, file=`C:\dev\tmp\weather.csv`, duration=0s, errors=0] +2022-08-03T14:00:41.440196Z I i.q.c.t.ParallelCsvFileImporter started [importId=5502031634e923b2, phase=boundary_check, file=`C:\dev\tmp\weather.csv`, workerCount=10] +2022-08-03T14:01:18.853212Z I i.q.c.t.ParallelCsvFileImporter finished [importId=5502031634e923b2, phase=boundary_check, file=`C:\dev\tmp\weather.csv`, duration=6s, errors=0] +2022-08-03T14:01:18.853303Z I i.q.c.t.ParallelCsvFileImporter started [importId=5502031634e923b2, phase=indexing, file=`C:\dev\tmp\weather.csv`, workerCount=10] +2022-08-03T14:01:18.853516Z I i.q.c.t.ParallelCsvFileImporter temporary import directory [path='E:\dev\tmp\weather\] +2022-08-03T14:01:42.612302Z I i.q.c.t.CsvFileIndexer finished chunk [chunkLo=23099021813, chunkHi=26948858785, lines=29999792, errors=0] +2022-08-03T14:01:42.791789Z I i.q.c.t.CsvFileIndexer finished chunk [chunkLo=11549510915, chunkHi=15399347885, lines=30000011, errors=0] +``` + +If the [`ON ERROR` option](/docs/reference/sql/copy/#options) is set to `ABORT`, +import stops on the first error and the error is logged. Otherwise, all errors +are listed in the log. + +The reference to the error varies depending on the phase of an import: + +- In the indexing phase, if an error occurs, the absolute input file line is + referenced: + +```log +2022-08-08T11:50:24.319675Z E i.q.c.t.CsvFileIndexer could not parse timestamp [line=999986, column=1] +``` + +- In the data import phase, if an error occurs, the log references the offset as + related to the start of the file. + +```log +2022-08-08T12:19:56.828792Z E i.q.c.t.TextImportTask type syntax [type=INT, offset=5823, column=0, value='CMP2'] +``` + +The errored rows can then be extracted for further investigation. + + + +## FAQ + +
+ COPY on a table with symbol columns is very slow. How can I speed it up? +

+ +QuestDB uses `256` as the default symbol capacity. If the number of distinct +symbol values exceeds this default significantly, the `COPY` performance will +suffer. Make sure that you specify symbol capacities when creating the table +before running the `COPY` command. + +Here is an example: + +```questdb-sql +CREATE TABLE table_name ( + ts TIMESTAMP, + sym SYMBOL CAPACITY 100000 +) TIMESTAMP(ts) PARTITION BY DAY; +``` + +Refer to the [symbol type documentation](/docs/concept/symbol/#symbol-columns) +for more information on configuring the symbol capacity. + +

+
+ +
+ What happens in a database crash or OS reboot? +

+ +If reboot/power loss happens while partitions are being attached, then table +might be left with incomplete data. Please truncate table before re-importing +with: + +```questdb-sql +TRUNCATE TABLE table_name; +``` + +If reboot/power loss happens before any partitions being attached, the import +should not be affected. + +

+
+ +
+ I'm getting "COPY is disabled ['cairo.sql.copy.root' is not set?]" error message +

+ +Please set `cairo.sql.copy.root` setting, restart the instance and try again. + +

+
+ +
+ I'm getting "could not create temporary import work directory [path='somepath', errno=-1]" error message +

+ +Please make sure that the `cairo.sql.copy.root` and `cairo.sql.copy.work.root` +are valid paths pointing to existing directories. + +

+
+ +
+ I'm getting "[2] could not open read-only [file=somepath]" error message +

+ +Please check that import file path is valid and accessible to QuestDB instance +users. + +If you are running QuestDB using Docker, please check if the directory mounted +for storing source CSV files is identical to the one `cairo.sql.copy.root` +property or `QDB_CAIRO_SQL_COPY_ROOT` environment variable points to. + +For example, the following command can start a QuestDB instance: + +```shell +docker run -p 9000:9000 \ +-v "/tmp/questdb:/var/lib/questdb" \ +-v "/tmp/questdb/my_input_root:/tmp/questdb_import" \ +-e QDB_CAIRO_SQL_COPY_ROOT=/tmp/questdb_wrong \ +questdb/questdb +``` + +However, running: + +```questdb-sql +COPY weather from 'weather_example.csv' WITH HEADER true; +``` + +Results in the "[2] could not open read-only +[file=/tmp/questdb_wrong/weather_example.csv]" error message. + +

+
+ +
+ I'm getting "column count mismatch [textColumnCount=4, tableColumnCount=3, table=someTable]" error message +

+ +There are more columns in input file than in the existing target table. Please +remove column(s) from input file or add them to the target table schema. + +

+
+ +
+ I'm getting "timestamp column 'ts2' not found in file header" error message +

+ +Either input file is missing header or timestamp column name given in `COPY` +command is invalid. Please add file header or fix timestamp option. + +

+
+ +
+ I'm getting "column is not a timestamp [no=0, name='ts']" error message +

+ +Timestamp column given by the user or (if header is missing) assumed based on +target table schema is of a different type. +Please check timestamp column name in input file header or make sure input file +column order matches that of target table. + +

+
+ +
+ I'm getting "target table must be empty [table=t]" error message +

+ +`COPY` doesn't yet support importing into partitioned table with existing data. + +Please truncate table before re-importing with: + +```questdb-sql +TRUNCATE TABLE table_name; +``` + +or import into another empty table and then use `INSERT INTO SELECT`: + +```questdb-sql +INSERT INTO table_name batch 100000 +SELECT * FROM other_table; +``` + +to copy data into original target table. + +

+
+ +
+ I'm getting "io_uring error" error message +

+ +It's possible that you've hit a IO_URING-related kernel error. +Please set `cairo.iouring.enabled` setting to false, restart QuestDB instance, +and try again. + +

+
+ +
+ I'm getting "name is reserved" error message +

+ +The table you're trying import into is in bad state (metadata is incomplete). + +Please either drop the table with: + +```questdb-sql +DROP TABLE table_name; +``` + +and recreate the table or change the table name in the `COPY` command. + +

+
+ +
+ I'm getting "Unable to process the import request. Another import request may be in progress." error message +

+ +Only one import can be running at a time. + +Either cancel running import with: + +```questdb-sql +COPY 'paste_import_id_here' CANCEL; +``` + +or wait until the current import is finished. + +

+
+ +
+ Import finished but table is (almost) empty +

+ +Please check the latest entries in log table: + +```questdb-sql +SELECT * FROM sys.text_import_log LIMIT -10; +``` + +If "errors" column is close to number of records in the input file then it may +mean: + +- `FORMAT` option of `COPY` command or auto-detected format doesn't match + timestamp column data in file +- Other column(s) can't be parsed and `ON ERROR SKIP_ROW` option was used +- Input file is unordered and target table has designated timestamp but is not + partitioned + +If none of the above causes the error, please check the log file for messages +like: + +```log +2022-08-08T11:50:24.319675Z E i.q.c.t.CsvFileIndexer could not parse timestamp [line=999986, column=1] +``` + +or + +```log +2022-08-08T12:19:56.828792Z E i.q.c.t.TextImportTask type syntax [type=INT, offset=5823, column=0, value='CMP2'] +``` + +that should explain why rows were rejected. Note that in these examples, the +former log message mentions the absolute input file line while the latter is +referencing the offset as related to the start of the file. + +

+
+ +
+ Import finished but table column names are `f0`, `f1`, ... +

+ +Input file misses header and target table does not exist, so columns received +synthetic names . You can rename them with the `ALTER TABLE` command: + +```questdb-sql +ALTER TABLE table_name RENAME COLUMN f0 TO ts; +``` + +

+
diff --git a/docs/guides/influxdb-migration.md b/docs/guides/influxdb-migration.md new file mode 100644 index 00000000..a46a8d21 --- /dev/null +++ b/docs/guides/influxdb-migration.md @@ -0,0 +1,180 @@ +--- +title: Migrating from InfluxDB +description: + This document describes details about steps to migrate your database from + InfluxDB to QuestDB. +--- + +This page describes the steps to importing data from InfluxDB OSS or InfluxDB +Cloud to QuestDB. + +## Overview + +Data stored in InfluxDB needs to be exported in order to import it into QuestDB. +There are two ways of exporting data from InfluxDB: + +- Run a SQL query using InfluxDB API and get results in JSON +- Run the `inspect` command and get results in InfluxDB Line Protocol + +The first approach is simpler and might suffice if you are migrating a small to +moderate dataset. For larger datasets it is advised to use the second option. + +## SQL query for importing small tables + +:::note + +This approach is recommended only for small to moderate datasets. + +::: + +When using InfluxDB API to run a SQL query, results will be in JSON. Since we +cannot import JSON files directly into QuestDB, we will need to convert the +results into CSV. There are many ways to do so, and one of those is using the +[jq JSON processor](https://stedolan.github.io/jq/). + +To run the SQL query you will need to have an +[API token](https://docs.influxdata.com/influxdb/cloud/security/tokens/create-token/). + +The below is an example to query a table using the SQL API endpoint and convert +the results to CSV: + +```shell +curl --get http://localhost:8086/query --header "Authorization: Token zuotzwwBxbXIor4_D-t-BMEpJj1nAj2DGqNSshTUyHUcX0DMjI6fiBv_pgeW-xxpnAwgEVG0uJAucEaJKtvpJA==" --data-urlencode "db=bench" --data-urlencode "q=SELECT * from readings LIMIT 1000;" | jq '.results[].series[].values[] | @csv' +``` + +The resulting CSV can be then +[imported into QuestDB](/docs/guides/importing-data-rest/). + +## The inspect command and InfluxDB Line Protocol for importing datasets at scale + +To move data from InfluxDB into QuestDB at scale, it is best to use the +`influxd inspect` command to export the data, as the +[`export-lp`](https://docs.influxdata.com/influxdb/v2.6/reference/cli/influxd/inspect/export-lp/) +subcommand allows exporting all time-structured merge tree (TSM) data in a +bucket as InfluxDB Line Protocol messages in a big text file. + +The text file can then be inserted into QuestDB. This assumes you are migrating +from self-managed InfluxDB and have access to execute the `inspect` command. + +For InfluxDB Cloud users, the first step should be +[exporting the data from cloud to InfluxDB OSS](https://docs.influxdata.com/influxdb/cloud/migrate-data/migrate-cloud-to-oss/) +before following the instructions. + +### Instructions + +#### Generate admin token + +Make sure you have an _admin token_ generated and set the env variable +export `INFLUX_TOKEN`. + +For example: + +```shell +export INFLUX_TOKEN=xyoczzmBxbXIor4_D-t-BMEpJj1nAj2DGqNSshTUyHUcX0DMjI6fiBv_pgeW-xxpnAwgEVG0uJAucEaJKtvpJA== +``` + +#### Find out your org_id and bucket_id + +You need to know your org_id and bucket_id you will export. If you don’t know +them you can +issue `influx org list` and `influx bucket list --org-id YOUR_ID` to find those +values. + +#### Export the bucket contents + +Now you can just export the bucket contents by using `inspect export-lp` command +and by defining a destination folder: + +```shell +influxd inspect export-lp --bucket-id YOUR_BUCKET_ID --output-path /var/tmp/influx/outputfolder +``` + +Please note the step above can take a while. As an example, it took almost an +hour for a 160G bucket on a mid-AWS EC2 instance. + +#### Connect to QuestDB + +Connect to your QuestDB instance and issue a +[CREATE TABLE](/docs/reference/sql/create-table/) statement. This is not +technically needed as once you start streaming data, your table will be +automatically created. However, this step is recommended because this allows +fine tuning some parameters such as column types or partition. + +Since the data is already in InfluxDB Line Protocol format, there is no need to +use the official QuestDB client libraries for ingestion. + +You only need to connect via a socket to your instance and stream row by row. + +The below is an example Python code streaming the instance: + +```python +import socket +import sys + +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +def send_utf8(msg): + print(msg) + sock.sendall(msg.encode()) + +if __name__ == '__main__': + try: + sock.connect(('localhost', 9009)) + with open("ilp_influx_export.ilp") as infile: + for line in infile: + print(line) + send_utf8(line) + except socket.error as e: + sys.stderr.write(f'Got error: {e}') + sock.close() +``` + +#### Transform data in QuestDB + +Since InfluxDB exports only one metric for each line, this means that if you are +storing more than one metric for the same series, one row will create multiple +InfluxDB Line Protocol lines with one valid metric value and the other metrics +shown as `NULL`. Therefore, we recommend transforming your data in QuestDB. + +For example, if you query a table with several metrics: + +```questdb-sql +SELECT * FROM diagnostics WHERE timestamp = '2016-01-01T00:00:00.000000Z' AND driver='Andy' AND name='truck_150') +``` + +Your result may be something like this: + +![Screenshot of the query result where each metric forms one line with NULL for other metrics](/img/docs/guide/one-metric.png) + +A way to solve this is to execute a SQL query grouping data by all the +dimensions and selecting the maximum values for all the metrics: + +```questdb-sql +SELECT + timestamp, + device_version, + driver, + fleet, + model, + name, + max(current_load) AS current_load, + max(fuel_capacity) AS fuel_capacity, + max(fuel_state) AS fuel_state, + max(load_capacity) AS load_capacity, + max(nominal_fuel_consumption) AS nominal_fuel_consumption, + max(status) AS status +FROM + diagnostics; +``` + +This produces aggregated rows containing all the metrics for each dimension +group: + +![Screenshot of the query result showing aggregated rows based on the SQL query](/img/docs/guide/adjusted-metric.png) + +You can use the [INSERT](/docs/reference/sql/insert/) keyword to output the +processed result into a new table. + +## See also + +- [Comparing InfluxDB and QuestDB databases](/blog/2021/11/29/questdb-versus-influxdb/) diff --git a/docs/guides/modifying-data.md b/docs/guides/modifying-data.md new file mode 100644 index 00000000..bd9d74e7 --- /dev/null +++ b/docs/guides/modifying-data.md @@ -0,0 +1,109 @@ +# Modify data + +QuestDB is a [time-series database](/glossary/time-series-database/) optimized +to append data. + +For best performance, design your application to avoid having to frequently edit +existing records. + +The [`UPDATE`](/docs/reference/sql/update/) statement is available in QuestDB +since version 6.4, `DELETE` is also planned to be included in upcoming releases. +However, they are intended for correcting data that was inserted incorrectly or +should have never been inserted in the first place (for example as part of data +administration tasks). + +These are three alternatives to `UPDATE` and `DELETE` you may consider: + +- [Append newest state](#append-newest-state): Insert a newer state to replace + an older one: This has the added advantage that you can query back in time to + a previous state. It is also the basis of organizing data for + [bi-temporality](https://martinfowler.com/articles/bitemporal-history.html). + +- [Replace a table](#replace-table): Create a new table with the new data you + need, drop the old one and rename. + +- [Delete by dropping partitions](#delete-by-dropping-partitions): Create your + time-series tables with partitions, then delete the ones you no longer need. + +## Append newest state + +### Using the timestamp field + +Here's a worked example using the timestamp column: + +```questdb-sql +CREATE TABLE takeaway_order ( + ts TIMESTAMP, + id SYMBOL, + status SYMBOL) + timestamp(ts); + +INSERT INTO takeaway_order VALUES (now(), 'order1', 'placed'); +INSERT INTO takeaway_order VALUES (now(), 'order2', 'placed'); +INSERT INTO takeaway_order VALUES (now(), 'order1', 'cooking'); +INSERT INTO takeaway_order VALUES (now(), 'order1', 'in-transit'); +INSERT INTO takeaway_order VALUES (now(), 'order1', 'arrived'); +INSERT INTO takeaway_order VALUES (now(), 'order3', 'placed'); +INSERT INTO takeaway_order VALUES (now(), 'order3', 'cooking'); +INSERT INTO takeaway_order VALUES (now(), 'order3', 'in-transit'); +``` + +We join the latest timestamp of an order id against the rest of the data to +obtain full details. + +```questdb-sql +WITH + ts_takeaway_order AS ( + SELECT + max(ts) AS ts, + id + FROM + takeaway_order GROUP BY id) +SELECT + o.* +FROM + ts_takeaway_order ts_o + INNER JOIN 'takeaway_order' o + ON ts_o.ts = o.ts +``` + +This results in the latest state for each order: + +| _timestamp_ ts | id _symbol_ | status _symbol_ | +| :-------------------------- | :---------- | :-------------- | +| 2022-04-07T15:33:43.944922Z | order1 | arrived | +| 2022-04-07T15:33:37.370694Z | order2 | placed | +| 2022-04-07T15:33:50.829323Z | order3 | in-transit | + +### Using dedicated fields + +If timestamps don't work for you here, you can also use an extra integer column +called `version`, an extra boolean `deleted` column or similar. + +## Replace Table + +Another alternative is to: + +- Select only the data you want from an existing table into a new temporary one. +- Drop the original table. +- Rename the temporary table to the original table's name. + +```questdb-sql +CREATE TABLE mytable_copy AS ( + SELECT * FROM mytable WHERE column_value != 42 +) TIMESTAMP(ts) PARTITION BY DAY; + +DROP TABLE mytable; +RENAME table mytable_copy TO mytable; +``` + +## Delete by Dropping Partitions + +When you create tables with a timestamp, you may organise them into +[partitions](/docs/concept/partitions/) using the +[`CREATE TABLE .. PARTITION BY`](/docs/reference/sql/create-table/#partitioning) +SQL statement. + +You may then use the +[`ALTER TABLE DROP PARTITION`](/docs/reference/sql/alter-table-drop-partition/) +SQL statement to drop partitions you no longer need. diff --git a/docs/guides/v6-migration.md b/docs/guides/v6-migration.md new file mode 100644 index 00000000..48d905d8 --- /dev/null +++ b/docs/guides/v6-migration.md @@ -0,0 +1,215 @@ +--- +title: Version 6.0 migration +description: + This document describes details about automatic upgrades with QuestDB version + 6.0 and instructions for manually reverting tables for compatibility with + earlier QuestDB versions. +--- + +Release 6.0 introduces breaking changes in table transaction files. An automated +conversion process has been included in the release which will migrate table +transaction files to use the new format. The following sections describe the +automated upgrade process with notes for manually downgrading tables for +compatibility with older versions. + +## Upgrading QuestDB + +When QuestDB v6.0 starts up, and tables from older QuestDB versions are +detected, a migration to the new transaction file format will run automatically. +The migration scans for the existence of tables within the QuestDB storage +directory and upgrades transaction (`_txn`) files for each table. All other +table data is untouched by the upgrade. + +If the migration fails for a table, an error message will be printed in the +QuestDB logs on startup. QuestDB will not terminate, but tables which have not +been successfully upgraded cannot be used for querying or writing. + +Starting QuestDB again will trigger another attempt to migrate tables using an +older transaction file format. + +## Reverting transaction files + +During the upgrade process, `_txn` files are backed up and renamed using the +format `_txn.v417`. Users who wish to revert the table migration can downgrade +tables by following these steps: + +1. delete the folder `/path/to/questdb/db/_upgrade.d` +2. for each table, rename `_txn.v417` to `_txn` + +### Table downgrade example + +This section illustrates how to revert transaction files to a format used by +QuestDB versions earlier than 6.0. Given storage directories for two table +`example_table` and `sensors`: + +```bash title="path/to/qdb" +├── conf +├── db +│   ├── _tab_index.d +│   ├── _upgrade.d +│   ├── example_table +│   │   ├── 2021 +│   │   │   ├── tempF.d +│   │   │   ├── ... +│   │   │   └── visMiles.d +│   │   ├── _meta +│   │   ├── _txn +│   │   └── _txn.v417 +│   └── sensors +│   ├── 2021 +│   │   ├── device_id.d +│   │   ├── ... +│   │   └── temperature.d +│   ├── _meta +│   ├── _txn +│   └── _txn.v417 +└── public +``` + +The tables may be downgraded in the following manner: + +```bash +rm db/_upgrade.d +mv db/example_table/_txn.v417 db/example_table/_txn +mv db/sensors/_txn.v417 db/sensors/_txn +``` + +After these steps have been completed, QuestDB v5.x may be started and the table +data will be loaded as usual. + +## Breaking SQL changes + +Release 6.0.1 contains breaking changes relating to SQL syntax to simplify +working with `TIMESTAMP` types and for improved compatibility with ANSI SQL +expectations. + +:::note + +For more information on these changes, see the 6.0.1 software version +[release notes on GitHub](https://github.com/questdb/questdb/releases/tag/6.0.1). + +::: + +To illustrate how timestamps are handled, a table `my_table` containing 48 +records with timestamps every hour beginning at `00:00:00` on `2020-01-01` will +be used in the following examples: + +|timestamp | +|:--------------------------| +|2020-01-01T00:00:00.000000Z| +|2020-01-01T01:00:00.000000Z| +|2020-01-01T02:00:00.000000Z| +|... | +|2020-01-01T23:00:00.000000Z| +|2020-01-02T00:00:00.000000Z| +|2020-01-02T01:00:00.000000Z| +|... | +|2020-01-02T23:00:00.000000Z| + +### Timestamp string equality + +The following example SQL uses a `WHERE` clause to evaluate if records match +using string equality. + +```questdb-sql title="Timestamp string equality" +SELECT * FROM my_table +WHERE timestamp = '2020-01-01' +``` + +The result will be 1 record with exact match of `2020-01-01T00:00:00.000000Z`. +In other words, the string `2020-01-01` does not represent an interval, but a +single `TIMESTAMP` data point of `2020-01-01T00:00:00.000000Z` + +|timestamp | +|:--------------------------| +|2020-01-01T00:00:00.000000Z| + +Before software version `6.0.1`, this would result in 24 records of all hours +during date '2020-01-01' + +|timestamp | +|:--------------------------| +|2020-01-01T00:00:00.000000Z| +|2020-01-01T01:00:00.000000Z| +|2020-01-01T02:00:00.000000Z| +|... | +|2020-01-01T23:00:00.000000Z| + +In order to use the old semantics, the query must use the `IN` keyword instead +of `=`: + +```questdb-sql title="Timestamp string equality using IN" +SELECT * FROM my_table +WHERE timestamp IN '2020-01-01' +``` + +### Timestamp string comparison + +Timestamps may also be compared using `>` greater-than and `<` less-than +operators. The following example SQL uses a `>` greater-than operator to +evaluate if records occur later than a timestamp provided as a string: + +```questdb-sql title="Timestamp string equality" +SELECT * FROM my_table +WHERE timestamp > '2020-01-01' +``` + +The results are 47 records which have timestamps strictly greater than +`2020-01-01T00:00:00.000000Z`. The string `2020-01-01` does not represent an +interval, but a single `TIMESTAMP` data point of `2020-01-01T00:00:00.000000Z`: + +|timestamp | +|:--------------------------| +|2020-01-01T01:00:00.000000Z| +|... | +|2020-01-02T23:00:00.000000Z| + +Before software version `6.0.1`, this would result in 24 records, one for each +hour during the date `2020-01-02`: + +|timestamp | +|:--------------------------| +|2020-01-02T00:00:00.000000Z| +|... | +|2020-01-02T23:00:00.000000Z| + +In order to use the old semantics, the query must use `>=` instead of `>`, and +`<=` instead of `<`: + +```questdb-sql title="Greater than or equal to a string timestamp" +SELECT * FROM my_table +WHERE timestamp >= '2020-01-02' +``` + +### Timestamp IN list + +The `IN` keyword is used to check equality with a list of 2 elements: + +```questdb-sql title="Timestamp IN string list" +SELECT * FROM my_table +WHERE timestamp IN ('2020-01-01T00:00:00.000000Z', '2020-01-02T00:00:00.000000Z') +``` + +The result is two records matching exactly `2020-01-01T00:00:00.000000Z` and +`2020-01-02T00:00:00.000000Z` + +|timestamp | +|:--------------------------| +|2020-01-02T00:00:00.000000Z| +|2020-01-02T00:00:00.000000Z| + +Before software version `6.0.1`, this would result in 25 records, one for each +hour during the date `2020-01-01` and the `00:00:00` data point on `2020-01-02`: + +|timestamp | +|:--------------------------| +|2020-01-02T00:00:00.000000Z| +|... | +|2020-01-02T00:00:00.000000Z| + +In order to use the old semantics, the `BETWEEN` keyword should be used: + +```questdb-sql title="Timestamp string equality using BETWEEN" +SELECT * FROM my_table +WHERE timestamp BETWEEN '2020-01-01T00:00:00.000000Z' AND '2020-01-02T00:00:00.000000Z' +``` diff --git a/docs/guides/working-with-timestamps-timezones.md b/docs/guides/working-with-timestamps-timezones.md new file mode 100644 index 00000000..f24660c4 --- /dev/null +++ b/docs/guides/working-with-timestamps-timezones.md @@ -0,0 +1,192 @@ +--- +title: Timestamps and time zones +description: + This document describes how to work with time zones in QuestDB, including + hints how to convert timestamps to UTC, to a specific time zone or by a UTC + offset value. +--- + +When working with timestamped data, it may be necessary to convert timestamp +values to or from UTC, or to offset timestamp values by a fixed duration. The +following sections describe how QuestDB handles timestamps natively, how to use +built-in functions for working with time zone conversions, and general hints for +working with time zones in QuestDB. + +## Timestamps in QuestDB + +The native timestamp format used by QuestDB is a Unix timestamp in microsecond +resolution. Although timestamps in nanoseconds will be parsed, the output will +be truncated to microseconds. QuestDB does not store time zone information +alongside timestamp values and therefore it should be assumed that all +timestamps are in UTC. + +The following example shows how a Unix timestamp in microseconds may be passed +into a timestamp column directly: + +```questdb-sql +CREATE TABLE my_table (ts timestamp, col1 int) timestamp(ts); +INSERT INTO my_table VALUES(1623167145123456, 12); +my_table; +``` + +| ts | col1 | +| :-------------------------- | :--- | +| 2021-06-08T15:45:45.123456Z | 12 | + +Timestamps may also be inserted as strings in the following way: + +```questdb-sql +INSERT INTO my_table VALUES('2021-06-08T16:45:45.123456Z', 13); +my_table; +``` + +| ts | col1 | +| :-------------------------- | :--- | +| 2021-06-08T15:45:45.123456Z | 12 | +| 2021-06-08T16:45:45.123456Z | 13 | + +When inserting timestamps into a table, it is also possible to use +[timestamp units](/docs/reference/function/date-time/#date-and-timestamp-format) +to define the timestamp format, in order to process trailing zeros in exported +data sources such as PostgreSQL: + +```questdb-sql +INSERT INTO my_table VALUES(to_timestamp('2021-06-09T16:45:46.123456789', 'yyyy-MM-ddTHH:mm:ss.N+'), 14); +-- Passing 9-digit nanosecond into QuestDB, this is equal to: + +INSERT INTO my_table VALUES(to_timestamp('2021-06-10T16:45:46.123456789', 'yyyy-MM-ddTHH:mm:ss.SSSUUUN'), 14); + +my_table; +``` + +The output maintains microsecond resolution: + +| ts | col1 | +| :-------------------------- | :--- | +| 2021-06-08T15:45:45.123456Z | 12 | +| 2021-06-08T16:45:45.123456Z | 13 | +| 2021-06-09T16:45:46.123456Z | 14 | + +## QuestDB's internal time zone database + +In order to simplify working with time zones, QuestDB uses +[the tz time zone database](https://en.wikipedia.org/wiki/Tz_database) which is +standard in the Java ecosystem. This time zone database is used internally in +time zone lookup and in operations relating to timestamp value conversion to and +from time zones. + +For this reason, a time zone may be referenced by abbreviated name, by full time +zone name or by UTC offset: + +| Abbreviation | Time zone name | UTC offset | +| :----------- | :--------------- | :--------- | +| EST | America/New_York | -05:00 | + +### Referring to time zones + +It's strongly advised **not to use the three-letter ID** or abbreviation for +time zones for the following reason: + +> The same abbreviation is often used for multiple time zones (for example, +> "CST" could be U.S. "Central Standard Time" and "China Standard Time"), and +> the Java platform can then only recognize one of them + +Therefore, choosing a geographic region which observes a time zone +(`"America/New_York"`, `"Europe/Prague"`) or a UTC offset value (`"+02:00"`) is +more reliable when referring to time zones. Instructions for converting to and +from time zones are described in the +[Converting timestamps to and from time zones](#converting-timestamps-to-and-from-time-zones) +section below. + +The current QuestDB time zone database uses the **English locale** but support +for additional locales may be added in future. Referring to time zones which are +outdated or not recognized results in a `invalid timezone name` error. The +following resources may be used for hints how to refer to time zones by ID or +offset: + +- The [official list maintained by IANA](https://www.iana.org/time-zones) +- Java's + [getAvailableZoneIds](https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#getAvailableZoneIds--) + method +- [Wiki entry on tz database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + (this is a convenient reference, but may not be 100% accurate) + +:::note + +Users should be aware that the time zone database contains both **current and +historic transitions** for various time zones. Therefore time zone conversions +must take the historic time zone transitions into account based on the timestamp +values. + +::: + +### Updates to the time zone database + +The upstream project updates past time zones as new information becomes +available. These changes are typically related to daylight saving time (DST) +start and end date transitions and, on rare occasions, time zone name changes. + +The tz database version used by QuestDB is determined by the JDK version used at +build time and therefore updates to the time zone database are directly +influenced by this JDK version. To find the JDK version used by a QuestDB build, +run the following SQL: + +```questdb-sql +SELECT build() +``` + +| build | +| -------------------------------------------------------------------------------------------------- | +| Build Information: QuestDB 6.0.3, JDK 11.0.7, Commit Hash a6afbadb9b9419d47cca1bf86fa13fdadf08bda4 | + +## Converting timestamps to and from time zones + +For convenience, QuestDB includes two functions for time zone conversions on +timestamp values. + +- [to_timezone()](/docs/reference/function/date-time/#to_timezone) +- [to_utc()](/docs/reference/function/date-time/#to_utc) + +These functions are used to convert a Unix timestamp, or a string equivalent +cast to timestamp as follows: + +```questdb-sql +SELECT to_timezone(1623167145000000, 'Europe/Berlin') +``` + +| to_timezone | +| :-------------------------- | +| 2021-06-08T17:45:45.000000Z | + +```questdb-sql +SELECT to_utc(1623167145000000, 'Europe/Berlin') +``` + +| to_utc | +| :-------------------------- | +| 2021-06-08T13:45:45.000000Z | + +### Using UTC offset for conversions + +The [to_timezone()](/docs/reference/function/date-time/#to_timezone) and +[to_utc()](/docs/reference/function/date-time/#to_utc) functions may use UTC +offset for converting timestamp values. In some cases, this can be more reliable +than string or time zone ID conversion given historic changes to time zone names +or transitions. The following example takes a Unix timestamp in microseconds and +converts it to a time zone `+2` hours offset from UTC: + +```questdb-sql +SELECT to_timezone(1213086329000000, '+02:00') +``` + +| to_timezone | +| :-------------------------- | +| 2008-06-10T10:25:29.000000Z | + +```questdb-sql +SELECT to_utc('2008-06-10T10:25:29.000000Z', '+02:00') +``` + +| to_timezone | +| :-------------------------- | +| 2008-06-10T08:25:29.000000Z | diff --git a/docs/introduction.md b/docs/introduction.md new file mode 100644 index 00000000..85c427ae --- /dev/null +++ b/docs/introduction.md @@ -0,0 +1,247 @@ +--- +title: Introduction +slug: / +description: + QuestDB is a relational column-oriented database designed for real-time + analytics on time-series data. +--- + +QuestDB is a [time-series database](/glossary/time-series-database/) for high +throughput ingestion and fast SQL queries with operational simplicity. It +supports schema-agnostic ingestion using the InfluxDB line protocol, PostgreSQL +wire protocol, and a REST API for bulk imports and exports. + +These pages cover core concepts of QuestDB, including setup steps, usage guides, +and reference documentation for syntax, APIs, and configuration. + +## Get Started + +This section explains how to install and run QuestDB using one of the following +methods: + +- [Docker](/docs/get-started/docker/) for repeatable, portable and scalable + installations +- [Binaries](/docs/get-started/binaries/) for direct downloads to run on Linux, + macOS or Windows +- [Homebrew](/docs/get-started/homebrew/) for running QuestDB on macOS +- [Create your first database](/docs/get-started/first-database/) provides a + guide once QuestDB is running +- [Learn more](/docs/get-started/learn-more/) is a selection of useful concepts + and guides for new QuestDB users + +## Develop + +This section describes how to connect to QuestDB using a variety of tools and +programming languages through our various network endpoints. + +- [Connect](/docs/develop/connect/) to the database through our various + endpoints. Learn which protocol is best for different use cases +- [Insert data](/docs/develop/insert-data/) using the InfluxDB Line Protocol, + PostgreSQL wire protocol or our HTTP REST API +- [Query data](/docs/develop/query-data/) with SQL queries via the PostgreSQL + Wire Protocol or exported to JSON or CSV via our HTTP REST API + +- [Web Console](/docs/develop/web-console/) for quick SQL queries, charting and + CSV upload/export functionality + +## Guides + +- [Migrating from InfluxDB](/docs/guides/influxdb-migration/) +- [Large CSV import (COPY SQL)](/docs/guides/importing-data/) +- [Small CSV import (REST API)](/docs/guides/importing-data-rest/) +- [Modifying Data](/docs/guides/modifying-data/) +- [Timestamps and time zones](/docs/guides/working-with-timestamps-timezones/) + +## Deployment + +- [Capacity planning](/docs/deployment/capacity-planning/) for configuring + server settings and system resources +- [AWS Marketplace AMI](/docs/deployment/aws-official-ami/) +- [Kubernetes](/docs/deployment/kubernetes/) +- [Google Cloud Platform](/docs/deployment/google-cloud-platform/) +- [DigitalOcean Droplet](/docs/deployment/digitalocean/) + +## Operations + +This section contains resources for managing QuestDB instances and has dedicated +pages for the following topics: + +- [Design for performance](/docs/operations/design-for-performance/) for + configuring table settings to improve performance +- [Data retention](/docs/operations/data-retention/) strategy to delete old data + and save disk space +- [Health monitoring](/docs/operations/health-monitoring/) endpoint for + determining the status of the instance +- [Backup and restore](/docs/operations/backup/) using filesystem and + point-in-time backup functionality. Notes for cloud providers. + +## Third-party tools + +This section describes how to integrate QuestDB with third-party tools and +utilities for collecting metrics and visualizing data: + +- [Flink](/docs/third-party-tools/flink/) guide for ingesting data from Flink + into QuestDB +- [Grafana](/docs/third-party-tools/grafana/) instructions for connecting + QuestDB as a datasource for building visualizations and dashboards +- [MindsDB](/docs/third-party-tools/mindsdb/) tutorial for connecting QuestDB to + MindsDB for running models with Machine Learning capabilities. +- [Kafka](/docs/third-party-tools/kafka/overview/) guide for ingesting data from + Kafka topics into QuestDB +- [Pandas](/docs/third-party-tools/pandas/) for ingesting Pandas dataframes via + InfluxDB Line Protocol +- [Prometheus](/docs/third-party-tools/prometheus/) monitoring and alerting +- [Redpanda](/docs/third-party-tools/redpanda/) instructions for a Kafka + compatible data streaming tool +- [Spark](/docs/third-party-tools/spark/) for large-scale data engineering +- [SQLAlchemy](/docs/third-party-tools/sqlalchemy/) guide for using the SQL + toolkit with QuestDB +- [Superset](/docs/third-party-tools/superset/) instructions for using Apache + Superset data visualization +- [Telegraf](/docs/third-party-tools/telegraf/) guide for collecting system + metrics, specifying QuestDB as an output and visualizing the results + +## Concepts + +This section describes the architecture of QuestDB, how it stores and queries +data, and introduces features and capabilities unique to the system. + +- [Storage model](/docs/concept/storage-model/) describes how QuestDB stores + records and partitions within tables +- [Designated timestamp](/docs/concept/designated-timestamp/) is a core feature + that enables time-oriented language capabilities and partitioning +- [SQL extensions](/docs/concept/sql-extensions/) allow performant time-series + analysis with a concise syntax +- [JIT compiler](/docs/concept/jit-compiler/) to speed up `WHERE` clauses +- [Partitions](/docs/concept/partitions/) can be used to perform time-based + queries more efficiently +- [Symbol](/docs/concept/symbol/) type makes storing and retrieving repetitive + strings efficient +- [Indexes](/docs/concept/indexes/) can be used for faster read access on + specific columns +- [Geospatial data](/docs/concept/geohashes/) with geohashes +- [Root directory](/docs/concept/root-directory-structure/) describes the + directory contents of QuestDB for storage and configuration + +## Reference + +This section contains the reference documentation for the following categories: + +### APIs + +- [REST](/docs/reference/api/rest/) +- [Postgres](/docs/reference/api/postgres/) +- [InfluxDB](/docs/reference/api/ilp/overview/) +- [Java (embedded)](/docs/reference/api/java-embedded/) + +### Command-line options + +The following resource provides info on options that may be passed to QuestDB +when starting services: + +- [Command-line options](/docs/reference/command-line-options/) for starting and + running QuestDB from an executable + +### Configuration + +The [configuration](/docs/reference/configuration/) page shows all the +properties that can be used to customize QuestDB. + +### InfluxDB Line Protocol Client Libraries + +- [Client Libraries overview](/docs/reference/clients/overview/) +- [Java InfluxDB Line Protocol client](/docs/reference/clients/java_ilp/) + +### Data Types + +The [data types](/docs/reference/sql/datatypes/) page lists the datatypes that +can be used in QuestDB. + +### Functions + +- [Aggregate](/docs/reference/function/aggregation/) +- [Analytic](/docs/reference/function/analytic/) +- [Binary](/docs/reference/function/binary/) +- [Boolean](/docs/reference/function/boolean/) +- [Conditional](/docs/reference/function/conditional/) +- [Date and time](/docs/reference/function/date-time/) +- [Meta](/docs/reference/function/meta/) +- [Numeric](/docs/reference/function/numeric/) +- [Random value generator](/docs/reference/function/random-value-generator/) +- [Row generator](/docs/reference/function/row-generator/) +- [Spatial](/docs/reference/function/spatial/) +- [Text](/docs/reference/function/text/) +- [Timestamp generator](/docs/reference/function/timestamp-generator/) +- [Timestamp](/docs/reference/function/timestamp/) +- [Trigonometric](/docs/reference/function/trigonometric/) + +### Operators + +- [Bitwise](/docs/reference/operators/bitwise/) +- [Comparison](/docs/reference/operators/comparison/) +- [Pattern matching](/docs/reference/operators/pattern-matching/) +- [Spatial](/docs/reference/operators/spatial/) + +### SQL + +- [SQL Execution order](/docs/reference/sql/datatypes/) +- [Data types](/docs/reference/sql/datatypes/) +- [ALTER TABLE ADD COLUMN](/docs/reference/sql/alter-table-add-column/) +- [ALTER TABLE ATTACH PARTITION](/docs/reference/sql/alter-table-attach-partition/) +- [ALTER TABLE DETACH PARTITION](/docs/reference/sql/alter-table-detach-partition/) +- [ALTER TABLE DROP COLUMN](/docs/reference/sql/alter-table-drop-column/) +- [ALTER TABLE DROP PARTITION](/docs/reference/sql/alter-table-drop-partition/) +- [ALTER TABLE RENAME COLUMN](/docs/reference/sql/alter-table-rename-column/) +- [ALTER TABLE RESUME WAL](/docs/reference/sql/alter-table-resume-wal/) +- [ALTER TABLE SET PARAM](/docs/reference/sql/alter-table-set-param/) +- [ALTER TABLE SET TYPE](/docs/reference/sql/alter-table-set-type/) +- [ALTER TABLE SQUASH PARTITIONS](/docs/reference/sql/alter-table-squash-partitions/) +- [ALTER TABLE ALTER COLUMN ADD INDEX](/docs/reference/sql/alter-table-alter-column-add-index/) +- [ALTER TABLE ALTER COLUMN CACHE | NOCACHE](/docs/reference/sql/alter-table-alter-column-cache/) +- [ALTER TABLE ALTER COLUMN DROP INDEX](/docs/reference/sql/alter-table-alter-column-drop-index/) +- [BACKUP](/docs/reference/sql/backup/) +- [CASE](/docs/reference/sql/case/) +- [CAST](/docs/reference/sql/cast/) +- [COPY](/docs/reference/sql/copy/) +- [CREATE TABLE](/docs/reference/sql/create-table/) +- [DISTINCT](/docs/reference/sql/distinct/) +- [EXPLAIN](/docs/reference/sql/explain/) +- [FILL](/docs/reference/sql/fill/) +- [DROP TABLE](/docs/reference/sql/drop/) +- [GROUP BY](/docs/reference/sql/group-by/) +- [INSERT](/docs/reference/sql/insert/) +- [JOIN](/docs/reference/sql/join/) +- [LATEST ON](/docs/reference/sql/latest-on/) +- [LIMIT](/docs/reference/sql/limit/) +- [ORDER BY](/docs/reference/sql/order-by/) +- [REINDEX](/docs/reference/sql/reindex/) +- [RENAME TABLE](/docs/reference/sql/rename/) +- [SAMPLE BY](/docs/reference/sql/sample-by/) +- [SELECT](/docs/reference/sql/select/) +- [SHOW](/docs/reference/sql/show/) +- [SNAPSHOT](/docs/reference/sql/snapshot/) +- [TRUNCATE TABLE](/docs/reference/sql/truncate/) +- [UNION EXCEPT INTERSECT](/docs/reference/sql/union-except-intersect/) +- [VACUUM TABLE](/docs/reference/sql/vacuum-table/) +- [WHERE](/docs/reference/sql/where/) +- [WITH](/docs/reference/sql/with/) + +## Support + +For hints on diagnosing common configuration issues, see the following +resources: + +- [Troubleshooting FAQ](/docs/troubleshooting/faq/) guide with solutions for + various HW & SW configuration issues +- [Log](/docs/troubleshooting/log/) guide for log location and details +- [List of OS error codes](/docs/troubleshooting/os-error-codes/) page with the + list of Operating System error codes + +We are happy to help with any question you may have, particularly to help you +optimize the performance of your application. Feel free to reach out using the +following channels: + +- [Raise an issue on GitHub]({@githubUrl@}/issues) +- [Join the Community Slack]({@slackUrl@}) +- [QuestDB on Stack Overflow]({@stackoverflowUrl@}) +- or email us at [hello@questdb.io](mailto:hello@questdb.io) diff --git a/docs/operations/backup.md b/docs/operations/backup.md new file mode 100644 index 00000000..3711545f --- /dev/null +++ b/docs/operations/backup.md @@ -0,0 +1,160 @@ +--- +title: Backup and restore +sidebar_label: Backup and restore +description: + Details and resources which describe how to perform database backup and + restore operations for a QuestDB instance using point-in-time backups and + filesystem images. +--- + +This document provides practical details of using the point-in-time backup +functionality in QuestDB along with filesystem backup as means to prevent data +loss. Alongside backup details, this document describes how to restore from +backups and hints for performing filesystem backups on common cloud providers. + +QuestDB provides two strategies for creating backups: + +- **Point-in-time** (PIT) backup +- **Filesystem** backup + +For a backup to be successful, the database backup must contain database +metadata files and directories (`db`, `config` etc.) See the +[root directory](/docs/concept/root-directory-structure) documentation for +details about these directories. + +## Limitations + +QuestDB officially supports the following filesystems: + +- APFS +- EXT4 +- NTFS +- OVERLAYFS (used by Docker) +- XFS + +Other file systems supporting +[mmap](https://man7.org/linux/man-pages/man2/mmap.2.html) feature may work with +QuestDB but they should not be used in production, as QuestDB does not run tests +on them. + +:::caution + +- A backup includes the contents of the database up to the point of executing a + backup. Any data inserted while a backup is underway is not stored as part of + the backup. + +- Users can't use NFS or a similar distributed filesystem directly with QuestDB, + but users may copy a backup to such a filesystem after a backup has been made. + +::: + +## Creating a point-in-time backup + +When creating a point-in-time (PIT) backup in QuestDB, you can specify that the +whole database or specific tables should be backed up. This process will create +a backup in a directory specified by the user in the `cairo.sql.backup.root` +[configuration key](/docs/reference/configuration/). For more details on passing +configuration in this manner, see the +[server configuration](/docs/concept/root-directory-structure/#serverconf) +documentation. + +```ini title="/path/to/server.conf" +cairo.sql.backup.root=/path/to/backup/dir +``` + +A backup can then be triggered via [SQL command](/docs/reference/sql/backup/) and +the backup is complete as soon as the SQL query has finished executing: + +```questdb-sql +-- backup whole database +BACKUP database; +-- backup a specific table +BACKUP table my_table; +``` + +Note that calling `BACKUP TABLE ` will only copy table data and +metadata to the destination folder. This form of backup will not copy entire +database configuration files required to perform a complete database restore. + +Alternatively, the [REST API](/docs/reference/api/rest/#exec---execute-queries) +can be used to execute the SQL for a database backup: + +```bash title="Backing up a database via curl" +curl -G --data-urlencode "query=BACKUP database;" \ + http://localhost:9000/exec +``` + +## Creating a filesystem backup (disk snapshot) + +:::caution + +To run a reliable filesystem backup without database downtime, you should use +`SNAPSHOT PREPARE`/`SNAPSHOT COMPLETE` +[SQL statements](/docs/reference/sql/snapshot/). + +::: + +The most common ways to perform cloud-native filesystem snapshots are described +in the following resources, which rely on similar steps but have minor +differences in terminology and services: + +- [AWS](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-creating-snapshot.html) - + creating EBS snapshots +- [Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/snapshot-copy-managed-disk?tabs=portal) - + creating snapshots of a virtual hard disk +- [GCP](https://cloud.google.com/compute/docs/disks/create-snapshots/) - working + with persistent disk snapshots + +## Restoring from a backup + +In order to restore a backup, the QuestDB executable must be provided with the +directory location of an existing backup as the **root directory**. This can +done via the `-d` flag as `-d /path/to/backup` when starting up QuestDB. + +```bash +java -p /path/to/questdb-.jar \ + -m io.questdb/io.questdb.ServerMain \ + -d /path/to/backup_directory +``` + +Users who are starting QuestDB via `systemd` or the official AWS AMI may refer +to the +[systemd file](https://github.com/questdb/questdb/blob/master/pkg/ami/marketplace/assets/systemd.service#L21) +for reference. To verify that database information has been successfully +imported, check logs via `journalctl -u questdb` which will contain a list +existing tables. + +Docker instances may have a backup directory mounted to the root directory as +follows: + +```bash +docker run \ + -p 9000:9000 -p 9009:9009 \ + -p 8812:8812 -p 9003:9003 \ + -v "/path/to/backup_directory:/root/.questdb/" questdb/questdb +``` + +## Examples + +The following example sets up a cronjob which triggers a daily backup via REST +API: + +```bash +# this will add crontab record that will run trigger at backup every-day at 01:00 AM +# copy paste this into server terminal +crontab -l | { cat; echo "0 1 * * * /usr/bin/curl --silent -G --data-urlencode 'query=BACKUP database;' http://localhost:9000/exec &>/dev/null"; } | crontab - +``` + +This example shows how to compress a backup using the `tar` utility. An archive +file `questdb_backup.tar.gz` will be created in the directory that the command +is run: + +```bash +tar -zcvf questdb_backup.tar.gz /path/to/backup +``` + +The backup file can be expanded using the same utility: + +```bash +tar -xf questdb_backup.tar.gz +``` diff --git a/docs/operations/data-retention.md b/docs/operations/data-retention.md new file mode 100644 index 00000000..70840960 --- /dev/null +++ b/docs/operations/data-retention.md @@ -0,0 +1,132 @@ +--- +title: Data retention +description: + How to employ a data retention strategy to delete old data and save disk space +--- + +## Background + +The nature of time-series data is that the relevance of information diminishes +over time. If stale data is no longer required, users can delete old data from +QuestDB to either save disk space or adhere to a data retention policy. This is +achieved in QuestDB by removing data partitions from a table. + +This page provides a high-level overview of partitioning with examples to drop +data by date. For more details on partitioning, see the +[partitioning](/docs/concept/partitions/) page. + +## Strategy for data retention + +A simple approach to removing stale data is to drop data that has been +partitioned by time. A table must have a +[designated timestamp](/docs/concept/designated-timestamp/) assigned and a +partitioning strategy specified during a `CREATE TABLE` operation to achieve +this. + +:::note + +Users cannot alter the partitioning strategy after a table is created. + +::: + +Tables can be partitioned by one of the following: + +- `YEAR` +- `MONTH` +- `WEEK` +- `DAY` +- `HOUR` + +```questdb-sql title="Creating a table and partitioning by DAY" +CREATE TABLE my_table(ts TIMESTAMP, symb SYMBOL, price DOUBLE) timestamp(ts) +PARTITION BY DAY; +``` + +### Dropping partitions + +:::caution + +Use `DROP PARTITION` with care, as QuestDB **cannot recover data from dropped +partitions**. + +::: + +To drop partitions, users can use the +[ALTER TABLE DROP PARTITION](/docs/reference/sql/alter-table-drop-partition/) +syntax. Partitions may be dropped by: + +- `DROP PARTITION LIST` - specifying a comma-separated list of partitions to + drop + + ```questdb-sql + --Delete a partition + ALTER TABLE my_table DROP PARTITION LIST '2021-01-01'; + + --Delete a list of two partitions + ALTER TABLE my_table DROP PARTITION LIST '2021-01-01', '2021-01-02'; + ``` + +- `WHERE timestamp =` - exact date matching by timestamp + + ```questdb-sql + ALTER TABLE my_table DROP PARTITION + WHERE timestamp = to_timestamp('2021-01-01', 'yyyy-MM-dd'); + ``` + +- `WHERE timestamp <` - using comparison operators (`<` / `>`) to delete by time + range relative to a timestamp. Note that the `now()` function may be used to + automate dropping of partitions relative to the current time, i.e.: + + ```questdb-sql + --Drop partitions older than 30 days + WHERE timestamp < dateadd('d', -30, now()) + ``` + +**Usage notes:** + +- The most chronologically recent partition cannot be deleted +- Arbitrary partitions may be dropped, which means they may not be the oldest + chronologically. Depending on the types of queries users are performing on a + dataset, it may not be desirable to have gaps caused by dropped partitions. + +### Example + +The following example demonstrates how to create a table with partitioning and +to drop partitions based on time. This example produces 5 days' worth of data +with one incrementing `LONG` value inserted per hour. + +```questdb-sql title="Create a partitioned table and generate data" +CREATE TABLE my_table (timestamp TIMESTAMP, x LONG) timestamp(timestamp) +PARTITION BY DAY; + +INSERT INTO my_table +SELECT timestamp_sequence( + to_timestamp('2021-01-01T00:00:00', 'yyyy-MM-ddTHH:mm:ss'),100000L * 36000), x +FROM long_sequence(120); +``` + +For reference, the following functions are used to generate the example data: + +- [timestamp sequence](/docs/reference/function/timestamp-generator/#timestamp_sequence) + with 1 hour stepping +- [row generator](/docs/reference/function/row-generator/#long_sequence) with + `long_sequence()` function which creates a `x:long` column + +The result of partitioning is visible when listing as directories on disk: + +```bash title="path/to//db" +my_table +├── 2021-01-01 +├── 2021-01-02 +├── 2021-01-03 +├── 2021-01-04 +└── 2021-01-05 +``` + +Partitions can be dropped using the following query: + +``` +--Delete days before 2021-01-03 +ALTER TABLE my_table DROP PARTITION +WHERE timestamp < to_timestamp('2021-01-03', 'yyyy-MM-dd'); +``` diff --git a/docs/operations/design-for-performance.md b/docs/operations/design-for-performance.md new file mode 100644 index 00000000..78d67af4 --- /dev/null +++ b/docs/operations/design-for-performance.md @@ -0,0 +1,135 @@ +--- +title: Design for performance +description: How to plan and configure database to optimize performance. +--- + +To optimize the performance of a QuestDB instance, it is important to adjust +system and table configuration according to the nature of the data. This page +lists out common configurations that users should take into account when testing +data using QuestDB. + +To monitor various metrics of the QuestDB instances, refer to the +[Prometheus monitoring page](/docs/third-party-tools/prometheus/) or the +[Health monitoring page](/docs/operations/health-monitoring/). + +Refer to [Capacity planning](/docs/deployment/capacity-planning/) for deployment +considerations. + +## Optimizing queries + +The following section describes the underlying aspects to consider when +formulating queries. + +### Row serialization + +Row serialization and deserialization has a cost on both client and server. The +QuestDB Web Console limits fetching to 10,000 dataset. When fetching a large +(10K+) dataset via a single query using other methods, consider using +pagination, hence multiple queries instead. + +## Choosing a schema + +This section provides some hints for choosing the right schema for a dataset +based on the storage space that types occupy in QuestDB. + +### Partitioning + +When creating tables, a partitioning strategy is recommended in order to be able +to enforce a data retention policy to save disk space, and for optimizations on +the number of concurrent file reads performed by the system. For more +information on this topic, see the following resources: + +- [partitions](/docs/concept/partitions/) page which provides a general overview + of this concept +- [data retention](/docs/operations/data-retention/) guide provides further + details on partitioning tables with examples on how to drop partitions by time + range + +#### Records per partition + +The number of records per partition should factor into the partitioning strategy +(`YEAR`, `MONTH`, `WEEK`, `DAY`, `HOUR`). Having too many records per partition or +having too few records per partition and having query operations across too many +partitions has the result of slower query times. A general guideline is that +roughly between 1 million and 100 million records is optimal per partition. + +### Symbols + +[Symbols](/docs/concept/symbol/) are a data type that is recommended to be used +for strings that are repeated often in a dataset. The benefit of using this data +type is lower storage requirements than regular strings and faster performance +on queries as symbols are internally stored as `int` values. + +Only symbols can be [indexed](/docs/concept/indexes/) in QuestDB. Although +multiple indexes can be specified for a table, there would be a performance +impact on the rate of ingestion. + +The following example shows the creation of a table with a `symbol` type that +has multiple options passed for performance optimization. + +```questdb-sql +CREATE TABLE my_table( + symb SYMBOL CAPACITY 1048576 NOCACHE INDEX CAPACITY 512, + s STRING, + ts TIMESTAMP +) timestamp(ts) PARTITION BY DAY; +``` + +This example adds a `symbol` type with: + +- **capacity** specified to estimate how many unique symbol values to expect +- **caching** disabled which allows dealing with larger value counts +- **index** for the symbol column with a storage block value + +A full description of the options used above for `symbol` types can be found in +the [CREATE TABLE](/docs/reference/sql/create-table/#symbol) page. + +#### Symbol caching + +[Symbol cache](/docs/concept/symbol/#usage-of-symbols) enables the use of on-heap +cache for reads and can enhance performance. However, the cache size grows as +the number of distinct value increases, and the size of the cached symbol may +hinder query performance. + +We recommend that users check the JVM and GC metrics via +[Prometheus monitoring](/docs/third-party-tools/prometheus/) before taking one +of the following steps: + +- Disabling the symbol cache. See + [Usage of `symbols`](/docs/concept/symbol/#usage-of-symbols) for server-wide + and table-wide configuration options. +- Increasing the JVM heap size using the `-Xmx` argument. + +#### Symbol capacity + +[Symbol capacity](/docs/concept/symbol/#usage-of-symbols) should be the same or +slightly larger than the count of distinct symbol values. + +Undersized symbol columns slow down query performance. Similarly, there is a +performance impact when symbol is not used for its designed way, most commonly +assigning `symbol` to columns with a unique value per row. It is crucial to +choose a suitable [data type](/docs/reference/sql/datatypes/) based on the +nature of the dataset. + +#### Index + +Appropriate us of [indexes](/docs/concept/indexes/) provides faster read access +to a table. However, indexes have a noticeable cost in terms of disk space and +ingestion rate - we recommend starting with no indexes and adding them later, +only if they appear to improve query performance. Refer to +[Index trade-offs](/docs/concept/indexes/#trade-offs) for more information. + +### Numbers + +The storage space that numbers occupy can be optimized by choosing `byte`, +`short`, and `int` data types appropriately. When values are not expected to +exceed the limit for that particular type, savings on disk space can be made. +See also [Data types](/docs/reference/sql/datatypes/) for more details. + +| type | storage per value | numeric range | +| :------ | :---------------- | :--------------------------------------- | +| byte | 8 bits | -128 to 127 | +| short | 16 bits | -32768 to 32767 | +| int | 32 bits | -2147483648 to 2147483647 | +| float | 32 bits | Single precision IEEE 754 floating point | +| double | 64 bits | Double precision IEEE 754 floating point | diff --git a/docs/operations/health-monitoring.md b/docs/operations/health-monitoring.md new file mode 100644 index 00000000..932fd97d --- /dev/null +++ b/docs/operations/health-monitoring.md @@ -0,0 +1,103 @@ +--- +title: Health monitoring +description: + How to configure health monitoring for querying the status of a QuestDB + instance using an embedded server, Prometheus metrics and Alertmanager. +--- + +This document describes the options available for monitoring the health of a +QuestDB instance. There are options for minimal health checks via a `min` server +which provides a basic 'up/down' check, or detailed metrics in Prometheus format +exposed via an HTTP endpoint. + +## Prometheus metrics endpoint + +Prometheus is an open-source systems monitoring and alerting toolkit. Prometheus +collects and stores metrics as time-series data, i.e. metrics information is +stored with the timestamp at which it was recorded, alongside optional key-value +pairs called labels. + +QuestDB exposes a `/metrics` endpoint which provides internal system metrics in +Prometheus format. To use this functionality and get started with example +configuration, refer to the +[Prometheus documentation](/docs/third-party-tools/prometheus/). + +## Min health server + +REST APIs will often be situated behind a load balancer that uses a monitor URL +for its configuration. Having a load balancer query the QuestDB REST endpoints +(on port `9000` by default) will cause internal logs to become excessively +noisy. Additionally, configuring per-URL logging would increase server latency. + +To provide a dedicated health check feature that would have no performance knock +on other system components, we opted to decouple health checks from the REST +endpoints used for querying and ingesting data. For this purpose, a `min` HTTP +server runs embedded in a QuestDB instance and has a separate log and thread +pool configuration. + +The configuration section for the `min` HTTP server is available in the +[minimal HTTP server reference](/docs/reference/configuration/#minimal-http-server). + +The `min` server is enabled by default and will reply to any `HTTP GET` request +to port `9003`: + +```shell title="GET health status of local instance" +curl -v http://127.0.0.1:9003 +``` + +The server will respond with an HTTP status code of `200`, indicating that the +system is operational: + +```shell title="200 'OK' response" +* Trying 127.0.0.1... +* TCP_NODELAY set +* Connected to 127.0.0.1 (127.0.0.1) port 9003 (#0) +> GET / HTTP/1.1 +> Host: 127.0.0.1:9003 +> User-Agent: curl/7.64.1 +> Accept: */* +> +< HTTP/1.1 200 OK +< Server: questDB/1.0 +< Date: Tue, 26 Jan 2021 12:31:03 GMT +< Transfer-Encoding: chunked +< Content-Type: text/plain +< +* Connection #0 to host 127.0.0.1 left intact +``` + +Path segments are ignored which means that optional paths may be used in the URL +and the server will respond with identical results, e.g.: + +```shell title="GET health status with arbitrary path" +curl -v http://127.0.0.1:9003/status +``` + +## Unhandled error detection + +When the metrics subsystem is +[enabled](/docs/third-party-tools/prometheus/#scraping-prometheus-metrics-from-questdb) +on the database, the health endpoint may be configured to check the occurrences +of any unhandled errors since the database started. For any errors +detected, it returns the HTTP 500 status code. The check is based on the +`questdb_unhandled_errors_total` metric. + +To enabled + +```ini title="server.conf to enable critical error checks in the health check endpoint" +metrics.enabled=true +http.pessimistic.health.check.enabled=true +``` + +When the metrics subsystem is disabled, the health check endpoint always returns +the HTTP 200 status code. + +## Avoiding CPU starvation + +On systems with +[8 Cores and less](/docs/deployment/capacity-planning/#cpu-cores), contention +for threads might increase the latency of health check service responses. If you +are in a situation where a load balancer thinks QuestDB service is dead with +nothing apparent in QuestDB logs, you may need to configure a dedicated thread +pool for the health check service. For more reference, see the +[minimal HTTP server configuration](/docs/reference/configuration/#minimal-http-server). diff --git a/docs/operations/updating-data.md b/docs/operations/updating-data.md new file mode 100644 index 00000000..ee0a5db7 --- /dev/null +++ b/docs/operations/updating-data.md @@ -0,0 +1,76 @@ +--- +title: Updating data +sidebar_label: Updating data +description: + How the UPDATE statement is implemented in QuestDB. +--- + +This document describes how the UPDATE statement works in QuestDB and what happens +under the hood when an update is executed. + +## Storage model + +To be able to understand how table rows are updated in QuestDB, first we +need to have an idea of how the data is stored. The documentation contains +detailed descriptions of the [storage model](/docs/concept/storage-model/) and +the [directory layout](/docs/concept/root-directory-structure/#db-directory) +but if we quickly want to summarize it: +- Each table has its own folder in the db root, the directory is named after the table +- Partitions are manifested as subdirectories under the folder which represents the table +- The actual data is stored in column files inside these subdirectories +- Column files store data **ordered by the designated timestamp** and they are +**append-only**. This goes naturally with time-series data, just think about market +data where the price of different financial instruments are tracked during the +trading day, for example + +## Column versions + +Since the data is stored in order and the files are append-only updating it is not +straightforward. We took the optimistic approach and assumed that past data +will never have to change. This is great for read performance. +However, sometimes you may need to **amend data** which has been recorded incorrectly +because of a bug or for any other reason. + +We could break our append-only model and start accessing different parts of the +column files to fix incorrect data. The problem we would face with is inconsistent +reads. Readers running queries on the table would not be happy as they could see +some part of the data updated but not others. + +The solution is to make the update **transactional** and **copy-on-write**. Basically +a new column file is created when processing the UPDATE statement. All readers are +looking at a previous consistent view of the data from an older column file while the +UPDATE is in progress. Readers can find the latest committed version of column files +based on a record stored in a metadata file. When the update is completed and a new +column version is available for the readers, this metadata record gets updated as part +of the commit. After metadata has changed newly submitted SELECT queries will see the +updated data. + +The copy-on-write approach gives us data consistency and good performance at a price, +disk usage will increase. When sizing disk space we should account for extra storage +to make sure UPDATE statements have enough headroom. Only those column files will get +a new version where data is actually changing. For example, if only a single column +is updated in a single partition of a table, then only a single column file will be +rewritten. + +Please, also check the following guide on [modifying data](/docs/guides/modifying-data/) +in QuestDB for additional information. + +## Vacuum updated columns + +When a column is updated, the new version of the column is written to disk and a background +task starts to vacuum redundant column files. The term Vacuum originates from Postgres, it means +the collection of garbage and release of disk space. The Vacuum task checks periodically if +older column versions are still used by readers and deletes unused files. +Vacuum runs automatically and there is also a [`VACUUM TABLE`](/docs/reference/sql/vacuum-table/) +SQL command to trigger it. + +## Limitations + +Current implementation of the UPDATE operation rewrites the column files by copying +records in their existing order from the previous version, and replacing the value if +it needs changing. As a result the **designated timestamp cannot be updated.** + +Modifying the designated timestamp would lead to rewriting history of the time series. +Records would need to be reordered, this could even mean moving rows in between +different partitions. We may remove this limitation in the future if there is enough +demand by users. diff --git a/docs/reference/_cairo.config.json b/docs/reference/_cairo.config.json new file mode 100644 index 00000000..71388712 --- /dev/null +++ b/docs/reference/_cairo.config.json @@ -0,0 +1,402 @@ +{ + "query.timeout.sec": { + "default": "60", + "description": "A global timeout (in seconds) for long-running queries. Timeout for each query can override the default by setting HTTP header [`Statement-Timeout`](/docs/reference/api/rest/#headers) or Postgres [`options`](/docs/reference/api/postgres/#list-of-supported-connection-properties)." + }, + "cairo.max.uncommitted.rows": { + "default": "500000", + "description": "Maximum number of uncommitted rows per table, when the number of pending rows reaches this parameter on a table, a commit will be issued." + }, + "cairo.o3.max.lag (QuestDB 6.6 and later)": { + "default": "10 minutes", + "description": "The maximum size of in-memory buffer in milliseconds. The buffer is allocated dynamically through analysing the shape of the incoming data, and `o3MaxLag` is the upper limit." + }, + "cairo.o3.min.lag (QuestDB 6.6 and later)": { + "default": "1 second", + "description": "The minimum size of in-memory buffer in milliseconds. The buffer is allocated dynamically through analysing the shape of the incoming data, and `o3MinLag` is the lower limit." + }, + "cairo.sql.backup.root": { + "default": "null", + "description": "Output root directory for backups." + }, + "cairo.sql.backup.dir.datetime.format": { + "default": "null", + "description": "Date format for backup directory." + }, + "cairo.sql.backup.dir.tmp.name": { + "default": "tmp", + "description": "Name of tmp directory used during backup." + }, + "cairo.sql.backup.mkdir.mode": { + "default": "509", + "description": "Permission used when creating backup directories." + }, + "cairo.snapshot.instance.id": { + "default": "empty string", + "description": "Instance id to be included into disk snapshots." + }, + "cairo.snapshot.recovery.enabled": { + "default": "true", + "description": "When `false`, disables snapshot recovery on database start." + }, + "cairo.root": { + "default": "db", + "description": "Directory for storing db tables and metadata. This directory is inside the server root directory provided at startup." + }, + "cairo.commit.mode": { + "default": "nosync", + "description": "How changes to table are flushed to disk upon commit. Choices: `nosync`, `async` (flush call schedules update, returns immediately), `sync` (waits for flush on the appended column files to complete)." + }, + "cairo.rnd.memory.max.pages": { + "default": "128", + "description": "Sets the max number of pages for memory used by `rnd_` functions. Supports `rnd_str()` and `rnd_symbol()`." + }, + "cairo.rnd.memory.page.size": { + "default": "8K", + "description": "Sets the memory page size used by `rnd_` functions. Supports `rnd_str()` and `rnd_symbol()`." + }, + "cairo.create.as.select.retry.count": { + "default": "5", + "description": "Number of types table creation or insertion will be attempted." + }, + "cairo.default.map.type": { + "default": "fast", + "description": "Type of map used. Options: `fast` (speed at the expense of storage), `compact`." + }, + "cairo.default.symbol.cache.flag": { + "default": "true", + "description": "When `true`, symbol values will be cached on Java heap instead of being looked up in the database files." + }, + "cairo.default.symbol.capacity": { + "default": "256", + "description": "Specifies approximate capacity for `SYMBOL` columns. It should be equal to number of unique symbol values stored in the table and getting this value badly wrong will cause performance degradation. Must be power of 2." + }, + "cairo.file.operation.retry.count": { + "default": "30", + "description": "Number of attempts to open files." + }, + "cairo.idle.check.interval": { + "default": "300000", + "description": "Frequency of writer maintenance job in milliseconds." + }, + "cairo.inactive.reader.ttl": { + "default": "120000", + "description": "TTL (Time-To-Live) to close inactive readers in milliseconds." + }, + "cairo.inactive.writer.ttl": { + "default": "600000", + "description": "TTL (Time-To-Live) to close inactive writers in milliseconds." + }, + "cairo.index.value.block.size": { + "default": "256", + "description": "Approximation of number of rows for a single index key, must be power of 2." + }, + "cairo.max.swap.file.count": { + "default": "30", + "description": "Number of attempts to open swap files." + }, + "cairo.mkdir.mode": { + "default": "509", + "description": "File permission mode for new directories." + }, + "cairo.parallel.index.threshold": { + "default": "100000", + "description": "Minimum number of rows before allowing use of parallel indexation." + }, + "cairo.reader.pool.max.segments": { + "default": "5", + "description": "Number of attempts to get TableReader." + }, + "cairo.spin.lock.timeout": { + "default": "1000", + "description": "Timeout when attempting to get BitmapIndexReaders in millisecond." + }, + "cairo.character.store.capacity": { + "default": "1024", + "description": "Size of the CharacterStore." + }, + "cairo.character.store.sequence.pool.capacity": { + "default": "64", + "description": "Size of the CharacterSequence pool." + }, + "cairo.column.pool.capacity": { + "default": "4096", + "description": "Size of the Column pool in the SqlCompiler." + }, + "cairo.compact.map.load.factor": { + "default": "0.7", + "description": "Load factor for CompactMaps." + }, + "cairo.expression.pool.capacity": { + "default": "8192", + "description": "Size of the ExpressionNode pool in SqlCompiler." + }, + "cairo.fast.map.load.factor": { + "default": "0.5", + "description": "Load factor for all FastMaps." + }, + "cairo.sql.join.context.pool.capacity": { + "default": "64", + "description": "Size of the JoinContext pool in SqlCompiler." + }, + "cairo.lexer.pool.capacity": { + "default": "2048", + "description": "Size of FloatingSequence pool in GenericLexer." + }, + "cairo.sql.map.key.capacity": { + "default": "2M", + "description": "Key capacity in FastMap and CompactMap." + }, + "cairo.sql.map.max.resizes": { + "default": "2^31", + "description": "Number of map resizes in FastMap and CompactMap before a resource limit exception is thrown, each resize doubles the previous size." + }, + "cairo.sql.map.page.size": { + "default": "4m", + "description": "Memory page size for FastMap and CompactMap." + }, + "cairo.sql.map.max.pages": { + "default": "2^31", + "description": "Memory max pages for CompactMap." + }, + "cairo.model.pool.capacity": { + "default": "1024", + "description": "Size of the QueryModel pool in the SqlCompiler." + }, + "cairo.sql.sort.key.page.size": { + "default": "4M", + "description": "Memory page size for storing keys in LongTreeChain." + }, + "cairo.sql.sort.key.max.pages": { + "default": "2^31", + "description": "Max number of pages for storing keys in LongTreeChain before a resource limit exception is thrown." + }, + "cairo.sql.sort.light.value.page.size": { + "default": "1048576", + "description": "Memory page size for storing values in LongTreeChain." + }, + "cairo.sql.sort.light.value.max.pages": { + "default": "2^31", + "description": "Max pages for storing values in LongTreeChain." + }, + "cairo.sql.hash.join.value.page.size": { + "default": "16777216", + "description": "Memory page size of the slave chain in full hash joins." + }, + "cairo.sql.hash.join.value.max.pages": { + "default": "2^31", + "description": "Max pages of the slave chain in full hash joins." + }, + "cairo.sql.latest.by.row.count": { + "default": "1000", + "description": "Number of rows for LATEST BY." + }, + "cairo.sql.hash.join.light.value.page.size": { + "default": "1048576", + "description": "Memory page size of the slave chain in light hash joins." + }, + "cairo.sql.hash.join.light.value.max.pages": { + "default": "2^31", + "description": "Max pages of the slave chain in light hash joins." + }, + "cairo.sql.sort.value.page.size": { + "default": "16777216", + "description": "Memory page size of file storing values in SortedRecordCursorFactory." + }, + "cairo.sql.sort.value.max.pages": { + "default": "2^31", + "description": "Max pages of file storing values in SortedRecordCursorFactory." + }, + "cairo.work.steal.timeout.nanos": { + "default": "10000", + "description": "Latch await timeout in nanos for stealing indexing work from other threads." + }, + "cairo.parallel.indexing.enabled": { + "default": "true", + "description": "Allows parallel indexation. Works in conjunction with cairo.parallel.index.threshold." + }, + "cairo.sql.join.metadata.page.size": { + "default": "16384", + "description": "Memory page size for JoinMetadata file." + }, + "cairo.sql.join.metadata.max.resizes": { + "default": "2^31", + "description": "Number of map resizes in JoinMetadata before a resource limit exception is thrown, each resize doubles the previous size." + }, + "cairo.sql.analytic.column.pool.capacity": { + "default": "64", + "description": "Size of AnalyticColumn pool in SqlParser." + }, + "cairo.sql.create.table.model.pool.capacity": { + "default": "16", + "description": "Size of CreateTableModel pool in SqlParser." + }, + "cairo.sql.column.cast.model.pool.capacity": { + "default": "16", + "description": "Size of CreateTableModel pool in SqlParser." + }, + "cairo.sql.rename.table.model.pool.capacity": { + "default": "16", + "description": "Size of RenameTableModel pool in SqlParser." + }, + "cairo.sql.with.clause.model.pool.capacity": { + "default": "128", + "description": "Size of WithClauseModel pool in SqlParser." + }, + "cairo.sql.insert.model.pool.capacity": { + "default": "64", + "description": "Size of InsertModel pool in SqlParser." + }, + "cairo.sql.copy.model.pool.capacity": { + "default": "32", + "description": "Size of CopyModel pool in SqlParser." + }, + "cairo.sql.copy.buffer.size": { + "default": "2M", + "description": "Size of buffer used when copying tables." + }, + "cairo.sql.double.cast.scale": { + "default": "12", + "description": "Maximum number of decimal places that types cast as doubles have." + }, + "cairo.sql.float.cast.scale": { + "default": "4", + "description": "Maximum number of decimal places that types cast as floats have." + }, + "cairo.sql.copy.formats.file": { + "default": "/text_loader.json", + "description": "Name of file with user's set of date and timestamp formats." + }, + "cairo.sql.jit.mode": { + "default": "on", + "description": "JIT compilation for SQL queries. May be disabled by setting this value to `off`." + }, + "cairo.sql.jit.debug.enabled": { + "default": "false", + "description": "Sets debug flag for JIT compilation. When enabled, assembly will be printed into `stdout`." + }, + "cairo.sql.jit.bind.vars.memory.page.size": { + "default": "4K", + "description": "Sets the memory page size for storing bind variable values for JIT compiled filter." + }, + "cairo.sql.jit.bind.vars.memory.max.pages": { + "default": "8", + "description": "Sets the max memory pages for storing bind variable values for JIT compiled filter." + }, + "cairo.sql.jit.rows.threshold": { + "default": "1M", + "description": "Sets minimum number of rows to shrink filtered rows memory after query execution." + }, + "cairo.sql.jit.page.address.cache.threshold": { + "default": "1M", + "description": "Sets minimum cache size to shrink page address cache after query execution." + }, + "cairo.sql.jit.ir.memory.page.size": { + "default": "8K", + "description": "Sets the memory page size for storing IR for JIT compilation." + }, + "cairo.sql.jit.ir.max.pages": { + "default": "8", + "description": "Sets max memory pages for storing IR for JIT compilation." + }, + "cairo.sql.page.frame.min.rows": { + "default": "1000", + "description": "Sets the minimum number of rows in page frames used in SQL queries." + }, + "cairo.sql.page.frame.max.rows": { + "default": "1000000", + "description": "Sets the maximum number of rows in page frames used in SQL. queries" + }, + "cairo.sql.sampleby.page.size": { + "default": "0", + "description": "SampleBy index query page size. Max values returned in single scan. 0 is default, and it means to use symbol block capacity." + }, + "cairo.date.locale": { + "default": "en", + "description": "The locale to handle date types." + }, + "cairo.timestamp.locale": { + "default": "en", + "description": "The locale to handle timestamp types." + }, + "cairo.o3.column.memory.size": { + "default": "8M", + "description": "Memory page size per column for O3 operations. Please be aware O3 will use 2x of the set value per column (therefore a default of 2x8M)." + }, + "cairo.writer.data.append.page.size": { + "default": "16M", + "description": "mmap sliding page size that table writer uses to append data for each column." + }, + "cairo.writer.data.index.key.append.page.size": { + "default": "512K", + "description": "mmap page size for appending index key data; key data is number of distinct symbol values times 4 bytes." + }, + "cairo.writer.data.index.value.append.page.size": { + "default": "16M", + "description": "mmap page size for appending value data." + }, + "cairo.writer.misc.append.page.size": { + "default": "4K", + "description": "mmap page size for mapping small files, default value is OS page size (4k Linux, 64K windows, 16k OSX M1). Overriding this rounds to the nearest (greater) multiple of the OS page size." + }, + "cairo.writer.command.queue.capacity": { + "default": "32", + "description": "Maximum writer ALTER TABLE and replication command capacity. Shared between all the tables." + }, + "cairo.writer.tick.rows.count": { + "default": "1024", + "description": "Row count to check writer command queue after on busy writing, e.g. tick after X rows written." + }, + "cairo.writer.alter.busy.wait.timeout": { + "default": "500", + "description": "Maximum wait timeout in milliseconds for `ALTER TABLE` SQL statement run via REST and PG Wire interfaces when statement execution is `ASYNCHRONOUS`." + }, + "cairo.sql.column.purge.queue.capacity": { + "default": "128", + "description": "Purge column version job queue. Increase the size if column version not automatically cleanup after execution of UPDATE SQL statement. Reduce to decrease initial memory footprint." + }, + "cairo.sql.column.purge.task.pool.capacity": { + "default": "256", + "description": "Column version task object pool capacity. Increase to reduce GC, reduce to decrease memory footprint." + }, + "cairo.sql.column.purge.retry.delay": { + "default": "10000", + "description": "Initial delay (μs) before re-trying purge of stale column files." + }, + "cairo.sql.column.purge.retry.delay.multiplier": { + "default": "10.0", + "description": "Multiplier used to increases retry delay with each iteration." + }, + "cairo.sql.column.purge.retry.delay.limit": { + "default": "60000000", + "description": "Delay limit (μs), upon reaching which, the re-try delay remains constant." + }, + "cairo.sql.column.purge.retry.limit.days": { + "default": "31", + "description": "Number of days purge system will continue to re-try deleting stale column files before giving up." + }, + "cairo.volumes": { + "default": "-", + "description": "A comma separated list of _alias -> root-path_ pairs defining allowed volumes to be used in [CREATE TABLE IN VOLUME](/docs/reference/sql/create-table/#table-target-volume) statements." + }, + "cairo.system.table.prefix": { + "default": "sys.", + "description": "Prefix of the tables used for QuestDB internal data storage. These tables are hidden from QuestDB web console." + }, + "cairo.wal.enabled.default": { + "default": "false", + "description": "Setting defining whether WAL table is the default when using `CREATE TABLE`." + }, + "cairo.o3.partition.split.min.size": { + "default": "50MB", + "description": "The estimated partition size on disk. This setting is one of the conditions to trigger [auto-partitioning](/docs/deployment/capacity-planning/#auto-partitioning)" + }, + "cairo.o3.last.partition.max.splits": { + "default": "20", + "description": "The number of partition pieces allowed before the last partition piece is merged back to the physical partition." + }, + "cairo.o3.partition.purge.list.initial.capacity": { + "default": "1", + "description": "Number of partition expected on average. Initial value for purge allocation job, extended in runtime automatically." + } +} diff --git a/docs/reference/_csv-import.config.json b/docs/reference/_csv-import.config.json new file mode 100644 index 00000000..74647c37 --- /dev/null +++ b/docs/reference/_csv-import.config.json @@ -0,0 +1,30 @@ +{ + "cairo.sql.copy.root": { + "default": "null", + "description": "Input root directory for CSV imports via `COPY` SQL. This path should not overlap with other directory (e.g. db, conf) of running instance, otherwise import may delete or overwrite existing files." + }, + "cairo.sql.copy.work.root": { + "default": "null", + "description": "Temporary import file directory. Defaults to `root_directory/tmp` if not set explicitly." + }, + "cairo.iouring.enabled": { + "default": "true", + "description": "Enable or disable io_uring implementation. Applicable to newer Linux kernels only. Can be used to switch io_uring interface usage off if there's a kernel bug affecting it." + }, + "cairo.sql.copy.buffer.size": { + "default": "2 MiB", + "description": "Size of read buffers used in import." + }, + "cairo.sql.copy.log.retention.days": { + "default": "3", + "description": "Number of days to keep import messages in `sys.text_import_log`." + }, + "cairo.sql.copy.max.index.chunk.size": { + "default": "100M", + "description": "Maximum size of index chunk file used to limit total memory requirements of import. Indexing phase should use roughly `thread_count * cairo.sql.copy.max.index.chunk.size` of memory." + }, + "cairo.sql.copy.queue.capacity": { + "default": "32", + "description": "Size of copy task queue. Should be increased if there's more than 32 import workers." + } +} diff --git a/docs/reference/_http-minimal.config.json b/docs/reference/_http-minimal.config.json new file mode 100644 index 00000000..7ab7d787 --- /dev/null +++ b/docs/reference/_http-minimal.config.json @@ -0,0 +1,34 @@ +{ + "http.min.enabled": { + "default": "true", + "description": "Enable or disable Minimal HTTP server." + }, + "http.min.bind.to": { + "default": "0.0.0.0:9003", + "description": "IPv4 address and port of the server. `0` means it will bind to all network interfaces, otherwise the IP address must be one of the existing network adapters." + }, + "http.min.net.connection.limit": { + "default": "4", + "description": "Active connection limit." + }, + "http.min.net.connection.timeout": { + "default": "300000", + "description": "Idle connection timeout in milliseconds." + }, + "http.min.net.connection.hint": { + "default": "false", + "description": "Windows specific flag to overcome OS limitations on TCP backlog size." + }, + "http.min.worker.count": { + "default": "", + "description": "By default, minimal HTTP server uses shared thread pool for CPU core count 16 and below. It will use dedicated thread for core count above 16. When `0`, the server will use the shared pool. Do not set pool size to more than `1`." + }, + "http.min.worker.affinity": { + "default": "", + "description": "Core number to pin thread to." + }, + "http.min.worker.haltOnError": { + "default": "false", + "description": "Flag that indicates if the worker thread must stop when an unexpected error occurs." + } +} diff --git a/docs/reference/_http.config.json b/docs/reference/_http.config.json new file mode 100644 index 00000000..0006f8f1 --- /dev/null +++ b/docs/reference/_http.config.json @@ -0,0 +1,210 @@ +{ + "http.enabled": { + "default": "true", + "description": "Enable or disable HTTP server." + }, + "http.bind.to": { + "default": "0.0.0.0:9000", + "description": "IP address and port of HTTP server. A value of `0` means that the HTTP server will bind to all network interfaces. You can specify IP address of any individual network interface on your system." + }, + "http.net.connection.limit": { + "default": "64", + "description": "The maximum number permitted for simultaneous TCP connection to the HTTP server. The rationale of the value is to control server memory consumption." + }, + "http.net.connection.timeout": { + "default": "300000", + "description": "TCP connection idle timeout in milliseconds. Connection is closed by HTTP server when this timeout lapses." + }, + "http.net.connection.sndbuf": { + "default": "2M", + "description": "Maximum send buffer size on each TCP socket. If this value is `-1`, the socket send buffer size remains unchanged from the OS defaults." + }, + "http.net.connection.rcvbuf": { + "default": "2M", + "description": "Maximum receive buffer size on each TCP socket. If this value is `-1`, the socket receive buffer size remains unchanged from the OS defaults." + }, + "http.net.connection.hint": { + "default": "false", + "description": "Windows specific flag to overcome OS limitations on TCP backlog size" + }, + "http.net.connection.queue.timeout": { + "default": "5000", + "description": "Amount of time in milliseconds a connection can wait in the listen backlog queue before it is refused. Connections will be aggressively removed from the backlog until the active connection limit is breached." + }, + "http.net.bind.to": { + "default": "0.0.0.0:9000", + "description": "IP address and port of HTTP server." + }, + "http.connection.pool.initial.capacity": { + "default": "4", + "description": "Initial size of pool of reusable objects that hold connection state. The pool should be configured to maximum realistic load so that it does not resize at runtime." + }, + "http.connection.string.pool.capacity": { + "default": "128", + "description": "Initial size of the string pool shared by the HTTP header and multipart content parsers." + }, + "http.multipart.header.buffer.size": { + "default": "512", + "description": "Buffer size in bytes used by the HTTP multipart content parser." + }, + "http.multipart.idle.spin.count": { + "default": "10000", + "description": "How long the code accumulates incoming data chunks for column and delimiter analysis." + }, + "http.receive.buffer.size": { + "default": "1M", + "description": "Size of receive buffer." + }, + "http.request.header.buffer.size": { + "default": "64K", + "description": "Size of internal buffer allocated for HTTP request headers. The value is rounded up to the nearest power of 2. When HTTP requests contain headers that exceed the buffer size server will disconnect the client with HTTP error in server log." + }, + "http.response.header.buffer.size": { + "default": "32K", + "description": "Size of the internal response buffer. The value will be rounded up to the nearest power of 2. The buffer size should be large enough to accommodate max size of server response headers." + }, + "http.worker.count": { + "default": "0", + "description": "Number of threads in private worker pool. When `0`, HTTP server will be using shared worker pool of the server. Values above `0` switch on private pool." + }, + "http.worker.affinity": { + "default": "", + "description": "Comma separated list of CPU core indexes. The number of items in this list must be equal to the worker count." + }, + "http.worker.haltOnError": { + "default": "false", + "description": "**Changing the default value is strongly discouraged**. Flag that indicates if the worker thread must stop when an unexpected error occurs." + }, + "http.send.buffer.size": { + "default": "2M", + "description": "Size of the internal send buffer. Larger buffer sizes result in fewer I/O interruptions the server is making at the expense of memory usage per connection. There is a limit of send buffer size after which increasing it stops being useful in terms of performance. 2MB seems to be optimal value." + }, + "http.static.index.file.name": { + "default": "index.html", + "description": "Name of index file for the Web Console." + }, + "http.frozen.clock": { + "default": "false", + "description": "Sets the clock to always return zero. This configuration parameter is used for internal testing." + }, + "http.allow.deflate.before.send": { + "default": "false", + "description": "Flag that indicates if Gzip compression of outgoing data is allowed." + }, + "http.keep-alive.timeout": { + "default": "5", + "description": "Used together with `http.keep-alive.max` to set the value of HTTP `Keep-Alive` response header. This instructs browser to keep TCP connection open. Has to be `0` when `http.version` is set to `HTTP/1.0`." + }, + "http.keep-alive.max": { + "default": "10000", + "description": "See `http.keep-alive.timeout`. Has to be `0` when `http.version` is set to `HTTP/1.0`." + }, + "http.static.public.directory": { + "default": "public", + "description": "The name of directory for public web site." + }, + "http.text.date.adapter.pool.capacity": { + "default": "16", + "description": "Size of date adapter pool. This should be set to the anticipated maximum number of `DATE` fields a text input can have. The pool is assigned to connection state and is reused alongside of connection state object." + }, + "http.text.json.cache.limit": { + "default": "16384", + "description": "JSON parser cache limit. Cache is used to compose JSON elements that have been broken up by TCP protocol. This value limits the maximum length of individual tag or tag value." + }, + "http.text.json.cache.size": { + "default": "8192", + "description": "Initial size of JSON parser cache. The value must not exceed `http.text.json.cache.limit` and should be set to avoid cache resizes at runtime." + }, + "http.text.max.required.delimiter.stddev": { + "default": "0.1222d", + "description": "The maximum standard deviation value for the algorithm that calculates text file delimiter. Usually when text parser cannot recognise the delimiter it will log the calculated and maximum standard deviation for the delimiter candidate." + }, + "http.text.max.required.line.length.stddev": { + "default": "0.8", + "description": "Maximum standard deviation value for the algorithm that classifies input as text or binary. For the values above configured stddev input will be considered binary." + }, + "http.text.metadata.string.pool.capacity": { + "default": "128", + "description": "The initial size of pool for objects that wrap individual elements of metadata JSON, such as column names, date pattern strings and locale values." + }, + "http.text.roll.buffer.limit": { + "default": "4M", + "description": "The limit of text roll buffer. See `http.text.roll.buffer.size` for description." + }, + "http.text.roll.buffer.size": { + "default": "1024", + "description": "Roll buffer is a structure in the text parser that holds a copy of a line that has been broken up by TCP. The size should be set to the maximum length of text line in text input." + }, + "http.text.analysis.max.lines": { + "default": "1000", + "description": "Number of lines to read on CSV import for heuristics which determine column names & types. Lower line numbers may detect CSV schemas quicker, but possibly with less accuracy. 1000 lines is the maximum for this value." + }, + "http.text.lexer.string.pool.capacity": { + "default": "64", + "description": "The initial capacity of string fool, which wraps `STRING` column types in text input. The value should correspond to the maximum anticipated number of STRING columns in text input." + }, + "http.text.timestamp.adapter.pool.capacity": { + "default": "64", + "description": "Size of timestamp adapter pool. This should be set to the anticipated maximum number of `TIMESTAMP` fields a text input can have. The pool is assigned to connection state and is reused alongside of connection state object." + }, + "http.text.utf8.sink.size": { + "default": "4096", + "description": "Initial size of UTF-8 adapter sink. The value should correspond the maximum individual field value length in text input." + }, + "http.json.query.connection.check.frequency": { + "default": "1000000", + "description": "**Changing the default value is strongly discouraged**. The value to throttle check if client socket has been disconnected." + }, + "http.json.query.float.scale": { + "default": "4", + "description": "The scale value of string representation of `FLOAT` values." + }, + "http.json.query.double.scale": { + "default": "12", + "description": "The scale value of string representation of `DOUBLE` values." + }, + "http.query.cache.enabled": { + "default": "true", + "description": "Enable or disable the query cache. Cache capacity is `number_of_blocks * number_of_rows`." + }, + "http.query.cache.block.count": { + "default": "4", + "description": "Number of blocks for the query cache." + }, + "http.query.cache.row.count": { + "default": "16", + "description": "Number of rows for the query cache." + }, + "http.security.readonly": { + "default": "false", + "description": "Forces HTTP read only mode when `true`, disabling commands which modify the data or data structure, e.g. INSERT, UPDATE, or CREATE TABLE." + }, + "http.security.max.response.rows": { + "default": "2^63-1", + "description": "Limit the number of response rows over HTTP." + }, + "http.security.interrupt.on.closed.connection": { + "default": "true", + "description": "Switch to enable termination of SQL processing if the HTTP connection is closed. The mechanism affects performance so the connection is only checked after `circuit.breaker.throttle` calls are made to the check method. The mechanism also reads from the input stream and discards it since some HTTP clients send this as a keep alive in between requests, `circuit.breaker.buffer.size` denotes the size of the buffer for this." + }, + "http.pessimistic.health.check.enabled": { + "default": "false", + "description": "When enabled, the health check returns HTTP 500 for any unhandled errors since the server started." + }, + "circuit.breaker.throttle": { + "default": "2000000", + "description": "Number of internal iterations such as loops over data before checking if the HTTP connection is still open" + }, + "circuit.breaker.buffer.size": { + "default": "32", + "description": "Size of buffer to read from HTTP connection. If this buffer returns zero and the HTTP client is no longer sending data, SQL processing will be terminated." + }, + "http.server.keep.alive": { + "default": "true", + "description": "If set to `false`, the server will disconnect the client after completion of each request." + }, + "http.version": { + "default": "HTTP/1.1", + "description": "Protocol version, other supported value is `HTTP/1.0`." + } +} diff --git a/docs/reference/_parallel-sql.config.json b/docs/reference/_parallel-sql.config.json new file mode 100644 index 00000000..39a5d6c1 --- /dev/null +++ b/docs/reference/_parallel-sql.config.json @@ -0,0 +1,26 @@ +{ + "cairo.sql.parallel.filter.enabled": { + "default": "true", + "description": "Enable or disable parallel SQL filter execution. JIT compilation takes place only when this setting is enabled." + }, + "cairo.sql.parallel.filter.pretouch.enabled": { + "default": "true", + "description": "Enable column pre-touch as part of the parallel SQL filter execution, to improve query performance for large tables." + }, + "cairo.page.frame.shard.count": { + "default": "4", + "description": "Number of shards for both dispatch and reduce queues. Shards reduce queue contention between SQL statements that are executed concurrently." + }, + "cairo.page.frame.reduce.queue.capacity": { + "default": "64", + "description": "Reduce queue is used for data processing and should be large enough to supply tasks for worker threads (shared worked pool)." + }, + "cairo.page.frame.rowid.list.capacity": { + "default": "256", + "description": "Row ID list initial capacity for each slot of the reduce queue. Larger values reduce memory allocation rate, but increase minimal RSS size." + }, + "cairo.page.frame.column.list.capacity": { + "default": "16", + "description": "Column list capacity for each slot of the reduce queue. Used by JIT-compiled filter functions. Larger values reduce memory allocation rate, but increase minimal RSS size." + } +} diff --git a/docs/reference/_postgres.config.json b/docs/reference/_postgres.config.json new file mode 100644 index 00000000..a44a101c --- /dev/null +++ b/docs/reference/_postgres.config.json @@ -0,0 +1,146 @@ +{ + "pg.enabled": { + "default": "true", + "description": "Configuration for enabling or disabling the Postres interface." + }, + "pg.net.bind.to": { + "default": "0.0.0.0:8812", + "description": "IP address and port of Postgres wire protocol server. 0 means that the server will bind to all network interfaces. You can specify IP address of any individual network interface on your system." + }, + "pg.net.connection.limit": { + "default": "64", + "description": "The maximum number permitted for simultaneous Postgres connections to the server. This value is intended to control server memory consumption." + }, + "pg.net.connection.timeout": { + "default": "300000", + "description": "Connection idle timeout in milliseconds. Connections are closed by the server when this timeout lapses." + }, + "pg.net.connection.rcvbuf": { + "default": "-1", + "description": "Maximum send buffer size on each TCP socket. If value is -1 socket send buffer remains unchanged from OS default." + }, + "pg.net.connection.sndbuf": { + "default": "-1", + "description": "Maximum receive buffer size on each TCP socket. If value is -1, the socket receive buffer remains unchanged from OS default." + }, + "pg.net.connection.hint": { + "default": "false", + "description": "Windows specific flag to overcome OS limitations on TCP backlog size" + }, + "pg.net.connection.queue.timeout": { + "default": "300000", + "description": "Amount of time in milliseconds a connection can wait in the listen backlog queue before it is refused. Connections will be aggressively removed from the backlog until the active connection limit is breached." + }, + "pg.security.readonly": { + "default": "false", + "description": "Forces PGWire read only mode when `true`, disabling commands which modify the data or data structure, e.g. INSERT, UPDATE, or CREATE TABLE." + }, + "pg.character.store.capacity": { + "default": "4096", + "description": "Size of the CharacterStore." + }, + "pg.character.store.pool.capacity": { + "default": "64", + "description": "Size of the CharacterStore pool capacity." + }, + "pg.connection.pool.capacity": { + "default": "64", + "description": "The maximum amount of pooled connections this interface may have." + }, + "pg.password": { + "default": "quest", + "description": "Postgres database password." + }, + "pg.user": { + "default": "admin", + "description": "Postgres database username." + }, + "pg.readonly.user.enabled": { + "default": "false", + "description": "Enable or disable Postgres database read-only user account. When enabled, this additional user can be used to open read-only connections to the database." + }, + "pg.readonly.password": { + "default": "quest", + "description": "Postgres database read-only user password." + }, + "pg.readonly.user": { + "default": "user", + "description": "Postgres database read-only user username." + }, + "pg.select.cache.enabled": { + "default": "true", + "description": "Enable or disable the SELECT query cache. Cache capacity is `number_of_blocks * number_of_rows`." + }, + "pg.select.cache.block.count": { + "default": "16", + "description": "Number of blocks to cache SELECT query execution plan against text to speed up execution." + }, + "pg.select.cache.row.count": { + "default": "16", + "description": "Number of rows to cache for SELECT query execution plan against text to speed up execution." + }, + "pg.insert.cache.enabled": { + "default": "true", + "description": "Enable or disable the INSERT query cache. Cache capacity is `number_of_blocks * number_of_rows`." + }, + "pg.insert.cache.block.count": { + "default": "8", + "description": "Number of blocks to cache INSERT query execution plan against text to speed up execution." + }, + "pg.insert.cache.row.count": { + "default": "8", + "description": "Number of rows to cache for INSERT query execution plan against text to speed up execution." + }, + "pg.update.cache.enabled": { + "default": "true", + "description": "Enable or disable the UPDATE query cache. Cache capacity is `number_of_blocks * number_of_rows`." + }, + "pg.update.cache.block.count": { + "default": "8", + "description": "Number of blocks to cache UPDATE query execution plan against text to speed up execution." + }, + "pg.update.cache.row.count": { + "default": "8", + "description": "Number of rows to cache for UPDATE query execution plan against text to speed up execution." + }, + "pg.max.blob.size.on.query": { + "default": "512k", + "description": "For binary values, clients will receive an error when requesting blob sizes above this value." + }, + "pg.recv.buffer.size": { + "default": "1M", + "description": "Size of the buffer for receiving data." + }, + "pg.send.buffer.size": { + "default": "1M", + "description": "Size of the buffer for sending data." + }, + "pg.date.locale": { + "default": "en", + "description": "The locale to handle date types." + }, + "pg.timestamp.locale": { + "default": "en", + "description": "The locale to handle timestamp types." + }, + "pg.worker.count": { + "default": "0", + "description": "Number of dedicated worker threads assigned to handle PGWire queries. When `0`, the jobs will use the shared pool." + }, + "pg.worker.affinity": { + "default": "", + "description": "Comma-separated list of thread numbers which should be pinned for Postgres ingestion. Example `pg.worker.affinity=1,2,3`." + }, + "pg.halt.on.error": { + "default": "false", + "description": "Whether ingestion should stop upon internal error." + }, + "pg.daemon.pool": { + "default": "true", + "description": "Defines whether to run all PGWire worker threads in daemon mode (`true`) or not (`false`)." + }, + "pg.binary.param.count.capacity": { + "default": "2", + "description": "Size of the initial capacity for the pool used for binary bind variables." + } +} diff --git a/docs/reference/_shared-worker.config.json b/docs/reference/_shared-worker.config.json new file mode 100644 index 00000000..7d87a180 --- /dev/null +++ b/docs/reference/_shared-worker.config.json @@ -0,0 +1,14 @@ +{ + "shared.worker.count": { + "default": "", + "description": "Number of worker threads shared across the application. Increasing this number will increase parallelism in the application at the expense of CPU resources." + }, + "shared.worker.affinity": { + "default": "", + "description": "Comma-delimited list of CPU ids, one per thread specified in `shared.worker.count`. By default, threads have no CPU affinity." + }, + "shared.worker.haltOnError": { + "default": "false", + "description": "Flag that indicates if the worker thread must stop when an unexpected error occurs." + } +} diff --git a/docs/reference/_tcp.config.json b/docs/reference/_tcp.config.json new file mode 100644 index 00000000..3015dda2 --- /dev/null +++ b/docs/reference/_tcp.config.json @@ -0,0 +1,114 @@ +{ + "line.tcp.enabled": { + "default": "true", + "description": "Enable or disable line protocol over TCP." + }, + "line.tcp.net.bind.to": { + "default": "0.0.0.0:9009", + "description": "IP address of the network interface to bind listener to and port. By default, TCP receiver listens on all network interfaces." + }, + "line.tcp.net.connection.limit": { + "default": "256", + "description": "The maximum number permitted for simultaneous connections to the server. This value is intended to control server memory consumption." + }, + "line.tcp.net.connection.timeout": { + "default": "300000", + "description": "Connection idle timeout in milliseconds. Connections are closed by the server when this timeout lapses." + }, + "line.tcp.net.connection.hint": { + "default": "false", + "description": "Windows specific flag to overcome OS limitations on TCP backlog size" + }, + "line.tcp.net.connection.rcvbuf": { + "default": "-1", + "description": "Maximum buffer receive size on each TCP socket. If value is -1, the socket receive buffer remains unchanged from OS default." + }, + "line.tcp.net.connection.queue.timeout": { + "default": "5000", + "description": "Amount of time in milliseconds a connection can wait in the listen backlog queue before its refused. Connections will be aggressively removed from the backlog until the active connection limit is breached." + }, + "line.tcp.auth.db.path": { + "default": "", + "description": "Path which points to the authentication db file." + }, + "line.tcp.connection.pool.capacity": { + "default": "64", + "description": "The maximum amount of pooled connections this interface may have." + }, + "line.tcp.timestamp": { + "default": "n", + "description": "Input timestamp resolution. Possible values are `n`, `u`, `ms`, `s` and `h`." + }, + "line.tcp.msg.buffer.size": { + "default": "32768", + "description": "Size of the buffer read from queue. Maximum size of write request, regardless of the number of measurements." + }, + "line.tcp.maintenance.job.interval": { + "default": "1000", + "description": "Maximum amount of time (in milliseconds) between maintenance jobs committing any uncommitted data on inactive tables." + }, + "line.tcp.min.idle.ms.before.writer.release": { + "default": "500", + "description": "Minimum amount of idle time (in milliseconds) before a table writer is released." + }, + "line.tcp.commit.interval.fraction": { + "default": "0.5", + "description": "Commit lag fraction. Used to calculate commit interval for the table according to the following formula: `commit_interval = commit_lag ∗ fraction`. The calculated commit interval defines how long uncommitted data will need to remain uncommitted." + }, + "line.tcp.commit.interval.default": { + "default": "1000", + "description": "Default commit interval in milliseconds." + }, + "line.tcp.max.measurement.size": { + "default": "32768", + "description": "Maximum size of any measurement." + }, + "line.tcp.writer.queue.size": { + "default": "128", + "description": "Size of the queue between network I/O and writer jobs. Each queue entry represents a measurement." + }, + "line.tcp.writer.worker.count": { + "default": "", + "description": "Number of dedicated I/O worker threads assigned to write data to tables. When `0`, the writer jobs will use the shared pool." + }, + "line.tcp.writer.worker.affinity": { + "default": "", + "description": "Comma-separated list of thread numbers which should be pinned for line protocol ingestion over TCP. CPU core indexes are 0-based." + }, + "line.tcp.writer.worker.sleep.threshold": { + "default": "1000", + "description": "Amount of subsequent loop iterations with no work done before the worker goes to sleep." + }, + "line.tcp.writer.worker.yield.threshold": { + "default": "10", + "description": "Amount of subsequent loop iterations with no work done before the worker thread yields." + }, + "line.tcp.writer.queue.capacity": { + "default": "128", + "description": "Size of the queue between the IO jobs and the writer jobs, each queue entry represents a measurement." + }, + "line.tcp.writer.halt.on.error": { + "default": "false", + "description": "Flag that indicates if the worker thread must stop when an unexpected error occurs." + }, + "line.tcp.io.worker.count": { + "default": "", + "description": "Number of dedicated I/O worker threads assigned to parse TCP input. When `0`, the writer jobs will use the shared pool." + }, + "line.tcp.io.worker.affinity": { + "default": "", + "description": "Comma-separated list of thread numbers which should be pinned for line protocol ingestion over TCP. CPU core indexes are 0-based." + }, + "line.tcp.io.worker.sleep.threshold": { + "default": "1000", + "description": "Amount of subsequent loop iterations with no work done before the worker goes to sleep." + }, + "line.tcp.io.worker.yield.threshold": { + "default": "10", + "description": "Amount of subsequent loop iterations with no work done before the worker thread yields." + }, + "line.tcp.disconnect.on.error": { + "default": "true", + "description": "Disconnect TCP socket that sends malformed messages." + } +} diff --git a/docs/reference/_udp.config.json b/docs/reference/_udp.config.json new file mode 100644 index 00000000..59ffeb30 --- /dev/null +++ b/docs/reference/_udp.config.json @@ -0,0 +1,50 @@ +{ + "line.udp.join": { + "default": "232.1.2.3", + "description": "Multicast address receiver joins. This values is ignored when receiver is in \"unicast\" mode." + }, + "line.udp.bind.to": { + "default": "0.0.0.0:9009", + "description": "IP address of the network interface to bind listener to and port. By default UDP receiver listens on all network interfaces." + }, + "line.udp.commit.rate": { + "default": "1000000", + "description": "For packet bursts the number of continuously received messages after which receiver will force commit. Receiver will commit irrespective of this parameter when there are no messages." + }, + "line.udp.msg.buffer.size": { + "default": "2048", + "description": "Buffer used to receive single message. This value should be roughly equal to your MTU size." + }, + "line.udp.msg.count": { + "default": "10000", + "description": "Only for Linux. On Linux, QuestDB will use the `recvmmsg()` system call. This is the max number of messages to receive at once." + }, + "line.udp.receive.buffer.size": { + "default": "8388608", + "description": "UDP socket buffer size. Larger size of the buffer will help reduce message loss during bursts." + }, + "line.udp.enabled": { + "default": "false", + "description": "Enable or disable UDP receiver." + }, + "line.udp.own.thread": { + "default": "false", + "description": "When `true`, UDP receiver will use its own thread and busy spin that for performance reasons. \"false\" makes receiver use worker threads that do everything else in QuestDB." + }, + "line.udp.own.thread.affinity": { + "default": "-1", + "description": "-1 does not set thread affinity. OS will schedule thread and it will be liable to run on random cores and jump between the. 0 or higher pins thread to give core. This property is only valid when UDP receiver uses own thread." + }, + "line.udp.unicast": { + "default": "false", + "description": "When `true`, UDP will use unicast. Otherwise multicast." + }, + "line.udp.timestamp": { + "default": "n", + "description": "Input timestamp resolution. Possible values are `n`, `u`, `ms`, `s` and `h`." + }, + "line.udp.commit.mode": { + "default": "nosync", + "description": "Commit durability. Available values are `nosync`, `sync` and `async`." + } +} diff --git a/docs/reference/_wal.config.json b/docs/reference/_wal.config.json new file mode 100644 index 00000000..f701b617 --- /dev/null +++ b/docs/reference/_wal.config.json @@ -0,0 +1,34 @@ +{ + "wal.apply.worker.count": { + "default": "equal to the CPU core count", + "description": "Number of dedicated worker threads assigned to handle WAL table data." + }, + "wal.apply.worker.affinity": { + "default": "equal to the CPU core count", + "description": "Comma separated list of CPU core indexes." + }, + "wal.apply.worker.haltOnError": { + "default": "false", + "description": "Flag that indicates if the worker thread must stop when an unexpected error occurs." + }, + "cairo.wal.purge.interval": { + "default": "30000", + "description": "Period in ms of how often WAL-applied files are cleaned up from the disk" + }, + "cairo.wal.segment.rollover.row.count": { + "default": "200000", + "description": "The number of rows written to the same WAL segment before starting a new segment" + }, + "cairo.wal.commit.squash.row.limit": { + "default": "500000", + "description": "Maximum row count that can be squashed together from multiple transactions before applying to the table. A very low value can delay data visibility." + }, + "cairo.wal.squash.uncommitted.rows.multiplier": { + "default": "20.0", + "description": "Multiplier to cairo.max.uncommitted.rows to calculate the limit of rows that can be kept invisible when writing to WAL table under heavy load, when multiple transactions are to be applied. It is used to reduce the number Out-Of-Order (O3) commits when O3 commits are unavoidable by squashing multiple commits together. Setting it very low can increase O3 commit frequency and decrease the throughput. Setting it too high may cause excessive memory usage and increase the latency." + }, + "cairo.wal.max.lag.txn.count": { + "default": "20", + "description": "Maximum number of transactions that can be kept invisible when writing to WAL table. Once the number is reached, full commit occurs. If not set, defaults to the rounded value of cairo.wal.squash.uncommitted.rows.multiplier." + } +} diff --git a/docs/reference/api/ilp/authenticate.md b/docs/reference/api/ilp/authenticate.md new file mode 100644 index 00000000..dcaa7595 --- /dev/null +++ b/docs/reference/api/ilp/authenticate.md @@ -0,0 +1,327 @@ +--- +title: Authentication +description: + This page shows how to set up authentication for InfluxDB line protocol over + TCP using JSON web keys. +--- + +InfluxDB Line Protocol Authentication works by using an +[elliptic curve P-256](https://en.wikipedia.org/wiki/Elliptic-curve_cryptography) +JSON Web Token (JWT) to sign a server challenge. This page shows how to +authenticate clients with QuestDB when using +[InfluxDB line protocol](/docs/reference/api/ilp/overview/) for the TCP +endpoint. + +## Prerequisites + +QuestDB should be running and accessible and can be started via +[Docker](/docs/get-started/docker/), the [binaries](/docs/get-started/binaries/) +or [Homebrew](/docs/get-started/homebrew/) for macOS users. + +The [jose](https://github.com/latchset/jose) package is a C-language +implementation of the Javascript Object Signing and Encryption standard and may +be used for convenience to generate cryptographic keys. It's also recommended to +[install jq](https://stedolan.github.io/jq/download/) for parsing the JSON +output from the keys generated by `jose` + + + + + + + +```bash +brew install jose +brew install jq +``` + + + + + + + +```bash +yum install jose +yum install jq +``` + + + + + + + +```bash +apt install jose +apt install jq +``` + + + + + + + +## Server configuration + +In order to use this feature, you need to create an authentication file using +the following template: + +```bash +testUser1 ec-p-256-sha256 fLKYEaoEb9lrn3nkwLDA-M_xnuFOdSt9y0Z7_vWSHLU Dt5tbS1dEDMSYfym3fgMv0B99szno-dFc1rYF9t0aac +# [key/user id] [key type] {keyX keyY} +``` + +Only elliptic curve (P-256) are supported (key type `ec-p-256-sha256`). An +authentication file can be generated using the `jose` utility with the following +command. + +```bash +jose jwk gen -i '{"alg":"ES256", "kid": "testUser1"}' -o /var/lib/questdb/conf/full_auth.json + +KID=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.kid') +X=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.x') +Y=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.y') + +echo "$KID ec-p-256-sha256 $X $Y" | tee /var/lib/questdb/conf/auth.txt +``` + +Once you created the file, you will need to reference it in the server +[configuration](/docs/reference/configuration/): + +```ini title='/path/to/server.conf' +line.tcp.auth.db.path=conf/auth.txt +``` + +## Client keys + +For the server configuration above, the corresponding JSON Web Key must be +stored on the client side. When sending a fully-composed JWK, it will have the +following keys: + +```json +{ + "kty": "EC", + "d": "5UjEMuA0Pj5pjK8a-fa24dyIf-Es5mYny3oE_Wmus48", + "crv": "P-256", + "kid": "testUser1", + "x": "fLKYEaoEb9lrn3nkwLDA-M_xnuFOdSt9y0Z7_vWSHLU", + "y": "Dt5tbS1dEDMSYfym3fgMv0B99szno-dFc1rYF9t0aac" +} +``` + +For this kind of key, the `d` property is used to generate the the secret key. +The `x` and `y` parameters are used to generate the public key (values that we +retrieve in the server authentication file). + +## Client Library Configuration + +The server will now expect the client to send its key id (terminated with `\n`) +straight after `connect()`. The server will respond with a challenge (printable +characters terminated with `\n`). The client needs to sign the challenge and +respond to the server with the `base64` encoded signature (terminated with +`\n`). If all is good the client can then continue, if not the server will +disconnect and log the failure. + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + + + + + + + +```javascript +const { Socket } = require("net") +const { Crypto } = require("node-webcrypto-ossl") + +const crypto = new Crypto() + +const PORT = 9009 +const HOST = "localhost" + +const PRIVATE_KEY = "5UjEMuA0Pj5pjK8a-fa24dyIf-Es5mYny3oE_Wmus48" +const PUBLIC_KEY = { + x: "fLKYEaoEb9lrn3nkwLDA-M_xnuFOdSt9y0Z7_vWSHLU", + y: "Dt5tbS1dEDMSYfym3fgMv0B99szno-dFc1rYF9t0aac", +} +const JWK = { + ...PUBLIC_KEY, + kid: "testUser1", + kty: "EC", + d: PRIVATE_KEY, + crv: "P-256", +} + +const client = new Socket() + +async function write(data) { + return new Promise((resolve) => { + client.write(data, () => { + resolve() + }) + }) +} + +async function authenticate(challenge) { + // Check for trailing \n which ends the challenge + if (challenge.slice(-1).readInt8() === 10) { + const apiKey = await crypto.subtle.importKey( + "jwk", + JWK, + { name: "ECDSA", namedCurve: "P-256" }, + true, + ["sign"], + ) + + const signature = await crypto.subtle.sign( + { name: "ECDSA", hash: "SHA-256" }, + apiKey, + challenge.slice(0, challenge.length - 1), + ) + + await write(`${Buffer.from(signature).toString("base64")}\n`) + + return true + } + + return false +} + +async function sendData() { + const rows = [ + `test,location=us temperature=22.4 ${Date.now() * 1e6}`, + `test,location=us temperature=21.4 ${Date.now() * 1e6}`, + ] + + for (row of rows) { + await write(`${row}\n`) + } +} + +async function run() { + let authenticated = false + let data + + client.on("data", async function (raw) { + data = !data ? raw : Buffer.concat([data, raw]) + + if (!authenticated) { + authenticated = await authenticate(data) + await sendData() + setTimeout(() => { + client.destroy() + }, 0) + } + }) + + client.on("ready", async function () { + await write(`${JWK.kid}\n`) + }) + + client.connect(PORT, HOST) +} + +run() +``` + + + + + + + +```go +package main + +import ( + "context" + "fmt" + "log" + "time" + + qdb "github.com/questdb/go-questdb-client" +) + +func main() { + ctx := context.TODO() + // Connect to QuestDB running on 127.0.0.1:9009 + sender, err := qdb.NewLineSender( + ctx, + // Specify keyId and key for authentication. + qdb.WithAuth("testUser1", "5UjEMuA0Pj5pjK8a-fa24dyIf-Es5mYny3oE_Wmus48"), + ) + if err != nil { + log.Fatal(err) + } + // Make sure to close the sender on exit to release resources. + defer sender.Close() + // Send a few InfluxDB Line Protocol messages. + err = sender. + Table("trades"). + Symbol("name", "test_ilp1"). + Float64Column("value", 12.4). + AtNow(ctx) + if err != nil { + log.Fatal(err) + } + err = sender. + Table("trades"). + Symbol("name", "test_ilp2"). + Float64Column("value", 11.4). + AtNow(ctx) + if err != nil { + log.Fatal(err) + } + // Make sure that the messages are sent over the network. + err = sender.Flush(ctx) + if err != nil { + log.Fatal(err) + } +} +``` + + + + + +```python +# https://github.com/questdb/py-questdb-client + +from questdb.ingress import Sender, IngressError, TimestampNanos, TimestampMicros +import datetime +import sys + +HOST = 'localhost' +PORT = 9009 + + +def send_with_auth(): + try: + auth = ("YOUR_KID", "YOUR_D_KEY", "YOUR_X_KEY", "YOUR_Y_KEY") + with Sender(HOST, PORT, auth=auth, tls=True) as sender: + buffer = sender.new_buffer() + buffer.row( + 'trades', + symbols={'name': 'tls_client_timestamp'}, + columns={'value': 12.4, 'valid_from': TimestampMicros.from_datetime(datetime.datetime.utcnow())}, + at=TimestampNanos.from_datetime(datetime.datetime.utcnow())) + sender.flush(buffer) + except IngressError as e: + sys.stderr.write(f'Got error: {e}') + + +if __name__ == '__main__': + send_with_auth() +``` + + + + + diff --git a/docs/reference/api/ilp/columnset-types.md b/docs/reference/api/ilp/columnset-types.md new file mode 100644 index 00000000..367fc5ee --- /dev/null +++ b/docs/reference/api/ilp/columnset-types.md @@ -0,0 +1,303 @@ +--- +title: InfluxDB Line Protocol Columnset Value Types +sidebar_label: Columnset Value Types +description: + Describes all support value types in InfluxDB Line Protocol columnset. +--- + +This page lists the supported InfluxDB Line Protocol columnset value types and +details about type casting. + +If a target column does not exist, QuestDB will create a column using the same +type that the InfluxDB Line Protocol client sends. + +Type casts that cause data loss will cause entire line to be rejected. + +## Integer + +64-bit signed integer values, which correspond to QuestDB type `long`. The +values are required to have `i` suffix. For example: + +```shell +temps,device=cpu,location=south value=96i 1638202821000000000\n +``` + +Sometimes integer values are small and do not warrant 64 bits to store them. To +reduce storage for such values it is possible to create a table upfront with +smaller type, for example: + +```questdb-sql +CREATE TABLE temps (device SYMBOL, location SYMBOL, value SHORT); +``` + +The line above will be accepted and `96i` will be cast to `short`. + +### Cast table + +The following `cast` operations are supported when existing table column type is +not `long`: + +| | `byte` | `short` | `int` | `long` | `float` | `double` | `date` | `timestamp` | +| :-------- | :----- | :------ | :---- | :------- | :------ | :------- | :----- | :---------- | +| `integer` | cast | cast | cast | `native` | cast | cast | cast | cast | + +## Long256 + +Custom type, which correspond to QuestDB type `long256`. The values are hex +encoded 256-bit unsigned integer values with `i` suffix. For example: + +```shell +temps,device=cpu,location=south value=0x123a4i 1638202821000000000\n +``` + +When column does not exist, it will be created with type `long256`. Values +overflowing 256-bit integer will cause the entire line to be rejected. + +`long256` cannot be cast to anything else. + +## Float + +These values correspond to QuestDB type `double`. They actually do not have any +suffix, which might lead to a confusion. For example: + +```shell +trade,ticker=BTCUSD price=30 1638202821000000000\n +``` + +`price` value will be stored as `double` even though it does not look like a +conventional double value would. + +### Cast table + +The following `cast` operations are supported when existing table column type is +not `double`: + +| | `float` | `double` | +| :------ | :------ | :------- | +| `float` | cast | `native` | + +## Boolean + +These value correspond to QuestDB type `boolean`. In InfluxDB Line Protocol +`boolean` values can be represented in any of the following ways: + +| Actual value | Single char lowercase | Single char uppercase | Full lowercase | Full camelcase | Full uppercase | +| :----------- | :-------------------- | :-------------------- | :------------- | :------------- | :------------- | +| `true` | `t` | `T` | `true` | `True` | `TRUE` | +| `false` | `f` | `F` | `false` | `False` | `FALSE` | + +Example: + +```shell +sensors,location=south warning=false\n +``` + +### Cast table + +The following `cast` operations are supported when existing table column type is +not `boolean`: + +| | `boolean` | `byte` | `short` | `int` | `float` | `long` | `double` | +| :-------- | :-------- | :----- | :------ | :---- | :------ | :----- | :------- | +| `boolean` | `native` | cast | cast | cast | cast | cast | cast | + +When cast to numeric type, boolean `true` is `1` and `false` is `0` + +## String + +These value correspond to QuestDB type `string`. They must be enclosed in +quotes. The following characters in values must be escaped with a `\`: `"`, +`\n`, `\r` and `\`. For example: + +```shell +trade,ticker=BTCUSD description="this is a \"rare\" value",user="John" 1638202821000000000\n +``` + +The result: + +| timestamp | ticker | description | user | +| :------------------ | :----- | :--------------------- | :--- | +| 1638202821000000000 | BTCUSD | this is a "rare" value | John | + +:::note + +String values must be UTF-8 encoded before sending. + +::: + +### Cast table + +The following `cast` operations are supported when existing table column type is +not `string`: + +| | `char` | `string` | `geohash` | `symbol` | `uuid` | +| :------- | :----- | :------- | :-------- | :------- | ------ | +| `string` | cast | `native` | cast | cast | cast | + +### Cast to CHAR + +String value can be cast to `char` type if its length is less than 2 characters. +The following example are valid lines: + +```shell +trade,ticker=BTCUSD status="A" 1638202821000000000\n +trade,ticker=BTCUSD status="" 1638202821000000001\n +``` + +The result: + +| timestamp | ticker | status | +| :------------------ | :----- | :----- | +| 1638202821000000000 | BTCUSD | A | +| 1638202821000000001 | BTCUSD | `null` | + +Casting strings with 2 or more characters to `char` will cause entire line to be +rejected. + +### Cast to GEOHASH + +String value can be cast to `geohash` type when the destination column exists +and is of a `GEOHASH` type already. Do make sure that column is created upfront. +Otherwise, InfluxDB Line Protocol will create `STRING` column regardless of the +value. + +Example: + +Upcasting is an attempt to store higher resolution `geohash` in a lower +resolution column. Let's create table before sending a message. Our `geohash` +column has resolution of 4 bits. + +```questdb-sql +CREATE TABLE tracking ( + geohash GEOHASH(4b), + ts TIMESTAMP +) TIMESTAMP(ts) PARTITION BY HOUR; +``` + +Send message including `16c` `geohash` value: + +```shell +tracking,obj=VLCC\ STEPHANIE gh="9v1s8hm7wpkssv1h" 1000000000\n +``` + +The result: the `geohash` value has been truncated to size of the column. + +| ts | gh | +| :-------------------------- | :--- | +| 1970-01-01T00:00:01.000000Z | 0100 | + +Sending empty string value will insert `null` into `geohash` column of any size: + +```shell +tracking,obj=VLCC\ STEPHANIE gh="" 2000000000\n +``` + +| ts | gh | +| :-------------------------- | :----- | +| 1970-01-01T00:00:01.000000Z | `null` | + +:::note + +Downcast of `geohash` value, which is inserting of lower resolution values into +higher resolution column, will cause the entire line to be rejected. + +::: + +### Cast to SYMBOL + +The symbol values correspond to the QuestDB type +[`symbol`](/docs/concept/symbol/). String values can be cast to the `symbol` +type when the destination column exists and its type is `symbol`. This gives +clients an option to populate `symbol` columns without knowing the type of the +columns. + +```questdb-sql +CREATE TABLE trade ( + ticker SYMBOL, + timestamp TIMESTAMP +) TIMESTAMP(ts) PARTITION BY HOUR; +``` + +Send message including `BTCUSD` as `string`: + +```shell +trade ticker="BTCUSD" 1638202821000000000\n +trade ticker="BTCUSD" 1638402821000000000\n +``` + +The `ticker` column is populated with `symbol` values: + +| timestamp | ticker | +| :-------------------------- | :----- | +| 2021-11-29T16:20:21.000000Z | BTCUSD | +| 2021-12-01T23:53:41.000000Z | BTCUSD | + +We recommend sending `symbol` values directly as the `symbol` type because it +will automatically create a `symbol` column if it doesn't exist. + +When sending `symbol` values as the `string` type and the column does not exist, +then it will be created as the `string` type. + +### Cast to UUID + +String values can be cast to the `uuid` type when all the following are true: + +- The destination column exists. +- The destination column type is `uuid`. +- The `string` values are valid UUID. + +```questdb-sql +CREATE TABLE trade ( + ticker SYMBOL, + uuid UUID, + timestamp TIMESTAMP +) TIMESTAMP(timestamp) PARTITION BY HOUR; +``` + +Send messages including the UUID value `a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11` as +`string`: + +```shell +trade,ticker="BTCUSD" uuid="a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11" 1638202821000000000\n +trade,ticker="BTCUSD" uuid="a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11" 1638402821000000000\n +``` + +The `uuid` column is populated with `uuid` values: + +| timestamp | ticker | uuid | +| :-------------------------- | :----- | :----------------------------------- | +| 2021-11-29T16:20:21.000000Z | BTCUSD | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | +| 2021-12-01T23:53:41.000000Z | BTCUSD | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | + +When the `string` value is not a valid UUID, the entire line will be rejected. + +## Timestamp + +These value correspond to QuestDB type `timestamp`. Timestamp values are epoch +`microseconds` suffixed with `t`. In this example we're populating +_non-designated_ timestamp field `ts1`: + +```shell +tracking,obj=VLCC\ STEPHANIE gh="9v1s8hm7wpkssv1h",ts1=10000t 1000000000\n +``` + +It is possible to populate _designated_ timestamp using `columnset`, although +this is not recommended. Let's see how this works in practice. Assuming table: + +```questdb-sql +CREATE TABLE (loc SYMBOL, ts TIMESTAMP) TIMESTAMP(ts) PARTITION BY DAY; +``` + +When we send: + +```shell title="Sending mixed desginated timestamp values" +tracking,loc=north ts=2000000000t 1000000000\n +tracking,loc=south ts=3000000000t\n +``` + +The result in `columnset` value always wins: + +| loc | ts | +| :---- | :--------- | +| north | 2000000000 | +| south | 3000000000 | diff --git a/docs/reference/api/ilp/overview.md b/docs/reference/api/ilp/overview.md new file mode 100644 index 00000000..656575ff --- /dev/null +++ b/docs/reference/api/ilp/overview.md @@ -0,0 +1,437 @@ +--- +title: InfluxDB Line Protocol Overview +sidebar_label: Overview +description: InfluxDB line protocol reference documentation. +--- + +QuestDB implements the +[InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.8/write_protocols/line_protocol_tutorial/) +to ingest data. QuestDB can listen for line protocol packets over +[TCP](/docs/reference/api/ilp/tcp-receiver/). + +This page aims to provide examples for QuestDB experts setting up TCP without +any client libraries, or those looking to implement a new client library +yourself. + +:::tip + +For general QuestDB users, client libraries are available for a number of +languages: +[InfluxDB Line Protocol client libraries](/docs/reference/clients/overview/). + +::: + +## TCP receiver overview + +The TCP receiver is a high-throughput ingestion-only API for QuestDB. Here are +some key facts about the service: + +- ingestion only, there is no query capability +- accepts plain text input in a form of InfluxDB Line Protocol +- implicit transactions, batching +- supports automatic table and column creation +- multi-threaded, non-blocking +- supports authentication +- encryption requires an optional external reverse-proxy + +By default, QuestDB listens over TCP on `0.0.0.0:9009`. The receiver consists of +two thread pools, which is an important design feature to be aware of to +configure the receiver for maximum performance. The `io worker` threads are +responsible for parsing text input. The `writer` threads are responsible for +persisting data in tables. We will talk more about these in +[capacity planning](#capacity-planning) section. + +## Authentication + +Although the original protocol does not support it, we have added authentication +over TCP. This works by using an +[elliptic curve P-256](https://en.wikipedia.org/wiki/Elliptic-curve_cryptography) +JSON Web Token (JWT) to sign a server challenge. Details of authentication over +InfluxDB Line Protocol can be found in the +[authentication documentation](/docs/reference/api/ilp/authenticate/). + +## Configuration reference + +The TCP receiver configuration can be completely customized using +[configuration keys](/docs/reference/configuration/#influxdb-line-protocol). You +can use this to configure the thread pools, buffer and queue sizes, receiver IP +address and port, load balancing, etc. + +## Usage + +This section provides usage information and details for data ingestion via +InfluxDB Line Protocol. + +We provide examples in a number of programming languages. See our +[Insert Data page](/docs/develop/insert-data/#influxdb-line-protocol) for code +snippets. + +### Syntax + +```shell +table_name,symbolset columnset timestamp\n +``` + +| Element | Definition | +| :----------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `table_name` | Name of the table where QuestDB will write data. | +| `symbolset` | A set of comma-separated `name=value` pairs that will be parsed as symbol columns. | +| `columnset` | A set of comma-separated `name=value` pairs that will be parsed as non-symbol columns. | +| `timestamp` | UNIX timestamp. The default unit is nanosecond and is configurable via `line.tcp.timestamp`. The value will be truncated to microsecond resolution when parsed by QuestDB. | + +`name` in the `name=value` pair always corresponds to `column name` in the +table. + +:::note + +Each InfluxDB Line Protocol message has to end with a new line `\n` character. + +::: + +### Behavior + +- When the `table_name` does not correspond to an existing table, QuestDB will + create the table on the fly using the name provided. Column types will be + automatically recognized and assigned based on the data. +- The `timestamp` column is automatically created as + [designated timestamp](/docs/concept/designated-timestamp/) with the + [partition strategy](/docs/concept/partitions/) set to `DAY`. Alternatively, + use [CREATE TABLE](/docs/reference/sql/create-table/) to create the table with + a different partition strategy before ingestion. +- When the timestamp is empty, QuestDB will use the server timestamp. + +### Difference from InfluxDB + +QuestDB TCP Receiver uses InfluxDB Line Protocol as both serialization and the +transport format. InfluxDB on other hand uses HTTP as the transport and InfluxDB +Line Protocol as serialization format. For this reason the existing InfluxDB +client libraries will not work with QuestDB. + +### Generic example + +Let's assume the following data: + +| timestamp | city | temperature | humidity | make | +| :------------------ | :------ | :---------- | :------- | :-------- | +| 1465839830100400000 | London | 23.5 | 0.343 | Omron | +| 1465839830100600000 | Bristol | 23.2 | 0.443 | Honeywell | +| 1465839830100700000 | London | 23.6 | 0.358 | Omron | + +The line protocol syntax for that table is: + +```shell +readings,city=London,make=Omron temperature=23.5,humidity=0.343 1465839830100400000\n +readings,city=Bristol,make=Honeywell temperature=23.2,humidity=0.443 1465839830100600000\n +readings,city=London,make=Omron temperature=23.6,humidity=0.348 1465839830100700000\n +``` + +This would create table similar to this SQL statement and populate it. + +```questdb-sql +CREATE TABLE readings ( + timestamp TIMESTAMP, + city SYMBOL, + temperature DOUBLE, + humidity DOUBLE, + make SYMBOL +) TIMESTAMP(timestamp) PARTITION BY DAY; +``` + +### Designated timestamp + +### Timestamps + +Designated timestamp is the trailing value of an InfluxDB Line Protocol message. +It is optional, and when present, is a timestamp in Epoch nanoseconds. When the +timestamp is omitted, the server will insert each message using the system clock +as the row timestamp. See `cairo.timestamp.locale` and `line.tcp.timestamp` +[configuration options](/docs/reference/configuration/). + +:::caution + +- While + [`columnset` timestamp type units](/docs/reference/api/ilp/columnset-types/#timestamp) + are microseconds, the designated timestamp units are nanoseconds by default, + and can be overridden via the `line.tcp.timestamp` configuration property. + +- The native timestamp format used by QuestDB is a Unix timestamp in microsecond + resolution; timestamps in nanoseconds will be parsed and truncated to + microseconds. + +::: + +```shell title="Example of InfluxDB Line Protocol message with desginated timestamp value" +tracking,loc=north val=200i 1000000000\n +``` + +```shell title="Example of InfluxDB Line Protocol message sans timestamp" +tracking,loc=north val=200i\n +``` + +:::note + +We recommend populating designated timestamp via trailing value syntax above. + +::: + +It is also possible to populate designated timestamp via `columnset`. Please see +[mixed timestamp](/docs/reference/api/ilp/columnset-types/#timestamp) reference. + +### Irregularly-structured data + +InfluxDB line protocol makes it possible to send data under different shapes. +Each new entry may contain certain tags or fields, and others not. QuestDB +supports on-the-fly data structure changes with minimal overhead. Whilst the +example just above highlights structured data, it is possible for InfluxDB line +protocol users to send data as follows: + +```shell +readings,city=London temperature=23.2 1465839830100400000\n +readings,city=London temperature=23.6 1465839830100700000\n +readings,make=Honeywell temperature=23.2,humidity=0.443 1465839830100800000\n +``` + +This would result in the following table: + +| timestamp | city | temperature | humidity | make | +| :------------------ | :----- | :---------- | :------- | :-------- | +| 1465839830100400000 | London | 23.5 | NULL | NULL | +| 1465839830100700000 | London | 23.6 | NULL | NULL | +| 1465839830100800000 | NULL | 23.2 | 0.358 | Honeywell | + +:::tip + +Whilst we offer this function for flexibility, we recommend that users try to +minimize structural changes to maintain operational simplicity. + +::: + +### Duplicate column names + +If line contains duplicate column names, the value stored in the table will be +that from the first `name=value` pair on each line. For example: + +```shell +trade,ticker=USD price=30,price=60 1638202821000000000\n +``` + +Price `30` is stored, `60` is ignored. + +### Name restrictions + +Both table name and column names are allowed to have spaces ` `. These spaces +have to be escaped with `\`. For example both of these are valid lines. + +```shell +trade\ table,ticker=USD price=30,details="Latest price" 1638202821000000000\n +``` + +```shell +trade,symbol\ ticker=USD price=30,details="Latest price" 1638202821000000000\n +``` + +Table and column names must not contain any of the forbidden characters: +`\n`,`\r`,`?`,`,`,`:`,`"`,`'`,`\`,`/`,`\0`,`)`,`(`,`+`,`*`,`~` and `%`. + +Additionally, table name must not start or end with the `.` character. Column +name must not contain `.` and `-`. + +### Symbolset + +Area of the message that contains comma-separated set of `name=value` pairs for +symbol columns. For example in a message like this: + +```shell +trade,ticker=BTCUSD,venue=coinbase price=30,price=60 1638202821000000000\n +``` + +`symbolset` is `ticker=BTCUSD,venue=coinbase`. Please note the mandatory space +between `symbolset` and `columnset`. Naming rules for columns are subject to +[duplicate rules](#duplicate-column-names) and +[name restrictions](#name-restrictions). + +### Symbolset values + +`symbolset` values are always interpreted as [SYMBOL](/docs/concept/symbol/). +Parser takes values literally so please beware of accidentally using high +cardinality types such as `9092i` or `1.245667`. This will result in a +significant performance loss due to large mapping tables. + +`symbolset` values are not quoted. They are allowed to have special characters, +such as ` ` (space), `=`, `,`, `\n`, `\r` and `\`, which must be escaped with a +`\`. Example: + +```shell +trade,ticker=BTC\\USD\,All,venue=coin\ base price=30 1638202821000000000\n +``` + +Whenever `symbolset` column does not exist, it will be added on-the-fly with +type `SYMBOL`. On other hand when the column does exist, it is expected to be of +`SYMBOL` type, otherwise the line is rejected. + +### Columnset + +Area of the message that contains comma-separated set of `name=value` pairs for +non-symbol columns. For example in a message like this: + +```shell +trade,ticker=BTCUSD priceLow=30,priceHigh=60 1638202821000000000\n +``` + +`columnset` is `priceLow=30,priceHigh=60`. Naming rules for columns are subject +to [duplicate rules](#duplicate-column-names) and +[name restrictions](#name-restrictions). + +### Columnset values + +`columnset` supports several values types, which are used to either derive type +of new column or mapping strategy when column already exists. These types are +limited by existing InfluxDB Line Protocol specification. Wider QuestDB type +system is available by creating table via SQL upfront. The following are +supported value types: +[Integer](/docs/reference/api/ilp/columnset-types/#integer), +[Long256](/docs/reference/api/ilp/columnset-types/#long256), +[Float](/docs/reference/api/ilp/columnset-types/#float), +[String](/docs/reference/api/ilp/columnset-types/#string) and +[Timestamp](/docs/reference/api/ilp/columnset-types/#timestamp) + +### Inserting NULL values + +To insert a NULL value, skip the column (or symbol) for that row. + +For example: + +```text +table1 a=10.5 1647357688714369403 +table1 b=1.25 1647357698714369403 +``` + +Will insert as: + +| a | b | timestamp | +| :----- | :----- | --------------------------- | +| 10.5 | _NULL_ | 2022-03-15T15:21:28.714369Z | +| _NULL_ | 1.25 | 2022-03-15T15:21:38.714369Z | + +### InfluxDB Line Protocol Datatypes and Casts + +#### Strings vs Symbols + +Strings may be recorded as either the `STRING` type or the `SYMBOL` type. + +Inspecting a sample message we can see how a space `' '` separator splits +`SYMBOL` columns to the left from the rest of the columns. + +```text +table_name,col1=symbol_val1,col2=symbol_val2 col3="string val",col4=10.5 + ┬ + ╰───────── separator +``` + +In this example, columns `col1` and `col2` are strings written to the database +as `SYMBOL`s, whilst `col3` is written out as a `STRING`. + +`SYMBOL`s are strings which are automatically +[interned](https://en.wikipedia.org/wiki/String_interning) by the database on a +per-column basis. You should use this type if you expect the string to be +re-used over and over, such as is common with identifiers. + +For one-off strings use `STRING` columns which aren't interned. + +#### Casts + +QuestDB types are a superset of those supported by InfluxDB Line Protocol. This +means that when sending data you should be aware of the performed conversions. + +See: + +- [QuestDB Types in SQL](/docs/reference/sql/datatypes/) +- [InfluxDB Line Protocol types and cast conversion tables](/docs/reference/api/ilp/columnset-types/) + +### Constructing well-formed messages + +Different library implementations will perform different degrees of content +validation upfront before sending messages out. To avoid encountering issues, +follow these guidelines: + +- **All strings must be UTF-8 encoded.** + +- **Columns should only appear once per row.** + +- **Symbol columns must be written out before other columns.** + +- **Table and column names can't have invalid characters.** These should not + contain `?`, `.`,`,`, `'`, `"`, `\`, `/`, `:`, `(`, `)`, `+`, `-`, `*`, `%`, + `~`,`' '` (space), `\0` (nul terminator), + [ZERO WIDTH NO-BREAK SPACE](https://unicode-explorer.com/c/FEFF). + +- **Write timestamp column via designated API**, or at the end of the message if + you are using raw sockets. If you have multiple timestamp columns write + additional ones as column values. + +- **Don't change column type between rows.** + +### Error handling + +QuestDB will always log any InfluxDB Line Protocol errors in its +[server logs](/docs/concept/root-directory-structure/#log-directory). + +It is recommended that sending applications reuse TCP connections. If QuestDB +receives an invalid message, it will discard invalid lines, produce an error +message in the logs and forcibly _disconnect_ the sender to prevent further data +loss. + +Data may be discarded because of: + +- missing new line characters at the end of messages +- an invalid data format such as unescaped special characters +- invalid column / table name characters +- schema mismatch with existing tables +- message size overflows on the input buffer +- system errors such as no space left on the disk + +Detecting malformed input can be achieved through QuestDB logs by searching for +`LineTcpMeasurementScheduler` and `LineTcpConnectionContext`, for example: + +```bash +2022-02-03T11:01:51.007235Z I i.q.c.l.t.LineTcpMeasurementScheduler could not create table [tableName=trades, ex=`column name contains invalid characters [colName=trade_%]`, errno=0] +``` + +The following input is tolerated by QuestDB: + +- a column is specified twice or more on the same line, QuestDB will pick the + first occurrence and ignore the rest +- missing columns, their value will be defaulted to `null`/`0.0`/`false` + depending on the type of the column +- missing designated timestamp, the current server time will be used to generate + the timestamp +- the timestamp is specified as a column instead of appending it to the end of + the line +- timestamp appears as a column and is also present at the end of the line, the + value sent as a field will be used + +With sufficient client-side validation, the lack of errors to the client and +confirmation isn't necessarily a concern: QuestDB will log out any issues and +disconnect on error. The database will process any valid lines up to that point +and insert rows. + +To resume WAL table ingestion after recovery from errors, see +[ALTER TABLE RESUME WAL](/docs/reference/sql/alter-table-resume-wal/) for more +information. + +### If you don't immediately see data + +If you don't see your inserted data, this is usually down to one of two things: + +- You prepared the messages, but forgot to call `.flush()` or similar in your + client library, so no data was sent. + +- The internal timers and buffers within QuestDB did not commit the data yet. + For development (and development only), you may want to tweak configuration + settings to commit data more frequently. + ```ini title=server.conf + cairo.max.uncommitted.rows=1 + ``` + Refer to + [InfluxDB Line Protocol's commit strategy](/docs/reference/api/ilp/tcp-receiver/#commit-strategy) + documentation for more on these configuration settings. diff --git a/docs/reference/api/ilp/tcp-receiver.md b/docs/reference/api/ilp/tcp-receiver.md new file mode 100644 index 00000000..cf18a22e --- /dev/null +++ b/docs/reference/api/ilp/tcp-receiver.md @@ -0,0 +1,44 @@ +--- +title: InfluxDB Line Protocol TCP Receiver +sidebar_label: TCP Receiver +description: InfluxDB line protocol TCP receiver reference documentation. +--- + +## Capacity planning + +TCP receiver makes use of 3 logical thread pools: + +- I/O worker pool - `line.tcp.io.worker.count`, threads responsible for handling + incoming TCP connections and parsing received InfluxDB Line Protocol messages +- writer pool - `line.tcp.writer.worker.count`, threads responsible for table + writes +- shared pool - `shared.worker.count`, threads responsible for handling out-of-order data + +Depending on the number of concurrent TCP connections `io worker pool` size +might need to be adjusted. The ideal ratio is `1:1` - a thread per connection. +In less busy environments it is possible for single `io worker` thread to handle +multiple connections simultaneously. We recommend starting with conservative +ratio, measure and increase the ratio up to `1:1`. More threads than connections +will be wasting server CPU. + +Another consideration is the number of tables updated concurrently. +`writer pool` should be tuned to increase concurrency. `writer` threads can also +handle multiple tables concurrently. `1:1` ratio is the maximum required ratio +between `writer` threads and tables. If `1:1` ratio is not an option, avoid +writing to all tables from each connection. Instead, group connections and +tables. For example, if there are 10 tables, 8 TCP connections and `writer pool` +size is set to 2, 4 TCP connections may be used to write into tables 1-5, while +4 connections may write into tables 6-10. + +:::note + +Sending updates for multiple tables from a single TCP connection might be +inefficient. Consider using multiple connections to improve performance. If a +single connection is unavoidable, keep `writer pool` size set to 1 for optimal +CPU resource utilization. + +::: + +When ingesting data out of order (O3) `shared pool` accelerates O3 tasks. It is +also responsible for SQL execution. `shared pool` size should be set to use the +remaining available CPU cores. \ No newline at end of file diff --git a/docs/reference/api/ilp/udp-receiver.md b/docs/reference/api/ilp/udp-receiver.md new file mode 100644 index 00000000..93a2fa17 --- /dev/null +++ b/docs/reference/api/ilp/udp-receiver.md @@ -0,0 +1,50 @@ +--- +title: InfluxDB Line Protocol UDP Receiver +sidebar_label: UDP Receiver +description: InfluxDB line protocol UDP receiver reference documentation. +--- + +:::note + +The UDP receiver is deprecated since QuestDB version 6.5.2. We recommend the +[TCP receiver](/docs/reference/api/ilp/overview/) instead. + +::: + +The UDP receiver can handle both single and multi row write requests. It is +currently single-threaded, and performs both network I/O and write jobs out of +one thread. The UDP worker thread can work either on its own thread or use the +common thread pool. It supports both multicast and unicast. + +## Overview + +By default, QuestDB listens for `multicast` line protocol packets over UDP on +`232.1.2.3:9009`. If you are running QuestDB with Docker, you will need to +publish the port `9009` using `-p 9009:9009` and publish multicast packets with +TTL of at least 2. This port can be customized, and you can also configure +QuestDB to listen for `unicast`. + +The UDP receiver is disabled by default. To enable it, change the +`line.udp.enabled` setting in the +[server configuration](/docs/reference/configuration/). + +## Commit strategy + +Uncommitted rows are committed either: + +- after receiving a number of continuous messages equal to + `line.udp.commit.rate` or +- when UDP receiver has idle time, i.e. ingestion slows down or completely + stops. + +## Configuration + +The UDP receiver configuration can be completely customized using +[configuration keys](/docs/reference/configuration/#udp-specific-settings). You +can use this to configure the IP address and port the receiver binds to, commit +rates, buffer size, whether it should run on a separate thread etc. + +## Examples + +Find an example of how to use this in the +[InfluxDB sender library section](/docs/reference/api/java-embedded/#influxdb-sender-library). diff --git a/docs/reference/api/java-embedded.md b/docs/reference/api/java-embedded.md new file mode 100644 index 00000000..9f3db846 --- /dev/null +++ b/docs/reference/api/java-embedded.md @@ -0,0 +1,479 @@ +--- +title: Java (embedded) +description: Java embedded API reference documentation. +--- + +import CodeBlock from "@theme/CodeBlock" +import InterpolateReleaseData from "../../../src/components/InterpolateReleaseData" + +QuestDB is written in Java and can be used as any other Java library. Moreover, +it is a single JAR with no additional dependencies. + +To include QuestDB in your project, use the following: + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + + + + + + { + return ( + + {` + + org.questdb + questdb + ${release.name} + + `} + + ) + }} +/> + + { + return ( + + {` + + org.questdb + questdb + ${release.name}-jdk8 + + `} + + ) + }} +/> + + + + + + { + return ( + + implementation 'org.questdb:questdb:{release.name}' + + ) + }} +/> + + { + return ( + + implementation 'org.questdb:questdb:{release.name}-jdk8' + + ) + }} +/> + + + + + +## Writing data + +This section provides example codes to write data to WAL and non-WAL tables. See +[Write Ahead Log](/docs/concept/write-ahead-log) for details about the +differences between WAL and non-WAL tables. + +The following writers are available for data ingestion: + +- `WalWriter` for WAL tables +- `TableWriter` for non-WAL tables +- `TableWriterAPI` for both WAL and non-WAL tables as it is an interface for + `WalWriter` and `Table Writer` + +### Writing data using `WalWriter` + +The `WalWriter` facilitates table writes to WAL tables. To successfully create +an instance of `WalWriter`, the table must already exist. + +```java title="Example WalWriter" +final CairoConfiguration configuration = new DefaultCairoConfiguration("data_dir"); +try (CairoEngine engine = new CairoEngine(configuration)) { + final SqlExecutionContext ctx = new SqlExecutionContextImpl(engine, 1) + .with(AllowAllSecurityContext.INSTANCE, null); + engine.ddl("CREATE TABLE testTable (" + + "a int, b byte, c short, d long, e float, g double, h date, " + + "i symbol, j string, k boolean, l geohash(8c), ts timestamp" + + ") TIMESTAMP(ts) PARTITION BY DAY WAL", ctx); + + // write data into WAL + final TableToken tableToken = engine.getTableTokenIfExists("testTable"); + try (WalWriter writer = engine.getWalWriter(tableToken)) { + for (int i = 0; i < 3; i++) { + TableWriter.Row row = writer.newRow(Os.currentTimeMicros()); + row.putInt(0, 123); + row.putByte(1, (byte) 1111); + row.putShort(2, (short) 222); + row.putLong(3, 333); + row.putFloat(4, 4.44f); + row.putDouble(5, 5.55); + row.putDate(6, System.currentTimeMillis()); + row.putSym(7, "xyz"); + row.putStr(8, "abc"); + row.putBool(9, true); + row.putGeoHash(10, GeoHashes.fromString("u33dr01d", 0, 8)); + row.append(); + } + writer.commit(); + } + + // apply WAL to the table + try (ApplyWal2TableJob walApplyJob = new ApplyWal2TableJob(engine, 1, 1)) { + while (walApplyJob.run(0)) ; + } +} +``` + +### Writing data using `TableWriter` + +Non-WAL tables do not allow concurrent writes via multiple interfaces. To +successfully create an instance, the table must: + +- Already exist +- Have no other open writers against it as the `TableWriter` constructor will + attempt to obtain an exclusive cross-process lock on the table. + +```java title="Example TableWriter" +try (CairoEngine engine = new CairoEngine(configuration)) { + final SqlExecutionContext ctx = new SqlExecutionContextImpl(engine, 1) + .with(AllowAllSecurityContext.INSTANCE, null); + engine.ddl("CREATE TABLE testTable (" + + "a int, b byte, c short, d long, e float, g double, h date, " + + "i symbol, j string, k boolean, l geohash(8c), ts timestamp" + + ") TIMESTAMP(ts) PARTITION BY DAY BYPASS WAL", ctx); + + // write data into WAL + final TableToken tableToken = engine.getTableTokenIfExists("testTable"); + try (TableWriter writer = engine.getWriter(tableToken, "test")) { + for (int i = 0; i < 3; i++) { + TableWriter.Row row = writer.newRow(Os.currentTimeMicros()); + row.putInt(0, 123); + row.putByte(1, (byte) 1111); + row.putShort(2, (short) 222); + row.putLong(3, 333); + row.putFloat(4, 4.44f); + row.putDouble(5, 5.55); + row.putDate(6, System.currentTimeMillis()); + row.putSym(7, "xyz"); + row.putStr(8, "abc"); + row.putBool(9, true); + row.putGeoHash(10, GeoHashes.fromString("u33dr01d", 0, 8)); + row.append(); + } + writer.commit(); + } +} +``` + +### Writing data using `TableWriterAPI` + +`TableWriterAPI` allows writing to both WAL and non-WAL tables by returning the +suitable `Writer` based on the table configurations. The table must already +exist: + +```java title="Example TableWriterAPI" +try (CairoEngine engine = new CairoEngine(configuration)) { + final SqlExecutionContext ctx = new SqlExecutionContextImpl(engine, 1) + .with(AllowAllSecurityContext.INSTANCE, null); + engine.ddl("CREATE TABLE testTable (" + + "a int, b byte, c short, d long, e float, g double, h date, " + + "i symbol, j string, k boolean, l geohash(8c), ts timestamp" + + ") TIMESTAMP(ts) PARTITION BY DAY WAL", ctx); + + // write data into WAL + final TableToken tableToken = engine.getTableTokenIfExists("testTable"); + try (TableWriterAPI writer = engine.getTableWriterAPI(tableToken, "test")) { + for (int i = 0; i < 3; i++) { + TableWriter.Row row = writer.newRow(Os.currentTimeMicros()); + row.putInt(0, 123); + row.putByte(1, (byte) 1111); + row.putShort(2, (short) 222); + row.putLong(3, 333); + row.putFloat(4, 4.44f); + row.putDouble(5, 5.55); + row.putDate(6, System.currentTimeMillis()); + row.putSym(7, "xyz"); + row.putStr(8, "abc"); + row.putBool(9, true); + row.putGeoHash(10, GeoHashes.fromString("u33dr01d", 0, 8)); + row.append(); + } + writer.commit(); + } + + // apply WAL to the table + try (ApplyWal2TableJob walApplyJob = new ApplyWal2TableJob(engine, 1, 1)) { + while (walApplyJob.run(0)) ; + } +} +``` + +### Detailed steps + +#### Configure Cairo engine + +`CairoEngine` is the resource manager for the embedded QuestDB. Its main +function is to facilitate concurrent access to pools of `TableReader` and +suitable writer instances. + +```java title="New CairoEngine instance" +final CairoConfiguration configuration = new DefaultCairoConfiguration("data_dir"); +try (CairoEngine engine = new CairoEngine(configuration)) { +``` + +A typical application will need only one instance of `CairoEngine`. This +instance will start when the application starts and shuts down when the +application closes. You will need to close `CairoEngine` gracefully when the +application stops. + +QuestDB provides a default configuration which only requires the data directory +to be specified. For a more advanced usage, the whole `CairoConfiguration` +interface can be overridden. + +#### Create an instance of SqlExecutionContext + +Execution context is a conduit for passing SQL execution artifacts to the +execution site. This instance is not thread-safe and it must not be shared +between threads. + +```java title="Example of execution context" +final SqlExecutionContext ctx = new SqlExecutionContextImpl(engine, 1) + .with(AllowAllSecurityContext.INSTANCE, null); +``` + +The second argument of the constructor is the number of threads that will be +helping to execute SQL statements. Unless you are building another QuestDB +server, this value should always be 1. + +#### SqlCompiler and blank table + +Before we start writing data using a writer, the target table has to exist. +There are several ways to create a new table and we recommend using +`CairoEngine`: + +```java title="Creating new table" +// Create a non-WAL table: + +engine.ddl("CREATE TABLE abc (" + + "a int, b byte, c short, d long, e float, g double, h date, " + + "i symbol, j string, k boolean, l geohash(8c), ts timestamp" + + ") TIMESTAMP(ts) PARTITION BY DAY BYPASS WAL", ctx); + +// Create a WAL table: + +engine.ddl("CREATE TABLE abc (" + + "a int, b byte, c short, d long, e float, g double, h date, " + + "i symbol, j string, k boolean, l geohash(8c), ts timestamp" + + ") TIMESTAMP(ts) PARTITION BY DAY WAL", ctx); +``` + +As you will be able to see below, the table field types and indexes must match +the code that is populating the table. + +Another way to create a table is to obtain a `SqlCompiler` from the engine and +use it to run the DDL statement: + +```java title="Creating new table with a SqlCompiler" +try (SqlCompiler compiler = engine.getSqlCompiler()) { + engine.ddl(compiler, "CREATE TABLE abc (" + + "a int, b byte, c short, d long, e float, g double, h date, " + + "i symbol, j string, k boolean, l geohash(8c), ts timestamp" + + ") TIMESTAMP(ts) PARTITION BY DAY WAL", ctx, null); +} +``` + +This way the obtained `SqlCompiler` can be reused to run other SQL statements. + +Note that `CairoEngine` has a number of helper methods for different types of +SQL statements. These are: + +- `CairoEngine#ddl()` - meant to execute CREATE TABLE and ALTER statements. +- `CairoEngine#insert()` - meant to execute INSERT statements. +- `CairoEngine#drop()` - meant to execute DROP TABLE statements. +- `CairoEngine#select()` - meant to execute SELECT queries. + +#### A new writer instance + +We use `CairoEngine` to obtain an instance of the writer. This will enable +reusing this writer instance later, when we use the same method of creating +table writer again. + +```java title="New table writer instance for a non-WAL table" +final TableToken tableToken = engine.getTableTokenIfExists("abc"); +try (TableWriter writer = engine.getWriter(tableToken, "test")) { +``` + +```java title="New table writer instance for a WAL table" +final TableToken tableToken = engine.getTableTokenIfExists("abc"); +try (WalWriter writer = engine.getWalWriter(tableToken)) { +``` + +```java title="New table writer instance for either a WAL or non-WAL table" +final TableToken tableToken = engine.getTableTokenIfExists("abc"); +try (TableWriterAPI writer = engine.getTableWriterAPI(tableToken, "test")) { +``` + +`TableWriter` - A non-WAL table uses `TableWriter`, which will hold an exclusive +lock on table `abc` until it is closed and `testing` will be used as the lock +reason. This lock is both intra- and inter-process. If you have two Java +applications accessing the same table only one will succeed at one time. + +`WalWriter` - A WAL table uses `WalWriter` to enable concurrent data ingestion, +data modification, and schema changes, as the table is not locked. + +`TableWriterAPI` - Both WAL and Non-WAL tables can use `TableWriterAPI`. It is +an interface implemented by both writers. + +Note the all of the writer classes are not thread-safe, so they should not be +used concurrently. + +#### Create a new row + +```java title="Creating new table row with timestamp" +TableWriter.Row row = writer.newRow(Os.currentTimeMicros()); +``` + +Although this operation semantically looks like a new object creation, the row +instance is actually being re-used under the hood. A timestamp is necessary to +determine a partition for the new row. Its value has to be either increment or +stay the same as the last row. When the table is not partitioned and does not +have a designated timestamp column, the timestamp value can be omitted. + +```java title="Creating new table row without timestamp" +TableWriter.Row row = writer.newRow(); +``` + +#### Populate columns + +There are `put*` methods for every supported data type. Columns are updated by +an index as opposed to by name. + +```java title="Populating table column" +row.putLong(3, 333); +``` + +Column update order is not important and updates can be sparse. All unset +columns will default to NULL values. + +#### Append a row + +Following method call: + +```java title="Appending a new row" +row.append(); +``` + +Appended rows are not visible to readers until they are committed. An unneeded +row can also be canceled if required. + +```java title="Cancelling half-populated row" +row.cancel(); +``` + +A pending row is automatically cancelled when `writer.newRow()` is called. +Consider the following scenario: + +```java +TableWriter.Row row = writer.newRow(Os.currentTimeMicros()); +row.putInt(0, 123); +row.putByte(1, (byte) 1111); +row.putShort(2, (short) 222); +row.putLong(3, 333); +row = writer.newRow(Os.currentTimeMicros()); +... +``` + +Second `newRow()` call would cancel all the updates to the row since the last +`append()`. + +#### Commit changes + +To make changes visible to readers, writer has to commit. `writer.commit` does +this job. Unlike traditional SQL databases, the size of the transaction does not +matter. You can commit anything between 1 and 1 trillion rows. We also spent +considerable effort to ensure `commit()` is lightweight. You can drip one row at +a time in applications that require such behaviour. + +Note that WAL writer commits aren't immediately visible to readers. The +committed data becomes visible once it was applied by the `ApplyWal2TableJob` +job. + +## Executing queries + +We provide a single API for executing all kinds of SQL queries. The example +below focuses on SELECT and how to fetch data from a cursor. + +```java title="Compiling SQL" +final CairoConfiguration configuration = new DefaultCairoConfiguration(temp.getRoot().getAbsolutePath()); +try (CairoEngine engine = new CairoEngine(configuration)) { + final SqlExecutionContext ctx = new SqlExecutionContextImpl(engine, 1) + .with(AllowAllSecurityContext.INSTANCE, null); + try (RecordCursorFactory factory = engine.select("SELECT * FROM abc", ctx)) { + try (RecordCursor cursor = factory.getCursor(ctx)) { + final Record record = cursor.getRecord(); + while (cursor.hasNext()) { + // access 'record' instance for field values + } + } + } + } +} +``` + +### Detailed steps + +The steps to setup `CairoEngine`, execution context and `SqlCompiler` are the +same as those we explained in the sections. We will skip them here and focus on +fetching data. + +Note the all of the classes described below are not thread-safe, so they should +not be used concurrently. + +#### RecordCursorFactory + +You can think of `RecordCursorFactory` as a prepared statement. This is the +entity that holds SQL execution plan with all of the execution artefacts. +Factories are designed to be reused and we strongly encourage caching them. You +also need to make sure that you close factories explicitly when you no longer +need them. Failing to do so can cause memory and/or other resources leak. + +#### RecordCursor + +This instance allows iterating over the dataset produced by SQL. Cursors are +relatively short-lived and do not imply fetching all the data. Note that you +have to close a cursor as soon as enough data is fetched ; the closing process +can happen at any time. + +#### Record + +This is cursor's data access API. Record instance is obtained from the cursor +outside of the fetch loop. + +```java title="Example of fetching data from cursor" +final Record record = cursor.getRecord(); +while (cursor.hasNext()) { + // access 'record' instance for field values +} +``` + +Record does not hold the data. Instead, it is an API to pull data when data is +needed. Record instance remains the same while cursor goes over the data, making +caching of records pointless. + +## InfluxDB Line Protocol client library + +We have [Java Client Library](/docs/reference/clients/java_ilp/) to allow fast +data ingestion. diff --git a/docs/reference/api/postgres.md b/docs/reference/api/postgres.md new file mode 100644 index 00000000..59c48551 --- /dev/null +++ b/docs/reference/api/postgres.md @@ -0,0 +1,93 @@ +--- +title: Postgres +description: Postgres compatibility reference documentation. +--- + +QuestDB supports the Postgres wire protocol. As a result, QuestDB is capable of +running most of Postgres queries. This means that you can use your favorite +Postgres client or driver with QuestDB, at no extra cost. + +The storage model used by Postgres is fundamentally different to the one used by +QuestDB. Some features that exists for Postgres do not apply to QuestDB. + +## Examples + +We provide examples in a number of programming languages. See our "develop" docs +for: + +- [Inserting](/docs/develop/insert-data/#postgresql-wire-protocol) +- [Querying](/docs/develop/query-data/#postgresql-wire-protocol) +- [Updating](/docs/develop/update-data/#postgres-compatibility) + +## Compatibility + +### List of supported features + +- Querying (all types expect `BLOB`) +- Prepared statements with bind parameters (check for specific libraries + [below](/docs/reference/api/postgres/#libraries--programmatic-clients)) +- `INSERT` statements with bind parameters +- `UPDATE` statements with bind parameters +- DDL execution +- Batch inserts with `JDBC` +- Plain authentication + +Examples which demonstrate how to use Postgres clients in a number of different +languages can be found on the following pages: + +- [Insert data](/docs/develop/insert-data/#postgresql-wire-protocol) demonstrates + how to use the parameterized queries and prepared statements to insert data. +- [Query data](/docs/develop/query-data/#postgresql-wire-protocol) shows how to + run queries against tables. +- [Update data](/docs/develop/update-data/#postgres-compatibility) shows how to + update tables. + +### List of supported connection properties + +| Name | Example | Description | +| ---------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| `database` | qdb | Should be set to any value for example `qdb`, database name is ignored, QuestDB does not have database instance name | +| `user` | admin | User name configured in `pg.user` or `pg.readonly.user` property in `server.conf`. Default value is `admin` | +| `password` | quest | Password from `pg.password` or `pg.readonly.password` property in `server.conf`. Default value is `quest` | +| `options` | -c statement_timeout=60000 | The only supported option is `statement_timeout`. It specifies maximum execution time in milliseconds for SELECT or UPDATE statement | + +### List of unsupported features + +- SSL +- Remote file upload (`COPY` from `stdin`) +- `DELETE` statements +- `BLOB` transfer + +## Recommended third party tools + +The following list of third party tools includes drivers, clients or utility +CLIs that our team has tested extensively. Picking an item from it will +guarantee that your code will work with QuestDB. + +We recognize that our community might value some features more than others. This +is why we encourage you to [open an issue on GitHub]({@githubUrl@}/issues) if +you think we are missing something important for your workflow. + +### CLIs + +#### [PSQL](https://www.postgresql.org/docs/current/app-psql.html) `12` + +Support for `SELECT`, `INSERT`, `UPDATE`, `CREATE`, `DROP`, `TRUNCATE`. + +### Libraries / Programmatic clients + +#### [node-postgres](https://node-postgres.com/) (NodeJS) `8.4` + +#### [pq](https://github.com/lib/pq) (Go) `1.8` + +#### [pq](https://www.postgresql.org/docs/12/libpq.html) (C) `12` + +#### [Psycopg](https://www.psycopg.org) (Python) `2.9.3` and `3.1` + +#### [ruby-pg](https://github.com/ged/ruby-pg) (Ruby) `1.4.3` + +#### [pg_connect](https://www.php.net/manual/en/function.pg-connect.php) (PHP) `8.1.0` + +### Drivers + +#### [JDBC](https://jdbc.postgresql.org/) `42.2` diff --git a/docs/reference/api/rest.md b/docs/reference/api/rest.md new file mode 100644 index 00000000..00ddb82c --- /dev/null +++ b/docs/reference/api/rest.md @@ -0,0 +1,512 @@ +--- +title: REST API +sidebar_label: REST +description: REST API reference documentation. +--- + +The QuestDB REST API is based on standard HTTP features and is understood by +off-the-shelf HTTP clients. It provides a simple way to interact with QuestDB +and is compatible with most programming languages. API functions are fully keyed +on the URL and they use query parameters as their arguments. + +The Web Console is the official Web client relying on the REST API. Find out +more in the section [using the Web Console](/docs/develop/web-console/). + +**Available methods** + +- [`/imp`](#imp---import-data) for importing data from `.CSV` files +- [`/exec`](#exec---execute-queries) to execute a SQL statement +- [`/exp`](#exp---export-data) to export data + +## Examples + +We provide examples in a number of programming languages. See our "develop" docs +for: + +- [Inserting](/docs/develop/insert-data/#http-rest-api) +- [Querying](/docs/develop/query-data/#http-rest-api) + +## /imp - Import data + +`/imp` streams tabular text data directly into a table. It supports CSV, TAB and +pipe (`|`) delimited inputs with optional headers. There are no restrictions on +data size. Data types and structures are detected automatically, without +additional configuration. In some cases, additional configuration can be +provided to improve the automatic detection as described in +[user-defined schema](#user-defined-schema). + +:::note + +The structure detection algorithm analyses the chunk in the beginning of the +file and relies on relative uniformity of data. When the first chunk is +non-representative of the rest of the data, automatic imports can yield errors. + +If the data follows a uniform pattern, the number of lines which are analyzed +for schema detection can be reduced to improve performance during uploads using +the `http.text.analysis.max.lines` key. Usage of this setting is described in +the +[HTTP server configuration](/docs/reference/configuration/#minimal-http-server) +documentation. + +::: + +### URL parameters + +`/imp` is expecting an HTTP POST request using the `multipart/form-data` +Content-Type with following optional URL parameters which must be URL encoded: + +| Parameter | Required | Default | Description | +| -------------------- | -------- | ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `atomicity` | No | `skipCol` | `abort`, `skipRow` or `skipCol`. Behaviour when an error is detected in the data. `abort`: the entire file will be skipped. `skipRow`: the row is skipped. `skipCol`: the column is skipped. | +| `delimiter` | No | | URL encoded delimiter character. When set, import will try to detect the delimiter automatically. Since automatic delimiter detection requires at least two lines (rows) to be present in the file, this parameter may be used to allow single line file import. | +| `fmt` | No | `tabular` | Can be set to `json` to get the response formatted as such. | +| `forceHeader` | No | `false` | `true` or `false`. When `false`, QuestDB will try to infer if the first line of the file is the header line. When set to `true`, QuestDB will expect that line to be the header line. | +| `name` | No | Name of the file | Name of the table to create, [see below](/docs/reference/api/rest/#names). | +| `overwrite` | No | `false` | `true` or `false`. When set to true, any existing data or structure will be overwritten. | +| `partitionBy` | No | `NONE` | See [partitions](/docs/concept/partitions/#properties). | +| `o3MaxLag` | No | | Sets upper limit on the created table to be used for the in-memory out-of-order buffer. Can be also set globally via the `cairo.o3.max.lag` configuration property. | +| `maxUncommittedRows` | No | | Maximum number of uncommitted rows to be set for the created table. When the number of pending rows reaches this parameter on a table, a commit will be issued. Can be also set globally via the `cairo.max.uncommitted.rows` configuration property. | +| `skipLev` | No | `false` | `true` or `false`. Skip “Line Extra Values”, when set to true, the parser will ignore those extra values rather than ignoring entire line. An extra value is something in addition to what is defined by the header. | +| `timestamp` | No | | Name of the column that will be used as a [designated timestamp](/docs/concept/designated-timestamp/). | +| | + +```shell title="Example usage" +curl -F data=@weather.csv \ +'http://localhost:9000/imp?overwrite=true&name=new_table×tamp=ts&partitionBy=MONTH' +``` + +Further example queries with context on the source CSV file contents relative +and the generated tables are provided in the [examples section](#examples) +below. + +### Names + +Table and column names are subject to restrictions, the following list of +characters are automatically removed: + +```plain +[whitespace] +. +? +, +: +\ +/ +\\ +\0 +) +( +_ ++ +- +* +~ +% +``` + +When the header row is missing, column names are generated automatically. + +### Consistency guarantees + +`/imp` benefits from the properties of the QuestDB +[storage model](/docs/concept/storage-model#consistency-and-durability), +although Atomicity and Durability can be relaxed to meet convenience and +performance demands. + +#### Atomicity + +QuestDB is fully insured against any connection problems. If the server detects +closed socket(s), the entire request is rolled back instantly and transparently +for any existing readers. The only time data can be partially imported is when +atomicity is in `relaxed` mode and data cannot be converted to column type. In +this scenario, any "defective" row of data is discarded and `/imp` continues to +stream request data into table. + +#### Consistency + +This property is guaranteed by consistency of append transactions against +QuestDB storage engine. + +#### Isolation + +Data is committed to QuestDB storage engine at end of request. Uncommitted +transactions are not visible to readers. + +#### Durability + +`/imp` streams data from network socket buffer directly into memory mapped +files. At this point data is handed over to the OS and is resilient against +QuestDB internal errors and unlikely but hypothetically possible crashes. This +is default method of appending data and it is chosen for its performance +characteristics. + +### Examples + +#### Automatic schema detection + +The following example uploads a file `ratings.csv` which has the following +contents: + +| ts | visMiles | tempF | dewpF | +| --------------------------- | -------------- | ----- | ----- | +| 2010-01-01T00:00:00.000000Z | 8.8 | 34 | 30 | +| 2010-01-01T00:51:00.000000Z | 9.100000000000 | 34 | 30 | +| 2010-01-01T01:36:00.000000Z | 8.0 | 34 | 30 | +| ... | ... | ... | ... | + +An import can be performed with automatic schema detection with the following +request: + +```shell +curl -F data=@weather.csv 'http://localhost:9000/imp' +``` + +A HTTP status code of `200` will be returned and the response will be: + +```shell ++-------------------------------------------------------------------------------+ +| Location: | weather.csv | Pattern | Locale | Errors | +| Partition by | NONE | | | | +| Timestamp | NONE | | | | ++-------------------------------------------------------------------------------+ +| Rows handled | 49976 | | | | +| Rows imported | 49976 | | | | ++-------------------------------------------------------------------------------+ +| 0 | ts | TIMESTAMP | 0 | +| 1 | visMiles | DOUBLE | 0 | +| 2 | tempF | INT | 0 | +| 3 | dewpF | INT | 0 | ++-------------------------------------------------------------------------------+ +``` + +#### User-defined schema + +To specify the schema of a table, a schema object can be provided: + +```shell +curl \ +-F schema='[{"name":"dewpF", "type": "STRING"}]' \ +-F data=@weather.csv 'http://localhost:9000/imp' +``` + +```shell title="Response" ++------------------------------------------------------------------------------+ +| Location: | weather.csv | Pattern | Locale | Errors | +| Partition by | NONE | | | | +| Timestamp | NONE | | | | ++------------------------------------------------------------------------------+ +| Rows handled | 49976 | | | | +| Rows imported | 49976 | | | | ++------------------------------------------------------------------------------+ +| 0 | ts | TIMESTAMP | 0 | +| 1 | visMiles | DOUBLE | 0 | +| 2 | tempF | INT | 0 | +| 3 | dewpF | STRING | 0 | ++------------------------------------------------------------------------------+ +``` + +**Non-standard timestamp formats** + +Given a file `weather.csv` with the following contents which contains a +timestamp with a non-standard format: + +| ts | visMiles | tempF | dewpF | +| --------------------- | -------------- | ----- | ----- | +| 2010-01-01 - 00:00:00 | 8.8 | 34 | 30 | +| 2010-01-01 - 00:51:00 | 9.100000000000 | 34 | 30 | +| 2010-01-01 - 01:36:00 | 8.0 | 34 | 30 | +| ... | ... | ... | ... | + +The file can be imported as usual with the following request: + +```shell title="Importing CSV with non-standard timestamp" +curl -F data=@weather.csv 'http://localhost:9000/imp' +``` + +A HTTP status code of `200` will be returned and the import will be successful, +but the timestamp column is detected as a `STRING` type: + +```shell title="Response with timestamp as STRING type" ++-------------------------------------------------------------------------------+ +| Location: | weather.csv | Pattern | Locale | Errors | +| Partition by | NONE | | | | +| Timestamp | NONE | | | | ++-------------------------------------------------------------------------------+ +| Rows handled | 49976 | | | | +| Rows imported | 49976 | | | | ++-------------------------------------------------------------------------------+ +| 0 | ts | STRING | 0 | +| 1 | visMiles | DOUBLE | 0 | +| 2 | tempF | INT | 0 | +| 3 | dewpF | INT | 0 | ++-------------------------------------------------------------------------------+ +``` + +To amend the timestamp column type, this example curl can be used which has a +`schema` JSON object to specify that the `ts` column is of `TIMESTAMP` type with +the pattern `yyyy-MM-dd - HH:mm:ss` + +Additionally, URL parameters are provided: + +- `overwrite=true` to overwrite the existing table +- `timestamp=ts` to specify that the `ts` column is the designated timestamp + column for this table +- `partitionBy=MONTH` to set a + [partitioning strategy](/docs/operations/data-retention/) on the table by + `MONTH` + +```shell title="Providing a user-defined schema" +curl \ +-F schema='[{"name":"ts", "type": "TIMESTAMP", "pattern": "yyyy-MM-dd - HH:mm:ss"}]' \ +-F data=@weather.csv \ +'http://localhost:9000/imp?overwrite=true×tamp=ts&partitionBy=MONTH' +``` + +The HTTP status code will be set to `200` and the response will show `0` errors +parsing the timestamp column: + +```shell ++------------------------------------------------------------------------------+ +| Location: | weather.csv | Pattern | Locale | Errors | +| Partition by | MONTH | | | | +| Timestamp | ts | | | | ++------------------------------------------------------------------------------+ +| Rows handled | 49976 | | | | +| Rows imported | 49976 | | | | ++------------------------------------------------------------------------------+ +| 0 | ts | TIMESTAMP | 0 | +| 1 | visMiles | DOUBLE | 0 | +| 2 | tempF | INT | 0 | +| 3 | dewpF | INT | 0 | ++------------------------------------------------------------------------------+ +``` + +#### JSON response + +If you intend to upload CSV programmatically, it's easier to parse the response +as JSON. Set `fmt=json` query argument on the request. + +Here's an example of a successful response: + +```json +{ + "status": "OK", + "location": "example_table", + "rowsRejected": 0, + "rowsImported": 3, + "header": false, + "columns": [ + { "name": "col1", "type": "SYMBOL", "size": 4, "errors": 0 }, + { "name": "col2", "type": "DOUBLE", "size": 8, "errors": 0 }, + { "name": "col3", "type": "BOOLEAN", "size": 1, "errors": 0 } + ] +} +``` + +Here is an example with request-level errors: + +```json +{ + "status": "not enough lines [table=example_table]" +} +``` + +Here is an example with column-level errors due to unsuccessful casts: + +```json +{ + "status": "OK", + "location": "example_table2", + "rowsRejected": 0, + "rowsImported": 3, + "header": false, + "columns": [ + { "name": "col1", "type": "DOUBLE", "size": 8, "errors": 3 }, + { "name": "col2", "type": "SYMBOL", "size": 4, "errors": 0 }, + { "name": "col3", "type": "BOOLEAN", "size": 1, "errors": 0 } + ] +} +``` + +## /exec - Execute queries + +`/exec` compiles and executes the SQL query supplied as a parameter and returns +a JSON response. + +:::note + +The query execution terminates automatically when the socket connection is +closed. + +::: + +### Overview + +#### Parameters + +`/exec` is expecting an HTTP GET request with following query parameters: + +| Parameter | Required | Default | Description | +| --------------- | -------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `count` | No | `false` | `true` or `false`. Counts the number of rows and returns this value. | +| `limit` | No | | Allows limiting the number of rows to return. `limit=10` will return the first 10 rows (equivalent to `limit=1,10`), `limit=10,20` will return row numbers 10 through to 20 inclusive. | +| `nm` | No | `false` | `true` or `false`. Skips the metadata section of the response when set to `true`. | +| `query` | Yes | | URL encoded query text. It can be multi-line. | +| `timings` | No | `false` | `true` or `false`. When set to `true`, QuestDB will also include a `timings` property in the response which gives details about the execution times. | +| `explain` | No | `false` | `true` or `false`. When set to `true`, QuestDB will also include an `explain` property in the response which gives details about the execution plan. | +| `quoteLargeNum` | No | `false` | `true` or `false`. When set to `true`, QuestDB will surround `LONG` type numbers with double quotation marks that will make them parsed as strings. | + +The parameters must be URL encoded. + +#### Headers + +Supported HTTP headers: + +| Header | Required | Description | +| ------------------- | -------- | ------------------------------------------------------------------------- | +| `Statement-Timeout` | No | Query timeout in milliseconds, overrides default timeout from server.conf | + +### Examples + +#### SELECT query example: + +```shell +curl -G \ + --data-urlencode "query=SELECT timestamp, tempF FROM weather LIMIT 2;" \ + --data-urlencode "count=true" \ + http://localhost:9000/exec +``` + +A HTTP status code of `200` is returned with the following response body: + +```json +{ + "query": "SELECT timestamp, tempF FROM weather LIMIT 2;", + "columns": [ + { + "name": "timestamp", + "type": "TIMESTAMP" + }, + { + "name": "tempF", + "type": "INT" + } + ], + "dataset": [ + ["2010-01-01T00:00:00.000000Z", 34], + ["2010-01-01T00:51:00.000000Z", 34] + ], + "count": 2 +} +``` + +SELECT query returns response in the following format: + +```json +{ + "query": string, + "columns": Array<{ "name": string, "type": string }> + "dataset": Array>, + "count": Optional, + "timings": Optional<{ compiler: number, count: number, execute: number }>, + "explain": Optional<{ jitCompiled: boolean }> +} +``` + +You can find the exact list of types in the +[dedicated page](/docs/reference/sql/datatypes/). + +#### UPDATE query example: + +This request executes an update of table `weather` setting 2 minutes query +timeout + +```shell +curl -G \ + -H "Statement-Timeout: 120000" \ + --data-urlencode "query=UPDATE weather SET tempF = tempF + 0.12 WHERE tempF > 60" \ + http://localhost:9000/exec +``` + +A HTTP status code of `200` is returned with the following response body: + +```json +{ + "ddl": "OK", + "updated": 34 +} +``` + +## /exp - Export data + +This endpoint allows you to pass url-encoded queries but the request body is +returned in a tabular form to be saved and reused as opposed to JSON. + +### Overview + +`/exp` is expecting an HTTP GET request with following parameters: + +| Parameter | Required | Description | +| :-------- | :------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `limit` | No | Paging opp parameter. For example, `limit=10,20` will return row numbers 10 through to 20 inclusive and `limit=20` will return first 20 rows, which is equivalent to `limit=0,20`. `limit=-20` will return the last 20 rows. | +| `query` | Yes | URL encoded query text. It can be multi-line. | + +The parameters must be URL encoded. + +### Examples + +Considering the query: + +```shell +curl -G \ + --data-urlencode "query=SELECT AccidentIndex2, Date, Time FROM 'Accidents0514.csv'" \ + --data-urlencode "limit=5" \ + http://localhost:9000/exp +``` + +A HTTP status code of `200` is returned with the following response body: + +```shell +"AccidentIndex","Date","Time" +200501BS00001,"2005-01-04T00:00:00.000Z",17:42 +200501BS00002,"2005-01-05T00:00:00.000Z",17:36 +200501BS00003,"2005-01-06T00:00:00.000Z",00:15 +200501BS00004,"2005-01-07T00:00:00.000Z",10:35 +200501BS00005,"2005-01-10T00:00:00.000Z",21:13 +``` + +## Error responses + +### Malformed queries + +A successful call to `/exec` or `/exp` which also contains a malformed query +will return response bodies with the following format: + +```json +{ + "query": string, + "error": string, + "position": number +} +``` + +The `position` field is the character number from the beginning of the string +where the error was found. + +Considering the query: + +```shell +curl -G \ + --data-urlencode "query=SELECT * FROM table;" \ + http://localhost:9000/exp +``` + +A HTTP status code of `400` is returned with the following response body: + +```json +{ + "query": "SELECT * FROM table;", + "error": "function, literal or constant is expected", + "position": 8 +} +``` diff --git a/docs/reference/clients/java_ilp.md b/docs/reference/clients/java_ilp.md new file mode 100644 index 00000000..3cceb006 --- /dev/null +++ b/docs/reference/clients/java_ilp.md @@ -0,0 +1,158 @@ +--- +title: Java InfluxDB Line Protocol client +description: Introducing QuestDB Java InfluxDB Line Protocol Client +--- + + + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" +import CodeBlock from "@theme/CodeBlock" +import InterpolateReleaseData from "../../../src/components/InterpolateReleaseData" +import { RemoteRepoExample } from '@theme/RemoteRepoExample' + + + +## Quickstart + +Add a QuestDB as a dependency to your build project: + + + + + + ( + + {` + org.questdb + questdb + ${release.name} +`} + + )} + /> + + + ( + + {`compile group: 'org.questdb', name: 'questdb', version: '${release.name}'`} + + )} + /> + + + + + + +The code bellow creates an instance of a client, configures it to connect to a +QuestDB server running on localhost on a TCP port 9009. Then it sends two rows, +each with one [symbol](/docs/concept/symbol/), long column and one string +column. It instructs the QuestDB server to assign a timestamp by using a local +wall-clock. + + + +## Example with TLS and Authentication enabled + +This sample configures a client to use TLS encryption for a connection to a +QuestDB server. It also instructs the client to authenticate. + + + +This configuration also works with [QuestDB Cloud](https://questdb.io/cloud/). +If you are using a self-managed QuestDB with +[authentication enabled](/docs/reference/api/ilp/authenticate/) then `authToken` +is the `d` portion of a JWK. + +## General usage pattern + +1. Create a client instance via `Sender.builder()`. +2. Use `table(CharSequence)` to select a table for inserting a new row. +3. Use `symbol(CharSequence, CharSequence)` to add all symbols. You must add + symbols before adding other column type. +4. Use the following options to add all the remaining columns: + + - `stringColumn(CharSequence, CharSequence)` + - `longColumn(CharSequence, long)` + - `doubleColumn(CharSequence, double)` + - `boolColumn(CharSequence, boolean)` + - `timestampColumn(CharSequence, long)` + +5. Use `at(long)` or `atNow()` to set a designated timestamp. +6. Optionally: You can use `flush()` to send locally buffered data into a + server. +7. Go to the step no. 2 to start a new row. +8. Use `close()` to dispose the Sender after you no longer need it. + +## Designated timestamp considerations + +The following options determine how a +[designated timestamp](/docs/concept/designated-timestamp/) is assigned: + +- `atNow()` automatically assigns a timestamp based on a server wall-clock upon + receiving a row. +- `at(long)` assigns a specific timestamp. + +The code samples above use QuestDB to assign timestamp automatically: `atNow()` +instructs the server to assign a timestamp by using a local wall-clock. To +assign a specific timestamp, use `at(long)` instead of `atNow()`. + +:::note + +QuestDB works best when rows are ingested in chronological order. This means +rows with older timestamps are ingested before rows with newer timestamps. + +For QuestDB 6.6 and later versions, the out-of-order data ingestion has been optimized and automated. + + + +::: + +Please note that the client does not interpret the timestamp in any way. It +merely passes the timestamp to a server, and it is up to the server to interpret +the timestamp. The default behavior of a QuestDB server is to treat the +designated timestamp as a number of nanoseconds since 1st Jan 1970 UTC. See +[Timestamps](/docs/develop/insert-data/#timestamps) for more details. + +:::caution + +By default, QuestDB's engine treats a designated timestamp as nanoseconds, but +this does not mean that `System.nanoTime()` can be used to get the current time +and pass it to `at(long)`. `System.nanoTime()` is only useful for measuring +elapsed time, and it is not related to any notion of system or wall-clock time. + +::: + +## Other considerations + +- The Sender is not thread-safe. For multiple threads to send data to QuestDB, + each thread should have its own Sender instance. An object pool can also be + used to re-use Sender instances. +- The Sender instance has to be closed after it is no longer in use. The Sender + implements the `java.lang.AutoCloseable` interface, and therefore the + [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) + pattern can be used to ensure that the Sender is closed. +- A client buffers row data internally and sends them to a server in batches + when the buffer is full. This improves performance significantly, but it also + means that during a period of quiescence, some written rows may be retained in + the buffer and are not sent to a server until the buffer are full. +- The method `flush()` can be called to force sending the internal buffer to a + server, even when the buffer is not full yet. + +## Configuration + +The client offers a builder API to configure all supported options. See +`Sender.builder()`. + +## Limitations + +The client supports TCP only. It does not support UDP as transport. diff --git a/docs/reference/clients/overview.md b/docs/reference/clients/overview.md new file mode 100644 index 00000000..4d473888 --- /dev/null +++ b/docs/reference/clients/overview.md @@ -0,0 +1,27 @@ +--- +title: Client Libraries +description: Introducing QuestDB Client Libraries for different languages +--- + +import { ILPClientsTable } from "@theme/ILPClientsTable" + +Current Client Libraries only +[implement InfluxDB Line Protocol](/docs/reference/api/ilp/overview/) for now. +They are suitable only for data ingestion. Follow this +[guide for querying data](/docs/develop/query-data/). + +InfluxDB Line Protocol Client Libraries allow fast data ingestion while +abstracting you away from the details of the wire protocol. It's the recommended +way to ingest data into QuestDB. Clients also support authentication and TLS +encryption. + + + +For other languages, we have examples and a +[protocol reference](/docs/reference/api/ilp/overview). Please +[let us know](https://github.com/questdb/questdb/issues/new/) if you cannot find +a client for your favourite language! + +:::note InfluxDB Line Protocol clients are for data ingestion only. You cannot +use a client to query database. If you are looking for ways to query QuestDB +then see [this page](/docs/develop/query-data/). ::: diff --git a/docs/reference/command-line-options.md b/docs/reference/command-line-options.md new file mode 100644 index 00000000..4323fc6e --- /dev/null +++ b/docs/reference/command-line-options.md @@ -0,0 +1,322 @@ +--- +title: Command-line options +description: Command-line options reference documentation. +--- + +QuestDB may be started, stopped and passed configuration options from the +command line. On Windows, the QuestDB server can also start an +[interactive session](#interactive-session-windows). + +## Options + +The following sections describe the options that may be passed to QuestDB when +starting the server from the command line. + + + +import Tabs from "@theme/Tabs" +import TabItem from "@theme/TabItem" + + + + + + + + +```shell +./questdb.sh [start|stop|status] [-d dir] [-f] [-t tag] +``` + + + + + + + +```shell +questdb [start|stop|status] [-d dir] [-f] [-t tag] +``` + + + + + + + +```shell +questdb.exe [start|stop|status|install|remove] \ + [-d dir] [-f] [-j JAVA_HOME] [-t tag] +``` + + + + + + + +### Start + +`start` - starts QuestDB as a service. + +| Option | Description | +| ------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `-d` | Expects a `dir` directory value which is a folder that will be used as QuestDB's root directory. For more information and the default values, see the [default root](#default-root-directory) section below. | +| `-t` | Expects a `tag` string value which will be as a tag for the service. This option allows users to run several QuestDB services and manage them separately. If this option is omitted, the default tag will be `questdb`. | +| `-f` | Force re-deploying the Web Console. Without this option, the Web Console is cached and deployed only when missing. | +| `-j` | **Windows only!** This option allows to specify a path to `JAVA_HOME`. | + +:::note + +- When running multiple QuestDB services, a tag must be used to disambiguate +between services for `start` and `stop` commands. There will be conflicting +ports and root directories if only the tag flag is specified when starting +multiple services. Each new service should have its own config file or should +be started with separate port and root directory options. + +- When running QuestDB as Windows service you can check status in both: + - Windows Event Viewer - look for events with "QuestDB" source in Windows Logs | Application . + - service log file - `$dataDir\log\service-%Y-%m-%dT%H-%M-%S.txt` (default is `C:\Windows\System32\qdbroot\log\service-%Y-%m-%dT%H-%M-%S.txt` ) + +::: + + + + + + + + + + + +```shell +./questdb.sh start [-d dir] [-f] [-t tag] +``` + + + + + + + +```shell +questdb start [-d dir] [-f] [-t tag] +``` + + + + + + + +```shell +questdb.exe start [-d dir] [-f] [-j JAVA_HOME] [-t tag] +``` + + + + + + + +#### Default root directory + +By default, QuestDB's [root directory](/docs/concept/root-directory-structure/) +will be the following: + + + + + + + + + + +```shell +$HOME/.questdb +``` + + + + + + +Path on Macs with Apple Silicon (M1 or M2) chip: + +```shell +/opt/homebrew/var/questdb +``` + +Path on Macs with Intel chip: + +```shell +/usr/local/var/questdb +``` + + + + + + + +```shell +C:\Windows\System32\qdbroot +``` + + + + + + + +### Stop + +`stop` - stops a service. + +| Option | Description | +| ------ | ------------------------------------------------------------------------------------------------------------------ | +| `-t` | Expects a `tag` string value which to stop a service by tag. If this is omitted, the default tag will be `questdb` | + + + + + + + + + + +```shell +./questdb.sh stop +``` + + + + + + + +```shell +questdb stop +``` + + + + + + + +```shell +questdb.exe stop +``` + + + + + + + +### Status + +`status` - shows the status for a service. + +| Option | Description | +| ------ | -------------------------------------------------------------------------------------------------------------- | +| `-t` | Expects a `tag` string value which to stop a service by tag. If this is omitted, the default will be `questdb` | + + + + + + + + + + +```shell +./questdb.sh status +``` + + + + + + + +```shell +questdb status +``` + + + + + + + +```shell +questdb.exe status +``` + + + + + + + +### Install (Windows) + +`install` - installs the Windows QuestDB service. The service will start +automatically at startup. + +```shell +questdb.exe install +``` + +### Remove (Windows) + +`remove` - removes the Windows QuestDB service. It will no longer start at +startup. + +```shell +questdb.exe remove +``` + +## Interactive session (Windows) + +You can start QuestDB interactively by running `questdb.exe`. This will launch +QuestDB interactively in the active `Shell` window. QuestDB will be stopped when +the Shell is closed. + +### Default root directory + +When started interactively, QuestDB's root directory defaults to the `current` +directory. + +### Stop + +To stop, press Ctrl+C in the terminal or close it +directly. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md new file mode 100644 index 00000000..66fb1dcc --- /dev/null +++ b/docs/reference/configuration.md @@ -0,0 +1,546 @@ +--- +title: Configuration +description: Server configuration keys reference documentation. +--- + +import { ConfigTable } from "@theme/ConfigTable" +import sharedWorkerConfig from "./_shared-worker.config.json" +import httpMinimalConfig from "./_http-minimal.config.json" +import httpConfig from "./_http.config.json" +import cairoConfig from "./_cairo.config.json" +import parallelSqlConfig from "./_parallel-sql.config.json" +import walConfig from "./_wal.config.json" +import csvImportConfig from "./_csv-import.config.json" +import postgresConfig from "./_postgres.config.json" +import tcpConfig from "./_tcp.config.json" +import udpConfig from "./_udp.config.json" + +This page describes methods for configuring QuestDB server settings. +Configuration can be set either: + +- In the `server.conf` configuration file available in the + [root directory](/docs/concept/root-directory-structure/) +- Using environment variables + +When a key is absent from both the configuration file and the environment +variables, the default value is used. Configuration of logging is handled +separately and details of configuring this behavior can be found at the +[logging section](#logging) below. + +## Environment variables + +All settings in the configuration file can be set or overridden using +environment variables. If a key is set in both the `server.conf` file and via an +environment variable, the environment variable will take precedence and the +value in the server configuration file will be ignored. + +To make these configuration settings available to QuestDB via environment +variables, they must be in the following format: + +```shell +QDB_ +``` + +Where `` is equal to the configuration key name. To +properly format a `server.conf` key as an environment variable it must have: + +1. `QDB_` prefix +2. uppercase characters +3. all `.` period characters replaced with `_` underscore + +For example, the server configuration key for shared workers must be passed as +described below: + +| `server.conf` key | env var | +| --------------------- | ------------------------- | +| `shared.worker.count` | `QDB_SHARED_WORKER_COUNT` | + +:::note + +QuestDB applies these configuration changes on startup and a running instance +must be restarted in order for configuration changes to take effect + +::: + +### Examples + +The following configuration property customizes the number of worker threads +shared across the application: + +```shell title="conf/server.conf" +shared.worker.count=5 +``` + +```shell title="Customizing the worker count via environment variable" +export QDB_SHARED_WORKER_COUNT=5 +``` + +## Docker + +This section describes how to configure QuestDB server settings when running +QuestDB in a Docker container. A command to run QuestDB via Docker with default +interfaces is as follows: + +```shell title="Example of running docker container with built-in storage" +docker run -p 9000:9000 \ + -p 9009:9009 \ + -p 8812:8812 \ + -p 9003:9003 \ + questdb/questdb +``` + +This publishes the following ports: + +- `-p 9000:9000` - [REST API](/docs/reference/api/rest/) and + [Web Console](/docs/develop/web-console/) +- `-p 9009:9009` - [InfluxDB line protocol](/docs/reference/api/ilp/overview/) +- `-p 8812:8812` - [Postgres wire protocol](/docs/reference/api/postgres/) +- `-p 9003:9003` - + [Min health server and Prometheus metrics](#minimal-http-server/) + +The examples in this section change the default HTTP and REST API port from +`9000` to `4000` for illustrative purposes, and demonstrate how to publish this +port with a non-default property. + +### Environment variables + +Server configuration can be passed to QuestDB running in Docker by using the +`-e` flag to pass an environment variable to a container: + +```bash +docker run -p 4000:4000 -e QDB_HTTP_BIND_TO=0.0.0.0:4000 questdb/questdb +``` + +### Mounting a volume + +A server configuration file can be provided by mounting a local directory in a +QuestDB container. Given the following configuration file which overrides the +default HTTP bind property: + +```shell title="./server.conf" +http.bind.to=0.0.0.0:4000 +``` + +Running the container with the `-v` flag allows for mounting the current +directory to QuestDB's `conf` directory in the container. With the server +configuration above, HTTP ports for the web console and REST API will be +available on `localhost:4000`: + +```bash +docker run -v "$(pwd):/var/lib/questdb/conf" -p 4000:4000 questdb/questdb +``` + +To mount the full root directory of QuestDB when running in a Docker container, +provide a the configuration in a `conf` directory: + +```shell title="./conf/server.conf" +http.bind.to=0.0.0.0:4000 +``` + +Mount the current directory using the `-v` flag: + +```bash +docker run -v "$(pwd):/var/lib/questdb/" -p 4000:4000 questdb/questdb +``` + +The current directory will then have data persisted to disk: + +```bash title="Current directory contents" +├── conf +│ └── server.conf +├── db +└── public +``` + +## Keys and default values + +This section lists the configuration keys available to QuestDB by topic or +subsystem. Parameters for specifying buffer and memory page sizes are provided +in the format `n`, where `` can be one of the following: + +- `m` for **MB** +- `k` for **kB** + +For example: + +```ini title="Setting maximum send buffer size to 2MB per TCP socket" +http.net.connection.sndbuf=2m +``` + +### Shared worker + +Shared worker threads service SQL execution subsystems and (in the default +configuration) every other subsystem. + + + +### Minimal HTTP server + +This server runs embedded in a QuestDB instance by default and enables health +checks of an instance via HTTP. It responds to all requests with a HTTP status +code of `200` unless the QuestDB process dies. + +:::tip + +Port `9003` also provides a `/metrics` endpoint with Prometheus metrics exposed. +Examples of how to use the min server and Prometheus endpoint can be found on +the [health monitoring page](/docs/operations/health-monitoring/). + +::: + + + +### HTTP server + +This section describes configuration settings for the Web Console and the REST +API available by default on port `9000`. For details on the use of this +component, refer to the [web console documentation](/docs/develop/web-console/) +page. + + + +### Cairo engine + +This section describes configuration settings for the Cairo SQL engine in +QuestDB. + + + +### WAL table configurations + +The following WAL tables settings on parallel threads are configurable for +applying WAL data to the table storage: + + + +### CSV import + +This section describes configuration settings for using `COPY` to import large +CSV files. + +Mandatory settings to enable `COPY`: + + + +Optional settings for `COPY`: + + + +#### CSV import configuration for Docker + +For QuestDB instances using Docker: + +- `cairo.sql.copy.root` must be defined using one of the following settings: + - The environment variable `QDB_CAIRO_SQL_COPY_ROOT`. + - The `cairo.sql.copy.root` in `server.conf`. +- The path for the source CSV file is mounted. +- The source CSV file path and the path defined by `QDB_CAIRO_SQL_COPY_ROOT` are + identical. +- It is optional to define `QDB_CAIRO_SQL_COPY_WORK_ROOT`. + +The following is an example command to start a QuestDB instance on Docker, in +order to import a CSV file: + +```shell +docker run -p 9000:9000 \ +-v "/tmp/questdb:/var/lib/questdb" \ +-v "/tmp/questdb/my_input_root:/var/lib/questdb/questdb_import" \ +-e QDB_CAIRO_SQL_COPY_ROOT=/var/lib/questdb/questdb_import \ +questdb/questdb +``` + +Where: + +- `-v "/tmp/questdb/my_input_root:/var/lib/questdb/questdb_import"`: Defining a + source CSV file location to be `/tmp/questdb/my_input_root` on local machine + and mounting it to `/var/lib/questdb/questdb_import` in the container. +- `-e QDB_CAIRO_SQL_COPY_ROOT=/var/lib/questdb/questdb_import`: Defining the + copy root directory to be `/var/lib/questdb/questdb_import`. + +It is important that the two path are identical +(`/var/lib/questdb/questdb_import` in the example). + +### Parallel SQL execution + +This section describes settings that can affect parallelism level of SQL +execution and therefore performance. + + + +### Postgres wire protocol + +This section describes configuration settings for client connections using +PostgresSQL wire protocol. + + + +### InfluxDB line protocol + +This section describes ingestion settings for incoming messages using InfluxDB +line protocol. + +| Property | Default | Description | +| ------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| line.default.partition.by | DAY | Table partition strategy to be used with tables that are created automatically by InfluxDB Line Protocol. Possible values are: `HOUR`, `DAY`, `WEEK`, `MONTH`, and `YEAR`. | + +#### TCP specific settings + + + +#### UDP specific settings + +:::note + +The UDP receiver is deprecated since QuestDB version 6.5.2. We recommend the +[InfluxDB Line Protocol TCP receiver](/docs/reference/api/ilp/overview/) +instead. + +::: + + + +### Config Validation + +The database startup phase checks for configuration issues, such as invalid or +deprecated settings. Issues may be classified as advisories or errors. Advisory +issues are [logged](/docs/concept/root-directory-structure/#log-directory) +without causing the database to stop its startup sequence: These are usually +setting deprecation warnings. Configuration errors can optionally cause the +database to fail its startup. + +| Property | Default | Description | +| ------------------------ | ------- | -------------------------------------------------------------- | +| config.validation.strict | false | When enabled, startup fails if there are configuration errors. | + +_We recommended enabling strict validation._ + +### Telemetry + +QuestDB sends anonymous telemetry data with information about usage which helps +us improve the product over time. We do not collect any personally-identifying +information, and we do not share any of this data with third parties. + +| Property | Default | Description | +| ------------------------ | ------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| telemetry.enabled | true | Enable or disable anonymous usage metrics collection. | +| telemetry.hide.tables | false | Hides telemetry tables from `select * from tables()` output. As a result, telemetry tables will not be visible in the Web Console table view. | +| telemetry.queue.capacity | 512 | Capacity of the internal telemetry queue, which is the gateway of all telemetry events. This queue capacity does not require tweaking. | + +### Metrics + +QuestDB exposes a `/metrics` endpoint providing internal system metrics in the +Prometheus format. To use this functionality and get started with an example +configuration, refer to the +[Prometheus documentation](/docs/third-party-tools/prometheus/). + +| Property | Default | Description | +| --------------- | ------- | ----------------------------------- | +| metrics.enabled | false | Enable or disable metrics endpoint. | + +## Logging + +The logging behavior of QuestDB may be set in dedicated configuration files or +by environment variables. This section describes how to configure logging using +these methods. + +### Configuration file + +Logs may be configured via a dedicated configuration file `log.conf`. + +```shell title="log.conf" +# list of configured writers +writers=file,stdout + +# file writer +#w.file.class=io.questdb.log.LogFileWriter +#w.file.location=questdb-debug.log +#w.file.level=INFO,ERROR + +# rolling file writer +#w.file.class=io.questdb.log.LogRollingFileWriter +#w.file.location=${log.dir}/questdb-rolling.log.${date:yyyyMMdd} +#w.file.level=INFO,ERROR +#rollEvery accepts: day, hour, minute, month +#w.file.rollEvery=day +#rollSize specifies size at which to roll a new log file: a number followed by k, m, g (KB, MB, GB respectively) +#w.file.rollSize=128m +#lifeDuration accepts: a number followed by s, m, h, d, w, M, y for seconds, minutes, hours, etc. +#w.file.lifeDuration=1d +#sizeLimit is the max fileSize of the log directory. Follows same format as rollSize +#w.file.sizeLimit=1g + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO,ERROR +``` + +QuestDB will look for `/log.conf` first in `conf/` directory and then on the +classpath unless this name is overridden via a command line property: +`-Dout=/something_else.conf`. QuestDB will create `conf/log.conf` using default +values If `-Dout` is not set and file doesn't exist . + +On Windows log messages go to depending on run mode : + +- interactive session - console and `$dataDir\log\stdout-%Y-%m-%dT%H-%M-%S.txt` + (default is `.\log\stdout-%Y-%m-%dT%H-%M-%S.txt` ) +- service - `$dataDir\log\service-%Y-%m-%dT%H-%M-%S.txt` (default is + `C:\Windows\System32\qdbroot\log\service-%Y-%m-%dT%H-%M-%S.txt` ) + +### Environment variables + +Values in the log configuration file can be overridden with environment +variables. All configuration keys must be formatted as described in the +[environment variables](#environment-variables) section above. + +For example, to set logging on `ERROR` level only: + +```shell title="Setting log level to ERROR in log-stdout.conf" +w.stdout.level=ERROR +``` + +This can be passed as an environment variable as follows: + +```shell title="Setting log level to ERROR via environment variable" +export QDB_LOG_W_STDOUT_LEVEL=ERROR +``` + +### Configuring Docker logging + +When mounting a volume to a Docker container, a logging configuration file may +be provided in the container located at `./conf/log.conf`. For example, a file +with the following contents can be created: + +```shell title="./conf/log.conf" +# list of configured writers +writers=file,stdout,http.min + +# file writer +w.file.class=io.questdb.log.LogFileWriter +w.file.location=questdb-docker.log +w.file.level=INFO,ERROR,DEBUG + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO + +# min http server, used monitoring +w.http.min.class=io.questdb.log.LogConsoleWriter +w.http.min.level=ERROR +w.http.min.scope=http-min-server +``` + +The current directory can be mounted: + +```shell title="Mount the current directory to a QuestDB container" +docker run -p 9000:9000 -v "$(pwd):/var/lib/questdb/" questdb/questdb +``` + +The container logs will be written to disk using the logging level and file name +provided in the `./conf/log.conf` file, in this case in `./questdb-docker.log`. + +### Prometheus Alertmanager + +QuestDB includes a log writer that sends any message logged at critical level +(logger.critical("may-day")) to Prometheus Alertmanager over a TCP/IP socket. +Details for configuring this can be found in the +[Prometheus documentation](/docs/third-party-tools/prometheus). To configure +this writer, add it to the `writers` config alongside other log writers. + +```ini title="log.conf" +# Which writers to enable +writers=stdout,alert + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO + +# Prometheus Alerting +w.alert.class=io.questdb.log.LogAlertSocketWriter +w.alert.level=CRITICAL +w.alert.location=/alert-manager-tpt.json +w.alert.alertTargets=localhost:9093,localhost:9096,otherhost:9093 +w.alert.defaultAlertHost=localhost +w.alert.defaultAlertPort=9093 + +# The `inBufferSize` and `outBufferSize` properties are the size in bytes for the +# socket write buffers. +w.alert.inBufferSize=2m +w.alert.outBufferSize=4m +# Delay in milliseconds between two consecutive attempts to alert when +# there is only one target configured +w.alert.reconnectDelay=250 +``` + +Of all properties, only `w.alert.class` and `w.alert.level` are required, the +rest assume default values as stated above (except for `w.alert.alertTargets` +which is empty by default). + +Alert targets are specified using `w.alert.alertTargets` as a comma-separated +list of up to 12 `host:port` TCP/IP addresses. Specifying a port is optional and +defaults to the value of `defaultAlertHost`. One of these alert managers is +picked at random when QuestDB starts, and a connection is created. + +All alerts will be sent to the chosen server unless it becomes unavailable. If +it is unavailable, the next server is chosen. If there is only one server +configured and a fail-over cannot occur, a delay of 250 milliseconds is added +between send attempts. + +The `w.alert.location` property refers to the path (absolute, otherwise relative +to `-d database-root`) of a template file. By default, it is a resource file +which contains: + +```json title="/alert-manager-tpt.json" +[ + { + "Status": "firing", + "Labels": { + "alertname": "QuestDbInstanceLogs", + "service": "QuestDB", + "category": "application-logs", + "severity": "critical", + "version": "${QDB_VERSION}", + "cluster": "${CLUSTER_NAME}", + "orgid": "${ORGID}", + "namespace": "${NAMESPACE}", + "instance": "${INSTANCE_NAME}", + "alertTimestamp": "${date: yyyy/MM/ddTHH:mm:ss.SSS}" + }, + "Annotations": { + "description": "ERROR/cl:${CLUSTER_NAME}/org:${ORGID}/ns:${NAMESPACE}/db:${INSTANCE_NAME}", + "message": "${ALERT_MESSAGE}" + } + } +] +``` + +Four environment variables can be defined, and referred to with the +`${VAR_NAME}` syntax: + +- _ORGID_ +- _NAMESPACE_ +- _CLUSTER_NAME_ +- _INSTANCE_NAME_ + +Their default value is `GLOBAL`, they mean nothing outside a cloud environment. + +In addition, `ALERT_MESSAGE` is a placeholder for the actual `critical` message +being sent, and `QDB_VERSION` is the runtime version of the QuestDB instance +sending the alert. The `${date: }` syntax can be used to produce a +timestamp at the time of sending the alert. + +### Debug + +QuestDB logging can be quickly forced globally to `DEBUG` via either providing +the java option `-Debug` or setting the environment variable `QDB_DEBUG=true`. diff --git a/docs/reference/function/aggregation.md b/docs/reference/function/aggregation.md new file mode 100644 index 00000000..5ec2d31e --- /dev/null +++ b/docs/reference/function/aggregation.md @@ -0,0 +1,469 @@ +--- +title: Aggregate functions +sidebar_label: Aggregate +description: Aggregate functions reference documentation. +--- + +This page describes the available functions to assist with performing aggregate +calculations. + +## avg + +`avg(value)` calculates simple average of values ignoring missing data (e.g +`null` values). + +### Arguments + +- `value` is any numeric value. + +### Return value + +Return value type is `double`. + +### Examples + +```questdb-sql title="Average transaction amount" +SELECT avg(amount) FROM transactions; +``` + +| avg | +| :--- | +| 22.4 | + +```questdb-sql title="Average transaction amount by payment_type" +SELECT payment_type, avg(amount) FROM transactions; +``` + +| payment_type | avg | +| :----------- | :---- | +| cash | 22.1 | +| card | 27.4 | +| null | 18.02 | + +## count + +- `count()` or `count(*)` - counts the number of rows irrespective of underlying data. +- `count(column_name)` - counts the number of non-null values in a given column. + +### Arguments + +- `count()` does not require arguments. +- `count(column_name)` - supports the following data types: + - `double` + - `float` + - `integer` + - `character` + - `short` + - `byte` + - `timestamp` + - `date` + - `long` + - `long256` + - `geohash` + - `string` + - `symbol` + +### Return value + +Return value type is `long`. + +### Examples + +Count of rows in the `transactions` table: + +```questdb-sql +SELECT count() FROM transactions; +``` + +| count | +| :---- | +| 100 | + +Count of rows in the `transactions` table aggregated by the `payment_type` value: + +```questdb-sql +SELECT payment_type, count() FROM transactions; +``` + +| payment_type | count | +| :----------- | :---- | +| cash | 25 | +| card | 70 | +| null | 5 | + +Count non-null transaction amounts: + +```questdb-sql +SELECT count(amount) FROM transactions; +``` + +| count | +|:------| +| 95 | + +Count non-null transaction amounts by `payment_type`: + +```questdb-sql +SELECT payment_type, count(amount) FROM transactions; +``` + +| payment_type | count | +|:-------------|:------| +| cash | 24 | +| card | 67 | +| null | 4 | + + +:::note + +`null` values are aggregated with `count()`, but not with `count(column_name)` + +::: + +## count_distinct + +`count_distinct(column_name)` - counts distinct values in `string`, `symbol`, +`long256`, `long`, or `int` columns. + +### Return value + +Return value type is `long`. + +### Examples + +- Count of distinct sides in the transactions table. Side column can either be + `BUY` or `SELL` or `null` + +```questdb-sql +SELECT count_distinct(side) FROM transactions; +``` + +| count_distinct | +| :------------- | +| 2 | + +- Count of distinct counterparties in the transactions table aggregated by + `payment_type` value. + +```questdb-sql +SELECT payment_type, count_distinct(counterparty) FROM transactions; +``` + +| payment_type | count_distinct | +| :----------- | :------------- | +| cash | 3 | +| card | 23 | +| null | 5 | + +:::note + +`null` values are not counted in the `count_distinct` function. + +::: + +## first/last + +- `first(column_name)` - returns the first value of a column. +- `last(column_name)` - returns the last value of a column. + +Supported column datatype: `double`, `float`, `integer`, `IPv4`, `character`, `short`, +`byte`, `timestamp`, `date`, `long`, `geohash`, `symbol`, `uuid`. + +If a table has a [designated timestamp](/docs/concept/designated-timestamp/), +then the first row is always the row with the lowest timestamp (oldest) and the +last row is always the one with the highest (latest) timestamp. For a table +without a designated timestamp column, `first` returns the first row and `last` +returns the last inserted row, regardless of any timestamp column. + +### Return value + +Return value type is `string`. + +### Examples + +Given a table `sensors`, which has a designated timestamp column: + +| device_id | temperature | ts | +| :--------- | :---------- | :-------------------------- | +| arduino-01 | 12 | 2021-06-02T14:33:19.970258Z | +| arduino-02 | 10 | 2021-06-02T14:33:21.703934Z | +| arduino-03 | 18 | 2021-06-02T14:33:23.707013Z | + +The following query returns oldest value for the `device_id` column: + +```questdb-sql +SELECT first(device_id) FROM sensors; +``` + +| first | +| :--------- | +| arduino-01 | + +The following query returns the latest symbol value for the `device_id` column: + +```questdb-sql +SELECT last(device_id) FROM sensors; +``` + +| last | +| :--------- | +| arduino-03 | + +Without selecting a designated timestamp column, the table may be unordered and +the query may return different result. Given an unordered table +`sensors_unordered`: + +| device_id | temperature | ts | +| :--------- | :---------- | :-------------------------- | +| arduino-01 | 12 | 2021-06-02T14:33:19.970258Z | +| arduino-03 | 18 | 2021-06-02T14:33:23.707013Z | +| arduino-02 | 10 | 2021-06-02T14:33:21.703934Z | + +The following query returns the first record for the `device_id` column: + +```questdb-sql +SELECT first(device_id) FROM sensors_unordered; +``` + +| first | +| :--------- | +| arduino-01 | + +The following query returns the last record for the `device_id` column: + +```questdb-sql +SELECT last(device_id) FROM sensors_unordered; +``` + +| last | +| :--------- | +| arduino-02 | + +## haversine_dist_deg + +`haversine_dist_deg(lat, lon, ts)` - calculates the traveled distance for a +series of latitude and longitude points. + +### Arguments + +- `lat` is the latitude expressed as degrees in decimal format (`double`) +- `lon` is the longitude expressed as degrees in decimal format (`double`) +- `ts` is the `timestamp` for the data point + +### Return value + +Return value type is `double`. + +### Examples + +```questdb-sql title="Calculate the aggregate traveled distance for each car_id" +SELECT car_id, haversine_dist_deg(lat, lon, k) + FROM table rides +``` + +## ksum + +`ksum(value)` - adds values ignoring missing data (e.g `null` values). Values +are added using the + +[Kahan compensated sum algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm). +This is only beneficial for floating-point values such as `float` or `double`. + +### Arguments + +- `value` is any numeric value. + +### Return value + +Return value type is the same as the type of the argument. + +### Examples + +```questdb-sql +SELECT ksum(a) +FROM (SELECT rnd_double() a FROM long_sequence(100)); +``` + +| ksum | +| :---------------- | +| 52.79143968514029 | + +## max + +`max(value)` - returns the highest value ignoring missing data (e.g `null` +values). + +### Arguments + +- `value` is any numeric value + +### Return value + +Return value type is the same as the type of the argument. + +### Examples + +```questdb-sql title="Highest transaction amount" +SELECT max(amount) FROM transactions; +``` + +| max | +| :--- | +| 55.3 | + +```questdb-sql title="Highest transaction amount by payment_type" +SELECT payment_type, max(amount) FROM transactions; +``` + +| payment_type | amount | +| :----------- | :----- | +| cash | 31.5 | +| card | 55.3 | +| null | 29.2 | + +## min + +`min(value)` - returns the lowest value ignoring missing data (e.g `null` +values). + +### Arguments + +- `value` is any numeric value + +### Return value + +Return value type is the same as the type of the argument. + +### Examples + +```questdb-sql title="Lowest transaction amount" +SELECT min(amount) FROM transactions; +``` + +| min | +| :--- | +| 12.5 | + +```questdb-sql title="Lowest transaction amount, by payment_type" +SELECT payment_type, min(amount) FROM transactions; +``` + +| payment_type | min | +| :----------- | :--- | +| cash | 12.5 | +| card | 15.3 | +| null | 22.2 | + +## nsum + +`nsum(value)` - adds values ignoring missing data (e.g `null` values). Values +are added using the +[Neumaier sum algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements). +This is only beneficial for floating-point values such as `float` or `double`. + +### Arguments + +- `value` is any numeric value. + +### Return value + +Return value type is `double`. + +### Examples + +```questdb-sql +SELECT nsum(a) +FROM (SELECT rnd_double() a FROM long_sequence(100)); +``` + +| nsum | +| :--------------- | +| 49.5442334742831 | + +## stddev_samp + +`stddev_samp(value)` - calculates the sample standard deviation of values +ignoring missing data (e.g `null` values). + +### Arguments + +- `value` is any numeric value. + +### Return value + +Return value type is `double`. + +### Examples + +```questdb-sql +SELECT stddev_samp(x) +FROM (SELECT x FROM long_sequence(100)); +``` + +| stddev_samp | +| :-------------- | +| 29.011491975882 | + +## sum + +`sum(value)` - adds values ignoring missing data (e.g `null` values). + +### Arguments + +- `value` is any numeric value. + +### Return value + +Return value type is the same as the type of the argument. + +### Examples + +```questdb-sql title="Sum all quantities in the transactions table" +SELECT sum(quantity) FROM transactions; +``` + +| sum | +| :-- | +| 100 | + +```questdb-sql title="Sum all quantities in the transactions table, aggregated by item" +SELECT item, sum(quantity) FROM transactions; +``` + +| item | count | +| :----- | :---- | +| apple | 53 | +| orange | 47 | + +### Overflow + +`sum` does not perform overflow check. To avoid overflow, you can cast the +argument to wider type. + +```questdb-sql title="Cast as long to avoid overflow" +SELECT sum(cast(a AS LONG)) FROM table; +``` +## to_uuid + +`to_uuid(value, value)` combines two 64-bit `long` into a single 128-bit `uuid`. + +### Arguments + +- `value` is any `long` + +### Return value + +Return value type is `uuid`. + +### Examples + +```questdb-sql +SELECT to_uuid(2, 1) +AS uuid FROM long_sequence(1); +``` + +Returns: + +``` +00000000-0000-0001-0000-000000000002 +``` diff --git a/docs/reference/function/analytic.md b/docs/reference/function/analytic.md new file mode 100644 index 00000000..2099f71e --- /dev/null +++ b/docs/reference/function/analytic.md @@ -0,0 +1,159 @@ +--- +title: Analytic functions +sidebar_label: Analytic +description: Analytic functions reference documentation. +--- + +This page describes the available analytic functions. QuestDB is working on +adding more analytic functions. + +An analytic function performs a calculation across a set of table rows that are +somehow related to the current row. + +## Syntax + +![Flow chart showing the syntax of an analytic function](/img/docs/diagrams/analyticFunction.svg) + +Analytic functions are used with an `OVER` clause to define the way data is +grouped and processed. The `OVER` clause is used with `PARTITION BY` and +`ORDER BY` to set unique parameters and organize the rows. + +## rank + +`rank()` - returns the rank of the current row, with gaps; that is, the +`row_number` of the first row in its peer group. + +**Arguments:** + +- `rank()` does not require arguments. + +**Return value:** + +Return value type is `long`. + +**Description** + +`rank()` returns values dynamically and there is no guarantee that the rows +returned will be ordered exactly the same with each execution of the query. +Hence, an `ORDER BY` outside of the `OVER()` clause can be used to ensure the +output order. + +**Examples:** + +For a given table `housing`: + +```questdb-sql +CREATE TABLE 'housing' ( + id INT, + price DOUBLE, + rating INT, + location STRING, + date_sold TIMESTAMP +); +``` + +| id | price | rating | location | date_sold | +| --- | -------- | ------ | -------------- | --------------------------- | +| 2 | 246.3393 | 1 | alcatraz_ave | 2021-02-01T00:00:00.000000Z | +| 10 | 69.2601 | 5 | alcatraz_ave | 2021-02-01T04:00:00.000000Z | +| 15 | 616.2569 | 3 | westbrae | 2021-02-01T08:00:00.000000Z | +| 3 | 112.7856 | 5 | south_side | 2021-02-01T12:00:00.000000Z | +| 17 | 993.3345 | 1 | south_side | 2021-02-01T16:00:00.000000Z | +| 8 | 937.4274 | 1 | berkeley_hills | 2021-02-01T20:00:00.000000Z | +| 4 | 207.7797 | 1 | alcatraz_ave | 2021-02-02T00:00:00.000000Z | +| 17 | 352.3193 | 3 | downtown | 2021-02-02T04:00:00.000000Z | +| 3 | 140.0437 | 1 | westbrae | 2021-02-02T08:00:00.000000Z | +| 15 | 971.7142 | 1 | westbrae | 2021-02-02T12:00:00.000000Z | + +The following query uses `rank()` to display output based on the rating: + +```questdb-sql +SELECT location, price, date_sold, rating, + rank() OVER (ORDER BY rating ASC) rank +FROM 'housing' +ORDER BY rank; +``` + +| location | price | date_sold | rating | rank | +| -------------- | -------- | --------------------------- | ------ | ---- | +| westbrae | 971.7142 | 2021-02-02T12:00:00.000000Z | 1 | 1 | +| westbrae | 140.0437 | 2021-02-02T08:00:00.000000Z | 1 | 1 | +| alcatraz_ave | 207.7797 | 2021-02-02T00:00:00.000000Z | 1 | 1 | +| berkeley_hills | 937.4274 | 2021-02-01T20:00:00.000000Z | 1 | 1 | +| south_side | 993.3345 | 2021-02-01T16:00:00.000000Z | 1 | 1 | +| alcatraz_ave | 246.3393 | 2021-02-01T00:00:00.000000Z | 1 | 1 | +| downtown | 352.3193 | 2021-02-02T04:00:00.000000Z | 3 | 7 | +| westbrae | 616.2569 | 2021-02-01T08:00:00.000000Z | 3 | 7 | +| south_side | 112.7856 | 2021-02-01T12:00:00.000000Z | 5 | 9 | +| alcatraz_ave | 69.2601 | 2021-02-01T04:00:00.000000Z | 5 | 9 | + +## row_number + +`row_number()` - assigns a row number to each row in a result set. For each +partition, the row number starts with one and increments by one. + +**Arguments:** + +- `row_number` does not require arguments. + +**Return value:** + +Return value type is `long`. + +**Description** + +`row_number()` returns values dynamically and there is no guarantee that the +rows returned will be ordered exactly the same with each execution of the query. +Hence, an `ORDER BY` outside of the `OVER()` clause can be used to ensure the +output order. + +**Examples:** + +Given a table `trades`, the queries below use `row_number()` with a `WHERE` +clause to filter trading records added within one day. + +The following query assigns row numbers and orders output based on them: + +```questdb-sql +SELECT +symbol, +side, +price, +amount, +row_number() OVER () AS row_num +FROM trades +WHERE timestamp > dateadd('d', -1, now()) +ORDER BY row_num ASC; +-- The ORDER BY clause arranges the output based on the assigned row_num. +``` + +| symbol | side | price | amount | row_num | +| :------ | :--- | :------- | :--------- | :------ | +| BTC-USD | sell | 20633.47 | 0.17569298 | 1 | +| ETH-USD | sell | 1560.04 | 1.3289 | 2 | +| ETH-USD | sell | 1560.04 | 0.3 | 3 | +| ETH-USD | sell | 1560 | 1.40426786 | 4 | +| BTC-USD | buy | 20633.48 | 0.00179092 | 5 | + +The following query groups the table based on `symbol` and assigns row numbers +to each group based on `price`: + +```questdb-sql +SELECT +symbol, +side, +price, +amount, +row_number() OVER (PARTITION BY symbol ORDER BY price) AS row_num +FROM trades +WHERE timestamp > dateadd('d', -1, now()) +ORDER BY row_num ASC; +-- The ORDER BY clause arranges the output based on the assigned row_num. +``` + +| symbol | side | price | amount | row_num | +| :------ | :--- | :------ | :--------- | :------ | +| BTC-USD | Sell | 1479.41 | 0.10904633 | 1 | +| ETH-USD | Sell | 20000 | 0.1 | 1 | +| BTC-USD | Sell | 1479.45 | 0.02 | 2 | +| ETH-USD | Sell | 20000 | 0.000249 | 2 | diff --git a/docs/reference/function/binary.md b/docs/reference/function/binary.md new file mode 100644 index 00000000..7b4905a1 --- /dev/null +++ b/docs/reference/function/binary.md @@ -0,0 +1,36 @@ +--- +title: Binary functions +sidebar_label: Binary +description: Binary function reference documentation. +--- + +This page describes the available functions to assist with working with binary data. + +## base64 + +`base64(data, maxLength)` encodes raw binary data using the base64 encoding into +a string with a maximum length defined by `maxLength`. + +**Arguments:** + +- `data` is the binary data to be encoded. +- `maxLength` is the intended maximum length of the encoded string. + +**Return value:** + +Return value type is `string`. + +**Example:** + +```questdb-sql +SELECT base64(rnd_bin(), 20); +-- `rnd_bin` can be used to generate random binary data. +``` + +| base64 | +| ---------------------------- | +| q7QDHliR4V1OsAEUVCFwDDTerbI= | + +## See also + +[`rnd_bin`](/docs/reference/function/random-value-generator/#rnd_bin) can be used to generate random binary data. diff --git a/docs/reference/function/boolean.md b/docs/reference/function/boolean.md new file mode 100644 index 00000000..3c9d0b48 --- /dev/null +++ b/docs/reference/function/boolean.md @@ -0,0 +1,58 @@ +--- +title: Boolean functions +sidebar_label: Boolean +description: Boolean function reference documentation. +--- + +This page describes the available functions to assist with performing boolean +calculations on numeric and timestamp types. + +## isOrdered + +`isOrdered(column)` return a `boolean` indicating whether the column values are +ordered in a table. + +**Arguments:** + +- `column` is a column name of numeric or timestamp type. + +**Return value:** + +Return value type is `boolean`. + +**Examples:** + +Given a table with the following contents: + +|numeric_sequence|ts | +|:---------------|:--------------------------| +|1 |2021-05-01T11:00:00.000000Z| +|2 |2021-05-01T12:00:00.000000Z| +|3 |2021-05-01T13:00:00.000000Z| + +```questdb-sql +SELECT isOrdered(numeric_sequence) is_num_ordered, + isOrdered(ts) is_ts_ordered +FROM my_table +``` + +|is_num_ordered|is_ts_ordered| +|:-------------|:------------| +|true |true | + +Adding an integer and timestamp rows out-of-order + +|numeric_sequence|ts | +|:---------------|:--------------------------| +|1 |2021-05-01T11:00:00.000000Z| +|2 |2021-05-01T12:00:00.000000Z| +|3 |2021-05-01T13:00:00.000000Z| +|2 |2021-05-01T12:00:00.000000Z| + +```questdb-sql +SELECT isOrdered(numeric_sequence) FROM my_table +``` + +|is_num_ordered|is_ts_ordered| +|:-------------|:------------| +|false |false | diff --git a/docs/reference/function/conditional.md b/docs/reference/function/conditional.md new file mode 100644 index 00000000..93c0bf32 --- /dev/null +++ b/docs/reference/function/conditional.md @@ -0,0 +1,96 @@ +--- +title: Conditional functions +sidebar_label: Conditional +description: Conditional functions reference documentation. +--- + +Conditional functions allow for conditionally selecting input values. For +instance, the `coalesce()` function is useful for handling null data values and +providing replacement values. + +## coalesce + +`coalesce(value [, ...])` - returns the first non-null argument in a provided +list of arguments in cases where null values should not appear in query results. + +This function is an implementation of the `COALESCE` expression in PostgreSQL +and as such, should follow the expected behavior described in the +[coalesce PostgreSQL documentation](https://www.postgresql.org/docs/current/functions-conditional.html#FUNCTIONS-COALESCE-NVL-IFNULL) + +**Arguments:** + +- `coalesce(value [, ...])` `value` and subsequent comma-separated list of + arguments which may be of any type except binary. If the provided arguments + are of different types, one should be `CAST`able to another. + +**Return value:** + +The returned value is the first non-null argument passed. + +**Examples:** + +Given a table with the following records: + +|timestamp |amount| +|:--------------------------|:-----| +|2021-02-11T09:39:16.332822Z|1 | +|2021-02-11T09:39:16.333481Z|null | +|2021-02-11T09:39:16.333511Z|3 | + +The following example demonstrates how to use `coalesce()` to return a default +value of `0` for an expression if the `amount` column contains `null` values. + +```questdb-sql +SELECT timestamp, + coalesce(amount, 0) as amount_not_null +FROM transactions +``` + +|timestamp |amount_not_null| +|:--------------------------|:--------------| +|2021-02-11T09:39:16.332822Z|1 | +|2021-02-11T09:39:16.333481Z|0 | +|2021-02-11T09:39:16.333511Z|3 | + +## nullif + +`nullif(value1, value2)` - returns a null value if `value1` is equal to `value2` +or otherwise returns `value1`. + +This function is an implementation of the `NULLIF` expression in PostgreSQL and +as such, should follow the expected behavior described in the +[nullif PostgreSQL documentation](https://www.postgresql.org/docs/current/functions-conditional.html#FUNCTIONS-COALESCE-NVL-IFNULL). + +**Arguments:** + +- `value1` is any numeric, char, or string value. +- `value2` is any numeric, char, or string value. + +**Return value:** + +The returned value is either `NULL`, or the first argument passed. + +**Examples:** + +Given a table with the following records: + +|timestamp |amount| +|:--------------------------|:-----| +|2021-02-11T09:39:16.332822Z|0 | +|2021-02-11T09:39:16.333481Z|11 | +|2021-02-11T09:39:16.333511Z|3 | + +The following example demonstrates how to use `nullif()` to return a `null` if +the `amount` column contains `0` values. + +```questdb-sql +SELECT timestamp, + nullif(amount, 0) as amount_null_if_zero +FROM transactions +``` + +|timestamp |amount_null_if_zero| +|:--------------------------|:------------------| +|2021-02-11T09:39:16.332822Z|null | +|2021-02-11T09:39:16.333481Z|11 | +|2021-02-11T09:39:16.333511Z|3 | diff --git a/docs/reference/function/date-time.md b/docs/reference/function/date-time.md new file mode 100644 index 00000000..1fa5bfc8 --- /dev/null +++ b/docs/reference/function/date-time.md @@ -0,0 +1,1222 @@ +--- +title: Date and time functions +sidebar_label: Date and time +description: Date and time functions reference documentation. +--- + +This page describes the available functions to assist with performing time-based +calculations. + +## Date and timestamp format + +The date and timestamp format is formed by units and arbitrary text. A unit is a +combination of letters representing a date or time component, as defined by the +table below. The letters used to form a unit are case-sensitive. + +See +[Timestamps in QuestDB](/docs/guides/working-with-timestamps-timezones/#timestamps-in-questdb) +for more examples of how the units are used to parse inputs. + +| Unit | Date or Time Component | Presentation | Examples | +| ------ | -------------------------------------------------------------------------------------------------------------- | ------------------ | ------------------------------------- | +| `G` | Era designator | Text | AD | +| `y` | `y` single digit or greedy year, depending on the input digit number | Year | 1996; 96; 999; 3 | +| `yy` | Two digit year of the current century | Year | 96 (interpreted as 2096) | +| `yyy` | Three-digit year | Year | 999 | +| `yyyy` | Four-digit year | Year | 1996 | +| `M` | Month in year | Month | July; Jul; 07 | +| `w` | Week in year | Number | 27 | +| `ww` | ISO week of year | Number | 2 | +| `D` | Day in year | Number | 189 | +| `d` | Day in month | Number | 10 | +| `F` | Day of week in month | Number | 2 | +| `E` | Day name in week | Text | Tuesday; Tue | +| `u` | Day number of week (1 = Monday, ..., 7 = Sunday) | Number | 1 | +| `a` | Am/pm marker | Text | PM | +| `H` | Hour in day (0-23) | Number | 0 | +| `k` | Hour in day (1-24) | Number | 24 | +| `K` | Hour in am/pm (0-11) | Number | 0 | +| `h` | Hour in am/pm (1-12) | Number | 12 | +| `m` | Minute in hour | Number | 30 | +| `s` | Second in minute | Number | 55 | +| `SSS` | 3-digit millisecond | Number | 978 | +| `S` | Millisecond up to 3 digits: `S` parses 1 digit when followed by another `unit`. Otherwise, it parses 3 digits. | Number | 900 | +| `z` | Time zone | General time zone | Pacific Standard Time; PST; GMT-08:00 | +| `Z` | Time zone | RFC 822 time zone | -0800 | +| `X` | Time zone | ISO 8601 time zone | -08; -0800; -08:00 | +| `UUU` | 3-digit microsecond | Number | 698 | +| `U` | Microsecond up to 3 digits: `U` parses 1 digit when followed by another `unit`. Otherwise, it parses 3 digits. | Number | 600 | +| `U+` | 6-digit microsecond | Number | 600 | +| `N` | Nanosecond. QuestDB provides microsecond resolution so the parsed nanosecond will be truncated. | Number | N/A (truncated) | +| `N+` | 9-digit nanosecond. QuestDB provides microsecond resolution so the parsed nanosecond will be truncated. | Number | N/A (truncated) | + +### Examples for greedy year format `y` + +The interpretation of `y` depends on the input digit number: + +- If the input year is a two-digit number, the output timestamp assumes the + current century. +- Otherwise, the number is interpreted as it is. + +| Input year | Timestamp value interpreted by `y-M` | Notes | +| ---------- | ------------------------------------ | ---------------------------------------------------- | +| `5-03` | `0005-03-01T00:00:00.000000Z` | Greedily parsing the number as it is | +| `05-03` | `2005-03-01T00:00:00.000000Z` | Greedily parsing the number assuming current century | +| `005-03` | `0005-03-01T00:00:00.000000Z` | Greedily parsing the number as it is | +| `0005-03` | `0005-03-01T00:00:00.000000Z` | Greedily parsing the number as it is | + +## date_trunc + +`date_trunc(unit, timestamp)` - returns a timestamps truncated to the selected precision + +**Arguments:** + +- `unit` is one of the following: + + - `millennium` + - `decade` + - `century` + - `year` + - `quarter` + - `month` + - `week` + - `day` + - `hour` + - `minute` + - `second` + - `milliseconds` + - `microseconds` + + +- `timestamp` is any timestamp value. + +**Return value:** + +Return value type is `timestamp` + +**Examples:** + +```questdb-sql +SELECT date_trunc('hour', '2022-03-11T22:00:30.555555Z') hour, +date_trunc('month', '2022-03-11T22:00:30.555555Z') month, +date_trunc('year','2022-03-11T22:00:30.555555Z') year; +``` + +| hour | month | year | +|--------------------------------|--------------------------------|--------------------------------| +| 2022-03-11T22:00:00.000000Z | 2022-03-01T00:00:00.000000Z | 2022-01-01T00:00:00.000000Z | + + +## dateadd + +`dateadd(period, n, startDate)` - adds `n` `period` to `startDate`. + +**Arguments:** + +- `period` is a `char`. Period to be added. Available periods are: + + - `s`: second + - `m`: minute + - `h`: hour + - `d`: day + - `w`: week + - `M`: month + - `y`: year + +- `n` is an `int` indicating the number of periods to add. +- `startDate` is a timestamp or date indicating the timestamp to add the period to. + +**Return value:** + +Return value type is `timestamp` + +**Examples:** + +```questdb-sql title="Adding hours" +SELECT systimestamp(), dateadd('h', 2, systimestamp()) +FROM long_sequence(1); +``` + +| systimestamp | dateadd | +| :-------------------------- | :-------------------------- | +| 2020-04-17T00:30:51.380499Z | 2020-04-17T02:30:51.380499Z | + +```questdb-sql title="Adding days" +SELECT systimestamp(), dateadd('d', 2, systimestamp()) +FROM long_sequence(1); +``` + +| systimestamp | dateadd | +| :-------------------------- | :-------------------------- | +| 2020-04-17T00:30:51.380499Z | 2020-04-19T00:30:51.380499Z | + +```questdb-sql title="Adding months" +SELECT systimestamp(), dateadd('M', 2, systimestamp()) +FROM long_sequence(1); +``` + +| systimestamp | dateadd | +| :-------------------------- | :-------------------------- | +| 2020-04-17T00:30:51.380499Z | 2020-06-17T00:30:51.380499Z | + +## datediff + +`datediff(period, date1, date2)` - returns the absolute number of `period` +between `date1` and `date2`. + +**Arguments:** + +- `period` is a char. Period to be added. Available periods are: + + - `s`: second + - `m`: minute + - `h`: hour + - `d`: day + - `w`: week + - `M`: month + - `y`: year + +- `date1` and `date2` are date or timestamp defining the dates to compare. + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Difference in days" +SELECT datediff('d', '2020-01-23', '2020-01-27'); +``` + +| datediff | +| :------- | +| 4 | + +```questdb-sql title="Difference in months" +SELECT datediff('M', '2020-01-23', '2020-02-27'); +``` + +| datediff | +| :------- | +| 1 | + + +## day + +`day(value)` - returns the `day` of month for a given date or timestamp from `1` +to `31`. + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Day of the month" +SELECT day(to_timestamp('2020-03-01:15:43:21', 'yyyy-MM-dd:HH:mm:ss')) +FROM long_sequence(1); +``` + +| day | +| :-- | +| 01 | + +```questdb-sql title="Using in an aggregation" +SELECT day(ts), count() FROM transactions; +``` + +| day | count | +| :-- | :---- | +| 1 | 2323 | +| 2 | 6548 | +| ... | ... | +| 30 | 9876 | +| 31 | 2567 | + +## day_of_week + +`day_of_week(value)` - returns the day number in a week from `1` (Monday) to `7` +(Sunday) + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql +SELECT to_str(ts,'EE'),day_of_week(ts) FROM myTable; +``` + +| day | day_of_week | +| :-------- | :---------- | +| Monday | 1 | +| Tuesday | 2 | +| Wednesday | 3 | +| Thursday | 4 | +| Friday | 5 | +| Saturday | 6 | +| Sunday | 7 | + +## day_of_week_sunday_first + +`day_of_week_sunday_first(value)` - returns the day number in a week from `1` +(Sunday) to `7` (Saturday) + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql +SELECT to_str(ts,'EE'),day_of_week_sunday_first(ts) FROM myTable; +``` + +| day | day_of_week_sunday_first | +| :-------- | :----------------------- | +| Monday | 2 | +| Tuesday | 3 | +| Wednesday | 4 | +| Thursday | 5 | +| Friday | 6 | +| Saturday | 7 | +| Sunday | 1 | + +## extract + +`extract (unit, timestamp)` - returns the selected time unit from the input timestamp. + +**Arguments:** + +- `unit` is one of the following: + + - `millennium` + - `epoch` + - `decade` + - `century` + - `year` + - `isoyear` + - `doy` (day of year) + - `quarter` + - `month` + - `week` + - `dow` (day of week) + - `isodow` + - `day` + - `hour` + - `minute` + - `second` + - `microseconds` + - `milliseconds` + +- `timestamp` is any timestamp value. + +**Return value:** + +Return value type is `integer`. + +**Examples** + +```questdb-sql + +SELECT extract(millennium from '2023-03-11T22:00:30.555555Z') millennium, +extract(year from '2023-03-11T22:00:30.555555Z') year, +extract(month from '2023-03-11T22:00:30.555555Z') month, +extract(week from '2023-03-11T22:00:30.555555Z') quarter, +extract(hour from '2023-03-11T22:00:30.555555Z') hour, +extract(second from '2023-03-11T22:00:30.555555Z') second; +``` + +| millennium | year | month | quarter | hour | second | +|---------------|---------|---------|------------|--------|---------| +| 3 | 2023 | 3 | 10 | 22 | 30 | +| | | | | | | + +## hour + +`hour(value)` - returns the `hour` of day for a given date or timestamp from `0` +to `23` + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Hour of the day" +SELECT hour(to_timestamp('2020-03-01:15:43:21', 'yyyy-MM-dd:HH:mm:ss')) +FROM long_sequence(1); +``` + +| hour | +| :--- | +| 12 | + +```questdb-sql title="Using in an aggregation" +SELECT hour(ts), count() FROM transactions; +``` + +| hour | count | +| :--- | :---- | +| 0 | 2323 | +| 1 | 6548 | +| ... | ... | +| 22 | 9876 | +| 23 | 2567 | + +## is_leap_year + +`is_leap_year(value)` - returns `true` if the `year` of `value` is a leap year, +`false` otherwise. + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `boolean` + +**Examples:** + +```questdb-sql +SELECT year(ts), is_leap_year(ts) FROM myTable; +``` + +| year | is_leap_year | +| :--- | :----------- | +| 2020 | true | +| 2021 | false | +| 2022 | false | +| 2023 | false | +| 2024 | true | +| 2025 | false | + +## days_in_month + +`days_in_month(value)` - returns the number of days in a month from a provided +timestamp or date. + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql +SELECT month(ts), days_in_month(ts) FROM myTable; +``` + +| month | days_in_month | +| :---- | :------------ | +| 4 | 30 | +| 5 | 31 | +| 6 | 30 | +| 7 | 31 | +| 8 | 31 | + + +## micros + +`micros(value)` - returns the `micros` of the millisecond for a given date or +timestamp from `0` to `999` + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Micros of the second" +SELECT micros(to_timestamp('2020-03-01:15:43:21.123456', 'yyyy-MM-dd:HH:mm:ss.SSSUUU')) +FROM long_sequence(1); +``` + +| millis | +| :----- | +| 456 | + +```questdb-sql title="Parsing 3 digits when no unit is added after U" +SELECT micros(to_timestamp('2020-03-01:15:43:21.123456', 'yyyy-MM-dd:HH:mm:ss.SSSU')) +FROM long_sequence(1); +``` + +| millis | +| :----- | +| 456 | + +```questdb-sql title="Using in an aggregation" +SELECT micros(ts), count() FROM transactions; +``` + +| second | count | +| :----- | :---- | +| 0 | 2323 | +| 1 | 6548 | +| ... | ... | +| 998 | 9876 | +| 999 | 2567 | + +## millis + +`millis(value)` - returns the `millis` of the second for a given date or +timestamp from `0` to `999` + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Millis of the second" +SELECT millis( + to_timestamp('2020-03-01:15:43:21.123456', 'yyyy-MM-dd:HH:mm:ss.SSSUUU')) +FROM long_sequence(1); +``` + +| millis | +| :----- | +| 123 | + +```questdb-sql title="Parsing 3 digits when no unit is added after S" +SELECT millis(to_timestamp('2020-03-01:15:43:21.123', 'yyyy-MM-dd:HH:mm:ss.S')) +FROM long_sequence(1); +``` + +| millis | +| :----- | +| 123 | + +```questdb-sql title="Using in an aggregation" +SELECT millis(ts), count() FROM transactions; +``` + +| second | count | +| :----- | :---- | +| 0 | 2323 | +| 1 | 6548 | +| ... | ... | +| 998 | 9876 | +| 999 | 2567 | + +## minute + +`minute(value)` - returns the `minute` of the hour for a given date or timestamp +from `0` to `59` + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Minute of the hour" +SELECT minute(to_timestamp('2020-03-01:15:43:21', 'yyyy-MM-dd:HH:mm:ss')) +FROM long_sequence(1); +``` + +| minute | +| :----- | +| 43 | + +```questdb-sql title="Using in an aggregation" +SELECT minute(ts), count() FROM transactions; +``` + +| minute | count | +| :----- | :---- | +| 0 | 2323 | +| 1 | 6548 | +| ... | ... | +| 58 | 9876 | +| 59 | 2567 | + +## month + +`month(value)` - returns the `month` of year for a given date or timestamp from +`1` to `12` + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Month of the year" +SELECT month(to_timestamp('2020-03-01:15:43:21', 'yyyy-MM-dd:HH:mm:ss')) +FROM long_sequence(1); +``` + +| month | +| :---- | +| 03 | + +```questdb-sql title="Using in an aggregation" +SELECT month(ts), count() FROM transactions; +``` + +| month | count | +| :---- | :---- | +| 1 | 2323 | +| 2 | 6548 | +| ... | ... | +| 11 | 9876 | +| 12 | 2567 | + + +## now + +`now()` - offset from UTC Epoch in microseconds. + +Calculates `UTC timestamp` using system's real time clock. Unlike +`systimestamp()`, it does not change within the query execution timeframe and +should be used in WHERE clause to filter designated timestamp column relative to +current time, i.e.: + +- `SELECT now() FROM long_sequence(200)` will return the same timestamp for all + rows +- `SELECT systimestamp() FROM long_sequence(200)` will have new timestamp values + for each row + +**Arguments:** + +- `now()` does not accept arguments. + +**Return value:** + +Return value type is `timestamp`. + +**Examples:** + +```questdb-sql title="Filter records to created within last day" +SELECT created, origin FROM telemetry +WHERE created > dateadd('d', -1, now()); +``` + +| created | origin | +| :-------------------------- | :----- | +| 2021-02-01T21:51:34.443726Z | 1 | + +```questdb-sql title="Query returns same timestamp in every row" +SELECT now() FROM long_sequence(3) +``` + +| now | +| :-------------------------- | +| 2021-02-01T21:51:34.443726Z | +| 2021-02-01T21:51:34.443726Z | +| 2021-02-01T21:51:34.443726Z | + +```questdb-sql title="Query based on last minute" +SELECT * FROM readings +WHERE date_time > now() - 60000000L; +``` + +## pg_postmaster_start_time + +`pg_postmaster_start_time()` - returns the time when the server started. + +**Arguments** + +- `pg_postmaster_start_time()` does not accept arguments. + +**Return value:** + +Return value type is `timestamp` + +**Examples** + +```questdb-sql +SELECT pg_postmaster_start_time(); +``` + +| pg_postmaster_start_time | +| :--------------------------:| +| 2023-03-30T16:20:29.763961Z | + +## second + +`second(value)` - returns the `second` of the minute for a given date or +timestamp from `0` to `59` + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Second of the minute" +SELECT second(to_timestamp('2020-03-01:15:43:21', 'yyyy-MM-dd:HH:mm:ss')) +FROM long_sequence(1); +``` + +| second | +| :----- | +| 43 | + +```questdb-sql title="Using in an aggregation" +SELECT second(ts), count() FROM transactions; +``` + +| second | count | +| :----- | :---- | +| 0 | 2323 | +| 1 | 6548 | +| ... | ... | +| 58 | 9876 | +| 59 | 2567 | + + +## systimestamp + +`systimestamp()` - offset from UTC Epoch in microseconds. Calculates +`UTC timestamp` using system's real time clock. The value is affected by +discontinuous jumps in the system time (e.g., if the system administrator +manually changes the system time). + +`systimestamp()` value can change within the query execution timeframe and +should **NOT** be used in WHERE clause to filter designated timestamp column. + +:::tip + +Use now() with WHERE clause filter. + +::: + +**Arguments:** + +- `systimestamp()` does not accept arguments. + +**Return value:** + +Return value type is `timestamp`. + +**Examples:** + +```questdb-sql title="Insert current system timestamp" +INSERT INTO readings +VALUES(systimestamp(), 123.5); +``` + +| ts | reading | +| :-------------------------- | :------ | +| 2020-01-02T19:28:48.727516Z | 123.5 | + +## sysdate + +`sysdate()` - returns the timestamp of the host system as a `date` with +`millisecond` precision. + +Calculates `UTC date` with millisecond precision using system's real time clock. +The value is affected by discontinuous jumps in the system time (e.g., if the +system administrator manually changes the system time). + + +`sysdate()` value can change within the query execution timeframe and +should **NOT** be used in WHERE clause to filter designated timestamp column. + +:::tip + +Use `now()` with WHERE clause filter. + +::: + +**Arguments:** + +- `sysdate()` does not accept arguments. + +**Return value:** + +Return value type is `date`. + +**Examples:** + +```questdb-sql title="Insert current system date along with a value" +INSERT INTO readings +VALUES(sysdate(), 123.5); +``` + +| sysdate | reading | +| :-------------------------- | :------ | +| 2020-01-02T19:28:48.727516Z | 123.5 | + +```questdb-sql title="Query based on last minute" +SELECT * FROM readings +WHERE date_time > sysdate() - 60000000L; +``` + +## timestamp_ceil + +`timestamp_ceil(unit, timestamp)` - performs a ceiling calculation on a +timestamp by given unit. + +A unit must be provided to specify which granularity to perform rounding. + +**Arguments:** + +`timestamp_ceil(unit, timestamp)` has the following arguments: + +`unit` - may be one of the following: + +- `T` milliseconds +- `s` seconds +- `m` minutes +- `h` hours +- `d` days +- `M` months +- `y` year + +`timestamp` - any timestamp value + +**Return value:** + +Return value type is `timestamp`. + +**Examples:** + +```questdb-sql +WITH t AS (SELECT cast('2016-02-10T16:18:22.862145Z' AS timestamp) ts) +SELECT + ts, + timestamp_ceil('T', ts) c_milli, + timestamp_ceil('s', ts) c_second, + timestamp_ceil('m', ts) c_minute, + timestamp_ceil('h', ts) c_hour, + timestamp_ceil('d', ts) c_day, + timestamp_ceil('M', ts) c_month, + timestamp_ceil('y', ts) c_year + FROM t +``` + +| ts | c_milli | c_second | c_minute | c_hour | c_day | c_month | c_year | +| :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :--------------------------- | +| 2016-02-10T16:18:22.862145Z | 2016-02-10T16:18:22.863000Z | 2016-02-10T16:18:23.000000Z | 2016-02-10T16:19:00.000000Z | 2016-02-10T17:00:00.000000Z | 2016-02-11T00:00:00.000000Z | 2016-03-01T00:00:00.000000Z | 2017-01-01T00:00:00.000000Z" | + +## timestamp_floor + +`timestamp_floor(unit, timestamp)` - performs a floor calculation on a timestamp +by given unit. + +A unit must be provided to specify which granularity to perform rounding. + +**Arguments:** + +`timestamp_floor(unit, timestamp)` has the following arguments: + +`unit` - may be one of the following: + +- `T` milliseconds +- `s` seconds +- `m` minutes +- `h` hours +- `d` days +- `M` months +- `y` year + +`timestamp` - any timestamp value + +**Return value:** + +Return value type is `timestamp`. + +**Examples:** + +```questdb-sql +WITH t AS (SELECT cast('2016-02-10T16:18:22.862145Z' AS timestamp) ts) +SELECT + ts, + timestamp_floor('T', ts) f_milli, + timestamp_floor('s', ts) f_second, + timestamp_floor('m', ts) f_minute, + timestamp_floor('h', ts) f_hour, + timestamp_floor('d', ts) f_day, + timestamp_floor('M', ts) f_month, + timestamp_floor('y', ts) f_year + FROM t +``` + +| ts | f_milli | f_second | f_minute | f_hour | f_day | f_month | f_year | +| :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | +| 2016-02-10T16:18:22.862145Z | 2016-02-10T16:18:22.862000Z | 2016-02-10T16:18:22.000000Z | 2016-02-10T16:18:00.000000Z | 2016-02-10T16:00:00.000000Z | 2016-02-10T00:00:00.000000Z | 2016-02-01T00:00:00.000000Z | 2016-01-01T00:00:00.000000Z | + + +## timestamp_shuffle + +`timestamp_shuffle(timestamp_1, timestamp_2)` - generates a random timestamp inclusively between the two input timestamps. + +**Arguments:** + +- `timestamp_1` - any timestamp value +- `timestamp_2` - a timestamp value that is not equal to `timestamp_1` + +**Return value:** + +Return value type is `timestamp`. + +**Examples:** + +```questdb-sql +SELECT timestamp_shuffle('2023-03-31T22:00:30.555998Z', '2023-04-01T22:00:30.555998Z'); +``` + +| timestamp_shuffle | +| :-------------------------- | +| 2023-04-01T11:44:41.893394Z | + +## to_date + +`to_date(string, format)` - converts string to `date` by using the supplied +`format` to extract the value. + +Will convert a `string` to `date` using the format definition passed as an +argument. When the `format` definition does not match the `string` input, the +result will be `null`. + +For more information about recognized timestamp formats, see the +[date and timestamp format section](#date-and-timestamp-format). + +**Arguments:** + +- `string` is any string that represents a date and/or time. +- `format` is a string that describes the `date format` in which `string` is + expressed. + +**Return value:** + +Return value type is `date` + +**Examples:** + +```questdb-sql title="string matches format" +SELECT to_date('2020-03-01:15:43:21', 'yyyy-MM-dd:HH:mm:ss') +FROM long_sequence(1); +``` + +| to_date | +| :----------------------- | +| 2020-03-01T15:43:21.000Z | + +```questdb-sql title="string does not match format" +SELECT to_date('2020-03-01:15:43:21', 'yyyy') +FROM long_sequence(1); +``` + +| to_date | +| :------ | +| null | + +```questdb-sql title="Using with INSERT" +INSERT INTO measurements +values(to_date('2019-12-12T12:15', 'yyyy-MM-ddTHH:mm'), 123.5); +``` + +| date | value | +| :----------------------- | :---- | +| 2019-12-12T12:15:00.000Z | 123.5 | + +## to_str + +`to_str(value, format)` - converts date or timestamp value to a string in the +specified format + +Will convert a date or timestamp value to a string using the format definition +passed as an argument. When elements in the `format` definition are +unrecognized, they will be passed-through as string. + +For more information about recognized timestamp formats, see the +[date and timestamp format section](#date-and-timestamp-format). + +**Arguments:** + +- `value` is any `date` or `timestamp` +- `format` is a timestamp format. + +**Return value:** + +Return value type is `string` + +**Examples:** + +- Basic example + +```questdb-sql +SELECT to_str(systimestamp(), 'yyyy-MM-dd') FROM long_sequence(1); +``` + +| to_str | +| :--------- | +| 2020-03-04 | + +- With unrecognized timestamp definition + +```questdb-sql +SELECT to_str(systimestamp(), 'yyyy-MM-dd gooD DAY 123') FROM long_sequence(1); +``` + +| to_str | +| :---------------------- | +| 2020-03-04 gooD DAY 123 | + +## to_timestamp + +`to_timestamp(string, format)` - converts `string` to `timestamp` by using the +supplied `format` to extract the value with microsecond precision. + +When the `format` definition does not match the `string` input, the result will +be `null`. + +For more information about recognized timestamp formats, see the +[date and timestamp format section](#date-and-timestamp-format). + +**Arguments:** + +- `string` is any string that represents a date and/or time. +- `format` is a string that describes the timestamp format in which `string` is + expressed. + +**Return value:** + +Return value type is `timestamp`. QuestDB provides `timestamp` with microsecond +resolution. Input strings with nanosecond precision will be parsed but lose the +precision. + +**Examples:** + +```questdb-sql title="Pattern matching with microsecond precision" +SELECT to_timestamp('2020-03-01:15:43:21.127329', 'yyyy-MM-dd:HH:mm:ss.SSSUUU') +FROM long_sequence(1); +``` + +| to_timestamp | +| :-------------------------- | +| 2020-03-01T15:43:21.127329Z | + +```questdb-sql title="Precision loss when pattern matching with nanosecond precision" +SELECT to_timestamp('2020-03-01:15:43:00.000000001Z', 'yyyy-MM-dd:HH:mm:ss.SSSUUUNNNZ') +FROM long_sequence(1); +``` + +| to_timestamp | +| :-------------------------- | +| 2020-03-01T15:43:00.000000Z | + +```questdb-sql title="String does not match format" +SELECT to_timestamp('2020-03-01:15:43:21', 'yyyy') +FROM long_sequence(1); +``` + +| to_timestamp | +| :----------- | +| null | + +```questdb-sql title="Using with INSERT" +INSERT INTO measurements +values(to_timestamp('2019-12-12T12:15', 'yyyy-MM-ddTHH:mm'), 123.5); +``` + +| timestamp | value | +| :-------------------------- | :---- | +| 2019-12-12T12:15:00.000000Z | 123.5 | + +Note that conversion of ISO timestamp format is optional. QuestDB automatically +converts `string` to `timestamp` if it is a partial or full form of +`yyyy-MM-ddTHH:mm:ss.SSSUUU` or `yyyy-MM-dd HH:mm:ss.SSSUUU` with a valid time +offset, `+01:00` or `Z`. See more examples in +[Native timestamp](/docs/reference/sql/where/#native-timestamp-format) + + +## to_timezone + +`to_timezone(timestamp, timezone)` - converts a timestamp value to a specified +timezone. For more information on the time zone database used for this function, +see the +[QuestDB time zone database documentation](/docs/guides/working-with-timestamps-timezones/). + +**Arguments:** + +- `timestamp` is any `timestamp` as Unix timestamp or string equivalent +- `timezone` may be `Country/City` tz database name, time zone abbreviation such + as `PST` or in UTC offset in string format. + +**Return value:** + +Return value type is `timestamp` + +**Examples:** + +- Unix UTC timestamp in microseconds to `Europe/Berlin` + +```questdb-sql +SELECT to_timezone(1623167145000000, 'Europe/Berlin') +``` + +| to_timezone | +| :-------------------------- | +| 2021-06-08T17:45:45.000000Z | + +- Unix UTC timestamp in microseconds to PST by UTC offset + +```questdb-sql +SELECT to_timezone(1623167145000000, '-08:00') +``` + +| to_timezone | +| :-------------------------- | +| 2021-06-08T07:45:45.000000Z | + +- Timestamp as string to `PST` + +```questdb-sql +SELECT to_timezone('2021-06-08T13:45:45.000000Z', 'PST') +``` + +| to_timezone | +| :-------------------------- | +| 2021-06-08T06:45:45.000000Z | + +## to_utc + +`to_utc(timestamp, timezone)` - converts a timestamp by specified timezone to +UTC. May be provided a timezone in string format or a UTC offset in hours and +minutes. For more information on the time zone database used for this function, +see the +[QuestDB time zone database documentation](/docs/guides/working-with-timestamps-timezones/). + +**Arguments:** + +- `timestamp` is any `timestamp` as Unix timestamp or string equivalent +- `timezone` may be `Country/City` tz database name, time zone abbreviation such + as `PST` or in UTC offset in string format. + +**Return value:** + +Return value type is `timestamp` + +**Examples:** + +- Convert a Unix timestamp in microseconds from the `Europe/Berlin` timezone to + UTC + +```questdb-sql +SELECT to_utc(1623167145000000, 'Europe/Berlin') +``` + +| to_utc | +| :-------------------------- | +| 2021-06-08T13:45:45.000000Z | + +- Unix timestamp in microseconds from PST to UTC by UTC offset + +```questdb-sql +SELECT to_utc(1623167145000000, '-08:00') +``` + +| to_utc | +| :-------------------------- | +| 2021-06-08T23:45:45.000000Z | + +- Timestamp as string in `PST` to UTC + +```questdb-sql +SELECT to_utc('2021-06-08T13:45:45.000000Z', 'PST') +``` + +| to_utc | +| :-------------------------- | +| 2021-06-08T20:45:45.000000Z | + +## week_of_year + +`week_of_year(value)` - returns the number representing the week number in the year + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples** + +```questdb-sql +SELECT week_of_year('2023-03-31T22:00:30.555998Z'); +``` + +| week_of_year | +| :-----------:| +| 13 | + +## year + +`year(value)` - returns the `year` for a given date or timestamp + +**Arguments:** + +- `value` is any `timestamp` or `date` + +**Return value:** + +Return value type is `int` + +**Examples:** + +```questdb-sql title="Year" +SELECT year(to_timestamp('2020-03-01:15:43:21', 'yyyy-MM-dd:HH:mm:ss')) +FROM long_sequence(1); +``` + +| year | +| :--- | +| 2020 | + +```questdb-sql title="Using in an aggregation" +SELECT month(ts), count() FROM transactions; +``` + +| year | count | +| :--- | :---- | +| 2015 | 2323 | +| 2016 | 9876 | +| 2017 | 2567 | diff --git a/docs/reference/function/ipv4.md b/docs/reference/function/ipv4.md new file mode 100644 index 00000000..31f8d146 --- /dev/null +++ b/docs/reference/function/ipv4.md @@ -0,0 +1,37 @@ +--- +title: IPv4 Supported SQL Functions +sidebar_label: IPv4 +description: A list of SQL functions supported by the IPv4 data type. +--- + +The IPv4 datatype supports many functions. + +For the full list of IPv4 Operators and more detailed syntax, see the [IPv4 Operators page](/docs/reference/operators/ipv4/). + +## Supported SQL Functions + +| Command | Example SQL Statement | +|---------|---------------| +| **CASE** | `SELECT ip, bytes, CASE WHEN ip << '2.65.32.1/2' THEN 1 ELSE 0 END FROM example` | +| **COUNT** | `SELECT COUNT(ip), bytes FROM example` | +| **FIRST** | `SELECT FIRST(ip) FROM example` | +| **LAST** | `SELECT LAST(ip) FROM example` | +| **FULL JOIN** | `SELECT a.count, a.ip, b.ip2, b.count2 FROM '*!*example' a JOIN '*!*example2' b ON b.ip2 = a.ip` | +| **GROUP BY** | `SELECT COUNT(count), ip FROM example GROUP BY ip` | +| **COUNT_DISTINCT** | `SELECT COUNT_DISTINCT(ip) FROM example` | +| **DISTINCT** | `SELECT DISTINCT ip FROM example ORDER BY ip` | +| **SELECT** | `SELECT ip FROM x EXCEPT SELECT ip2 FROM y` | +| **INTERSECT** | `SELECT ip FROM x INTERSECT SELECT ip2 FROM y` | +| **ISORDERED** | `SELECT ISORDERED(ip) FROM example` | +| **NULLIF** | `SELECT k, NULLIF(ip, '0.0.0.5') FROM example` | +| **RANK** | `SELECT ip, bytes, RANK() OVER (ORDER BY ip ASC) rank FROM example ORDER BY rank` | +| **UNION** | `SELECT ip FROM x UNION SELECT ip2 FROM y` | +| **UNION ALL** | `SELECT ip FROM x UNION ALL SELECT ip2 FROM y` | +| **INNER JOIN** | `SELECT example.count, example2.count2 FROM example INNER JOIN example2 ON example2.ip2 = example.ip` | +| **LATEST BY** | `SELECT * FROM example LATEST BY ip` | +| **LEFT JOIN** | `SELECT example.ip, example2.ip2, example.count, example2.count2 FROM example LEFT JOIN example2 ON example2.ip2 = example.ip` | +| **MAX** | `SELECT MAX(ip) FROM example` | +| **MIN** | `SELECT MIN(ip) FROM example` | +| **ORDER BY** | `SELECT * FROM example ORDER BY ip, bytes, ts` | +| **SAMPLE BY** | `SELECT ip, ts, SUM(bytes) FROM example SAMPLE BY 1y ORDER BY 2,1` | +| **WHERE** | `SELECT * FROM example WHERE ip = '0.0.0.1'` | \ No newline at end of file diff --git a/docs/reference/function/meta.md b/docs/reference/function/meta.md new file mode 100644 index 00000000..ed35b34a --- /dev/null +++ b/docs/reference/function/meta.md @@ -0,0 +1,263 @@ +--- +title: Meta functions +sidebar_label: Meta +description: Table and database metadata function reference documentation. +--- + +These functions provide table, column and partition information including +metadata. They are particularly useful for checking the table settings for: + +- [Designated timestamp](/docs/concept/designated-timestamp/) column +- [Attached, detached, or attachable](/docs/reference/sql/alter-table-attach-partition/) + partitions +- Partition storage size on disk + +## tables + +`tables()` returns all tables in the database including table metadata. + +**Arguments:** + +- `tables()` does not require arguments. + +**Return value:** + +Returns a `table`. + +**Examples:** + +```questdb-sql title="List all tables" +tables(); +``` + +| id | name | designatedTimestamp | partitionBy | maxUncommittedRows | o3MaxLag | walEnabled | directoryName | dedup | +| --- | ----------- | ------------------- | ----------- | ------------------ | ---------- | ---------- | ---------------- | ----- | +| 1 | my_table | ts | DAY | 500000 | 30000000 0 | false | my_table | false | +| 2 | device_data | null | NONE | 10000 | 30000000 | false | device_data | false | +| 3 | short_lived | null | HOUR | 10000 | 30000000 | false | short_lived (->) | false | + +```questdb-sql title="All tables in reverse alphabetical order" +tables() ORDER BY name DESC; +``` + +| id | name | designatedTimestamp | partitionBy | maxUncommittedRows | o3MaxLag | walEnabled | directoryName | dedup | +| --- | ----------- | ------------------- | ----------- | ------------------ | --------- | ---------- | ---------------- | ----- | +| 2 | device_data | null | NONE | 10000 | 30000000 | false | device_data | false | +| 1 | my_table | ts | DAY | 500000 | 300000000 | false | my_table | false | +| 3 | short_lived | ts | HOUR | 10000 | 30000000 | false | short_lived (->) | false | + +:::note + +`(->)` means the table was created using the +[IN VOLUME](/docs/reference/sql/create-table/#table-target-volume) clause. + +::: + +```questdb-sql title="All tables with a daily partitioning strategy" +tables() WHERE partitionBy = 'DAY' +``` + +| id | name | designatedTimestamp | partitionBy | maxUncommittedRows | walEnabled | directoryName | dedup | +| --- | -------- | ------------------- | ----------- | ------------------ | ---------- | ------------- | ----- | +| 1 | my_table | ts | DAY | 500000 | true | my_table | false | + +## wal_tables + +`wal_tables()` returns the WAL status for all +[WAL tables](/docs/concept/write-ahead-log/) in the database. + +**Arguments:** + +- `wal_tables()` does not require arguments. + +**Return value:** + +Returns a `table` including the following information: + +- `name` - table name +- `suspended` - suspended status flag +- `writerTxn` - the last committed transaction in TableWriter +- `writerLagTxnCount` - the number of transactions that are kept invisible when + writing to the table; these transactions will be eventually moved to the table + data and become visible for readers +- `sequencerTxn` - the last committed transaction in the sequencer + +**Examples:** + +```questdb-sql title="List all tables" +wal_tables(); +``` + +| name | suspended | writerTxn | writerLagTxnCount | sequencerTxn | +| ----------- | --------- | --------- | ----------------- | ------------ | +| sensor_wal | false | 2 | 1 | 4 | +| weather_wal | false | 3 | 0 | 3 | +| test_wal | true | 7 | 1 | 9 | + +## table_columns + +`table_columns('tableName')` returns the schema of a table. + +**Arguments:** + +- `tableName` is the name of an existing table as a string. + +**Return value:** + +Returns a `table` with the following columns: + +- `column` - name of the available columns in the table +- `type` - type of the column +- `indexed` - if indexing is applied to this column +- `indexBlockCapacity` - how many row IDs to store in a single storage block on + disk +- `symbolCached` - whether this `symbol` column is cached +- `symbolCapacity` - how many distinct values this column of `symbol` type is + expected to have +- `designated` - if this is set as the designated timestamp column for this + table +- `upsertKey` - if this column is a part of UPSERT KEYS list for table + [deduplication](/docs/concept/deduplication) + +For more details on the meaning and use of these values, see the +[CREATE TABLE](/docs/reference/sql/create-table/) documentation. + +**Examples:** + +```questdb-sql title="Get all columns in a table" +table_columns('my_table') +``` + +| column | type | indexed | indexBlockCapacity | symbolCached | symbolCapacity | designated | upsertKey | +| ------ | --------- | ------- | ------------------ | ------------ | -------------- | ---------- | --------- | +| symb | SYMBOL | true | 1048576 | false | 256 | false | false | +| price | DOUBLE | false | 0 | false | 0 | false | false | +| ts | TIMESTAMP | false | 0 | false | 0 | true | false | +| s | STRING | false | 0 | false | 0 | false | false | + +```questdb-sql title="Get designated timestamp column" +SELECT column, type, designated FROM table_columns('my_table') WHERE designated = true; +``` + +| column | type | designated | +| ------ | --------- | ---------- | +| ts | TIMESTAMP | true | + +```questdb-sql title="Get the count of column types" +SELECT type, count() FROM table_columns('my_table'); +``` + +| type | count | +| --------- | ----- | +| SYMBOL | 1 | +| DOUBLE | 1 | +| TIMESTAMP | 1 | +| STRING | 1 | + +## table_partitions + +`table_partitions('tableName')` returns information for the partitions of a +table with the option to filter the partitions. + +**Arguments:** + +- `tableName` is the name of an existing table as a string. + +**Return value:** + +Returns a `table` with the following columns: + +- `index` - _INTEGER_, index of the partition (_NaN_ when the partition is not + attached) +- `partitionBy` - _STRING_, one of _NONE_, _HOUR_, _DAY_, _WEEK_, _MONTH_ and + _YEAR_ +- `name` - _STRING_, name of the partition, e.g. `2023-03-14`, + `2023-03-14.detached`, `2023-03-14.attachable` +- `minTimestamp` - _LONG_, min timestamp of the partition (_NaN_ when the table + is not partitioned) +- `maxTimestamp` - _LONG_, max timestamp of the partition (_NaN_ when the table + is not partitioned) +- `numRows` - _LONG_, number of rows in the partition +- `diskSize` - _LONG_, size of the partition in bytes +- `diskSizeHuman` - _STRING_, size of the partition meant for humans to read + (same output as function + [size_pretty](/docs/reference/function/numeric/#size_pretty)) +- `readOnly` - _BOOLEAN_, true if the partition is + [attached via soft link](/docs/reference/sql/alter-table-attach-partition/#symbolic-links) +- `active` - _BOOLEAN_, true if the partition is the last partition, and whether + we are writing to it (at least one record) +- `attached` - _BOOLEAN_, true if the partition is + [attached](/docs/reference/sql/alter-table-attach-partition/) +- `detached` - _BOOLEAN_, true if the partition is + [detached](/docs/reference/sql/alter-table-detach-partition/) (`name` of the + partition will contain the `.detached` extension) +- `attachable` - _BOOLEAN_, true if the partition is detached and can be + attached (`name` of the partition will contain the `.attachable` extension) + +**Examples:** + +```questdb-sql title="Create table my_table" +CREATE TABLE my_table AS ( + SELECT + rnd_symbol('EURO', 'USD', 'OTHER') symbol, + rnd_double() * 50.0 price, + rnd_double() * 20.0 amount, + to_timestamp('2023-01-01', 'yyyy-MM-dd') + x * 6 * 3600 * 100000L timestamp + FROM long_sequence(700) +), INDEX(symbol capacity 32) TIMESTAMP(timestamp) PARTITION BY WEEK; +``` + +```questdb-sql title="Get all partitions from my_table" +table_partitions('my_table'); +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +| ----- | ----------- | -------- | --------------------- | --------------------- | ------- | -------- | ------------- | -------- | ------ | -------- | -------- | ---------- | +| 0 | WEEK | 2022-W52 | 2023-01-01 00:36:00.0 | 2023-01-01 23:24:00.0 | 39 | 98304 | 96.0 KiB | false | false | true | false | false | +| 1 | WEEK | 2023-W01 | 2023-01-02 00:00:00.0 | 2023-01-08 23:24:00.0 | 280 | 98304 | 96.0 KiB | false | false | true | false | false | +| 2 | WEEK | 2023-W02 | 2023-01-09 00:00:00.0 | 2023-01-15 23:24:00.0 | 280 | 98304 | 96.0 KiB | false | false | true | false | false | +| 3 | WEEK | 2023-W03 | 2023-01-16 00:00:00.0 | 2023-01-18 12:00:00.0 | 101 | 83902464 | 80.0 MiB | false | true | true | false | false | + +```questdb-sql title="Get size of a table in disk" +SELECT size_pretty(sum(diskSize)) FROM table_partitions('my_table') +``` + +| size_pretty | +| ----------- | +| 80.3 MB | + +```questdb-sql title="Get active partition of a table" +SELECT * FROM table_partitions('my_table') WHERE active = true +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +| ----- | ----------- | -------- | --------------------- | --------------------- | ------- | -------- | ------------- | -------- | ------ | -------- | -------- | ---------- | +| 3 | WEEK | 2023-W03 | 2023-01-16 00:00:00.0 | 2023-01-18 12:00:00.0 | 101 | 83902464 | 80.0 MiB | false | true | true | false | false | + +## version/pg_catalog.version + +`version()` or `pg_catalog.version()` returns the supported version of the +PostgreSQL Wire Protocol. + +**Arguments:** + +- `version()` or `pg_catalog.version()` does not require arguments. + +**Return value:** + +Returns `string`. + +**Examples:** + +```questdb-sql + +SELECT version(); + +--The above equals to: + +SELECT pg_catalog.version(); +``` + +| version | +| ------------------------------------------------------------------- | +| PostgreSQL 12.3, compiled by Visual C++ build 1914, 64-bit, QuestDB | diff --git a/docs/reference/function/numeric.md b/docs/reference/function/numeric.md new file mode 100644 index 00000000..6b0add56 --- /dev/null +++ b/docs/reference/function/numeric.md @@ -0,0 +1,305 @@ +--- +title: Numeric functions +sidebar_label: Numeric +description: Numeric function reference documentation. +--- + +This page describes the available functions to assist with performing numeric +calculations. + +## abs + +`abs(value)` return the absolute value. The behavior of `abs` is as follows: + +- When the input `value` is positive, `abs` returns `value` +- When the input `value` is negative, `abs` returns `- value` +- When the input `value` is `0`, `abs` returns `0` + +**Arguments:** + +- `value` is any numeric value. + +**Return value:** + +Return value type is the same as the type of the argument. + +**Examples:** + +```questdb-sql +SELECT + x - 2 a, + abs(x -2) +FROM long_sequence(3); +``` + +| a | abs | +| --- | --- | +| -1 | 1 | +| 0 | 0 | +| 1 | 1 | + +## log + +`log(value)` return the natural logarithm (**log*e***) of a given number. + +**Arguments:** + +- `value` is any numeric value. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql +SELECT log(4.123) +``` + +| log | +| ------------ | +| 1.4165810537 | + +## power + +`power(base, exponent)` returns the value of a number `base` raised to the +power defined by `exponent`. + +**Arguments:** + +- `base` is any numeric value. +- `exponent` is any numeric value. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql +SELECT power(2, 3); +``` + +| power | +| ----- | +| 8 | + +## round + +`round(value, scale)` returns the **closest** value in the specified scale. It +uses the "half up" tie-breaking method when the value is exactly halfway between +the `round_up` and `round_down` values. + +**Arguments:** + +- `value` is any numeric value. +- `scale` is the number of decimal points returned. A negative scale means the + rounding will occur to a digit to the left of the decimal point. For example, + -1 means the number will be rounded to the nearest tens and +1 to the nearest + tenths. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql +SELECT + d, + round(d, -2), + round(d, -1), + round(d,0), + round(d,1), + round(d,2) +FROM dbl; +``` + +| d | round-2 | round-1 | round0 | round1 | round2 | +| ------------ | ------- | ------- | ------ | ------ | ------- | +| -0.811905406 | 0 | 0 | -1 | -0.8 | -0.81 | +| -5.002768547 | 0 | -10 | -5 | -5 | -5 | +| -64.75487334 | -100 | -60 | -65 | -64.8 | -64.75 | +| -926.531695 | -900 | -930 | -927 | -926.5 | -926.53 | +| 0.069361448 | 0 | 0 | 0 | 0.1 | 0.07 | +| 4.003627053 | 0 | 0 | 4 | 4 | 4 | +| 86.91359825 | 100 | 90 | 87 | 86.9 | 86.91 | +| 376.3807766 | 400 | 380 | 376 | 376.4 | 376.38 | + +## round_down + +`round_down(value, scale)` - rounds a value down to the specified scale + +**Arguments:** + +- `value` is any numeric value. +- `scale` is the number of decimal points returned. A negative scale means the + rounding will occur to a digit to the left of the decimal point. For example, + -1 means the number will be rounded to the nearest tens and +1 to the nearest + tenths. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql +SELECT + d, + round_down(d, -2), + round_down(d, -1), + round_down(d,0), + round_down(d,1), + round_down(d,2) +FROM dbl; +``` + +| d | r_down-2 | r_down-1 | r_down0 | r_down1 | r_down2 | +| ------------ | -------- | -------- | ------- | ------- | ------- | +| -0.811905406 | 0 | 0 | 0 | -0.8 | -0.81 | +| -5.002768547 | 0 | 0 | -5 | -5 | -5 | +| -64.75487334 | 0 | -60 | -64 | -64.7 | -64.75 | +| -926.531695 | -900 | -920 | -926 | -926.5 | -926.53 | +| 0.069361448 | 0 | 0 | 0 | 0 | 0.06 | +| 4.003627053 | 0 | 0 | 4 | 4 | 4 | +| 86.91359825 | 0 | 80 | 86 | 86.9 | 86.91 | +| 376.3807766 | 400 | 370 | 376 | 376.3 | 376.38 | + +## round_half_even + +`round_half_even(value, scale)` - returns the **closest** value in the specified +scale. It uses the "half up" tie-breaking method when the value is exactly +halfway between the `round_up` and `round_down` values. + +**Arguments:** + +- `value` is any numeric value. +- `scale` is the number of decimal points returned. A negative scale means the + rounding will occur to a digit to the left of the decimal point. For example, + -1 means the number will be rounded to the nearest tens and +1 to the nearest + tenths. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql title="Tie-breaker behavior" +SELECT + round_half_even(5.55, 1), + round_half_even(5.65, 1) +FROM long_sequence(1); +``` + +| round_half_even | round_half_even | +| --------------- | --------------- | +| 5.6 | 5.6 | + +```questdb-sql title="More examples" +SELECT + d, + round_half_even(d, -2), + round_half_even(d, -1), + round_half_even(d,0), + round_half_even(d,1), + round_half_even(d,2) +FROM dbl; +``` + +| d | r_h_e-2 | r_h_e-1 | r_h_e0 | r_h_e1 | r_h_e2 | +| ------------ | ------- | ------- | ------ | ------ | ------- | +| -0.811905406 | 0 | 0 | -1 | -0.8 | -0.81 | +| -5.002768547 | 0 | 0 | -5 | -5 | -5 | +| -64.75487334 | -100 | -60 | -65 | -64.8 | -64.75 | +| -926.531695 | -900 | -930 | -927 | -926.5 | -926.53 | +| 0.069361448 | 0 | 0 | 0 | 0.1 | 0.07 | +| 4.003627053 | 0 | 0 | 4 | 4 | 4 | +| 86.91359825 | 100 | 90 | 87 | 86.9 | 86.91 | +| 376.3807766 | 400 | 380 | 376 | 376.4 | 376.38 | + +## round_up + +`round_up(value, scale)` - rounds a value up to the specified scale + +**Arguments:** + +- `value` is any numeric value. +- `scale` is the number of decimal points returned. A negative scale means the + rounding will occur to a digit to the left of the decimal point. For example, + -1 means the number will be rounded to the nearest tens and +1 to the nearest + tenths. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql +SELECT + d, + round_up(d, -2), + round_up(d, -1), + round_up(d,0), + round_up(d,1), + round_up(d,2) +FROM dbl; +``` + +| d | r_up-2 | r_up-1 | r_up0 | r_up1 | r_up2 | +| ------------ | ------ | ------ | ----- | ------ | ------- | +| -0.811905406 | -100 | -10 | -1 | -0.9 | -0.82 | +| -5.002768547 | -100 | -10 | -6 | -5.1 | -5.01 | +| -64.75487334 | -100 | -70 | -65 | -64.8 | -64.76 | +| -926.531695 | -1000 | -930 | -927 | -926.6 | -926.54 | +| 0.069361448 | 100 | 10 | 1 | 0.1 | 0.07 | +| 4.003627053 | 100 | 10 | 5 | 4.1 | 4.01 | +| 86.91359825 | 100 | 90 | 87 | 87 | 86.92 | +| 376.3807766 | 400 | 380 | 377 | 376.4 | 376.39 | + +## size_pretty + +`size_pretty(value)` returns a human-readable string equivalent to the input value. + +**Arguments:** + +- `value` is a `long` value that represents size in bytes. + +**Return value:** + +Return value type is `string`. The string contains the size as a floating point with +one significant figure followed by the scale [in base 1024](https://en.wikipedia.org/wiki/Byte#Multiple-byte_units). + +**Examples:** + +```questdb-sql +SELECT size_pretty(400032); +``` + +| size_pretty | +|-------------| +| 390.7 KiB | + +## sqrt + +`sqrt(value)` return the square root of a given number. + +**Arguments:** + +- `value` is any numeric value. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql +SELECT sqrt(4000.32) +``` + +| log | +| ---------------- | +| 63.2480829749013 | \ No newline at end of file diff --git a/docs/reference/function/random-value-generator.md b/docs/reference/function/random-value-generator.md new file mode 100644 index 00000000..6727362f --- /dev/null +++ b/docs/reference/function/random-value-generator.md @@ -0,0 +1,559 @@ +--- +title: Random value generator +sidebar_label: Random value generator +description: Random value generator function reference documentation. +--- + +The following functions have been created to help with our test suite. They are +also useful for users testing QuestDB on specific workloads in order to quickly +generate large test datasets that mimic the structure of their actual data. + +Values can be generated either: + +- Pseudo randomly +- [Deterministically](/docs/reference/function/row-generator/#long_sequence) + when specifying a `seed` + +QuestDB supports the following random generation functions: + +- [rnd_boolean](#rnd_boolean) +- [rnd_byte](#rnd_byte) +- [rnd_short](#rnd_short) +- [rnd_int](#rnd_int) +- [rnd_long](#rnd_long) +- [rnd_long256](#rnd_long256) +- [rnd_float](#rnd_float) +- [rnd_double](#rnd_double) +- [rnd_date](#rnd_date) +- [rnd_timestamp](#rnd_timestamp) +- [rnd_char](#rnd_char) +- [rnd_symbol](#rnd_symbol) +- [rnd_str](#rnd_str) +- [rnd_bin](#rnd_bin) +- [rnd_uuid4](#rnd_uuid4) + +## Usage + +Random functions should be used for populating test tables only. They do not +hold values in memory and calculations should not be performed at the same time +as the random numbers are generated. + +For example, running +`SELECT round(a,2), a FROM (SELECT rnd_double() a FROM long_sequence(10));` is +bad practice and will return inconsistent results. + +A better approach would be to populate a table and then run the query. So for +example + +1. **create** - `CREATE TABLE test(val double);` +2. **populate** - + `INSERT INTO test SELECT * FROM (SELECT rnd_double() FROM long_sequence(10));` +3. **query** - `SELECT round(val,2) FROM test;` + +## Generating sequences + +This page describes the functions to generate values. To generate sequences of +values, please refer the page about +[row generators](/docs/reference/function/row-generator/). + +## rnd_boolean + +`rnd_boolean()` - generates a random `boolean` value, either `true` or `false`, +both having equal probability. + +**Return value:** + +Return value type is `boolean`. + +**Examples:** + +```questdb-sql title="Random boolean" +SELECT + value a, + count() b +FROM (SELECT rnd_boolean() value FROM long_sequence(100)); +``` + +| a | b | +| ----- | --- | +| true | 47 | +| false | 53 | + +## rnd_byte + +- `rnd_byte()` - returns a random integer which can take any value between `0` + and `127`. +- `rnd_byte(min, max)` - generates byte values in a specific range (for example + only positive, or between 1 and 10). + +**Arguments:** + +- `min`: is a `byte` representing the lowest possible generated value + (inclusive). +- `max`: is a `byte` representing the highest possible generated value + (inclusive). + +**Return value:** + +Return value type is `byte`. + +**Examples:** + +```questdb-sql title="Random byte" +SELECT rnd_byte() FROM long_sequence(5); +SELECT rnd_byte(-1,1) FROM long_sequence(5); +``` + +``` +122,34,17,83,24 +0,1,-1,-1,0 +``` + +## rnd_short + +- `rnd_short()` - returns a random integer which can take any value between + `-32768` and `32767`. +- `rnd_short(min, max)` - returns short values in a specific range (for example + only positive, or between 1 and 10). Supplying `min` above `max` will result + in an `invalid range` error. + +**Arguments:** + +- `min`: is a `short` representing the lowest possible generated value + (inclusive). +- `max`: is a `short` representing the highest possible generated value + (inclusive). + +**Return value:** + +Return value type is `short`. + +**Examples:** + +```questdb-sql title="Random short" +SELECT rnd_short() FROM long_sequence(5); +SELECT rnd_short(-1,1) FROM long_sequence(5); +``` + +``` +-27434,234,-12977,8843,24 +0,1,-1,-1,0 +``` + +## rnd_int + +- `rnd_int()` is used to return a random integer which can take any value + between `-2147483648` and `2147483647`. +- `rnd_int(min, max, nanRate)` is used to generate int values in a specific + range (for example only positive, or between 1 and 10), or to get occasional + `NaN` values along with int values. + +**Arguments:** + +- `min`: is an `int` representing the lowest possible generated value + (inclusive). +- `max`: is an `int` representing the highest possible generated value + (inclusive). +- `nanRate` is an `int` defining the frequency of occurrence of `NaN` values: + - `0`: No `NaN` will be returned. + - `1`: Will only return `NaN`. + - `N > 1`: On average, one in N generated values will be NaN. + +**Return value:** + +Return value type is `int`. + +**Examples:** + +```questdb-sql title="Random int" +SELECT rnd_int() FROM long_sequence(5) +SELECT rnd_int(1,4,0) FROM long_sequence(5); +SELECT rnd_int(1,4,1) FROM long_sequence(5); +SELECT rnd_int(1,4,2) FROM long_sequence(5); +``` + +``` +1822685476, 1173192835, -2808202361, 78121757821, 44934191 +1,4,3,1,2 +null,null,null,null,null +1,null,4,null,2 +``` + +## rnd_long + +- `rnd_long()` is used to return a random signed integer between + `0x8000000000000000L` and `0x7fffffffffffffffL`. +- `rnd_long(min, max, nanRate)` is used to generate long values in a specific + range (for example only positive, or between 1 and 10), or to get occasional + `NaN` values along with int values. + +**Arguments:** + +- `min`: is a `long` representing the lowest possible generated value + (inclusive). +- `max`: is a `long` representing the highest possible generated value + (inclusive). +- `nanRate` is an `int` defining the frequency of occurrence of `NaN` values: + - `0`: No `NaN` will be returned. + - `1`: Will only return `NaN`. + - `N > 1`: On average, one in N generated values will be `NaN`. + +**Return value:** + +Return value type is `long`. + +**Examples:** + +```questdb-sql title="Random long" +SELECT rnd_long() FROM long_sequence(5); +SELECT rnd_long(1,4,0) FROM long_sequence(5); +SELECT rnd_long(1,4,1) FROM long_sequence(5); +SELECT rnd_long(-10000000,10000000,2) FROM long_sequence(5); +``` + +```questdb-sql +1,4,3,1,2 +null,null,null,null,null +-164567594, -323331140, 26846334, -892982893, -351053301 +300291810703592700, 2787990010234796000, 4305203476273459700, -8518907563589124000, 8443756723558216000 +``` + +## rnd_long256 + +- `rnd_long256()` - generates a random `long256` value between 0 and 2^256. + +**Return value:** + +Return value type is `long256`. + +**Examples:** + +```questdb-sql title="Random long256" +SELECT rnd_long256() FROM long_sequence(5); +``` + +``` +0x5dd94b8492b4be20632d0236ddb8f47c91efc2568b4d452847b4a645dbe4871a, +0x55f256188b3474aca83ccc82c597668bb84f36d3f5b25afd9e194c1867625918, +0x630c6f02c1c2e0c2aa4ac80ab684aa36d91dd5233cc185bb7097400fa12e7de0, +0xa9eeaa5268f911f4bcac2e89b621bd28bba90582077fc9fb9f14a53fcf6368b7, +0x7c80546eea2ec093a5244e39efad3f39c5489d2337007fd0b61d8b141058724d +``` + +## rnd_float + +- `rnd_float()` - generates a random **positive** `float` between 0 and 1. +- `rnd_float(nanRate)` - generates a random **positive** `float` between 0 and 1 + which will be `NaN` at a frequency defined by `nanRate`. + +**Arguments:** + +- `nanRate` is an `int` defining the frequency of occurrence of `NaN` values: +- `0`: No `NaN` will be returned. +- `1`: Will only return `NaN`. +- `N > 1`: On average, one in N generated values will be `NaN`. + +**Return value:** + +Return value type is `float`. + +**Examples:** + +```questdb-sql title="Random float" +SELECT rnd_float() FROM long_sequence(5); +SELECT rnd_float(2) FROM long_sequence(6); +``` + +``` +0.3821478, 0.5162148, 0.22929084, 0.03736937, 0.39675003 +0.08108246, 0.7082644, null, 0.6784522, null, 0.5711276 +``` + +## rnd_double + +- `rnd_double()` - generates a random **positive** `double` between 0 and 1. +- `rnd_double(nanRate)` - generates a random **positive** `double` between 0 and + 1 which will be `NaN` at a frequency defined by `nanRate`. + +**Arguments:** + +- `nanRate` is an `int` defining the frequency of occurrence of `NaN` values: +- `0`: No `NaN` will be returned. +- `1`: Will only return `NaN`. +- `N > 1`: On average, one in N generated values will be `NaN`. + +**Return value:** + +Return value type is `double`. + +**Examples:** + +```questdb-sql title="Random double" +SELECT rnd_double() FROM long_sequence(5); +SELECT rnd_double(2) FROM long_sequence(5); +``` + +``` +0.99115364871, 0.31011470271, 0.10776479191, 0.53938281731, 0.89820403511 +0.99115364871, null, null, 0.53938281731, 0.89820403511 +``` + +## rnd_date() + +- `rnd_date()` generates a random date between `start` and `end` dates (both + inclusive). IT will also generate `NaN` values at a frequency defined by + `nanRate`. When `start` or `end` are invalid dates, or when `start` is + superior to `end`, it will return `invalid range` error. When `nanRate` is + inferior to 0, it will return `invalid NAN rate` error. + +**Arguments:** + +- `start` is a `date` defining the minimum possible generated date (inclusive) +- `end` is a `date` defining the maximum possible generated date (inclusive) +- `nanRate` defines the frequency of occurrence of `NaN` values: + - `0`: No `NaN` will be returned. + - `1`: Will only return `NaN`. + - `N > 1`: On average, one in N generated values will be NaN. + +**Return value:** + +Return value type is `date`. + +**Examples:** + +```questdb-sql title="Random date" +SELECT rnd_date( + to_date('2015', 'yyyy'), + to_date('2016', 'yyyy'), + 0) +FROM long_sequence(5); +``` + +```questdb-sql +2015-01-29T18:00:17.402Z, 2015-11-15T20:22:14.112Z, +2015-12-08T09:26:04.483Z, 2015-05-28T02:22:47.022Z, +2015-10-13T19:16:37.034Z +``` + +## rnd_timestamp() + +- `rnd_timestamp(start, end, nanRate)` generates a random timestamp between + `start` and `end` timestamps (both inclusive). It will also generate `NaN` + values at a frequency defined by `nanRate`. When `start` or `end` are invalid + timestamps, or when `start` is superior to `end`, it will return + `invalid range` error. When `nanRate` is inferior to 0, it will return + `invalid NAN rate` error. + +**Arguments:** + +- `start` is a `timestamp` defining the minimum possible generated timestamp + (inclusive) +- `end` is a `timestamp` defining the maximum possible generated timestamp + (inclusive) +- `nanRate` defines the frequency of occurrence of `NaN` values: + - `0`: No `NaN` will be returned. + - `1`: Will only return `NaN`. + - `N > 1`: On average, one in N generated values will be NaN. + +**Return value:** + +Return value type is `timestamp`. + +**Examples:** + +```questdb-sql title="Random timestamp" +SELECT rnd_timestamp( + to_timestamp('2015', 'yyyy'), + to_timestamp('2016', 'yyyy'), + 0) +FROM long_sequence(5); +``` + +```questdb-sql +2015-01-29T18:00:17.402762Z, 2015-11-15T20:22:14.112744Z, +2015-12-08T09:26:04.483039Z, 2015-05-28T02:22:47.022680Z, +2015-10-13T19:16:37.034203Z +``` + +#### Sequences + +To generate increasing timestamps, please refer the page about +[row generators](/docs/reference/function/row-generator/). + +## rnd_char + +- `rnd_char()` is used to generate a random `char` which will be an uppercase + character from the 26-letter A to Z alphabet. Letters from A to Z will be + generated with equal probability. + +**Return value:** + +Return value type is `char`. + +**Examples:** + +```questdb-sql title="Random char" +SELECT rnd_char() FROM long_sequence(5); +``` + +``` +G, P, E, W, K +``` + +## rnd_symbol + +- `rnd_symbol(symbolList)` is used to choose a random `symbol` from a list + defined by the user. It is useful when looking to generate specific symbols + from a finite list (e.g `BUY, SELL` or `AUTUMN, WINTER, SPRING, SUMMER`. + Symbols are randomly chosen from the list with equal probability. When only + one symbol is provided in the list, this symbol will be chosen with 100% + probability, in which case it is more efficient to use + `cast('your_symbol' as symbol` +- `rnd_symbol(list_size, minLength, maxLength, nullRate)` generated a finite + list of distinct random symbols and chooses one symbol from the list at + random. The finite list is of size `list_size`. The generated symbols length + is between `minLength` and `maxLength` (both inclusive). The function will + also generate `null` values at a rate defined by `nullRate`. + +**Arguments:** + +- `symbolList` is a variable-length list of possible `symbol` values expressed + as a comma-separated list of strings. For example, + `'a', 'bcd', 'efg123', '行'` +- `list_size` is the number of distinct `symbol` values to generated +- `minLength` is an `int` defining the minimum length for of a generated symbol + (inclusive) +- `maxLength` is an `int` defining the maximum length for of a generated symbol + (inclusive) +- `nullRate` is an `int` defining the frequency of occurrence of `null` values: + - `0`: No `null` will be returned. + - `1`: Will only return `null`. + - `N > 1`: On average, one in N generated values will be `null`. + +**Return value:** + +Return value type is `symbol`. + +**Examples:** + +```questdb-sql title="Random symbol from a list" +SELECT rnd_symbol('ABC','def', '123') +FROM long_sequence(5); +``` + +``` +'ABC', '123', 'def', '123', 'ABC' +``` + +```questdb-sql title="Random symbol, randomly generated" +SELECT rnd_symbol(2, 3, 4, 0) +FROM long_sequence(5); +``` + +``` +'ABC', 'DEFG', 'ABC', 'DEFG', 'DEFG' +``` + +## rnd_str + +- `rnd_str(stringList)` is used to choose a random `string` from a list defined + by the user. It is useful when looking to generate specific strings from a + finite list (e.g `BUY, SELL` or `AUTUMN, WINTER, SPRING, SUMMER`. Strings are + randomly chosen from the list with equal probability. When only one string is + provided in the list, this string will be chosen with 100% probability. +- `rnd_str(list_size, minLength, maxLength, nullRate)` generated a finite list + of distinct random string and chooses one string from the list at random. The + finite list is of size `list_size`. The generated strings length is between + `minLength` and `maxLength` (both inclusive). The function will also generate + `null` values at a rate defined by `nullRate`. + +**Arguments:** + +- `strList` is a variable-length list of possible `string` values expressed as a + comma-separated list of strings. For example, `'a', 'bcd', 'efg123', '行'` +- `list_size` is the number of distinct `string` values to generated +- `minLength` is an `int` defining the minimum length for of a generated string + (inclusive) +- `maxLength` is an `int` defining the maximum length for of a generated string + (inclusive) +- `nullRate` is an `int` defining the frequency of occurrence of `null` values: + - `0`: No `null` will be returned. + - `1`: Will only return `null`. + - `N > 1`: On average, one in N generated values will be `null`. + +**Return value:** + +Return value type is `string`. + +**Examples:** + +```questdb-sql title="Random string from a list" +SELECT rnd_str('ABC','def', '123') +FROM long_sequence(5); +``` + +``` +'ABC', '123', 'def', '123', 'ABC' +``` + +```questdb-sql title="Random string, randomly generated" +SELECT rnd_str(3, 2, 2, 4) +FROM long_sequence(8); +``` + +``` +'AB', 'CD', null, 'EF', 'CD', 'EF', null, 'AB' +``` + +## rnd_bin + +- `rnd_bin()` generates random binary data of a size up to `32` bytes. +- `rnd_bin(minBytes, maxBytes, nullRate)` generates random binary data of a size + between `minBytes` and `maxBytes` and returns `null` at a rate defined by + `nullRate`. + +**Arguments:** + +- `minBytes` is a `long` defining the minimum size in bytes for of a generated + binary (inclusive) +- `maxBytes` is a `long` defining the maximum size in bytes for of a generated + binary (inclusive) +- `nullRate` is an `int` defining the frequency of occurrence of `null` values: + - `0`: No `null` will be returned. + - `1`: Will only return `null`. + - `N > 1`: On average, one in N generated values will be `null`. + +**Return value:** + +Return value type is `binary`. + +**Examples:** + +```questdb-sql title="Random binary" +SELECT rnd_bin() FROM long_sequence(5); +SELECT rnd_bin(2, 5, 2) FROM long_sequence(5); +``` + +## rnd_uuid4 + +- `rnd_uuid4()` is used to generate a random [UUID](/docs/reference/sql/datatypes/#the-uuid-type). +- The generated UUIDs are [version 4](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)) as per the + [RFC 4122](https://tools.ietf.org/html/rfc4122#section-4.4) specification. +- Generated UUIDs do not use a cryptographically strong random generator and should not be used for + security purposes. + +**Return value:** + +Return value type is `uuid`. + +**Examples:** + +```questdb-sql title="Random char" +SELECT rnd_uuid4() FROM long_sequence(3); +``` + +``` +deca0b0b-b14b-4d39-b891-9e1e786a48e7 +2f113ebb-d36e-4e58-b804-6ece2263abe4 +6eddd24a-8889-4345-8001-822cc2d41951 +``` \ No newline at end of file diff --git a/docs/reference/function/row-generator.md b/docs/reference/function/row-generator.md new file mode 100644 index 00000000..fdf43edd --- /dev/null +++ b/docs/reference/function/row-generator.md @@ -0,0 +1,96 @@ +--- +title: Row generator +sidebar_label: Row generator +description: Row generator function reference documentation. +--- + +The `long_sequence()` function may be used as a row generator to create table +data for testing. Basic usage of this function involves providing the number of +iterations required. Deterministic pseudo-random behavior can be achieved by +providing seed values when calling the function. + +This function is commonly used in combination with +[random generator functions](/docs/reference/function/random-value-generator/) +to produce mock data. + +## long_sequence + +- `long_sequence(iterations)` - generates rows +- `long_sequence(iterations, seed1, seed2)` - generates rows deterministically + +**Arguments:** + +-`iterations`: is a `long` representing the number of rows to generate. -`seed1` +and `seed2` are `long64` representing both parts of a `long128` seed. + +### Row generation + +The `long_sequence()` function can be used to generate very large datasets for +testing e.g. billions of rows. + +`long_sequence(iterations)` is used to: + +- Generate a number of rows defined by `iterations`. +- Generate a column `x:long` of monotonically increasing long integers starting + from 1, which can be accessed for queries. + +### Random number seed + +When `long_sequence` is used conjointly with +[random generators](/docs/reference/function/random-value-generator/), these +values are usually generated at random. The function supports a seed to be +passed in order to produce deterministic results. + +:::note + +Deterministic procedural generation makes it easy to test on vasts amounts of +data without actually moving large files around across machines. Using the same +seed on any machine at any time will consistently produce the same results for +all random functions. + +::: + +**Examples:** + +```questdb-sql title="Generating multiple rows" +SELECT x, rnd_double() +FROM long_sequence(5); +``` + +| x | rnd_double | +| --- | ------------ | +| 1 | 0.3279246687 | +| 2 | 0.8341038236 | +| 3 | 0.1023834675 | +| 4 | 0.9130602021 | +| 5 | 0.718276777 | + +```questdb-sql title="Accessing row_number using the x column" +SELECT x, x*x +FROM long_sequence(5); +``` + +| x | x\*x | +| --- | ---- | +| 1 | 1 | +| 2 | 4 | +| 3 | 9 | +| 4 | 16 | +| 5 | 25 | + +```questdb-sql title="Using with a seed" +SELECT rnd_double() +FROM long_sequence(2,128349234,4327897); +``` + +:::note + +The results below will be the same on any machine at any time as long as they +use the same seed in long_sequence. + +::: + +| rnd_double | +| ------------------ | +| 0.8251337821991485 | +| 0.2714941145110299 | diff --git a/docs/reference/function/spatial.md b/docs/reference/function/spatial.md new file mode 100644 index 00000000..1e9cb67c --- /dev/null +++ b/docs/reference/function/spatial.md @@ -0,0 +1,77 @@ +--- +title: Geospatial functions +sidebar_label: Spatial +description: Geospatial functions reference documentation. +--- + +Spatial functions allow for operations relating to the geohash types which +provide geospatial data support. For more information on this type of data, see +the [geohashes documentation](/docs/concept/geohashes/) and the +[operators](/docs/reference/operators/spatial/) which help with filtering data. + +## rnd_geohash + +`rnd_geohash(bits)` returns a random geohash of variable precision. + +**Arguments:** + +`bits` - an integer between `1` and `60` which determines the precision of the +generated geohash. + +**Return value:** + +Returns a `geohash` + +**Examples:** + +```questdb-sql +SELECT rnd_geohash(7) g7, + rnd_geohash(10) g10, + rnd_geohash(30) g30, + rnd_geohash(29) g29, + rnd_geohash(60) g60 +FROM long_sequence(5); +``` + +| g7 | g10 | g30 | g29 | g60 | +| ------- | --- | ------ | ----------------------------- | ------------ | +| 1101100 | 4h | hsmmq8 | 01110101011001101111110111011 | rjtwedd0z72p | +| 0010011 | vf | f9jc1q | 10101111100101111111101101101 | fzj09w97tj1h | +| 0101011 | kx | fkhked | 01110110010001001000110001100 | v4cs8qsnjkeh | +| 0000001 | 07 | qm99sm | 11001010011011000010101100101 | hrz9gq171nc5 | +| 0101011 | 6t | 3r8jb5 | 11011101010111001010010001010 | fm521tq86j2c | + +## make_geohash + +`make_geohash(lon, lat, bits)` returns a geohash equivalent of latitude and +longitude, with precision specified in bits. + +`make_geohash()` is intended to be used via SQL over HTTP / PostgreSQL wire +protocol, for use within Java (embedded) scenario, see the +[Java embedded documentation for geohashes](/docs/concept/geohashes/#java-embedded-usage). + +**Arguments:** + +- `lon` - longitude coordinate as a floating point value with up to eight + decimal places +- `lat` - latitude coordinate as a floating point value with up to eight decimal + places +- `bits` - an integer between `1` and `60` which determines the precision of the + generated geohash. + +The latitude and longitude arguments may be constants, column values or the +results of a function which produces them. + +**Return value:** + +Returns a `geohash`. + +- If latitude and longitude comes from constants and is incorrect, an error is + thrown +- If column values have invalid lat / long coordinates, this produces `null`. + +**Examples:** + +```questdb-sql +SELECT make_geohash(142.89124148, -12.90604153, 40) +``` diff --git a/docs/reference/function/text.md b/docs/reference/function/text.md new file mode 100644 index 00000000..bf5e7565 --- /dev/null +++ b/docs/reference/function/text.md @@ -0,0 +1,291 @@ +--- +title: Text functions +sidebar_label: Text +description: Text function reference documentation. +--- + +This page describes the available functions to assist with performing text +manipulation such as concatenation, case conversion, and string length +calculation. + +## concat + +`concat(str, ...)` - concatenates a string from one or more input values. + +```questdb-sql title="Example" +SELECT firstName, lastName, concat(firstName, ' ', lastName) FROM names; +``` + +| firstName | lastName | concat | +| --------- | -------- | ------------- | +| Tim | Thompson | Tim Thompson | +| Anna | Thompson | Anna Thompson | +| Anna | Mason | Anna Mason | +| Tom | Johnson | Tom Johnson | +| Tim | Smith | Tim Smith | + +:::tip + +`concat()` can be used to generate `line protocol`. See an example below. + +::: + +```questdb-sql title="Generating line protocol" +SELECT +concat( + 'trades,instrument=', rnd_str(2,2,0), + ',side=', rnd_str('B', 'S'), + ' price=', abs(cast(rnd_double(0)*100000 AS INT)), + ',quantity=', abs(cast(rnd_double(0)*10000 AS INT)), + ' ', + 1571270400000 + (x-1) * 100 +) +FROM long_sequence(5) x; +``` + +```title="Result" +trades,instrument=CR,side=B price=70867,quantity=9192 1571270400000 +trades,instrument=LN,side=S price=37950,quantity=1439 1571270400100 +trades,instrument=ZJ,side=S price=82829,quantity=8871 1571270400200 +trades,instrument=EW,side=S price=10427,quantity=1945 1571270400300 +trades,instrument=MI,side=B price=99348,quantity=8450 1571270400400 +``` + +## length + +`length(string)` - reads length of `string` value type (result is `int`) + +`length(symbol)` - reads length of `symbol` value type (result is `int`) + +`length(blob)` - reads length of `binary` value type (result is `long`) + +- a `string` +- a `symbol` +- a `binary` blob + +```questdb-sql title="Example" +SELECT name a, length(name) b FROM names limit 4 +``` + +| a | b | +| ------ | --- | +| AARON | 5 | +| AMELIE | 6 | +| TOM | 3 | +| null | -1 | + +## left + +`left(string, count)` - extracts a substring of the given length from a string +(starting from left). + +**Arguments:** + +- `string` is a string to extract from. +- `count` is an integer specifying the count of characters to be extracted into + a substring. + +**Return value:** + +Returns a string with the extracted characters. + +**Examples:** + +```questdb-sql title="Example" +SELECT name, left('Thompson', 3) l FROM names LIMIT 3 +``` + +| name | l | +| ------ | --- | +| AARON | AAR | +| AMELIE | AME | +| TOM | TOM | + +## right + +`right(string, count)` - extracts a substring of the given length from a string +(starting from right). + +**Arguments:** + +- `string` is a string to extract from. +- `count` is an integer specifying the count of characters to be extracted into + a substring. + +**Return value:** + +Returns a string with the extracted characters. + +**Examples:** + +```questdb-sql title="Example" +SELECT name, right('Thompson', 2) r FROM names LIMIT 3 +``` + +| name | l | +| ------ | --- | +| AARON | ON | +| AMELIE | IE | +| TOM | OM | + +## strpos / position + +`strpos(string, substring)` or `position(string, substring)` - searches for the first substring occurrence in a string, and returns +the index position of the starting character. If the substring is not found, this function returns `0`. The +performed search is case-sensitive. + +**Arguments:** + +- `string` is a string to search in. +- `substring` is a string to search for. + +**Return value:** + +Returns an integer for the substring position. Positions start from `1`. + +**Examples:** + +```questdb-sql title="Example" +SELECT name, strpos(name, 'Thompson') idx +FROM full_names +LIMIT 4; + +-- This is equal to: +SELECT name, position(name, 'Thompson') idx +FROM full_names +LIMIT 4; +``` + +| name | idx | +| ------------- | --- | +| Tim Thompson | 5 | +| Anna Thompson | 6 | +| Anna Mason | 0 | +| Tom Johnson | 0 | + +Assuming we have a table `example_table` with a single string type column `col`: + +| col | +| ---------- | +| apple,pear | +| cat,dog | +| ... | + +As a more advanced example, we can use `strpos()` or `position()` to split the string values of +`col`, in this case splitting at the comma character, `,` . By using +`left()`/`right()` functions, we can choose the string values at the left and +right of the comma: + +```questdb-sql title="Splitting string into two separate columns" +SELECT col, + left(col, strpos(col, ',') - 1) as col1, + right(col, length(col) - strpos(col, ',')) as col2 +FROM example_table; + +-- This is equal to: + +SELECT col, + left(col, position(col, ',') - 1) as col1, + right(col, length(col) - position(col, ',')) as col2 +FROM example_table; +``` + +| col | col1 | col2 | +| ---------- | ----- | ---- | +| apple,pear | apple | pear | +| cat,dog | cat | dog | + +## substring + +`substring(string, start, length)` - extracts a substring from the given string. + +**Arguments:** + +- `string` is a string to extract from. +- `start` is an integer specifying the position of the first character to be + extracted. Positions start from `1`. +- `length` is an integer specifying the count of characters to be extracted. + Should be non-negative. + +**Return value:** + +Returns a string with the extracted characters. If any part the arguments is +`null`, the function returns `null`. + +**Examples:** + +```questdb-sql title="Example" +SELECT id, substring(id, 1, 2) country FROM orders LIMIT 3 +``` + +| id | country | +| --------------- | ------- | +| UK2022072619373 | UK | +| UK2022072703162 | UK | +| US2022072676246 | US | + +If the `start` argument is negative, the output depends on the value of +`start+length`: + +- If `start+length` is greater than 1, the substring stops at position + `start+length - 1`. +- If `start+length` is zero, the output is empty string. +- If `start+length` is less than zero, the output is `null`. + +```questdb-sql title="Example" +SELECT substring('Lorem ipsum dolor sit amet', -5, 9) +``` + +| substring | +| --------- | +| Lor | + +## to_lowercase / lower + +- `to_lowercase(string)` or `lower(string)` - converts all upper case string + characters to lowercase + +### Arguments + +`string` is the input strong to be converted. + +### Return value + +Return value type is `string`. + +### Examples + +```questdb-sql +SELECT lower('questDB'); +-- This is equal to: +SELECT to_lowercase('questDB'); +``` + +| to_lowercase | +| ------------ | +| questdb | + +## to_uppercase / upper + +- `to_uppercase(string)` or `upper(string)` - converts all lower case string + characters to uppercase + +### Arguments + +`string` is the input strong to be converted. + +### Return value + +Return value type is `string`. + +### Examples + +```questdb-sql +SELECT upper('questDB'); +-- This is equal to: +SELECT to_uppercase('questDB'); +``` + +| to_uppercase | +| ------------ | +| QUESTDB | diff --git a/docs/reference/function/timestamp-generator.md b/docs/reference/function/timestamp-generator.md new file mode 100644 index 00000000..4917d9db --- /dev/null +++ b/docs/reference/function/timestamp-generator.md @@ -0,0 +1,66 @@ +--- +title: Timestamp generator +sidebar_label: Timestamp generator +description: Timestamp generator function reference documentation. +--- + +The `timestamp_sequence()` function may be used as a timestamp generator to +create data for testing. Pseudo-random steps can be achieved by providing a +[random function](/docs/reference/function/random-value-generator/) to the +`step` argument. A `seed` value may be provided to a random function if the +randomly-generated `step` should be deterministic. + +## timestamp_sequence + +- `timestamp_sequence(startTimestamp, step)` generates a sequence of `timestamp` + starting at `startTimestamp`, and incrementing by a `step` set as a `long` + value in microseconds. This `step` can be either; + + - a static value, in which case the growth will be monotonic, or + + - a randomized value, in which case the growth will be randomized. This is + done using + [random value generator functions](/docs/reference/function/random-value-generator/). + +**Arguments:** + +- `startTimestamp`: is a `timestamp` representing the starting (i.e lowest) + generated timestamp in the sequence. +- `step`: is a `long` representing the interval between 2 consecutive generated + timestamps in `microseconds`. + +**Return value:** + +Return value type is `timestamp`. + +**Examples:** + +```questdb-sql title="Monotonic timestamp increase" +SELECT x, timestamp_sequence( + to_timestamp('2019-10-17T00:00:00', 'yyyy-MM-ddTHH:mm:ss'), + 100000L) +FROM long_sequence(5); +``` + +| x | timestamp_sequence | +| --- | --------------------------- | +| 1 | 2019-10-17T00:00:00.000000Z | +| 2 | 2019-10-17T00:00:00.100000Z | +| 3 | 2019-10-17T00:00:00.200000Z | +| 4 | 2019-10-17T00:00:00.300000Z | +| 5 | 2019-10-17T00:00:00.400000Z | + +```questdb-sql title="Randomized timestamp increase" +SELECT x, timestamp_sequence( + to_timestamp('2019-10-17T00:00:00', 'yyyy-MM-ddTHH:mm:ss'), + rnd_short(1,5) * 100000L) +FROM long_sequence(5); +``` + +| x | timestamp_sequence | +| --- | --------------------------- | +| 1 | 2019-10-17T00:00:00.000000Z | +| 2 | 2019-10-17T00:00:00.100000Z | +| 3 | 2019-10-17T00:00:00.600000Z | +| 4 | 2019-10-17T00:00:00.900000Z | +| 5 | 2019-10-17T00:00:01.300000Z | diff --git a/docs/reference/function/timestamp.md b/docs/reference/function/timestamp.md new file mode 100644 index 00000000..ae91bf98 --- /dev/null +++ b/docs/reference/function/timestamp.md @@ -0,0 +1,90 @@ +--- +title: Timestamp function +sidebar_label: Timestamp +description: Timestamp function reference documentation. +--- + +`timestamp(columnName)` elects a +[designated timestamp](/docs/concept/designated-timestamp/): + +- during a [CREATE TABLE](/docs/reference/sql/create-table/#timestamp) operation +- during a [SELECT](/docs/reference/sql/select#timestamp/) operation + (`dynamic timestamp`) +- when ingesting data via InfluxDB Line Protocol, for tables that do not already exist in QuestDB, + partitions are applied automatically by day by default with a `timestamp` + column + +:::note + +- Checking if tables contain a designated timestamp column can be done via the + `tables()` and `table_columns()` functions which are described in the + [meta functions](/docs/reference/function/meta/) documentation page. + +- The native timestamp format used by QuestDB is a Unix timestamp in microsecond + resolution. See + [Timestamps in QuestDB](/docs/guides/working-with-timestamps-timezones/#timestamps-in-questdb) + for more details. + +::: + +## Syntax + +### During a CREATE operation + +Create a [designated timestamp](/docs/concept/designated-timestamp/) column +during table creation. For more information, refer to the +[CREATE TABLE](/docs/reference/sql/create-table/) section. + +![Flow chart showing the syntax of the TIMESTAMP keyword](/img/docs/diagrams/timestamp.svg) + +### During a SELECT operation + +Creates a [designated timestamp](/docs/concept/designated-timestamp/) column in +the result of a query. Assigning a timestamp in a `SELECT` statement +(`dynamic timestamp`) allows for time series operations such as `LATEST BY`, +`SAMPLE BY` or `LATEST BY` on tables which do not have a `designated timestamp` +assigned. + +![Flow chart showing the syntax of the timestamp function](/img/docs/diagrams/dynamicTimestamp.svg) + +## Examples + +### During a CREATE operation + +The following creates a table with +[designated timestamp](/docs/concept/designated-timestamp/). + +```questdb-sql title="Create table" +CREATE TABLE +temperatures(ts timestamp, sensorID symbol, sensorLocation symbol, reading double) +timestamp(ts); +``` + +### During a SELECT operation + +The following will query a table and assign a +[designated timestamp](/docs/concept/designated-timestamp/) to the output. Note +the use of brackets to ensure the timestamp clause is applied to the result of +the query instead of the whole `readings` table. + +```questdb-sql title="Dynamic timestamp" +(SELECT cast(dateTime AS TIMESTAMP) ts, device, value FROM readings) timestamp(ts); +``` + +Although the `readings` table does not have a designated timestamp, we are able +to create one on the fly. Now, we can use this into a subquery to perform +timestamp operations. + +```questdb-sql title="Dynamic timestamp subquery" +SELECT ts, avg(value) FROM +(SELECT cast(dateTime AS TIMESTAMP) ts, value FROM readings) timestamp(ts) +SAMPLE BY 1d; +``` + +If the data is unordered, it is important to order it first. + +```questdb-sql title="Dynamic timestamp - unordered data" +SELECT ts, avg(value) FROM +(SELECT ts, value FROM unordered_readings ORDER BY ts) timestamp(ts) +SAMPLE BY 1d; +``` diff --git a/docs/reference/function/touch.md b/docs/reference/function/touch.md new file mode 100644 index 00000000..e11d7344 --- /dev/null +++ b/docs/reference/function/touch.md @@ -0,0 +1,80 @@ +--- +title: Touch function +sidebar_label: Touch +description: Touch functions reference documentation. +--- + +The `touch()` function loads a table from disk to memory. Useful for triggering +a "hot" start from conditions where data may be "cold", such as after a restart +or any condition which caused disk cache to flush. A "hot" start provides the +usual fast and expected query performance, as no caching or movement from disk +to memory is required prior to an initial query. + +### Arguments: + +Wraps a SQL statement. + +### Return value + +Returns an `object` representing index state. + +```json +{ + "data_pages": number, + "index_key_pages": number, + "index_values_pages": number +} +``` + +See the [Index documentation](/docs/concept/indexes/#index-capacity-1) for more +information on how the above values are determined. + +### General example + +Consider a table with an indexed symbol column: + +```sql +CREATE TABLE x AS ( + SELECT + rnd_geohash(40) g, + rnd_double(0)* 100 a, + rnd_symbol(5, 4, 4, 1) b, + timestamp_sequence(0, 100000000000) k + FROM + long_sequence(20) +), +index(b) timestamp(k) PARTITION BY DAY; +``` + +Run `touch()` to "warm up" the table: + +```sql +SELECT touch(SELECT * FROM x WHERE k IN '1970-01-22'); +``` + +On success, an object is returned with the state of the index. + +```json +{ + "data_pages": 4, + "index_key_pages": 1, + "index_values_pages": 1 +} +``` + +### Practical example + +Many people use scripts to restart QuestDB. + +Use `touch()` after startup via the REST API: + +```shell +curl -G \ + --data-urlencode "SELECT touch(SELECT * FROM x WHERE k IN '1970-01-22');" \ + http://localhost:9000/exec +``` + +All subsequent queries will be within performance expectations, without +additional latency added for "warming up" the data. Touch simulates a query +without transferring data over the network, apart from the object as +confirmation. diff --git a/docs/reference/function/trigonometric.md b/docs/reference/function/trigonometric.md new file mode 100644 index 00000000..11ce971e --- /dev/null +++ b/docs/reference/function/trigonometric.md @@ -0,0 +1,348 @@ +--- +title: Trigonometric functions +sidebar_label: Trigonometric +description: Trigonometric function reference documentation. +--- + +This page describes the available functions to assist with performing +trigonometric calculations. + +:::tip + +Positive and negative infinity values are expressed as `'Infinity'` or +`'-Infinity'` in QuestDB. + +::: + +## sin + +`sin(angleRadians)` returns the trigonometric sine of an angle. + +### Arguments + +- `angleRadians` is a numeric value indicating the angle in radians. + +### Return value + +Return value type is `double`. + +### Description + +Special case: if the argument is `NaN` or an infinity, then the result is `Null`. + +### Examples + +```questdb-sql +SELECT pi()/2 angle, sin(pi()/2) sin; +``` + +| angle | sin | +| -------------- | --- | +| 1.570796326794 | 1 | + +## cos + +`cos(angleRadians)` returns the trigonometric cosine of an angle. + +### Arguments + +- `angleRadians` numeric value for the angle, in radians. + +### Return value + +Return value type is `double`. + +### Description + +Special case: if the argument is `NaN` or an infinity, then the result is `Null`. + +### Examples + +```questdb-sql +SELECT pi()/2 angle, cos(pi()/2) cos; +``` + +| angle | cos | +| -------------- | --------------------- | +| 1.570796326794 | 6.123233995736766e-17 | + +## tan + +`tan(angleRadians)` returns the trigonometric tangent of an angle. + +### Arguments + +- `angleRadians` numeric value for the angle, in radians. + +### Return value + +Return value type is `double`. + +### Description + +Special case: if the argument is `NaN` or an infinity, then the result is `Null`. + +### Examples + +```questdb-sql +SELECT pi()/2 angle, tan(pi()/2) tan; +``` + +| angle | tan | +| -------------- | ----------------- | +| 1.570796326794 | 16331239353195370 | + +## cot + +`cot(angleRadians)` returns the trigonometric cotangent of an angle. + +### Arguments + +- `angleRadians` numeric value for the angle, in radians. + +### Return value + +Return value type is `double`. + +### Description + +Special case: if the argument is `NaN`, 0, or an infinity, then the result is `Null`. + + +### Examples + +```questdb-sql +SELECT pi()/2 angle, cot(pi()/2) cot; +``` + +| angle | cot | +| -------------- | --------------------- | +| 1.570796326794 | 6.123233995736766e-17 | + +## asin + +`asin(value)` the arcsine of a value. + +### Arguments + +- `value` is a numeric value whose arcsine is to be returned. + +### Return value + +Return value type is `double`. The returned angle is between -pi/2 and pi/2 +inclusively. + +### Description + +Special case: if the argument is `NaN` or an infinity, then the result is `Null`. + +### Examples + +```questdb-sql +SELECT asin(1.0) asin; +``` + +| asin | +| -------------- | +| 1.570796326794 | + +## acos + +`acos(value)` returns the arccosine of a value. + +### Arguments + +- `value` is a numeric value whose arccosine is to be returned. The returned + angle is between 0.0 and pi inclusively. + +### Return value + +Return value type is `double`. + +### Description + +Special cases: if the argument is `NaN` or its absolute value is greater than 1, then the +result is `Null`. + +### Examples + +```questdb-sql +SELECT acos(0.0) acos; +``` + +| acos | +| -------------- | +| 1.570796326794 | + +## atan + +`atan(value)` returns the arctangent of a value. + +### Arguments + +- `value` is a numeric value whose arctangent is to be returned. + +### Return value + +Return value type is `double`. The returned angle is between -pi/2 and pi/2 +inclusively. + +### Description + +Special cases: + +- If the argument is `NaN`, then the result is `Null`. +- If the argument is infinity, then the result is the closest value to pi/2 with + the same sign as the input. + +### Examples + +Special case where input is `'-Infinity'`: + +```questdb-sql +SELECT atan('-Infinity'); +``` + +Returns the closest value to pi/2 with the same sign as the input: + +| atan | +| --------------- | +| -1.570796326794 | + +```questdb-sql +SELECT atan(1.0) atan; +``` + +| atan | +| -------------- | +| 0.785398163397 | + +## atan2 + +`atan2(valueY, valueX)` returns the angle _theta_ from the conversion of +rectangular coordinates (x, y) to polar (r, theta). This function computes +_theta_ (the phase) by computing an arctangent of y/x in the range of -pi to pi +inclusively. + +### Arguments + +- `valueY` numeric ordinate coordinate. +- `valueX` numeric abscissa coordinate. + +:::note + +The arguments to this function pass the y-coordinate first and the x-coordinate +second. + +::: + +### Return value + +Return value type is `double` between -pi and pi inclusively. + +### Description: + +`atan2(valueY, valueX)` measures the counterclockwise angle _theta_, in radians, +between the positive x-axis and the point (x, y): + +![Atan2 trigonometric function](/img/docs/atan2.svg) + +Special cases: + +| input `valueY` | input `valueX` | `atan2` return value | +| --------------------- | -------------- | ---------------------------------- | +| 0 | Positive value | 0 | +| Positive finite value | 'Infinity' | 0 | +| -0 | Positive value | 0 | +| Negative finite value | 'Infinity' | 0 | +| 0 | Negative value | Double value closest to pi | +| Positive finite value | '-Infinity' | Double value closest to pi | +| -0 | Negative value | Double value closest to -pi | +| Negative finite value | '-Infinity' | Double value closest to -pi | +| Positive value | 0 or -0 | Double value closest to pi/2 | +| 'Infinity' | Finite value | Double value closest to pi/2 | +| Negative value | 0 or -0 | Double value closest to -pi/2 | +| '-Infinity' | Finite value | Double value closest to -pi/2 | +| 'Infinity' | 'Infinity' | Double value closest to pi/4 | +| 'Infinity' | '-Infinity' | Double value closest to 3/4 \* pi | +| '-Infinity' | 'Infinity' | Double value closest to -pi/4 | +| '-Infinity' | '-Infinity' | Double value closest to -3/4 \* pi | + +### Examples + +```questdb-sql +SELECT atan2(1.0, 1.0) atan2; +``` + +| atan2 | +| -------------- | +| 0.785398163397 | + +## radians + +`radians(angleDegrees)` converts an angle measured in degrees to the equivalent +angle measured in radians. + +### Arguments + +- `angleDegrees` numeric value for the angle in degrees. + +### Return value + +Return value type is `double`. + +### Examples + +```questdb-sql +SELECT radians(180); +``` + +| radians | +| -------------- | +| 3.141592653589 | + +## degrees + +`degrees(angleRadians)` converts an angle measured in radians to the equivalent +angle measured in degrees. + +### Arguments + +- `angleRadians` numeric value for the angle in radians. + +### Return value + +Return value type is `double`. + +### Examples + +```questdb-sql +SELECT degrees(pi()); +``` + +| degrees | +| ------- | +| 180 | + +## pi + +`pi()` returns the constant pi as a double. + +### Arguments + +None. + +### Return value + +Return value type is `double`. + +### Examples + +```questdb-sql +SELECT pi(); +``` + +| pi | +| -------------- | +| 3.141592653589 | diff --git a/docs/reference/logging.md b/docs/reference/logging.md new file mode 100644 index 00000000..bb508804 --- /dev/null +++ b/docs/reference/logging.md @@ -0,0 +1,217 @@ +--- +title: Logging +description: Create various logs and customize a log.conf within QuestDB. +--- + +The logging behavior of QuestDB may be set in dedicated configuration files or +by environment variables. + +This section describes how to configure logging using these methods. + +## Enable debug log + +QuestDB `DEBUG` logging can be set globally. + +1. Provide the java option `-Debug` on startup +2. Setting the `QDB_DEBUG=true` as an environment variable + +## Configure log.conf + +Logs may be configured via a dedicated configuration file `log.conf`. + +QuestDB will look for `/log.conf` first in `conf/` directory and then on the +classpath, unless this name is overridden via a command line property: +`-Dout=/something_else.conf`. + +QuestDB will create `conf/log.conf` using default values if `-Dout` is not set +and file doesn't exist . + +On Windows log messages go to depending on run mode : + +- interactive session - console and `$dataDir\log\stdout-%Y-%m-%dT%H-%M-%S.txt` + (default is `.\log\stdout-%Y-%m-%dT%H-%M-%S.txt` ) +- service - `$dataDir\log\service-%Y-%m-%dT%H-%M-%S.txt` (default is + `C:\Windows\System32\qdbroot\log\service-%Y-%m-%dT%H-%M-%S.txt` ) + +The possible values to enable within the `log.conf` appear as such: + +```shell title="log.conf" +# list of configured writers +writers=file,stdout,http.min + + +# rolling file writer +w.file.class=io.questdb.log.LogRollingFileWriter +w.file.location=${log.dir}/questdb-rolling.log.${date:yyyyMMdd} +w.file.level=INFO,ERROR +w.file.rollEvery=day +w.file.rollSize=1g + +# Optionally, use a single log +# w.file.class=io.questdb.log.LogFileWriter +# w.file.location=questdb-docker.log +# w.file.level=INFO,ERROR,DEBUG + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO + +# min http server, used for error monitoring +w.http.min.class=io.questdb.log.LogConsoleWriter +w.http.min.level=ERROR +w.http.min.scope=http-min-server +``` + +### Log writer types + +There are four types of writer. + +Which one you need depends on your use case. + +| Available writers | Description | +| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| file | Select from one of the two above patterns. Write to a single log that will grow indefinitely, or write a rolling log. Rolling logs can be split into `minute`, `hour`, `day`, `month` or `year`. | +| stdout | Writes logs to standard output. | +| http.min | REST API exposed for additional error monitoring. Enabled at port `9003` by default. For more detail, see [Health Monitoring](/docs/operations/health-monitoring/#min-health-server) under Operations. | + +## Environment variables + +Values in the log configuration file can be overridden with environment +variables. All configuration keys must be formatted as described in the +[environment variables](#environment-variables) section above. + +For example, to set logging on `ERROR` level only: + +```shell title="Setting log level to ERROR in log-stdout.conf" +w.stdout.level=ERROR +``` + +This can be passed as an environment variable as follows: + +```shell title="Setting log level to ERROR via environment variable" +export QDB_LOG_W_STDOUT_LEVEL=ERROR +``` + +## Docker logging + +When mounting a volume to a Docker container, a logging configuration file may +be provided in the container located at `./conf/log.conf`. For example, a file +with the following contents can be created: + +```shell title="./conf/log.conf" +# list of configured writers +writers=file,stdout,http.min + +# file writer +w.file.class=io.questdb.log.LogFileWriter +w.file.location=questdb-docker.log +w.file.level=INFO,ERROR,DEBUG + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO + +# min http server, used monitoring +w.http.min.class=io.questdb.log.LogConsoleWriter +w.http.min.level=ERROR +w.http.min.scope=http-min-server +``` + +The current directory can be mounted: + +```shell title="Mount the current directory to a QuestDB container" +docker run -p 9000:9000 -v "$(pwd):/var/lib/questdb/" questdb/questdb +``` + +The container logs will be written to disk using the logging level and file name +provided in the `./conf/log.conf` file, in this case in `./questdb-docker.log`. + +## Prometheus Alertmanager + +QuestDB includes a log writer that sends any message logged at critical level +(logger.critical("may-day")) to Prometheus Alertmanager over a TCP/IP socket. +Details for configuring this can be found in the +[Prometheus documentation](/docs/third-party-tools/prometheus). To configure +this writer, add it to the `writers` config alongside other log writers. + +```ini title="log.conf" +# Which writers to enable +writers=stdout,alert + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO + +# Prometheus Alerting +w.alert.class=io.questdb.log.LogAlertSocketWriter +w.alert.level=CRITICAL +w.alert.location=/alert-manager-tpt.json +w.alert.alertTargets=localhost:9093,localhost:9096,otherhost:9093 +w.alert.defaultAlertHost=localhost +w.alert.defaultAlertPort=9093 + +# The `inBufferSize` and `outBufferSize` properties are the size in bytes for the +# socket write buffers. +w.alert.inBufferSize=2m +w.alert.outBufferSize=4m +# Delay in milliseconds between two consecutive attempts to alert when +# there is only one target configured +w.alert.reconnectDelay=250 +``` + +Of all properties, only `w.alert.class` and `w.alert.level` are required, the +rest assume default values as stated above (except for `w.alert.alertTargets` +which is empty by default). + +Alert targets are specified using `w.alert.alertTargets` as a comma-separated +list of up to 12 `host:port` TCP/IP addresses. Specifying a port is optional and +defaults to the value of `defaultAlertHost`. One of these alert managers is +picked at random when QuestDB starts, and a connection is created. + +All alerts will be sent to the chosen server unless it becomes unavailable. If +it is unavailable, the next server is chosen. If there is only one server +configured and a fail-over cannot occur, a delay of 250 milliseconds is added +between send attempts. + +The `w.alert.location` property refers to the path (absolute, otherwise relative +to `-d database-root`) of a template file. By default, it is a resource file +which contains: + +```json title="/alert-manager-tpt.json" +[ + { + "Status": "firing", + "Labels": { + "alertname": "QuestDbInstanceLogs", + "service": "QuestDB", + "category": "application-logs", + "severity": "critical", + "version": "${QDB_VERSION}", + "cluster": "${CLUSTER_NAME}", + "orgid": "${ORGID}", + "namespace": "${NAMESPACE}", + "instance": "${INSTANCE_NAME}", + "alertTimestamp": "${date: yyyy/MM/ddTHH:mm:ss.SSS}" + }, + "Annotations": { + "description": "ERROR/cl:${CLUSTER_NAME}/org:${ORGID}/ns:${NAMESPACE}/db:${INSTANCE_NAME}", + "message": "${ALERT_MESSAGE}" + } + } +] +``` + +Four environment variables can be defined, and referred to with the +`${VAR_NAME}` syntax: + +- _ORGID_ +- _NAMESPACE_ +- _CLUSTER_NAME_ +- _INSTANCE_NAME_ + +Their default value is `GLOBAL`, they mean nothing outside a cloud environment. + +In addition, `ALERT_MESSAGE` is a placeholder for the actual `critical` message +being sent, and `QDB_VERSION` is the runtime version of the QuestDB instance +sending the alert. The `${date: }` syntax can be used to produce a +timestamp at the time of sending the alert. diff --git a/docs/reference/operators/bitwise.md b/docs/reference/operators/bitwise.md new file mode 100644 index 00000000..e124b965 --- /dev/null +++ b/docs/reference/operators/bitwise.md @@ -0,0 +1,77 @@ +--- +title: Bitwise operators +sidebar_label: Bitwise +description: Bitwise operators reference documentation. +--- + +This page describes the available operators to assist with performing bitwise operations on numeric values. + +Precedence of these operators is as follows: + +1. `~` NOT +2. `&` AND +3. `^` XOR +4. `|` OR + +## ~ NOT + +`~` is a unary operation that performs logical negation on each bit. Bits that +are 0 become 1, and those that are 1 become 0. Expects a value of `long` or `int` type. + +**Examples:** + +```questdb-sql +SELECT ~1024 +``` + +| column | +| ------ | +| -1025 | + +## & AND + +`&` is a binary operation that takes two equal-length binary representations and +performs the logical AND operation on each pair of the corresponding bits. +Expects values of `long` or `int` type. + +**Examples:** + +```questdb-sql +SELECT 5 & 3 +``` + +| column | +| ------ | +| 1 | + +## ^ XOR + +`^` is a binary operation that takes two bit patterns of equal length and +performs the logical exclusive OR (XOR) operation on each pair of corresponding bits. +Expects a value of `long` or `int` type. + +**Examples:** + +```questdb-sql +SELECT 5 ^ 3 +``` + +| column | +| ------ | +| 6 | + +## | OR + +`|` is a binary operation that takes two bit patterns of equal length and +performs the logical inclusive OR operation on each pair of corresponding bits. +Expects a value of `long` or `int` type. + +**Examples:** + +```questdb-sql +SELECT 5 | 3 +``` + +| column | +| ------ | +| 7 | diff --git a/docs/reference/operators/comparison.md b/docs/reference/operators/comparison.md new file mode 100644 index 00000000..02c651a7 --- /dev/null +++ b/docs/reference/operators/comparison.md @@ -0,0 +1,131 @@ +--- +title: Comparison operators +sidebar_label: Comparison +description: Comparison operators reference documentation. +--- + +This page describes the available operators to assist with comparison +operations. + +If `string` or `char` values are used in the input, they are converted to `int` +using the [ASCII Table](https://www.asciitable.com/) for comparison. + +## = (equal to) + +`(value1) = (value2)` - returns true if the two values are the same. + +`=` can be used with [`>` or `<`](#description). + +### Arguments + +- `value1` is any data type. +- `value2` is any data type. + +### Return value + +Return value type is boolean. + +### Examples + +```questdb-sql + +SELECT '5' = '5'; +-- Returns true + +SELECT 5 = 5; +-- Returns true + +SELECT '5' = '3'; +-- Returns false + +SELECT 5 = 3; +-- Returns false +``` + +## > (greater than) and < (less than) + +- `(value1) > (value2)` - returns true if `value1` is greater than `value2`. +- `(value1) < (value2)` - returns true if `value1` is less than `value2`. + +### Arguments + +- `value1` and `value2` are one of the following data types: + - any numeric data type + - `char` + - `date` + - `timestamp` + - `symbol` + - `string` + +### Description + +`>` and `<` can be used in combination with `=` for the following comparison: + +- `>=` - greater than or equal to +- `<=` - less than or equal to + +### Return value + +Return value type is boolean. + +### Examples + +```questdb-sql + +SELECT 'abc' > 'def'; +-- Returns false + +SELECT '123' < '456'; +-- Returns true + +SELECT '5' > '5'; +-- Returns false + +SELECT '5' >= '5'; +-- Returns true + +SELECT 5 < 5; +-- Returns false + +SELECT 5 <= 5; +-- Returns true + +SELECT 'a' > 'b'; +-- Returns false + +SELECT 5 < 3; +-- Returns false +``` + +## <\> or != (not equal to) + +`(value1) <> (value2)` - returns true if `value1` is not equal to `value2`. + +`!=` is an alias of `<>`. + +### Arguments + +- `value1` is any data type. +- `value2` is any data type. + +### Return value + +Return value type is boolean. + +### Examples + +```questdb-sql + +SELECT '5' <> '5'; +-- Returns false + +SELECT 5 <> 5; +-- Returns false + +SELECT 'a' <> 'b'; +-- Returns true + +SELECT 5 <> 3; +-- Returns true + +``` diff --git a/docs/reference/operators/ipv4.md b/docs/reference/operators/ipv4.md new file mode 100644 index 00000000..b62127ce --- /dev/null +++ b/docs/reference/operators/ipv4.md @@ -0,0 +1,280 @@ +--- +title: IPv4 Operations +sidebar_label: IPv4 +description: Operator examples with IPv4 type data. +--- + +This document outlines the IPv4 data type operators. + +They are useful when dealing with IP addresses. + +The IP addresses can be in the range of `0.0.0.1` - `255.255.255.255`. + +The address: `0.0.0.0` is interpreted as null. + +The following operators support `string` type arguments to permit the passing of netmasks: + +* [<< Strict IP address contained by](/docs/reference/operators/ipv4/#-strict-ip-address-contained-by) +* [<<= IP address contained by or equal](/docs/reference/operators/ipv4/#--ip-address-contained-by-or-equal) +* [rnd_ipv4(string, int)](/docs/reference/operators/ipv4/#random-address-range-generator---rnd_ipv4string-int) +* [netmask()](/docs/reference/operators/ipv4/#return-netmask---netmask) + +See [IPv4 SQL functions](/docs/reference/function/ipv4/) for more guidance around IPv4 beyond operators. + +## Operators + +### < Less than + +Takes two IPv4 arguments. + +Returns a boolean. + +#### Example + +Use case: testing to see if one IP address is less than another. + +```sql +ipv4 '33.1.8.43' < ipv4 '200.6.38.9' -> T +``` + +### <= Less than or equal + +Takes two IPv4 arguments. + +Returns a boolean. + +#### Example + +Use case: testing to see if one IP address is less than or equal to another. + +```sql +ipv4 '33.1.8.43' <= ipv4 '33.1.8.43' -> T +``` + +### > Greater than + +Takes two IPv4 arguments. + +Returns a boolean. + +#### Example + +Use case: testing to see if one IP address is greater than another. + +```sql +ipv4 '33.1.8.43' > ipv4 '200.6.38.9' -> F +``` + +### >= Greater than or equal + +Takes two IPv4 arguments. + +Returns a boolean. + +#### Example + +Use case: testing to see if one IP address is greater than or equal to another. + +```sql +ipv4 '33.1.8.43' >= ipv4 '200.6.38.9' -> F +``` + +### = Equals + +Takes two IPv4 arguments. + +Returns a boolean. + +#### Example + +Use case: testing to see if one IP address is equal to another. + +```sql +ipv4 '44.8.9.10' = ipv4 '6.2.90.1' -> F +``` + +### != Does not equal + +Takes two IPv4 arguments. + +Returns a boolean. + +#### Example + +Use case: testing to see if one IP address is not equal to another. + +```sql +ipv4 '44.8.9.10' != ipv4 '6.2.90.1' -> T +``` + +### << Strict IP address contained by + +Takes one IPv4 argument and one string argument. + +The string argument can accept IPv4 addresses with a subnet mask, the IPv4 argument cannot. + +Returns a boolean. + +#### Example + +Use case: searching ip addresses by subnet + +```sql +ipv4 '35.24.65.11' << '35.24.65.2/16' -> T +ipv4 '35.24.65.11' << '35.24.65.2/32' -> F +``` + +### <<= IP address contained by or equal + +Takes one IPv4 argument and one string argument + +The string argument can accept IPv4 addresses with a subnet mask, the IPv4 argument cannot. + +Returns a boolean. + +#### Example + +Use case: searching ip addresses by subnet + +```sql +ipv4 '35.24.65.11' << '35.24.65.2/16' -> T +ipv4 '35.24.65.11' << '35.24.65.2/32' -> T +``` + +### & Bitwise AND + +Takes two IPv4 arguments. + +Returns an IPv4 address. + +#### Example + +Use case: separating an ip address into its network and host portions + +```sql +ipv4 '215.53.40.9' & ipv4 '255.255.0.0' -> 215.53.0.0 +ipv4 '99.8.63.41' & ipv4 '0.0.63.41' -> 0.0.63.41 +``` +### ~ Bitewise NOT + +Takes one IPv4 argument. + +Returns an IPv4 address. + +#### Example + +Use case: computing broadcast address' bitmask from a netmask + +```sql +~ ipv4 '255.255.0.0' -> 0.0.255.255 +``` + +### | Bitwise OR + +Takes two IPv4 arguments. + +Returns an IPv4 address. + +#### Example + +Use case: computing an ip address' broadcast address + +```sql +ipv4 '92.11.8.40' | '0.0.255.255' -> 92.11.255.255 +``` + +### + Add offset to an IP address + +Takes one IPv4 argument and one integer argument. + +Returns an IPv4 address. + +#### Example + +Use case: altering an ip address + +```sql +ipv4 '92.11.8.40' + 5 -> 92.11.8.45 +10 + ipv4 '2.6.43.8' -> 2.6.43.18 +``` + +### - Subtract offset from IP address + +Takes one IPv4 argument and one integer argument. + +Returns an IPv4 address. + +#### Example + +```sql +ipv4 '92.11.8.40' - 5 -> 92.11.8.35 +``` + +### - Difference between two IP addresses + +Takes two IPv4 arguments. + +Returns a long. + +#### Example + +Use case: calculating the range of unique addresses between two ip addresses + +```sql +ipv4 '92.11.8.40' - ipv4 '92.11.8.0' -> 40 +``` + +### Return netmask - netmask(string) + +Takes a `string` IPv4 argument as either: + +* ipv4 address with a netmask `22.59.138.9/8` +* subnet with netmask: `2.2/16` + +Returns an IPv4 addresses' netmask (`255.0.0.0`) in IPv4 format. + +#### Example + +Use case: Obtaining the broadcast bitmask for an ip address via performing bitwise NOT on the netmask. + +Apply a bitwise OR to this result to obtain the broadcast address of an ip address. + +```sql +~ netmask('68.11.9.2/8')) | ipv4 '68.11.9.2' -> 68.255.255.255 +``` + +### Random address generator - rnd_ipv4() + +Random address generator for a single address. + +Returns a single IPv4 address. + +Useful for testing. + +#### Example + +```sql +rnd_ipv4() +/* Return address between 0.0.0.1 - 255.255.255.255 */ +97.29.14.22 +``` + +### Random address range generator - rnd_ipv4(string, int) + +Generates a random ip address within the bounds of a given subnet. + +The integer argument dictates how many null values will be generated. + +Returns an IPv4 address within specified range. + +#### Example + +```sql +rnd_ipv4('22.43.200.9/16', 0) +/* Return address between 22.43.0.0 - 22.43.255.25 */ +22.43.200.12 +``` + +## Limitations + +IPv4 column types cannot be created via InfluxDB Line Protocol as the protocol lacks support for IPv4. As a result, the server cannot distinguish between string and IPv4 data. However, InfluxDB Line Protocol can still insert string data into a pre-existing column of type IPv4. diff --git a/docs/reference/operators/pattern-matching.md b/docs/reference/operators/pattern-matching.md new file mode 100644 index 00000000..ce08a771 --- /dev/null +++ b/docs/reference/operators/pattern-matching.md @@ -0,0 +1,128 @@ +--- +title: Pattern matching operators +sidebar_label: Pattern matching +description: Pattern matching operators reference documentation. +--- + +This page describes the available operators to assist with performing pattern +matching. For operators using regular expressions (`regex` in the syntax), QuestDB uses +[Java regular expression implementation](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html). + +## ~ (match) and !~ (does not match) + +- `(string) ~ (regex)` - returns true if the `string` value matches a regular expression, `regex`, otherwise returns false (case sensitive match). +- `(string) !~ (regex)` - returns true if the `string` value fails to match a regular expression, `regex`, otherwise returns false (case sensitive match). + +### Arguments + +- `string` is an expression that evaluates to the `string` data type. +- `regex` is any regular expression pattern. + +### Return value + +Return value type is `boolean`. + +## LIKE/ILIKE + +- `(string) LIKE (pattern)` - returns true if the `string` value matches `pattern`, otherwise returns false (case sensitive match). +- `(string) ILIKE (pattern)` - returns true if the `string` value matches `pattern`, otherwise returns false (case insensitive match). + +### Arguments + +- `string` is an expression that evaluates to the `string` data type. +- `pattern` is a pattern which can contain wildcards like `_` and `%`. + +### Return value + +Return value type is `boolean`. + +### Description + +If the pattern doesn't contain wildcards, then the pattern represents the string itself. + +The wildcards which can be used in pattern are interpreted as follows: +- `_` - matches any single character. +- `%` - matches any sequence of zero or more characters. + +Wildcards can be used as follows: + +```questdb-sql +SELECT 'quest' LIKE 'quest' ; +-- Returns true +SELECT 'quest' LIKE 'ques_'; +-- Returns true +SELECT 'quest' LIKE 'que%'; +-- Returns true +SELECT 'quest' LIKE '_ues_'; +-- Returns true +SELECT 'quest' LIKE 'q_' +-- Returns false +``` + + +`ILIKE` performs a case insensitive match as follows: + +```questdb-sql +SELECT 'quest' ILIKE 'QUEST'; +-- Returns true +SELECT 'qUeSt' ILIKE 'QUEST'; +-- Returns true +SELECT 'quest' ILIKE 'QUE%'; +-- Returns true +SELECT 'QUEST' ILIKE '_ues_'; +-- Returns true +``` +### Examples + +#### LIKE + +```questdb-sql +SELECT * FROM trades +WHERE symbol LIKE '%-USD' +LATEST ON timestamp PARTITION BY symbol; +``` + +| symbol | side | price | amount | timestamp | +| --- | --- | --- | --- | --- | +| ETH-USD | sell | 1348.13 | 3.22455108 | 2022-10-04T15:25:58.834362Z | +| BTC-USD | sell | 20082.08 | 0.16591219 | 2022-10-04T15:25:59.742552Z | + +#### ILIKE + +```questdb-sql +SELECT * FROM trades +WHERE symbol ILIKE '%-usd' +LATEST ON timestamp PARTITION BY symbol; +``` + +| symbol | side | price | amount | timestamp | +| --- | --- | --- | --- | --- | +| ETH-USD | sell | 1348.13 | 3.22455108 | 2022-10-04T15:25:58.834362Z | +| BTC-USD | sell | 20082.08 | 0.16591219 | 2022-10-04T15:25:59.742552Z | + +## regexp_replace + +`regexp_replace (string1, regex , string2 )` - provides substitution of new text +for substrings that match regular expression patterns. + +### Arguments: + +- `string1` is a source `string` value to be manipulated. +- `regex` is a regular expression pattern. +- `string2` is any `string` value to replace part or the whole of the source value. + +### Return value + +Return value type is `string`. The source string is returned unchanged if there is no match to the pattern. If +there is a match, the source string is returned with the replacement string +substituted for the matching substring. + +### Examples: + +```questdb-sql title="Example description - regexp_replace" +SELECT regexp_replace('MYSQL is a great database', '^(\S*)', 'QuestDB'); +``` + +``` +QuestDB is a great database +``` diff --git a/docs/reference/operators/spatial.md b/docs/reference/operators/spatial.md new file mode 100644 index 00000000..60c1b900 --- /dev/null +++ b/docs/reference/operators/spatial.md @@ -0,0 +1,74 @@ +--- +title: Spatial operators +sidebar_label: Spatial +description: Spatial operators reference documentation. +--- + +This page describes the available operators to assist with performing spatial +calculations. For more information on this type of data, see the +[geohashes documentation](/docs/concept/geohashes/) and the +[spatial functions](/docs/reference/function/spatial/) which have been added to +help with filtering and generating data. + +## within + +`within(geohash, ...)` - evaluates if a comma-separated list of geohashes are +equal to are within another geohash: + +- The `within` operator can only be used in `LATEST ON` queries and all symbol + columns within the query **must be indexed**. + +- Only **geohash literals** (`#ezzn5kxb`) are supported as opposed to geohashes + passed as strings (`'ezzn5kxb'`). + +- Filtering happens logically after `LATEST ON`. + +- Apart from the `within` operator, only simple filters on the designated timestamp are allowed in the `WHERE` clause. + +**Arguments:** + +- `geohash` is a geohash type in text or binary form + +**Return value:** + +- evaluates to `true` if geohash values are a prefix or complete match based on + the geohashes passed as arguments + +**Examples:** + +Given a table with the following contents: + +| ts | device_id | g1c | g8c | +| --------------------------- | --------- | --- | -------- | +| 2021-09-02T14:20:07.721444Z | device_2 | e | ezzn5kxb | +| 2021-09-02T14:20:08.241489Z | device_1 | u | u33w4r2w | +| 2021-09-02T14:20:08.241489Z | device_3 | u | u33d8b1b | + +The `within` operator can be used to filter results by geohash: + +```questdb-sql +SELECT * FROM pos +WHERE g8c within(#ezz, #u33d8) +LATEST ON ts PARTITON BY uuid; +``` + +This yields the following results: + +| ts | device_id | g1c | g8c | +| --------------------------- | --------- | --- | -------- | +| 2021-09-02T14:20:07.721444Z | device_2 | e | ezzn5kxb | +| 2021-09-02T14:20:08.241489Z | device_3 | u | u33d8b1b | + +Additionally, prefix-like matching can be performed to evaluate if geohashes +exist within a larger grid: + +```questdb-sql +SELECT * FROM pos +WHERE g8c within(#u33) +LATEST ON ts PARTITON BY uuid; +``` + +| ts | device_id | g1c | g8c | +| --------------------------- | --------- | --- | -------- | +| 2021-09-02T14:20:08.241489Z | device_1 | u | u33w4r2w | +| 2021-09-02T14:20:08.241489Z | device_3 | u | u33d8b1b | diff --git a/docs/reference/sql/alter-table-add-column.md b/docs/reference/sql/alter-table-add-column.md new file mode 100644 index 00000000..fa31da5f --- /dev/null +++ b/docs/reference/sql/alter-table-add-column.md @@ -0,0 +1,46 @@ +--- +title: ALTER TABLE ADD COLUMN +sidebar_label: ADD COLUMN +description: ALTER TABLE ADD COLUMN SQL keyword reference documentation. +--- + +Adds a new column of a specified type to an existing table. + +The new column is not back-populated even if the table contains data. While a +single column is added atomically, adding multiple columns is not an atomic +operation. QuestDB will stop adding the remaining columns on the list on the +first failure. It is therefore possible to add some columns and not others. + +:::caution + +- New column names may only consist of letters, numbers and underscores `_` + +- Adding a new column does not lock the table for reading and does not wait on + any reads to finish. + +::: + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of ALTER TABLE with ADD COLUMN keyword](/img/docs/diagrams/alterTableAddColumn.svg) + +## Examples + +Add a new column called `comment` of type `STRING` type to the table `ratings` + +```questdb-sql title="New column" +ALTER TABLE ratings ADD COLUMN comment STRING; +``` + +When adding a column of `Symbol` type, optional keywords may be passed which are +unique to this type. These keywords are described in the +[Symbol type](/docs/reference/sql/create-table/#symbol) section of the +`CREATE TABLE` documentation. + +The following example shows how to add a new `SYMBOL` column with `NOCACHE` and +`INDEX` keywords: + +```questdb-sql title="New symbol column" +ALTER TABLE ratings ADD COLUMN comment SYMBOL NOCACHE INDEX; +``` diff --git a/docs/reference/sql/alter-table-alter-column-add-index.md b/docs/reference/sql/alter-table-alter-column-add-index.md new file mode 100644 index 00000000..4fb574a1 --- /dev/null +++ b/docs/reference/sql/alter-table-alter-column-add-index.md @@ -0,0 +1,21 @@ +--- +title: ALTER TABLE COLUMN ADD INDEX +sidebar_label: ADD INDEX +description: ADD INDEX SQL keyword reference documentation. +--- + +Indexes an existing [`symbol`](/docs/concept/symbol/) column. + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of the ALTER TABLE with ADD INDEX keyword](/img/docs/diagrams/alterTableAddIndex.svg) + +Adding an [index](/docs/concept/indexes/) is an atomic, non-blocking, and non-waiting operation. Once +complete, the SQL optimizer will start using the new index for SQL executions. + +## Example + +```questdb-sql title="Adding an index" +ALTER TABLE trades ALTER COLUMN instrument ADD INDEX; +``` diff --git a/docs/reference/sql/alter-table-alter-column-cache.md b/docs/reference/sql/alter-table-alter-column-cache.md new file mode 100644 index 00000000..4866459b --- /dev/null +++ b/docs/reference/sql/alter-table-alter-column-cache.md @@ -0,0 +1,23 @@ +--- +title: ALTER TABLE COLUMN CACHE | NOCACHE +sidebar_label: CACHE | NOCACHE +--- + +`ALTER TABLE ALTER COLUMN CACHE | NOCACHE` changes the cache setting for a +[symbol](/docs/concept/symbol/) column. + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of NOCACHE | CACHE](/img/docs/diagrams/alterTableNoCache.svg) + +- `columnName` is the `symbol` data type. +- By default, a symbol column is cached. +- Refer to the [Guide on symbol](/docs/concept/symbol/#symbol-columns) for the advantages of + caching `symbols`. + +## Examples + +```questdb-sql +ALTER TABLE 'taxi_ride' ALTER COLUMN passenger_count NOCACHE; +``` diff --git a/docs/reference/sql/alter-table-alter-column-drop-index.md b/docs/reference/sql/alter-table-alter-column-drop-index.md new file mode 100644 index 00000000..b27c828f --- /dev/null +++ b/docs/reference/sql/alter-table-alter-column-drop-index.md @@ -0,0 +1,36 @@ +--- +title: ALTER TABLE COLUMN DROP INDEX +sidebar_label: DROP INDEX +description: DROP INDEX SQL keyword reference documentation. +--- + +Removes an existing index from a column of type [symbol](/docs/concept/symbol/). + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE with DROP INDEX keyword](/img/docs/diagrams/alterTableDropIndex.svg) + +Removing an index is an atomic, non-blocking, and non-waiting operation. Once +the operation is completed, the SQL engine stops using the index for SQL +executions, and all its associated files are deleted. + +This operation is similar to: + +```sql +UPDATE tab SET column=column; +``` + +Where `column` is a symbol column that has an [index](/docs/concept/indexes/) before the operation, and no +index afterwards. Readers of the table might be using the index in transaction +A, in the meantime, a writer creates transaction B containing the new version of +the column, minus the index (metadata is set to not have index, and index files +are not copied across to the newer version). When the readers are finished, +QuestDB automatically deletes all the files pertaining to the version of the +column in transaction A (QuestDB uses hardlinks internally to avoid an actual +copy operation of the data files, as they do not change at all). + +## Example + +```questdb-sql title="Removing an index" +ALTER TABLE trades ALTER COLUMN instrument DROP INDEX; +``` diff --git a/docs/reference/sql/alter-table-attach-partition.md b/docs/reference/sql/alter-table-attach-partition.md new file mode 100644 index 00000000..a6c9d6af --- /dev/null +++ b/docs/reference/sql/alter-table-attach-partition.md @@ -0,0 +1,211 @@ +--- +title: ALTER TABLE ATTACH PARTITION +sidebar_label: ATTACH PARTITION +description: ATTACH PARTITION SQL keyword reference documentation. +--- + +Restores one or more partitions to the table where they have been detached from +by using the SQL +[ALTER TABLE DETACH PARTITION](/docs/reference/sql/alter-table-detach-partition/) +statement. + +This feature is part of the manual S3/cold storage solution, allowing restoring +data manually. + +## Syntax + +![Flow chart showing the syntax of ALTER TABLE with ATTACH PARTITION keyword](/img/docs/diagrams/alterTableAttachPartition.svg) + +The `WHERE` clause is not supported when attaching partitions. + +## Description + +Before executing `ATTACH PARTITION`, the partition folders to be attached must +be made available to QuestDB using one of the following methods: + +- Copying the partition folders manually +- Using a [symbolic link](https://en.wikipedia.org/wiki/Symbolic_link) + +This section describes the details of each method. + +### Manual copy + +Partition folders can be manually moved from where they are stored into the +table folder in `db`. To make the partitions available for the attach operation, +the files need to be renamed `.attachable`. + +For example, in a table partitioned by year, given a partition folder named +`2020.detached`, rename it as `2020.attachable`, and move it to the table +folder. + +### Symbolic links + +[Symbolic links](https://en.wikipedia.org/wiki/Symbolic_link) can be used to +attach partition folders that exist potentially in a different volume as cold +storage. The partitions attached in this way will be **read-only**. To make +detached partition folders in cold storage available for attaching, for each +partition folder, create a symbolic link with the name +format`.attachable` from the table's folder, and set the target +path to the detached partition folder. + +In Windows, symbolic links require admin privileges, and thus this method is not +recommended. + +:::note + +SQL statements that hit partitions attached via symbolic links may have slower +runtime if their volumes have a slower disk. + +::: + +#### Properties using symbolic links + +Partitions attached via the symbolic link approach are **read-only** for the +following operations: + +- [`DETACH PARTITION`](/docs/reference/sql/alter-table-detach-partition/) and + [`DROP PARTITION`](/docs/reference/sql/alter-table-drop-partition/): Once the + partition folders are unlinked, the symbolic links are removed, but the + content remains. Detaching a partition that was attached via symbolic link + does not create a copy `.detached`. +- [`UPDATE`](/docs/reference/sql/update/): Attempts to update the read-only + partitions result in an error. +- [`INSERT`](/docs/reference/sql/insert/): Attemps to insert data into a + read-only partition result in a critical-level log message being logged by the + server, and the insertion is a no-op. If + [Prometheus monitoring](/docs/third-party-tools/prometheus/) is configured, an + alert will be triggered. + +For read-only partitions, the following operations are supported: + +- [`ADD COLUMN`](/docs/reference/sql/alter-table-add-column/) +- [`DROP COLUMN`](/docs/reference/sql/alter-table-drop-column/) +- [`RENAME COLUMN`](/docs/reference/sql/alter-table-rename-column/) +- [`ADD INDEX`](/docs/reference/sql/alter-table-alter-column-add-index/) +- [`DROP INDEX`](/docs/reference/sql/alter-table-alter-column-drop-index/) + +## Example + +### Manual copy + +Assuming the QuestDB data directory is `/var/lib/questdb/db`, for a table `x` +with AWS S3 for cold storage: + +1. Copy files from S3: + + ```bash + cd /var/lib/questdb/db/x + # Table x is the original table where the partition were detached from. + + mkdir 2019-02-01.attachable && aws s3 cp s3://questdb-internal/blobs/20190201.tar.gz - | tar xvfz - -C 2019-02-01.attachable --strip-components 1 + mkdir 2019-02-02.attachable && aws s3 cp s3://questdb-internal/blobs/20190202.tar.gz - | tar xvfz - -C 2019-02-01.attachable --strip-components 1 + ``` + +2. Execute the SQL `ALTER TABLE ATTACH PARTITION` command: + + ```questdb-sql + ALTER TABLE x ATTACH PARTITION LIST '2019-02-01', '2019-02-02'; + ``` + +3. After the SQL is executed, the partitions will be available to read. + +### Symbolic link + +The following example creates a table `tab` with some data, detaches all but the +last partition, and demonstrates how to attach the partitions using symbolic +links. + +These SQL statements create table `tab` partitioned by year, and insert seven +rows that result in a total of seven partitions: + +```sql +CREATE TABLE tab (name STRING, age INT, dob TIMESTAMP) TIMESTAMP(dob) PARTITION BY YEAR; + +INSERT INTO tab VALUES('B', 1, '2022-11-08T12:00:00.000000Z'); +INSERT INTO tab VALUES('C', 2, '2023-11-08T12:00:00.000000Z'); +INSERT INTO tab VALUES('D', 3, '2024-11-08T12:00:00.000000Z'); +INSERT INTO tab VALUES('E', 4, '2025-11-08T12:00:00.000000Z'); +INSERT INTO tab VALUES('F', 5, '2026-11-08T12:00:00.000000Z'); +INSERT INTO tab VALUES('A', 0, '2027-11-08T12:00:00.000000Z'); +INSERT INTO tab VALUES('0', 0, '2028-11-08T12:00:00.000000Z'); +``` + +This SQL statement detaches partitions 2022, 2023, 2024, 2025, 2026, and 2027: + +```sql +ALTER TABLE tab DETACH PARTITION WHERE dob < '2028'; +``` + +Assuming QuestDB's root directory to be `/opt/homebrew/var/questdb/db`, the +content of the table folder is: + +```shell +2022.detached +2023.detached +2024.detached +2025.detached +2026.detached +2027.detached +2028.5 +_cv +_meta +_todo_ +_txn +_txn_scoreboard +seq +``` + +You can now move those `` folders to a different path, +potentially a different volume: + +```shell +mv /opt/homebrew/var/questdb/db/tab/*.detached /cold_storage/tab +``` + +When you want to attach these partitions back, create a symlink for every +partition to be attached from the table folder +`/opt/homebrew/var/questdb/db/tab`: + +```shell +ln -s /cold_storage/tab/2022.detached 2022.attachable +ln -s /cold_storage/tab/2023.detached 2023.attachable +ln -s /cold_storage/tab/2024.detached 2024.attachable +ln -s /cold_storage/tab/2025.detached 2025.attachable +ln -s /cold_storage/tab/2026.detached 2026.attachable +ln -s /cold_storage/tab/2027.detached 2027.attachable +``` + +The content of the table folder should look like this now: + +```shell +2022.attachable -> /cold_storage/tab/2022.detached +2023.attachable -> /cold_storage/tab/2023.detached +2024.attachable -> /cold_storage/tab/2024.detached +2025.attachable -> /cold_storage/tab/2025.detached +2026.attachable -> /cold_storage/tab/2026.detached +2027.attachable -> /cold_storage/tab/2027.detached +2028.5 +_cv +_meta +_todo_ +_txn +_txn_scoreboard +seq +``` + +After the symbolic links have been created, the partitions can be attached with +the following SQL statement: + +```sql +ALTER TABLE tab ATTACH PARTITION LIST '2022', '2023', '2024', '2025', '2026', '2027'; +``` + +The SQL reference to the partitions does not include the suffix `.attachable`. + +## Limitation + +- S3/Cold storage interaction is manual. Partitions can only be attached to the + same table they were detached from. The table name must be the same. Moving + partitions between tables or database instances is not supported. +- The operation will fail if a partition already exists. We are working on + functionality to allow merging data in the same partition for attaching. diff --git a/docs/reference/sql/alter-table-detach-partition.md b/docs/reference/sql/alter-table-detach-partition.md new file mode 100644 index 00000000..f392c8c4 --- /dev/null +++ b/docs/reference/sql/alter-table-detach-partition.md @@ -0,0 +1,42 @@ +--- +title: ALTER TABLE DETACH PARTITION +sidebar_label: DETACH PARTITION +description: DETACH PARTITION SQL keyword reference documentation. +--- + +Makes partition data unavailable for reads and prepares partition directory for transportation. A partition detached by this SQL keyword can be "re-attached" using the complementary SQL keyword [ALTER TABLE ATTACH PARTITION](/docs/reference/sql/alter-table-attach-partition/). + +## Syntax + +![Flow chart showing the syntax of ALTER TABLE with DETACH LIST PARTITION keyword](/img/docs/diagrams/alterTableDetachPartition.svg) + +## Example + +To detach one or more partitions, let's assume table `x` with 3 partitions, `2019-02-01`, `2019-02-02`, and `2019-02-03`: + +1. Detach two partitions using the SQL `ALTER TABLE DETACH PARTITION` command: + + ```questdb-sql + ALTER TABLE x DETACH PARTITION LIST '2019-02-01', '2019-02-02'; + + -- It is also possible to use WHERE clause to define the partition list: + + ALTER TABLE sensors DETACH PARTITION WHERE < '2019-02-03T00'; + ``` + + +2. Users can move the partition, for example, to an S3 bucket: + + ```bash + cd /var/lib/questdb/db/x/ + tar cfz - '2019-02-01.detached' | aws s3 cp - s3://questdb-internal/blobs/20190201.tar.gz + tar cfz - '2019-02-02.detached' | aws s3 cp - s3://questdb-internal/blobs/20190202.tar.gz + ``` + The table directory is nested in the root directory. The root directory is set by `cairo.root` and is set to `db` by default. The detached partition files have the suffix `.detached`. + +## Limitation + +- QuestDB does not compress partitions after detaching nor does it change partition format significantly. In most cases, compression will have to be done manually before partitions are transported to cold storage. +- The operation does not support detaching: + - An active (the last) partition. + - The only partition in case of non-partitioned tables. diff --git a/docs/reference/sql/alter-table-disable-deduplication.md b/docs/reference/sql/alter-table-disable-deduplication.md new file mode 100644 index 00000000..716cc146 --- /dev/null +++ b/docs/reference/sql/alter-table-disable-deduplication.md @@ -0,0 +1,21 @@ +--- +title: ALTER TABLE DEDUP DISABLE +sidebar_label: DEDUP DISABLE +description: DISABLE DEDUPLICATION SQL command reference documentation. +--- + +Disable storage level data deduplication on inserts + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE DISABLE DEDUP statement](/img/docs/diagrams/disableDedup.svg) + +## Example + +Disable deduplication on table `TICKER_PRICE`: + +```sql +ALTER TABLE TICKER_PRICE DEDUP DISABLE +``` + +See more example at [data deduplication](/docs/concept/deduplication#example) page \ No newline at end of file diff --git a/docs/reference/sql/alter-table-drop-column.md b/docs/reference/sql/alter-table-drop-column.md new file mode 100644 index 00000000..252337d0 --- /dev/null +++ b/docs/reference/sql/alter-table-drop-column.md @@ -0,0 +1,35 @@ +--- +title: ALTER TABLE DROP COLUMN +sidebar_label: DROP COLUMN +description: DROP COLUMN SQL keyword reference documentation. +--- + +Deletes a column from an existing table. + +Dropping columns will also attempt to remove files belonging to the column from +all partitions, thus freeing up disk space immediately. If this is not +immediately possible on Windows, the file remove operation is postponed until +files are released by all threads. The logical drop column will succeed on +Windows in presence of active readers. + +:::caution + +Use `DROP COLUMN` with care, as QuestDB **cannot recover data from dropped +columns**! + +::: + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) + +![Flow chart showing the syntax of ALTER TABLE with DROP COLUMN keyword](/img/docs/diagrams/alterTableDropColumn.svg) + +## Example + +The following example deletes the column called `comment` from the table +`ratings` + +```questdb-sql title="Dropping a column" +ALTER TABLE ratings DROP COLUMN movieId; +``` diff --git a/docs/reference/sql/alter-table-drop-partition.md b/docs/reference/sql/alter-table-drop-partition.md new file mode 100644 index 00000000..0bb69b14 --- /dev/null +++ b/docs/reference/sql/alter-table-drop-partition.md @@ -0,0 +1,63 @@ +--- +title: ALTER TABLE DROP PARTITION +sidebar_label: DROP PARTITION +description: DROP PARTITION SQL keyword reference documentation. +--- + +Drops one or more partitions from an existing table. + +Similar to dropping columns, dropping of partitions is a non-blocking and +non-waiting operation. While atomic for single partitions, dropping multiple +partitions is in itself non-atomic. The operation will exit on the first failure +and will not continue through a list of partitions if one fails to be dropped. + +:::caution + +Use `DROP PARTITION` with care, as QuestDB **cannot recover data from dropped +partitions**! + +::: + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of ALTER TABLE with DROP PARTITION keyword](/img/docs/diagrams/alterTableDropPartition.svg) + +## Drop partition by name + +The partition name must match the name of the directory for the given partition. +The naming convention is detailed in [Partitions](/docs/concept/partitions/). + +### Examples + +```questdb-sql title="Drop a single partition" +--DAY +ALTER TABLE measurements DROP PARTITION LIST '2019-05-18'; +--MONTH +ALTER TABLE measurements DROP PARTITION LIST '2019-05'; +--YEAR +ALTER TABLE measurements DROP PARTITION LIST '2019'; +``` + +```questdb-sql title="Drop multiple partitions" +ALTER TABLE measurements DROP PARTITION LIST '2018','2019'; +``` + +## Drop partitions using boolean expression + +Drops partitions based on a boolean expression on the designated timestamp +column. + +### Examples + +```questdb-sql title="Drop one partition" +ALTER TABLE measurements +DROP PARTITION +WHERE timestamp = to_timestamp('2019-01-01:00:00:00', 'yyyy-MM-dd:HH:mm:ss'); +``` + +```questdb-sql title="Drop all partitions older than 2018" +ALTER TABLE measurements +DROP PARTITION +WHERE timestamp < to_timestamp('2018-01-01:00:00:00', 'yyyy-MM-dd:HH:mm:ss'); +``` diff --git a/docs/reference/sql/alter-table-enable-deduplication.md b/docs/reference/sql/alter-table-enable-deduplication.md new file mode 100644 index 00000000..6586c287 --- /dev/null +++ b/docs/reference/sql/alter-table-enable-deduplication.md @@ -0,0 +1,40 @@ +--- +title: ALTER TABLE DEDUP ENABLE +sidebar_label: DEDUP ENABLE +description: ENABLE DEDUPLICATION SQL command reference documentation. +--- + +Enable storage level data deduplication on inserts and configures `UPSERT KEYS`. + +:::note +- Deduplication can only be enabled for [Write-Ahead Log (WAL)](/docs/concept/write-ahead-log) tables. +- Enabling deduplication does not have any effect on the existing data and only applies to newly inserted data. This means that a table with deduplication enabled can still contain duplicate data. +- Enabling deduplication does not have any effect on modifying data with `UPDATE` statements. +::: + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE DEDUP ENABLE statement](/img/docs/diagrams/enableDedup.svg) + +`UPSERT KEYS` list can include one or more columns with the following rules: + +- The [designated Timestamp](/docs/concept/designated-timestamp) column must be included in the `UPSERT KEYS` list. +- Columns of [STRING and BINARY](/docs/reference/sql/datatypes) types cannot be used in the `UPSERT KEYS` list. + +Running `ALTER TABLE DEDUP ENABLE` on a table that already has deduplication enabled is not an error. + +In such cases, the `UPSERT KEYS` list overrides the previously set key column list. + +## Example + +To enable deduplication on the `TICKER_PRICE` table for the `ts` and `ticker` columns, where `ts` is the designated timestamp for the table, use the following command: + +```sql +ALTER TABLE TICKER_PRICE DEDUP ENABLE UPSERT KEYS(ts, ticker) +``` + +See more example at [data deduplication](/docs/concept/deduplication#example) page + +## See also + +[ALTER TABLE DEDUP DISABLE](/docs/reference/sql/alter-table-disable-deduplication) \ No newline at end of file diff --git a/docs/reference/sql/alter-table-rename-column.md b/docs/reference/sql/alter-table-rename-column.md new file mode 100644 index 00000000..3515a5e1 --- /dev/null +++ b/docs/reference/sql/alter-table-rename-column.md @@ -0,0 +1,27 @@ +--- +title: ALTER TABLE RENAME COLUMN +sidebar_label: RENAME COLUMN +description: RENAME COLUMN SQL keyword reference documentation. +--- + +Rename a column in an existing table. + +:::caution + +- New column names may only consist of letters, numbers and underscores `_` + +::: + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of the ALTER TABLE RENAME COLUMN keywords](/img/docs/diagrams/alterTableRenameColumn.svg) + +## Example + +The following example renames an existing column called `sensor` to +`hum_sensor_1` from the table `measurements`: + +```questdb-sql title="Renaming a column" +ALTER TABLE measurements RENAME COLUMN sensor TO hum_sensor_1; +``` diff --git a/docs/reference/sql/alter-table-resume-wal.md b/docs/reference/sql/alter-table-resume-wal.md new file mode 100644 index 00000000..87cf4357 --- /dev/null +++ b/docs/reference/sql/alter-table-resume-wal.md @@ -0,0 +1,66 @@ +--- +title: ALTER TABLE RESUME WAL +sidebar_label: RESUME WAL +description: ALTER TABLE RESUME WAL SQL keyword reference documentation. +--- + +Restarts transactions of a [WAL table](/docs/concept/write-ahead-log/) after +recovery from errors. + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of ALTER TABLE with RESUME WAL keyword](/img/docs/diagrams/alterTableResumeWal.svg) + +## Description + +`sequencerTxn` is the unique `txn` identification that the Sequencer issues to +transactions. + +When `sequencerTxn` is not specified, the operation resumes the WAL apply job +from the next uncommitted transaction, including the failed one. + +When `sequencerTxn` is not specified, the operation resumes the WAL apply job +from the provided `sequencerTxn` number explicitly. + +`ALTER TABLE RESUME WAL` is used to restart WAL table transactions after +resolving errors. When transactions are stopped, the `suspended` status from the +[`wal_tables()`](/docs/reference/function/meta/#wal_tables) function is marked as +`true`, and the `sequencerTxn` value indicates the last successful commit in the +Sequencer. Once the error is resolved, `ALTER TABLE RESUME WAL` restarts the +suspended WAL transactions from the failed transaction. Alternatively, an +optional `sequencerTxn` value can be provided to skip the failed transaction. + +## Examples + +Using the [`wal_tables()`](/docs/reference/function/meta/#wal_tables) function to +investigate the table status: + +```questdb-sql title="List all tables" +wal_tables(); +``` + +| name | suspended | writerTxn | sequencerTxn | +| ----------- |-----------|-----------|--------------| +| sensor_wal | false | 6 | 6 | +| weather_wal | true | 3 | 5 | + +The table `weather_wal` is suspended. The last successful commit in the +table is `3`. + +The following query restarts transactions from the failed transaction, `4`: + +```questdb-sql +ALTER TABLE weather_wal RESUME WAL; +``` + +Alternatively, specifying the `sequencerTxn` to skip the failed commit (`4` in +this case): + +```questdb-sql +ALTER TABLE weather_wal RESUME WAL FROM TRANSACTION 5; + +-- This is equivalent to + +ALTER TABLE weather_wal RESUME WAL FROM TXN 5; +``` diff --git a/docs/reference/sql/alter-table-set-param.md b/docs/reference/sql/alter-table-set-param.md new file mode 100644 index 00000000..31410fb3 --- /dev/null +++ b/docs/reference/sql/alter-table-set-param.md @@ -0,0 +1,99 @@ +--- +title: ALTER TABLE SET PARAM +sidebar_label: SET PARAM +description: SET PARAM SQL keyword reference documentation. +--- + +`ALTER TABLE SET PARAM` sets table parameters via SQL. + +:::note + +- Checking table metadata can be done via the `tables()` and `table_columns()` + functions, as described in the + [meta functions](/docs/reference/function/meta/) documentation page. + +::: + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of the ALTER TABLE SET PARA keywords](/img/docs/diagrams/alterTableSetParam.svg) + +`maxUncommittedRows` - defines the maximum number of uncommitted rows per-table +to keep in memory before triggering a commit for a specific table. + +The purpose of specifying maximum uncommitted rows per table is to reduce the +occurrences of resource-intensive commits when ingesting out-of-order data. + +The global setting for the same parameter is `cairo.max.uncommitted.rows`. + +## Example + +The values for `maximum uncommitted rows` can be changed per each table with the +following SQL: + +```questdb-sql title="Altering out-of-order parameters via SQL" +ALTER TABLE my_table SET PARAM maxUncommittedRows = 10000 +``` + +Checking the values per-table may be done using the `tables()` function: + +```questdb-sql title="List table metadata" +SELECT id, name, maxUncommittedRows FROM tables(); +``` + +| id | name | maxUncommittedRows | +| --- | -------- | ------------------ | +| 1 | my_table | 10000 | + +For more details on retrieving table and column information, see the +[meta functions documentation](/docs/reference/function/meta/). + +## Parameters for QuestDB 6.5.5 and earlier versions + +:::note + +**Deprecated content** + +- For QuestDB 6.5.5 and earlier versions, the following keywords are useful for + configuring InfluxDB Line Protocol data ingestion on a per-table basis. For + more information on more details and when to apply them, see the documentation + for + [InfluxDB Line Protocol commit strategy](/docs/reference/api/ilp/tcp-receiver/#commit-strategy). + +- From QuestDB 6.6 onwards, the database adjusts relevant settings automatically + and provides maximum ingestion speed. + +::: + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) +![Flow chart showing the syntax of the ALTER TABLE SET PARA with commit lag keywords](/img/docs/diagrams/alterTableSetParamCommitLag.svg) + +For context on InfluxDB Line Protocol, see the +[Commit Strategy page](/docs/reference/api/ilp/tcp-receiver/#commit-strategy). + +`commitLag` allows for specifying the expected maximum _lag_ of late-arriving +records when ingesting out-of-order data. The purpose of specifying a commit lag +per table is to reduce the occurrences of resource-intensive commits when +ingesting out-of-order data. Incoming records will be kept in memory until for +the duration specified in _lag_, then all records up to the boundary will be +ordered and committed. + +`commitLag` expects a value with a modifier to specify the unit of time for the +value: + +| unit | description | +| ---- | ------------ | +| us | microseconds | +| s | seconds | +| m | minutes | +| h | hours | +| d | days | + +To specify `commitLag` value to 20 seconds: + +```questdb-sql +ALTER TABLE my_table SET PARAM commitLag = 20s; +``` diff --git a/docs/reference/sql/alter-table-set-type.md b/docs/reference/sql/alter-table-set-type.md new file mode 100644 index 00000000..dc7ccd72 --- /dev/null +++ b/docs/reference/sql/alter-table-set-type.md @@ -0,0 +1,38 @@ +--- +title: ALTER TABLE SET TYPE +sidebar_label: SET TYPE +description: ALTER TABLE SET TYPE SQL keyword reference documentation. +--- + +Converts a non-WAL table to WAL, or a WAL table to non-WAL. + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) + +![Flow chart showing the syntax of ALTER TABLE with SET TYPE keyword](/img/docs/diagrams/setType.svg) + +## Description + +The command schedules the conversion of the specified table to WAL or non-WAL type. +The actual conversion takes place on the next restart of the server. + +If the command issued more than once before the restart, the last command overrides all previous ones. + +If the target type of the conversion is the same as the current type of the table, the conversion request is ignored. + +## Examples + +To convert a non-WAL table to WAL: + +``` +ALTER TABLE weather SET TYPE WAL; +restart instance +``` + +To convert a WAL table to non-WAL: + +``` +ALTER TABLE weather SET TYPE BYPASS WAL; +restart instance +``` diff --git a/docs/reference/sql/alter-table-squash-partitions.md b/docs/reference/sql/alter-table-squash-partitions.md new file mode 100644 index 00000000..a49a4c1e --- /dev/null +++ b/docs/reference/sql/alter-table-squash-partitions.md @@ -0,0 +1,52 @@ +--- +title: ALTER TABLE SQUASH PARTITIONS +sidebar_label: SQUASH PARTITIONS +description: ALTER TABLE SQUASH PARTITIONS SQL keyword reference documentation. +--- + +Merges partition parts back into the physical partition. + +This SQL keyword is designed to use for downgrading QuestDB to a version earlier +than 7.2, when +[partition split](/docs/concept/partitions/#splitting-and-squashing-time-partitions) +is introduced. Squashing partition parts makes the database compatible with +earlier QuestDB versions. + +## Syntax + +![Flow chart showing the syntax of the ALTER TABLE keyword](/img/docs/diagrams/alterTable.svg) + +![Flow chart showing the syntax of ALTER TABLE with SQUASH PARTITIONS keyword](/img/docs/diagrams/alterTableSquashPartitions.svg) + +## Examples + +The SQL keyword [SHOW PARTITIONS](/docs/reference/sql/show/) can be used to +display partition split details. + +For example, Let's consider the following table `x` containing split partitions: + +``` +SHOW PARTITIONS FROM x; +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +| ----- | ----------- | ------------------------ | --------------------------- | --------------------------- | ------- | -------- | ------------- | -------- | ------ | -------- | -------- | ---------- | +| 0 | DAY | 2023-02-04 | 2023-02-04T00:00:00.000000Z | 2023-02-04T23:59:59.940000Z | 1440000 | 71281136 | 68.0 MiB | FALSE | FALSE | TRUE | FALSE | FALSE | +| 1 | DAY | 2023-02-05 | 2023-02-05T00:00:00.000000Z | 2023-02-05T20:59:59.880000Z | 1259999 | 65388544 | 62.4 MiB | FALSE | FALSE | TRUE | FALSE | FALSE | +| 2 | DAY | 2023-02-05T205959-880001 | 2023-02-05T20:59:59.940000Z | 2023-02-05T21:59:59.940000Z | 60002 | 83886080 | 80.0 MiB | FALSE | TRUE | TRUE | FALSE | FALSE | + +The table is partition by day and there are two partitions for `2023-02-05` as a +result of partition split. + +To merge the two partitions: + +```questdb-sql +ALTER TABLE x SQUASH PARTITIONS; + +SHOW PARTITIONS FROM x; +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +| ----- | ----------- | ---------- | --------------------------- | --------------------------- | ------- | -------- | ------------- | -------- | ------ | -------- | -------- | ---------- | +| 0 | DAY | 2023-02-04 | 2023-02-04T00:00:00.000000Z | 2023-02-04T23:59:59.940000Z | 1440000 | 71281136 | 68.0 MiB | FALSE | FALSE | TRUE | FALSE | FALSE | +| 1 | DAY | 2023-02-05 | 2023-02-05T00:00:00.000000Z | 2023-02-05T21:59:59.940000Z | 1320001 | 65388544 | 62.4 MiB | FALSE | TRUE | TRUE | FALSE | FALSE | diff --git a/docs/reference/sql/backup.md b/docs/reference/sql/backup.md new file mode 100644 index 00000000..75fdd182 --- /dev/null +++ b/docs/reference/sql/backup.md @@ -0,0 +1,76 @@ +--- +title: BACKUP keyword +sidebar_label: BACKUP +description: BACKUP SQL keyword reference documentation. +--- + +Creates a backup for one, several, or all database tables. + +## Syntax + +![Flow chart showing the syntax of the BACKUP keyword](/img/docs/diagrams/backup.svg) + +## Backup directory + +Backing up a database or tables requires a **backup directory** which is set +using the `cairo.sql.backup.root` +[configuration key](/docs/reference/configuration/) in a +[server.conf](/docs/concept/root-directory-structure/#serverconf) file: + +```shell title="server.conf" +cairo.sql.backup.root=/Users/UserName/Desktop +``` + +The **backup directory** can be on a disk local to the server, a remote disk or +a remote filesystem. QuestDB will enforce that the backup is only written in a +location relative to the `backup directory`. This is a security feature to +disallow random file access by QuestDB. + +The tables will be written in a directory with today's date with the default +format `yyyy-MM-dd` (e.g., `2020-04-20`). A custom date format can be specified +using the `cairo.sql.backup.dir.datetime.format` +[configuration key](/docs/reference/configuration/): + +```shell title="server.conf" +cairo.sql.backup.dir.datetime.format=yyyy-dd-MM +``` + +Given a `BACKUP` query run on `2021-02-25`, the data and metadata files will be +written following the +[db directory structure](/docs/concept/root-directory-structure/#db) + +```filestructure title="/path/to/backup_directory" +├── 2021-02-25 +│   ├── table1 +│   │   ├── ... +│   ├── table2 +│   │   ├── ... +│   ├── table3 +│   ... +``` + +If a user performs several backups on the same date, each backup will be written +a new directory. Subsequent backups on the same date will look as follows: + +```filestructure title="/path/to/backup_directory" +├── 2021-02-22 'first' +├── 2021-02-22.1 'second' +├── 2021-02-22.2 'third' +├── 2021-02-24 'first new date' +├── 2021-02-24.1 'first new date' +│   ... +``` + +## Examples + +```questdb-sql title="Single table" +BACKUP TABLE table1; +``` + +```questdb-sql title="Multiple tables" +BACKUP TABLE table1, table2, table3; +``` + +```questdb-sql title="All tables" +BACKUP DATABASE; +``` diff --git a/docs/reference/sql/case.md b/docs/reference/sql/case.md new file mode 100644 index 00000000..c56011c7 --- /dev/null +++ b/docs/reference/sql/case.md @@ -0,0 +1,65 @@ +--- +title: CASE keyword +sidebar_label: CASE +description: CASE SQL keyword reference documentation. +--- + +## Syntax + +![Flow chart showing the syntax of CASE](/img/docs/diagrams/case.svg) + +## Description + +`CASE` goes through a set of conditions and returns a value corresponding to the +first condition met. Each new condition follows the `WHEN condition THEN value` +syntax. The user can define a return value when no condition is met using +`ELSE`. If `ELSE` is not defined and no conditions are met, then case returns +`null`. + +## Examples + +Assume the following data + +| name | age | +| ----- | --- | +| Tom | 4 | +| Jerry | 19 | +| Anna | 25 | +| Jack | 8 | + +```questdb-sql title="CASE with ELSE" +SELECT +name, +CASE + WHEN age > 18 THEN 'major' + ELSE 'minor' +END +FROM my_table +``` + +Result + +| name | case | +| ----- | ----- | +| Tom | minor | +| Jerry | major | +| Anna | major | +| Jack | minor | + +```questdb-sql title="CASE without ELSE" +SELECT +name, +CASE + WHEN age > 18 THEN 'major' +END +FROM my_table +``` + +Result + +| name | case | +| ----- | ----- | +| Tom | null | +| Jerry | major | +| Anna | major | +| Jack | null | diff --git a/docs/reference/sql/cast.md b/docs/reference/sql/cast.md new file mode 100644 index 00000000..0268df72 --- /dev/null +++ b/docs/reference/sql/cast.md @@ -0,0 +1,109 @@ +--- +title: CAST keyword +sidebar_label: CAST +description: CAST SQL keyword reference documentation. +--- + +Type conversion. Can be either: + +- [Explicit](#explicit-conversion) via `cast()` +- [Implicit](#implicit-conversion), in which case it will be automatically + performed when required by the context. + +## Syntax + +![Flow chart showing the syntax of the CAST keyword](/img/docs/diagrams/cast.svg) + +where: + +- `expression` can be a constant, a column, or an expression that evaluates to a + value. +- `type` refers to the desired [data type](/docs/reference/sql/datatypes/). + +`cast` can be used a part of arithmetic expression as normal + +## Explicit conversion + +Types can be converted from one to another using the `cast()` function. + +## Examples + +```questdb-sql title="Queries" +SELECT +cast(3L + 2L AS INT), +cast(1578506142000000 AS TIMESTAMP), +cast('10.2' AS DOUBLE), +cast('行' AS INT); +``` + +| cast | cast1 | cast2 | cast3 | +| ---- | --------------------------- | ----- | ----- | +| 5 | 2020-01-08T17:55:42.000000Z | 10.2 | 34892 | + +Explicit casting of an expression to a smaller +[data type](/docs/reference/sql/datatypes/) may result in loss of data when the +output data type is smaller than the expression. + +- Casting a decimal number type (`float` or `double`) to an integer number type + (`long`, `int`, `short`) will result in decimals drop. +- If the integer part being cast is larger than the resulting data type, it will + be resized by truncating bits. +- Conversions from `char` to a number type will return the corresponding + `unicode` number and vice versa. + +### Precision loss examples + +```questdb-sql title="Queries" +SELECT +cast(3.5 + 2 AS INT), +cast(7234623 AS SHORT), +cast(2334444.323 AS SHORT); +``` + +| cast | cast1 | cast2 | +| ---- | ----- | ------ | +| 5 | 25663 | -24852 | + +When casting numbers into a smaller data type, QuestDB will truncate the higher +bits of this number. + +## Implicit conversion + +Type casting may be necessary in certain context such as + +- Operations involving various different types +- Inserting values where the originating type is different from the destination + column type. + +QuestDB will attempt to convert to the data type required by the context. This +is called `implicit cast` and does not require using the `cast()` function. + +:::note + +QuestDB will only perform implicit cast when they would not result in data being +truncated or precision being lost. + +::: + +The below chart illustrates the explicit and implicit cast available in QuestDB. + +![Table showing the different possibilities the cast function supports, those are defined by an input and output types](/img/docs/castmap.jpg) + +:::note + +Implicit casting prevents data loss. When an operation involves multiple types, +the resulting type will be the smallest possible type so that no data is lost. + +::: + +```questdb-sql title="Queries" +SELECT +1234L + 567, +1234L + 0.567, +to_timestamp('2019-10-17T00:00:00', 'yyyy-MM-ddTHH:mm:ss') + 323, +to_timestamp('2019-10-17T00:00:00', 'yyyy-MM-ddTHH:mm:ss') + 0.323; +``` + +| column | column1 | column2 | column3 | +| ------ | -------- | --------------------------- | ---------------- | +| 1801 | 1234.567 | 2019-10-17T00:00:00.000323Z | 1571270400000000 | diff --git a/docs/reference/sql/copy.md b/docs/reference/sql/copy.md new file mode 100644 index 00000000..24846f71 --- /dev/null +++ b/docs/reference/sql/copy.md @@ -0,0 +1,195 @@ +--- +title: COPY keyword +sidebar_label: COPY +description: COPY SQL keyword reference documentation. +--- + +:::caution + +For partitioned tables, the best `COPY` performance can be achieved only on a +machine with a local, physically attached SSD. It is possible to use a network +block storage, such as an AWS EBS volume to perform the operation, with the +following impact: + +- Users need to configure the maximum IOPS and throughput setting values for the + volume. +- The required import time is likely to be 5-10x longer. + +::: + +## Syntax + +![Flow chart showing the syntax of the COPY keyword](/img/docs/diagrams/copy.svg) + +## Description + +Copies tables from a delimited text file saved in the defined root directory +into QuestDB. `COPY` has the following import modes: + +- Parallel import, used for copying partitioned tables: + + - The parallel level is based on partition granularity. It is important to + choose the timestamp column and partition type correctly for the data to be + imported. The higher the granularity of the partitions, the faster an import + operation can be completed. + - If the target table exists and is partitioned, the target table must be + empty. + - If the target table does not exist, both `TIMESTAMP` and `PARTITION BY` + options must be defined to create a partitioned table. The `PARTITION BY` + value should not be `NONE`. + - When table does exist and is not empty, import is not supported. + +- Serial import, used for copying non-partitioned tables: + + - If the target table exists and is not partitioned, the data is appended + provided the file structure matches the table. + - If the target table does not exist, then it is created using metadata + derived from the file data. + +:::note + +`COPY` takes up all the available resources. While one import is running, new +request(s) will be rejected. + +::: + +`COPY '' CANCEL` cancels the copying operation defined by the import `id`, +while an import is taking place. + +### Root directory + +`COPY` requires a defined root directory where CSV files are saved and copied +from. A CSV file must be saved to the root directory before starting the `COPY` +operation. There are two root directories to be defined: + +- `cairo.sql.copy.root` is used for storing regular files to be imported. +- `cairo.sql.copy.work.root` is used for storing temporary files like indexes or + temporary partitions. Unless otherwise specified, it points to the + `root_directory/tmp` directory. + +Use the [configuration keys](/docs/reference/configuration/) to edit these +properties in +[`COPY` configuration settings](/docs/reference/configuration/#bulk-csv-import): + +```shell title="Example" +cairo.sql.copy.root=/Users/UserName/Desktop +``` + +`cairo.sql.copy.root` and `cairo.sql.copy.work.root` can be on a local disk to +the server, on a remote disk, or a remote filesystem. QuestDB enforces that the +tables are only written from files located in a directory relative to the +directories. This is a security feature preventing random file access by +QuestDB. + +:::note + +For Mac OS users, using a directory under `/Users` may prevent import due to +permission problem. It is preferable to save the CSV file in a folder outside of +the `/Users` tree and set the root directory accordingly. + +::: + +### Log table + +`COPY` generates a log table,`sys.text_import_log`, tracking `COPY` operation +for the last three days with the following information: + +| Column name | Data type | Notes | +| ------------- | --------- | ----------------------------------------------------------------------------- | +| ts | timestamp | The log event timestamp | +| id | string | Import id | +| table | symbol | Destination table name | +| file | symbol | The source csv file | +| phase | symbol | Import phase.\* Available only in intermediate log records of parallel import | +| status | symbol | The event status: started, finished, failed, cancelled | +| message | string | The error message for when status is failed | +| rows_handled | long | The counters for the total number of scanned lines in the file | +| | | The counters are shown in the final log row for the given import | +| rows_imported | long | The counters for the total number of imported rows | +| | | The counters are shown in the final log row for the given import | +| errors | long | The number of errors for the given phase | + +\* Available phases for parallel import are: + +- setup +- boundary_check +- indexing +- partition_import +- symbol_table_merge +- update_symbol_keys +- build_symbol_index +- move_partitions +- attach_partitions +- analyze_file_structure +- cleanup + +Log table row retention is configurable through +`cairo.sql.copy.log.retention.days` setting, and is three days by default. + +`COPY` returns `id` value from `sys.text_import_log` to track the import +progress. + +## Options + +- `HEADER true/false`: When `true`, QuestDB automatically assumes the first row + is a header. Otherwise, schema recognition is used to determine whether the + first row is used as header. The default setting is `false`. +- `TIMESTAMP`: Define the name of the timestamp column in the file to be + imported. +- `FORMAT`: Timestamp column format when the format is not the default + (`yyyy-MM-ddTHH:mm:ss.SSSUUUZ`) or cannot be detected. See + [Date and Timestamp format](/docs/reference/function/date-time/#date-and-timestamp-format) + for more information. +- `DELIMITER`: Default setting is `,`. +- `PARTITION BY`: Partition unit. +- `ON ERROR`: Define responses to data parsing errors. The valid values are: + - `SKIP_ROW`: Skip the entire row + - `SKIP_COLUMN`: Skip column and use the default value (`null` for nullable + types, `false` for boolean, `0` for other non-nullable types) + - `ABORT`: Abort whole import on first error, and restore the pre-import table + status + +## Examples + +For more details on parallel import, please also see +[Importing data in bulk via CSV](/docs/guides/importing-data/). + +```questdb-sql title="COPY" +COPY weather FROM 'weather.csv' WITH HEADER true FORMAT 'yyyy-MM-ddTHH:mm:ss.SSSUUUZ' ON ERROR SKIP_ROW; +``` + +Starts an import asynchronously and returns an import id string: + +| id | +| ---------------- | +| 55ca24e5ba328050 | + +The log can be accessed by querying: + +```questdb-sql +SELECT * FROM 'sys.text_import_log' WHERE id = '55ca24e5ba328050'; +``` + +A sample log table: + +| ts | id | table | file | phase | status | message | rows_handled | rows_imported | errors | +| --------------------------- | ---------------- | ------- | ----------- | ----- | ------- | ------- | ------------ | ------------- | ------ | +| 2022-08-03T10:40:25.586455Z | 55ca24e5ba328050 | weather | weather.csv | | started | | | | 0 | +| | | | | | | | | | | + +While it is running, import can be cancelled with: + +```questdb-sql +COPY '55ca24e5ba328050' CANCEL; +``` + +Within a few seconds import should stop and message with 'cancelled' status +should appear in text_import_log, e.g.: + +```questdb-sql +SELECT * FROM 'sys.text_import_log' WHERE id = '55ca24e5ba328050' LIMIT -1; +``` + +| ts | id | table | file | phase | status | message | rows_handled | rows_imported | errors | +| :-------------------------- | ---------------- | ------- | ----------- | ----- | --------- | ---------------------------------------------------------- | ------------ | ------------- | ------ | +| 2022-08-03T14:04:42.268502Z | 55ca24e5ba328050 | weather | weather.csv | null | cancelled | import cancelled [phase=partition_import, msg=`Cancelled`] | 0 | 0 | 0 | diff --git a/docs/reference/sql/create-table.md b/docs/reference/sql/create-table.md new file mode 100644 index 00000000..768887de --- /dev/null +++ b/docs/reference/sql/create-table.md @@ -0,0 +1,408 @@ +--- +title: CREATE TABLE reference +sidebar_label: CREATE TABLE +description: CREATE TABLE SQL keywords reference documentation. +--- + +To create a new table in the database, the `CREATE TABLE` keywords followed by +column definitions are used. + +## Syntax + +To create a table by manually entering parameters and settings: + +![Flow chart showing the syntax of the CREATE TABLE keyword](/img/docs/diagrams/createTable.svg) + +:::note + +Checking table metadata can be done via the `tables()` and `table_columns()` +functions which are described in the +[meta functions](/docs/reference/function/meta/) documentation page. + +::: + +To create a table by cloning the metadata of an existing table: + +![Flow chart showing the syntax of the CREATE TABLE LIKE keyword](/img/docs/diagrams/createTableLike.svg) + +## IF NOT EXISTS + +An optional `IF NOT EXISTS` clause may be added directly after the +`CREATE TABLE` keywords to indicate that a new table should be created if one +with the desired table name does not already exist. + +```questdb-sql +CREATE TABLE IF NOT EXISTS test_table(price DOUBLE, ts TIMESTAMP) timestamp(ts); +``` + +## Table name + +Internally the table name is used as a directory name on the file system. It can +contain both ASCII and Unicode characters. The table name **must be unique** and +an error is returned if a table already exists with the requested name. Table +names containing spaces or period `.` character must be enclosed in **double +quotes**, for example: + +```questdb-sql +CREATE TABLE "example out of.space" (a INT); +INSERT INTO "example out of.space" values (1); +``` + +## Column name + +As with table names, the column name is used for file names internally. Although +it does support both ASCII and Unicode characters, character restrictions +specific to the file system still apply. Tables may have up to **2,147,483,647** +columns. + +:::note + +Column names must be unique within each table and **must not** contain a period +`.` character. + +::: + +## Type definition + +When specifying a column, a name and +[type definition](/docs/reference/sql/datatypes/) must be provided. The `symbol` +type may have additional optional parameters applied. + +![Flow chart showing the syntax of the different column types](/img/docs/diagrams/columnTypeDef.svg) + +### Symbols + +Optional keywords and parameters may follow the `symbol` type which allow for +further optimization on the handling of this type. For more information on the +benefits of using this type, see the [symbol](/docs/concept/symbol/) overview. + +#### Symbol capacity + +`CAPACITY` is an optional keyword used when defining a symbol type on table +creation to indicate how many distinct values this column is expected to have. +When `distinctValueEstimate` is not explicitly specified, a default value of +`cairo.default.symbol.capacity` is used. + +`distinctValueEstimate` - the value used to size data structures for +[symbols](/docs/concept/symbol/). + +```questdb-sql +CREATE TABLE my_table(symb SYMBOL CAPACITY 128, price DOUBLE, ts TIMESTAMP), + INDEX (symb) timestamp(ts); +``` + +The symbol capacity is not to be confused with **index capacity** described in +[column indexes](#column-indexes) below. + +```questdb-sql +CREATE TABLE my_table + (symb SYMBOL capacity 128 NOCACHE INDEX capacity 256, price DOUBLE, ts TIMESTAMP) +timestamp(ts); +``` + +#### Symbol caching + +`CACHE | NOCACHE` is used to specify whether a symbol should be cached. The +default value is `CACHE` unless otherwise specified. + +```questdb-sql +CREATE TABLE my_table + (symb SYMBOL CAPACITY 128 NOCACHE, price DOUBLE, ts TIMESTAMP) +timestamp(ts); +``` + +### Casting types + +`castDef` - casts the type of a specific column. `columnRef` must reference +existing column in the `selectSql` + +![Flow chart showing the syntax of the cast function](/img/docs/diagrams/castDef.svg) + +```questdb-sql +CREATE TABLE test AS (SELECT CAST(x as DOUBLE) x FROM long_sequence(10)); +``` +### IPv4 addresses + +Create an IPv4-friendly table using the following pattern: + +```sql +CREATE TABLE traffic (ts timestamp, src ipv4, dst ipv4) timestamp(ts) PARTITION BY DAY; +``` + +This demonstrates: + +* creation of a table called `traffic` +* columns for timestamp, source ipv4 address, destination ipv4 address +* a partition based on a day + +## Column indexes + +Index definitions (`indexDef`) are used to create an +[index](/docs/concept/indexes/) for a table column. The referenced table column +must be of type [symbol](/docs/concept/symbol/). + +![Flow chart showing the syntax of the index function](/img/docs/diagrams/indexDef.svg) + +```questdb-sql +CREATE TABLE my_table(symb SYMBOL, price DOUBLE, ts TIMESTAMP), + INDEX (symb) TIMESTAMP(ts); +``` + +An index capacity may be provided for the index by defining the index storage +parameter, `valueBlockSize`: + +```questdb-sql +CREATE TABLE my_table(symb SYMBOL, price DOUBLE, ts TIMESTAMP), + INDEX (symb CAPACITY 128) TIMESTAMP(ts); +-- equivalent to +CREATE TABLE my_table(symb SYMBOL INDEX CAPACITY 128, price DOUBLE, ts TIMESTAMP), + TIMESTAMP(ts); +``` + +See [Index](/docs/concept/indexes/#how-indexes-work) for more information about +index capacity. + +## CREATE TABLE AS + +When SQL (`selectSQL`) is `SELECT * FROM tab` or any arbitrary SQL result, the +selected column names and their data type will be cloned to the new table. + +```questdb-sql title="Create table as select" +CREATE TABLE new_table AS( + SELECT + rnd_int() a, + rnd_double() b, + rnd_symbol('ABB', 'CDD') c + FROM + long_sequence(100) + WHERE false +); +``` + +The data type of a column can be changed: + +```questdb-sql title="Clone an existing wide table and change type of cherry-picked columns" +CREATE TABLE new_table AS (SELECT * FROM source_table WHERE false), + CAST(price AS LONG), + CAST(instrument as SYMBOL); +``` + +Here we changed type of `price` (assuming it was `INT`) to `LONG` and changed +type of `sym` to [symbol](/docs/concept/symbol/) and created an +[index](/docs/concept/indexes/). + +## Designated timestamp + +The timestamp function allows for specifying which column (which must be of +`timestamp` type) should be a designated timestamp for the table. For more +information, see the [designated timestamp](/docs/concept/designated-timestamp/) +reference. + +The designated timestamp column **cannot be changed** after the table has been +created. + +## Partitioning + +`PARTITION BY` allows for specifying the +[partitioning strategy](/docs/concept/partitions/) for the table. Tables created +via SQL are not partitioned by default (`NONE`) and tables can be partitioned by one of +the following: + +- `NONE`: the default when partition is not defined. +- `YEAR` +- `MONTH` +- `WEEK` +- `DAY` +- `HOUR` + +The partitioning strategy **cannot be changed** after the table has been +created. + +## WAL table parameter + +It is possible to create a [WAL table](/docs/concept/write-ahead-log/), allowing concurrent data ingestion +and modification through multiple interfaces: + +- `WAL` creates a WAL table. When a WAL table is created, the table must has a [partition](#partitioning) that is not `NONE`. +- `BYPASS WAL` creates a non-WAL table. +- When neither option is specified, + [the server configuration](/docs/reference/configuration/#cairo-engine), + `cairo.wal.enabled.default`, is used: + - `true`: creates a WAL table. + - `false`: creates a non-WAL table. + +## Deduplication + +When [Deduplication](/docs/concept/deduplication) is enabled, QuestDB only inserts rows that do not match the existing data. When rows are inserted into a table with the deduplication option configured, QuestDB searches for existing rows to match using the specified `UPSERT KEYS`. If a match is found, the existing rows are replaced with the new row. If no match is found, the new rows are inserted into the table. + +Deduplication can only be enabled for [Write-Ahead Log (WAL)](/docs/concept/write-ahead-log/) tables. + +It is possible to include multiple columns of different types in the `UPSERT KEYS` list. + +However, there are a few limitations to keep in mind: + +- The designated timestamp column must be included in the list of columns +- Columns of [STRING and BINARY](/docs/reference/sql/datatypes) types cannot be used in `UPSERT KEYS` list + +After table creation the deduplication configuration can be changed at any time using `ALTER` table: + +- Enable deduplication and change `UPSERT KEYS` with [`ALTER TABLE ENABLE`](/docs/reference/sql/alter-table-enable-deduplication/) +- Disable deduplication with using [`ALTER TABLE DISABLE`](/docs/reference/sql/alter-table-disable-deduplication/) + +## WITH table parameter + +![Flow chart showing the syntax of keyword to specify WITH table parameter](/img/docs/diagrams/createTableWithMaxRowParam.svg) + +The parameter influences how often commits of out-of-order data occur. It may be +set during table creation using the `WITH` keyword. + +`maxUncommittedRows` - defines the maximum number of uncommitted rows per-table +to keep in memory before triggering a commit for a specific table. + +The purpose of specifying maximum uncommitted rows per table is to reduce the +occurrences of resource-intensive commits when ingesting out-of-order data. + +The global setting for the same parameter is `cairo.max.uncommitted.rows`. + +```questdb-sql title="Setting out-of-order table parameters via SQL" +CREATE TABLE my_table (timestamp TIMESTAMP) TIMESTAMP(timestamp) +PARTITION BY DAY WITH maxUncommittedRows=250000; +``` + +Checking the values per-table may be done using the `tables()` function: + +```questdb-sql title="List all tables" +SELECT id, name, maxUncommittedRows FROM tables(); +``` + +| id | name | maxUncommittedRows | +| :-- | :---------- | :----------------- | +| 1 | my_table | 250000 | +| 2 | device_data | 10000 | + +## Table target volume + +The `IN VOLUME` clause is used to create a table in a different volume than the standard. The table +is created in the specified target volume, and a symbolic link is created in the table's standard +volume to point to it. + +![Flow chart showing the syntax of keywords to specify a table target volume](/img/docs/diagrams/tableTargetVolumeDef.svg) + +The use of the comma (`,`) depends on the existence of the `WITH` clause: + +- If the `WITH` clause is present, a comma is mandatory before + `IN VOLUME`: + + ```questdb-sql + CREATE TABLE my_table (i symbol, ts timestamp), index(i capacity 32) WITH maxUncommittedRows=7, IN VOLUME SECONDARY_VOLUME; + ``` + +- If no `WITH` clause is used, the comma must not be added for the `IN VOLUME` + segment: + + ```questdb-sql + CREATE TABLE my_table (i symbol, ts timestamp) IN VOLUME SECONDARY_VOLUME; + ``` + +The use of quotation marks (`'`) depends on the alias: + +- If the alias contains spaces, the quotation marks are required: + + ```questdb-sql + CREATE TABLE my_table (i symbol, ts timestamp), index(i capacity 32) IN VOLUME 'SECONDARY VOLUME'; + ``` +- If the alias does not contain spaces, no quotation mark is necessary: + + ```questdb-sql + CREATE TABLE my_table (i symbol, ts timestamp), index(i capacity 32) IN VOLUME SECONDARY_VOLUME; + ``` + +### Description + +The table behaves the same way as if it had been created in the standard (default) +volume, with the exception that [`DROP TABLE`](/docs/reference/sql/drop/) +removes the symbolic link from the standard volume but the content pointed to is +left intact in its volume. A table using the same name in the same +volume cannot be created again as a result, it requires manual intervention +to either remove or rename the table's directory in its volume. + +### Configuration + +The secondary table target volume is defined by +`cairo.volumes` in +[`server.conf`](/docs/reference/configuration/#cairo-engine). The default setting +contains an empty list, which means the feature is not enabled. + +To enable the feature, define as many volume pairs as you need, with syntax +_alias -> volume-root-path_, and separate different pairs with a comma. For example: + +``` +cairo.volumes=SECONDARY_VOLUME -> /Users/quest/mounts/secondary, BIN -> /var/bin +``` + +Additional notes about defining the alias and volume root paths: + +- Aliases are case-insensitive. +- Volume root paths must be valid and exist at bootstrap time and at the time when the table is created. +- Aliases and/or volume root paths can be single quoted, it is not required. + +## CREATE TABLE LIKE + +The `LIKE` keyword clones the table schema of an existing table without copying +the data. Table settings and parameters such as designated timestamp, symbol +column indexes, and index capacity will be cloned, too. + +```questdb-sql title="Create table like" +CREATE TABLE new_table (LIKE my_table); +``` + +## Examples + +The following examples demonstrate creating tables from basic statements, and +introduce features such as partitioning and designated timestamps. For more +information on the concepts introduced to below, see + +- [designated timestamp](/docs/concept/designated-timestamp/) reference on + electing a timestamp column +- [partition](/docs/concept/partitions/) documentation which describes how + partitions work in QuestDB +- [symbol](/docs/concept/symbol/) reference for using the `symbol` data type + +This example will create a table without a designated timestamp and does not +have a partitioning strategy applied. + +```questdb-sql title="Basic example" +CREATE TABLE my_table(symb SYMBOL, price DOUBLE, ts TIMESTAMP, s STRING); +``` + +The same table can be created and a designated timestamp may be specified. + +```questdb-sql title="Adding a designated timestamp" +CREATE TABLE my_table(symb SYMBOL, price DOUBLE, ts TIMESTAMP, s STRING) + TIMESTAMP(ts); +``` + +```questdb-sql title="Adding a partitioning strategy by DAY" +CREATE TABLE my_table(symb SYMBOL, price DOUBLE, ts TIMESTAMP, s STRING) + TIMESTAMP(ts) +PARTITION BY DAY; +``` + +```questdb-sql title="Adding parameters for symbol type" +CREATE TABLE + my_table(symb SYMBOL CAPACITY 256 NOCACHE INDEX CAPACITY 1048576, + price DOUBLE, ts TIMESTAMP, s STRING) + TIMESTAMP(ts) +PARTITION BY DAY; +``` + +Let's assume we imported a text file into the table `taxi_trips_unordered` and +now we want to turn this data into time series through ordering trips by +`pickup_time`, assign dedicated timestamp and partition by month: + +```questdb-sql title="Create table as select with data manipulation" +CREATE TABLE taxi_trips AS( + SELECT * FROM taxi_trips_unordered ORDER BY pickup_time +) TIMESTAMP(pickup_time) +PARTITION BY MONTH; +``` diff --git a/docs/reference/sql/datatypes.md b/docs/reference/sql/datatypes.md new file mode 100644 index 00000000..fe1da0ae --- /dev/null +++ b/docs/reference/sql/datatypes.md @@ -0,0 +1,142 @@ +--- +title: Data types +sidebar_label: Data types +description: Data types reference documentation. +--- + +The type system is derived from Java types. + +| Type Name | Storage bits | Nullable | Description | +| ----------------- | ------------ | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `boolean` | `1` | No | Boolean `true` or `false`. | +| `ipv4` | `32` | Yes | `0.0.0.1` to `255.255.255. 255` | +| `byte` | `8` | No | Signed integer `-128` to `127`. | +| `short` | `16` | No | Signed integer `-32768` to `32767`. | +| `char` | `16` | Yes | `unicode` character. | +| `int` | `32` | Yes | Signed integer `0x80000000` to `0x7fffffff`. | +| `float` | `32` | Yes | Single precision IEEE 754 floating point value. | +| `symbol` | `32` | Yes | Symbols are stored as 32-bit signed indexes from symbol table. Each index will have a corresponding `string` value. Translation from index to string value is done automatically when data is being written or read. Symbol table is stored separately from column. | +| `string` | `32+n*16` | Yes | Length-prefixed sequence of UTF-16 encoded characters whose length is stored as signed 32-bit integer with maximum value of `0x7fffffff`. | +| `long` | `64` | Yes | Signed integer `0x8000000000000000L` to `0x7fffffffffffffffL`. | +| `date` | `64` | Yes | Signed offset in **milliseconds** from [Unix Epoch](https://en.wikipedia.org/wiki/Unix_time). | +| `timestamp` | `64` | Yes | Signed offset in **microseconds** from [Unix Epoch](https://en.wikipedia.org/wiki/Unix_time). | +| `double` | `64` | Yes | Double precision IEEE 754 floating point value. | +| `uuid` | `128` | Yes | [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) values. See also [the UUID type](#the-uuid-type). | +| `binary` | `64+n*8` | Yes | Length-prefixed sequence of bytes whose length is stored as signed 64-bit integer with maximum value of `0x7fffffffffffffffL`. | +| `long256` | `256` | Yes | Unsigned 256-bit integer. Does not support arbitrary arithmetic operations, but only equality checks. Suitable for storing hash code, such as crypto public addresses. | +| `geohash()` | `8`-`64` | Yes | Geohash with precision specified as a number followed by `b` for bits, `c` for chars. See [the geohashes documentation](/docs/concept/geohashes/) for details on use and storage. | + +## Variable-sized type limitations + +`BINARY` field size is limited either by 64-Bit signed int (8388608 peta bytes) +or disk size, whichever is smaller. + +`STRING` field size is limited by either 32-bit signed int (1073741824 +characters) or disk size, whichever is smaller. + +## Type nullability + +Nullable types use a specific value to mark `NULL` values: + +| Type Name | Null value | Description | +| ---------------- | -------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- | +| `float` | `NaN` | As defined by IEEE 754 (`java.lang.Float.NaN`). | +| `double` | `NaN` | As defined by IEEE 754 (`java.lang.Double.NaN`). | +| `long256` | `0x8000000000000000800000000000000080000000000000008000000000000000` | The value equals four consecutive `long` null literals. | +| `long` | `0x8000000000000000L` | Minimum possible value a `long` can take -2^63. | +| `date` | `0x8000000000000000L` | Minimum possible value a `long` can take -2^63. | +| `timestamp` | `0x8000000000000000L` | Minimum possible value a `long` can take -2^63. | +| `int` | `0x80000000` | Minimum possible value an `int` can take, -2^31. | +| `uuid` | `80000000-0000-0000-8000-000000000000` | Both 64 highest bits and 64 lowest bits set to -2^63. | +| `char` | `0x0000` | 0. | +| `geohash(byte)` | `0xff` | Geohashes `from 1 up to included 7 bits`. | +| `geohash(short)` | `0xffff` | Geohashes `from 8 up to included 15 bits`. | +| `geohash(int)` | `0xffffffff` | Geohashes `from 16 up to included 31 bits`. | +| `geohash(long)` | `0xffffffffffffffff` | Geohashes `from 32 up to included 60 bits`. | +| `symbol` | `0x80000000` | Symbols are stored as `int` offsets in a lookup file. | +| `string` | `0xffffffff` | Strings are length prefixed, the length is an `int` and `-1` marks it `NULL` (no further storage is used). | +| `binary` | `0xffffffffffffffff` | Binary columns are also length prefixed, the length is a `long` and `-1` marks it `NULL` (no further storage is used). | +| `ipv4` | `null` | IPv4 addresses are stored as `int`. | + +To filter columns that contain, or don't contain, `NULL` values use a filter +like: + +```questdb-sql +SELECT * FROM WHERE = NULL; +SELECT * FROM
WHERE != NULL; +``` + +Alternatively, from version 6.3 use the NULL equality operator aliases: + +```questdb-sql +SELECT * FROM
WHERE IS NULL; +SELECT * FROM
WHERE IS NOT NULL; +``` + +:::note `NULL` values still occupy disk space. ::: + +## The UUID type + +QuestDB natively supports the `UUID` type, which should be used for `UUID` +columns instead of storing `UUIDs` as `strings`. `UUID` columns are internally +stored as 128-bit integers, allowing more efficient performance particularly in +filtering and sorting. Strings inserted into a `UUID` column is permitted but +the data will be converted to the `UUID` type. + +```questdb-sql title="Inserting strings into a UUID column" +CREATE TABLE my_table ( + id UUID +); +[...] +INSERT INTO my_table VALUES ('a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'); +[...] +SELECT * FROM my_table WHERE id = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'; +``` + +If you use the [PGWire protocol](/docs/reference/api/postgres/) then you can use +the `uuid` type in your queries. The JDBC API does not distinguish the UUID +type, but the Postgres JDBC driver supports it in prepared statements: + +```java +UUID uuid = UUID.randomUUID(); +PreparedStatement ps = connection.prepareStatement("INSERT INTO my_table VALUES (?)"); +ps.setObject(1, uuid); +``` + +[QuestDB Client Libraries](https://questdb.io/docs/reference/clients/overview/) +can send `UUIDs` as `strings` to be converted to UUIDs by the server. + +## IPv4 + +QuestDB supports the IPv4 data type. + +The data type adds validity checks and type-specific functions. + +They are - as one would imagine - very useful when dealing with IP addresses. + +IPv4 addresses exist within the range of `0.0.0.1` - `255.255.255.255`. + +A full-zero address - `0.0.0.0` is interpreted as null. + +Columns may be created with the IPv4 data type like so: + +```sql +-- Creating a table named traffic with two ipv4 columns: src and dst. +CREATE TABLE traffic (ts timestamp, src ipv4, dst ipv4) timestamp(ts) PARTITION BY DAY; +``` + +IPv4 addresses also support a wide range of existing SQL functions and contain +their own operators: + +- For a table of supported SQL functions, see + [IPv4 Supported SQL Functions](/docs/reference/function/ipv4). + +- For a full list of operators, see + [IPv4 Operators](/docs/reference/operators/ipv4/). + +### Limitations + +IPv4 column types cannot be created via InfluxDB Line Protocol as the protocol +lacks support for IPv4. As a result, the server cannot distinguish between +string and IPv4 data. However, InfluxDB Line Protocol can still insert string +data into a pre-existing column of type IPv4. diff --git a/docs/reference/sql/distinct.md b/docs/reference/sql/distinct.md new file mode 100644 index 00000000..0ca1a11a --- /dev/null +++ b/docs/reference/sql/distinct.md @@ -0,0 +1,35 @@ +--- +title: DISTINCT keyword +sidebar_label: DISTINCT +description: DISTINCT SQL keyword reference documentation. +--- + +`SELECT DISTINCT` is used to return only distinct (i.e different) values from a +column as part of a [SELECT statement](/docs/reference/sql/select/). + +## Syntax + +![Flow chart showing the syntax of the DISTINCT keyword](/img/docs/diagrams/distinct.svg) + +## Examples + +The following query will return a list of all unique ratings in the table. + +```questdb-sql title="Simple query" +SELECT DISTINCT movieId +FROM ratings; +``` + +SELECT DISTINCT can be used in conjunction with more advanced queries and +filters. + +```questdb-sql title="With aggregate" +SELECT DISTINCT movieId, count() +FROM ratings; +``` + +```questdb-sql title="With filter" +SELECT DISTINCT movieId, count() +FROM ratings +WHERE score > 3; +``` diff --git a/docs/reference/sql/drop.md b/docs/reference/sql/drop.md new file mode 100644 index 00000000..d7543cae --- /dev/null +++ b/docs/reference/sql/drop.md @@ -0,0 +1,45 @@ +--- +title: DROP TABLE keyword +sidebar_label: DROP TABLE +description: DROP TABLE SQL keyword reference documentation. +--- + +`DROP TABLE` permanently deletes a table and its contents. +`DROP ALL TABLES` permanently deletes all the tables and their contents. + +## Syntax + +![Flow chart showing the syntax of the DROP TABLE keyword](/img/docs/diagrams/dropTable.svg) + +### IF EXISTS + +An optional `IF EXISTS` clause may be added directly after the `DROP TABLE` +keywords to indicate that the selected table should be dropped if it exists. + +## Description + +This command irremediably deletes the data in the target table. Unless the table +was created in a different volume than the standard, see +[CREATE TABLE IN VOLUME](/docs/reference/sql/create-table/#table-target-volume), +in which case the table is only logically removed and data remains intact in its +volume. In doubt, make sure you have created +[backups](/docs/reference/sql/backup/) of your data. + +Disk space is reclaimed asynchronously after the table is dropped. Ongoing table +reads might delay space reclamation. + +## Example + +```questdb-sql +DROP TABLE ratings; +``` + +```questdb-sql +DROP ALL TABLES; +``` + +## See also + +To delete the data inside a table but keep the table and its structure, use +[TRUNCATE](/docs/reference/sql/truncate/). + diff --git a/docs/reference/sql/explain.md b/docs/reference/sql/explain.md new file mode 100644 index 00000000..cb68d96e --- /dev/null +++ b/docs/reference/sql/explain.md @@ -0,0 +1,265 @@ +--- +title: EXPLAIN keyword +sidebar_label: EXPLAIN +description: EXPLAIN SQL keyword reference documentation. +--- + +`EXPLAIN` displays the execution plan of an `INSERT`, `SELECT`, or `UPDATE` +statement. + +## Syntax + +![Flow chart showing the syntax of the EXPLAIN keyword](/img/docs/diagrams/explain.svg) + +### Description + +A query execution plan shows how a statement will be implemented: which table is +going to be accessed and how, what join method are employed, and which +predicates are JIT-compiled etc. +`EXPLAIN` output is a tree of nodes containing properties and subnodes (aka +child nodes). + +In a plan such as: + +| QUERY PLAN | +| -------------------------------------------------------------------------- | +| Async JIT Filter | +|   filter: 100 100.0; +``` + +| QUERY PLAN | +| ----------------------------------------------------------------------------- | +| Async JIT Filter | +|   filter: 100.0 `DataFrameRecordCursorFactory` +- `Async JIT Filter` -> `AsyncJitFilteredRecordCursorFactory` +- `SampleByFillNoneNotKeyed` -> `SampleByFillNoneNotKeyedRecordCursorFactory` + while some are a bit harder to identify, e.g. +- `GroupByRecord vectorized: false` -> + `io.questdb.griffin.engine.groupby.GroupByRecordCursorFactory` +- `GroupByRecord vectorized: true` -> + `io.questdb.griffin.engine.groupby.vect.GroupByRecordCursorFactory` + +Other classes can be identified by searching for the node name in the `toPlan()` +methods. + +::: + +## See also + +This section includes links to additional information such as tutorials: + +- [EXPLAIN Your SQL Query Plan](/blog/explain-sql-query-plan/) +- [Exploring Query Plan Scan Nodes with SQL EXPLAIN](/blog/exploring-query-plan-scan-nodes-sql-explain/) diff --git a/docs/reference/sql/fill.md b/docs/reference/sql/fill.md new file mode 100644 index 00000000..0a87d8ca --- /dev/null +++ b/docs/reference/sql/fill.md @@ -0,0 +1,17 @@ +--- +title: FILL keyword +sidebar_label: FILL +description: FILL SQL keyword reference documentation. +--- + +Queries using a [SAMPLE BY](/docs/reference/sql/sample-by/) aggregate on data +which has missing records may return a discontinuous series of results. The +`FILL` keyword allows for specifying a fill behavior for results which have +missing aggregates due to missing rows. + +Details for the `FILL` keyword can be found on the +[SAMPLE BY](/docs/reference/sql/sample-by/) page. + +To specify a default handling for `null` values within queries, see the +[coalesce() function](/docs/reference/function/conditional/#coalesce) +documentation. diff --git a/docs/reference/sql/group-by.md b/docs/reference/sql/group-by.md new file mode 100644 index 00000000..b4dec5c8 --- /dev/null +++ b/docs/reference/sql/group-by.md @@ -0,0 +1,71 @@ +--- +title: GROUP BY keyword +sidebar_label: GROUP BY +description: GROUP BY SQL keyword reference documentation. +--- + +Groups aggregation calculations by one or several keys. In QuestDB, this clause +is [optional](/docs/concept/sql-extensions/#optionality-of-group-by/). + +## Syntax + +![Flow chart showing the syntax of the GROUP BY keyword](/img/docs/diagrams/groupBy.svg) + +:::note + +QuestDB groups aggregation results implicitly and does not require the GROUP BY +keyword. It is only supported for convenience. Using the GROUP BY clause +explicitly will return the same results as if the clause was omitted. + +::: + +## Examples + +The below queries perform aggregations on a single key. Using `GROUP BY` +explicitly or implicitly yields the same results: + +```questdb-sql title="Single key aggregation, explicit GROUP BY" +SELECT sensorId, avg(temp) +FROM readings +GROUP BY sensorId; +``` + +```questdb-sql title="Single key aggregation, implicit GROUP BY" +SELECT sensorId, avg(temp) +FROM readings; +``` + +The below queries perform aggregations on multiple keys. Using `GROUP BY` +explicitly or implicitly yields the same results: + +```questdb-sql title="Multiple key aggregation, explicit GROUP BY" +SELECT sensorId, sensorType, avg(temp) +FROM readings +GROUP BY sensorId,sensorType; +``` + +```questdb-sql title="Multiple key aggregation, implicit GROUP BY" +SELECT sensorId, sensorType, avg(temp) +FROM readings; +``` + +When used explicitly, the list of keys in the `GROUP BY` clause must match the +list of keys in the `SELECT` clause, otherwise an error will be returned: + +```questdb-sql title="Error - Column b is missing in the GROUP BY clause" +SELECT a, b, avg(temp) +FROM tab +GROUP BY a; +``` + +```questdb-sql title="Error - Column b is missing in the SELECT clause" +SELECT a, avg(temp) +FROM tab +GROUP BY a, b; +``` + +```questdb-sql title="Success - Columns match" +SELECT a, b, avg(temp) +FROM tab +GROUP BY a, b; +``` diff --git a/docs/reference/sql/insert.md b/docs/reference/sql/insert.md new file mode 100644 index 00000000..4d925339 --- /dev/null +++ b/docs/reference/sql/insert.md @@ -0,0 +1,109 @@ +--- +title: INSERT keyword +sidebar_label: INSERT +description: INSERT SQL keyword reference documentation. +--- + +`INSERT` ingests selected data into a database table. + +## Syntax + +Inserting values directly or using sub-queries: + +![Flow chart showing the syntax of the INSERT keyword](/img/docs/diagrams/insert.svg) + +Inserting using sub-query alias: + +![Flow chart showing the syntax of the WITH AS INSERT keyword](/img/docs/diagrams/withAsInsert.svg) + +### Description + +:::note + +If the target partition is +[attached by a symbolic link](/docs/reference/sql/alter-table-attach-partition/#symbolic-links), +the partition is read-only. `INSERT` operation on a read-only partition triggers +a critical-level log in the server, and the insert is a no-op. + +::: + +Inserting values directly or using sub-queries: + +- `VALUE`: Directly defines the values to be inserted. +- `SELECT`: Inserts values based on the result of a + [SELECT](/docs/reference/sql/select/) query + +Setting sub-query alias: + +- `WITH AS`: Inserts values based on a sub-query, to which an alias is given by + using [WITH](/docs/reference/sql/with/). + +Parameter: + +- `batch` expects a `batchCount` (integer) value defining how many records to + process at any one time. + +## Examples + +```questdb-sql title="Inserting all columns" +INSERT INTO trades +VALUES( + '2021-10-05T11:31:35.878Z', + 'AAPL', + 255, + 123.33, + 'B'); +``` + +```questdb-sql title="Bulk inserts" +INSERT INTO trades +VALUES + ('2021-10-05T11:31:35.878Z', 'AAPL', 245, 123.4, 'C'), + ('2021-10-05T12:31:35.878Z', 'AAPL', 245, 123.3, 'C'), + ('2021-10-05T13:31:35.878Z', 'AAPL', 250, 123.1, 'C'), + ('2021-10-05T14:31:35.878Z', 'AAPL', 250, 123.0, 'C'); +``` + +```questdb-sql title="Specifying schema" +INSERT INTO trades (timestamp, symbol, quantity, price, side) +VALUES( + to_timestamp('2019-10-17T00:00:00', 'yyyy-MM-ddTHH:mm:ss'), + 'AAPL', + 255, + 123.33, + 'B'); +``` + +:::note + +Columns can be omitted during `INSERT` in which case the value will be `NULL` + +::: + +```questdb-sql title="Inserting only specific columns" +INSERT INTO trades (timestamp, symbol, price) +VALUES(to_timestamp('2019-10-17T00:00:00', 'yyyy-MM-ddTHH:mm:ss'),'AAPL','B'); +``` + +### Inserting query results + +This method allows you to insert as many rows as your query returns at once. + +```questdb-sql title="Insert as select" +INSERT INTO confirmed_trades + SELECT timestamp, instrument, quantity, price, side + FROM unconfirmed_trades + WHERE trade_id = '47219345234'; +``` + +Using the [`WITH` keyword](/docs/reference/sql/with/) to set up an alias for a +`SELECT` sub-query: + +```questdb-sql title="Insert with sub-query" +WITH confirmed_id AS ( + SELECT * FROM unconfirmed_trades + WHERE trade_id = '47219345234' +) +INSERT INTO confirmed_trades +SELECT * FROM confirmed_id; +``` diff --git a/docs/reference/sql/join.md b/docs/reference/sql/join.md new file mode 100644 index 00000000..d557186d --- /dev/null +++ b/docs/reference/sql/join.md @@ -0,0 +1,506 @@ +--- +title: JOIN keyword +sidebar_label: JOIN +description: JOIN SQL keyword reference documentation. +--- + +QuestDB supports the type of joins you can frequently find in +[relational databases](/glossary/relational-database/): `INNER`, `LEFT (OUTER)`, +`CROSS`. Additionally, it implements joins which are particularly useful for +time-series analytics: `ASOF`, `LT`, and `SPLICE`. `FULL` joins are not yet +implemented and are on our roadmap. + +All supported join types can be combined in a single SQL statement; QuestDB +SQL's optimizer determines the best execution order and algorithms. + +There are no known limitations on the size of tables or sub-queries used in +joins and there are no limitations on the number of joins, either. + +## Syntax + +High-level overview: + +![Flow chart showing the syntax of the high-level syntax of the JOIN keyword](/img/docs/diagrams/joinOverview.svg) + +- `selectClause` - see [SELECT](/docs/reference/sql/select/) for more + information. +- `whereClause` - see [WHERE](/docs/reference/sql/where/) for more information. +- The specific syntax for `joinClause` depends on the type of `JOIN`: + + - `INNER` and `LEFT` `JOIN` has a mandatory `ON` clause allowing arbitrary + `JOIN` predicates, `operator`: + + ![Flow chart showing the syntax of the INNER, LEFT JOIN keyword](/img/docs/diagrams/InnerLeftJoin.svg) + + - `ASOF`, `LT`, and `SPLICE` `JOIN` has optional `ON` clause allowing only the + `=` predicate: + + ![Flow chart showing the syntax of the ASOF, LT, and SPLICE JOIN keyword](/img/docs/diagrams/AsofLtSpliceJoin.svg) + + - `CROSS JOIN` does not allow any `ON` clause: + + ![Flow chart showing the syntax of the CROSS JOIN keyword](/img/docs/diagrams/crossJoin.svg) + +Columns from joined tables are combined in a single row. Columns with the same +name originating from different tables will be automatically aliased to create a +unique column namespace of the resulting set. + +Though it is usually preferable to explicitly specify join conditions, QuestDB +will analyze `WHERE` clauses for implicit join conditions and will derive +transient join conditions where necessary. + +## Execution order + +Join operations are performed in order of their appearance in a SQL query. The +following query performs a join on a table with one million rows based on a +column from a smaller table with one hundred rows: + +```questdb-sql +SELECT * FROM 1_million_rows +INNER JOIN 1_hundred_rows +ON 1_million_rows.customer_id = 1_hundred_rows.referral_id; +``` + +The performance of this query can be improved by rewriting the query as follows: + +```questdb-sql +SELECT * FROM 1_hundred_rows +INNER JOIN 1_million_rows +ON 1_million_rows.referral_id = 1_hundred_rows.customer_id; +``` + +## Implicit joins + +It is possible to join two tables using the following syntax: + +```questdb-sql +SELECT * +FROM a, b +WHERE a.id = b.id; +``` + +The type of join as well as the column are inferred from the `WHERE` clause, and +may be either an `INNER` or `CROSS` join. For the example above, the equivalent +explicit statement would be: + +```questdb-sql +SELECT * +FROM a +JOIN b ON (id); +``` + +## Using the `ON` clause for the `JOIN` predicate + +When tables are joined on a column that has the same name in both tables you can +use the `ON (column)` shorthand. + +When the `ON` clause is permitted (all except `CROSS JOIN`), it is possible to +join multiple columns. + +For example, the following two tables contain identical column names `last_name` +and `first_name`: + +`order_records`: + +| last_name | first_name | total | order_date | +| --------- | ---------- | ------- | --------------------------- | +| Tom | Smith | 34.5000 | 2023-01-05T11:31:35.808000Z | +| Jane | Austen | 4.5000 | 2023-01-05T15:34:25.378000Z | +| Eliot | Flint | 89.9000 | 2023-01-05T17:00:37.872000Z | + +`customer_records`: + +| last_name | first_name | cust_id | +| --------- | ---------- | ------- | +| Jane | Austen | 101 | +| Tom | Smith | 201 | +| Eliot | Flint | 301 | + +It is possible to add multiple JOIN ON condition: + +```questdb-sql + +SELECT + * +FROM + 'order_records' + JOIN customer_records ON ( + order_records.last_name = customer_records.last_name + AND order_records.first_name = customer_records.first_name + ); +``` + +The query can be simplified further since the column names are identical: + +```questdb-sql + +SELECT * FROM 'order_records' +JOIN customer_records +ON (last_name, first_name); +``` + +The result of both queries is the following: + +| last_name | first_name | total | order_date | last_name | first_name | cust_id | +| --------- | ---------- | ------- | --------------------------- | --------- | ---------- | ------- | +| Tom | Smith | 34.5000 | 2023-01-05T11:31:35.808000Z | Jane | Austen | 101 | +| Jane | Austen | 4.5000 | 2023-01-05T15:34:25.378000Z | Tom | Smith | 201 | +| Eliot | Flint | 89.9000 | 2023-01-05T17:00:37.872000Z | Eliot | Flint | 301 | + +## (INNER) JOIN + +`(INNER) JOIN` returns rows from two tables where the records on the compared +column have matching values in both tables. `JOIN` is interpreted as +`INNER JOIN` by default, making the `INNER` keyword implicit. + +The following query returns the `movieId` and the average rating from table +`ratings`. It also adds a column for the `title` from the table `movies`. The +corresponding title will be identified based on the `movieId` in the `ratings` +table matching an `id` in the `movies` table. + +```questdb-sql title="INNER JOIN ON" +SELECT movieId a, title, avg(rating) +FROM ratings +INNER JOIN (SELECT movieId id, title FROM movies) +ON ratings.movieId = id; + +-- Omitting 'INNER' makes the query equivalent: +SELECT movieId a, title, avg(rating) +FROM ratings +JOIN (SELECT movieId id, title FROM movies) +ON ratings.movieId = id; +``` + +## LEFT (OUTER) JOIN + +`LEFT OUTER JOIN` or simply `LEFT JOIN` returns **all** records from the left +table, and if matched, the records of the right table. When there is no match +for the right table, it returns `NULL` values in right table fields. + +The general syntax is as follows: + +```questdb-sql title="LEFT JOIN ON" +SELECT tab1.colA, tab2.colB +FROM table1 tab1 +LEFT OUTER JOIN table2 tab2 +ON tab1.colA = tab2.colB; + +-- Omitting 'OUTER' makes the query equivalent: +SELECT tab1.colA, tab2.colB +FROM table1 tab1 +LEFT JOIN table2 tab2 +ON tab1.colA = tab2.colB; +``` + +A `LEFT OUTER JOIN` query can also be used to select all rows in the left table +that do not exist in the right table. + +```questdb-sql +SELECT tab1.colA, tab2.colB +FROM table1 tab1 +LEFT OUTER JOIN table2 tab2 +ON tab1.colA = tab2.colB +WHERE tab2.colB = NULL; +``` + +## CROSS JOIN + +`CROSS JOIN` returns the Cartesian product of the two tables being joined and +can be used to create a table with all possible combinations of columns. The +following query returns all possible combinations of `starters` and `deserts`: + +```questdb-sql +SELECT * +FROM starters +CROSS JOIN deserts; +``` + +:::note + +`CROSS JOIN` does not have an `ON` clause. + +::: + +## ASOF JOIN + +`ASOF JOIN` joins two different time-series measured. For each row in the first +time-series, the `ASOF JOIN` takes from the second time-series a timestamp that +meets both of the following criteria: + +- The timestamp is the closest to the first timestamp. +- The timestamp is **strictly prior or equal to** the first timestamp. + +### Example + +Given the following tables: + +Table `bids` (the left table): + +
+ + +| ts | bid | +| --------------------------- | --- | +| 2019-10-17T00:00:00.000000Z | 100 | +| 2019-10-17T00:00:00.100000Z | 101 | +| 2019-10-17T00:00:00.300000Z | 102 | +| 2019-10-17T00:00:00.500000Z | 103 | +| 2019-10-17T00:00:00.600000Z | 104 | + +
+ + +The `asks` table (the right table): + +
+ + +| ts | ask | +| --------------------------- | --- | +| 2019-10-17T00:00:00.100000Z | 100 | +| 2019-10-17T00:00:00.300000Z | 101 | +| 2019-10-17T00:00:00.400000Z | 102 | + +
+ + +An `ASOF JOIN` query can look like the following: + +```questdb-sql +SELECT bids.ts timebid, asks.ts timeask, bid, ask +FROM bids +ASOF JOIN asks; +``` + +This is the JOIN result: + +
+ + +| timebid | timeask | bid | ask | +| --------------------------- | --------------------------- | --- | ---- | +| 2019-10-17T00:00:00.000000Z | NULL | 101 | NULL | +| 2019-10-17T00:00:00.100000Z | 2019-10-17T00:00:00.100000Z | 101 | 100 | +| 2019-10-17T00:00:00.300000Z | 2019-10-17T00:00:00.300000Z | 102 | 101 | +| 2019-10-17T00:00:00.500000Z | 2019-10-17T00:00:00.400000Z | 103 | 102 | +| 2019-10-17T00:00:00.600000Z | 2019-10-17T00:00:00.400000Z | 104 | 102 | + +
+ + +The result has all rows from the `bids` table joined with rows from the `asks` +table. For each timestamp from the `bids` table, the query looks for a timestamp +that is equal or prior to it from the `asks` table. If no matching timestamp is +found, NULL is inserted. + +### Using `ON` for matching column value + +An additional `ON` clause can be used to join the tables based on the value of a +selected column. + +The query above does not use the optional `ON` clause. If both tables store data +for multiple stocks, `ON` clause provides a way to find asks for bids with +matching stock value. + +Table `bids` (the left table): + +| ts | bid | stock | +| --------------------------- | --- | :---- | +| 2019-10-17T00:00:00.000000Z | 500 | AAPL | +| 2019-10-17T00:00:00.100000Z | 101 | GOOG | +| 2019-10-17T00:00:00.200000Z | 102 | GOOG | +| 2019-10-17T00:00:00.300000Z | 501 | AAPL | +| 2019-10-17T00:00:00.500000Z | 103 | GOOG | +| 2019-10-17T00:00:00.600000Z | 502 | AAPL | +| 2019-10-17T00:00:00.600000Z | 200 | IBM | + +Table `asks` (the right table): + +| ts | ask | stock | +| --------------------------- | --- | :---- | +| 2019-10-17T00:00:00.000000Z | 500 | AAPL | +| 2019-10-17T00:00:00.100000Z | 501 | AAPL | +| 2019-10-17T00:00:00.100000Z | 100 | GOOG | +| 2019-10-17T00:00:00.400000Z | 502 | AAPL | +| 2019-10-17T00:00:00.700000Z | 200 | IBM | + +Notice how both tables have a new column `stock` that stores the stock name. The +`ON` clause allows you to match the value of the `stock` column in the `bids` +table with that in the `asks` table: + +```questdb-sql +SELECT bids.stock stock, bids.ts timebid, asks.ts timeask, bid, ask +FROM bids +ASOF JOIN asks ON (stock); +``` + +The above query returns these results: + +| stock | timebid | timeask | bid | ask | +| :---- | --------------------------- | --------------------------- | --- | ---- | +| AAPL | 2019-10-17T00:00:00.000000Z | 2019-10-17T00:00:00.000000Z | 500 | 500 | +| GOOG | 2019-10-17T00:00:00.100000Z | 2019-10-17T00:00:00.100000Z | 101 | 100 | +| GOOG | 2019-10-17T00:00:00.200000Z | 2019-10-17T00:00:00.100000Z | 102 | 100 | +| AAPL | 2019-10-17T00:00:00.300000Z | 2019-10-17T00:00:00.100000Z | 501 | 501 | +| GOOG | 2019-10-17T00:00:00.500000Z | 2019-10-17T00:00:00.100000Z | 103 | 100 | +| AAPL | 2019-10-17T00:00:00.600000Z | 2019-10-17T00:00:00.400000Z | 502 | 502 | +| IBM | 2019-10-17T00:00:00.600000Z | NULL | 200 | NULL | + +This query returns all rows from the `bids` table joined with records from the +`asks` table that meet both the following criterion: + +- The `stock` column of the two tables has the same value +- The timestamp of the `asks` record is prior to or equal to the timestamp of + the `bids` record. + +The IBM record in the `bids` table is not joined with any record in the `asks` +table because there is no record in the `asks` table with the same stock name +and a timestamp prior to or equal to the timestamp of the IBM record. The asks +table has a record with the IBM stock name but its timestamp is +`2019-10-17T00:00:00.700000Z` which is after the timestamp of the IBM record in +the `bids` table and therefore not joined. + +### Timestamp considerations + +`ASOF` join can be performed only on tables or result sets that are ordered by +time. When a table is created with a +[designated timestamp](/docs/concept/designated-timestamp/) the order of records +is enforced and the timestamp column name is in the table metadata. `ASOF` join +uses this timestamp column from metadata. + +In case tables do not have a designated timestamp column, but data is in +chronological order, timestamp columns can be specified at runtime: + +```questdb-sql +SELECT bids.ts timebid, bid, ask +FROM (bids timestamp(ts)) +ASOF JOIN (asks timestamp (ts)); +``` + +:::caution + +`ASOF` join does not check timestamp order, if data is not in chronological +order, the join result is non-deterministic. + +::: + +## LT JOIN + +Similar to `ASOF JOIN`, `LT JOIN` joins two different time-series measured. For +each row in the first time-series, the `LT JOIN` takes from the second +time-series a timestamp that meets both of the following criteria: + +- The timestamp is the closest to the first timestamp. +- The timestamp is **strictly prior to** the first timestamp. + +In other words: `LT JOIN` won't join records with equal timestamps. + +### Example + +Consider the following tables: + +Table `bids`: + +| ts | bid | +| --------------------------- | --- | +| 2019-10-17T00:00:00.000000Z | 101 | +| 2019-10-17T00:00:00.300000Z | 102 | +| 2019-10-17T00:00:00.500000Z | 103 | + +Table `asks`: + +| ts | ask | +| --------------------------- | --- | +| 2019-10-17T00:00:00.000000Z | 100 | +| 2019-10-17T00:00:00.300000Z | 101 | +| 2019-10-17T00:00:00.400000Z | 102 | + +An `LT JOIN` can be built using the following query: + +```questdb-sql +SELECT bids.ts timebid, asks.ts timeask, bid, ask +FROM bids +LT JOIN asks; +``` + +The query above returns the following results: + +| timebid | timeask | bid | ask | +| --------------------------- | --------------------------- | --- | ---- | +| 2019-10-17T00:00:00.000000Z | NULL | 101 | NULL | +| 2019-10-17T00:00:00.300000Z | 2019-10-17T00:00:00.000000Z | 102 | 100 | +| 2019-10-17T00:00:00.500000Z | 2019-10-17T00:00:00.400000Z | 103 | 102 | + +Notice how the first record in the `bids` table is not joined with any record in +the `asks` table. This is because there is no record in the `asks` table with a +timestamp prior to the timestamp of the first record in the `bids` table. + +Similarly, the second record in the `bids` table is joined with the first record +in the `asks` table because the timestamp of the first record in the `asks` +table is prior to the timestamp of the second record in the `bids` table. + +:::note + +`LT` join is often useful to join a table to itself in order to get preceding +values for every row. + +::: + +## SPLICE JOIN + +`SPLICE JOIN` is a full `ASOF JOIN`. It will return all the records from both +tables. For each record from left table splice join will find prevailing record +from right table and for each record from right table - prevailing record from +left table. + +Considering the following tables: + +Table `asks`: + +| ts | ask | +| --------------------------- | --- | +| 2019-10-17T00:00:00.000000Z | 100 | +| 2019-10-17T00:00:00.200000Z | 101 | +| 2019-10-17T00:00:00.400000Z | 102 | + +Table `bids`: + +| ts | bid | +| --------------------------- | --- | +| 2019-10-17T00:00:00.100000Z | 101 | +| 2019-10-17T00:00:00.300000Z | 102 | +| 2019-10-17T00:00:00.500000Z | 103 | + +A `SPLICE JOIN` can be built as follows: + +```questdb-sql +SELECT bids.ts timebid, bid, ask +FROM bids +SPLICE JOIN asks; +``` + +This query returns the following results: + +| timebid | bid | ask | +| --------------------------- | ---- | --- | +| null | null | 100 | +| 2019-10-17T00:00:00.100000Z | 101 | 100 | +| 2019-10-17T00:00:00.100000Z | 101 | 101 | +| 2019-10-17T00:00:00.300000Z | 102 | 101 | +| 2019-10-17T00:00:00.300000Z | 102 | 102 | +| 2019-10-17T00:00:00.500000Z | 103 | 102 | + +Note that the above query does not use the optional `ON` clause. In case you +need additional filtering on the two tables, the `ON` clause can be used as +follows: + +```questdb-sql +SELECT ts timebid, stock bidStock, bid, ask +FROM bids +SPLICE JOIN + ( + SELECT ts timesask, stock askStock, ask ask + FROM asks + ) + ON bidStock=askStock; +``` diff --git a/docs/reference/sql/latest-on.md b/docs/reference/sql/latest-on.md new file mode 100644 index 00000000..d36aa389 --- /dev/null +++ b/docs/reference/sql/latest-on.md @@ -0,0 +1,255 @@ +--- +title: LATEST ON keyword +sidebar_label: LATEST ON +description: + Reference documentation for using LATEST ON keywords with examples for + illustration. +--- + +Retrieves the latest entry by timestamp for a given key or combination of keys, +for scenarios where multiple time series are stored in the same table. + +## Syntax + +![Flow chart showing the syntax of the LATEST ON keyword](/img/docs/diagrams/latestOn.svg) + +where: + +- `columnName` used in the `LATEST ON` part of the clause is a `TIMESTAMP` + column. +- `columnName` list used in the `PARTITION BY` part of the clause is a list of + columns of one of the following types: `SYMBOL`, `STRING`, `BOOLEAN`, `SHORT`, + `INT`, `LONG`, `LONG256`, `CHAR`. + +## Description + +`LATEST ON` is used as part of a [SELECT statement](/docs/reference/sql/select/) +for returning the most recent records per unique time series identified by the +`PARTITION BY` column values. + +`LATEST ON` requires a +[designated timestamp](/docs/concept/designated-timestamp/) column. Use +[sub-queries](#latest-on-over-sub-query) for tables without the designated +timestamp. + +The query syntax has an impact on the [execution order](#execution-order) of the +`LATEST ON` clause and the `WHERE` clause. + +To illustrate how `LATEST ON` is intended to be used, consider the `trips` table +[in the QuestDB demo instance](https://demo.questdb.io/). This table has a +`payment_type` column as `SYMBOL` type which specifies the method of payment per +trip. We can find the most recent trip for each unique method of payment with +the following query: + +```questdb-sql +SELECT payment_type, pickup_datetime, trip_distance +FROM trips +LATEST ON pickup_datetime PARTITION BY payment_type; +``` + +| payment_type | pickup_datetime | trip_distance | +| ------------ | --------------------------- | ------------- | +| Dispute | 2014-12-31T23:55:27.000000Z | 1.2 | +| Voided | 2019-06-27T17:56:45.000000Z | 1.9 | +| Unknown | 2019-06-30T23:57:42.000000Z | 3.9 | +| No Charge | 2019-06-30T23:59:30.000000Z | 5.2 | +| Cash | 2019-06-30T23:59:54.000000Z | 2 | +| Card | 2019-06-30T23:59:56.000000Z | 1 | + +The above query returns the latest value within each time series stored in the +table. Those time series are determined based on the values in the column(s) +specified in the `LATEST ON` clause. In our example those time series are +represented by different payment types. Then the column used in the `LATEST ON` +part of the clause stands for the designated timestamp column for the table. +This allows the database to find the latest value within each time series. + +## Examples + +For the next examples, we can create a table called `balances` with the +following SQL: + +```questdb-sql +CREATE TABLE balances ( + cust_id SYMBOL, + balance_ccy SYMBOL, + balance DOUBLE, + ts TIMESTAMP +) TIMESTAMP(ts) PARTITION BY DAY; + +insert into balances values ('1', 'USD', 600.5, '2020-04-21T16:03:43.504432Z'); +insert into balances values ('2', 'USD', 950, '2020-04-21T16:08:34.404665Z'); +insert into balances values ('2', 'EUR', 780.2, '2020-04-21T16:11:22.704665Z'); +insert into balances values ('1', 'USD', 1500, '2020-04-21T16:11:32.904234Z'); +insert into balances values ('1', 'EUR', 650.5, '2020-04-22T16:11:32.904234Z'); +insert into balances values ('2', 'USD', 900.75, '2020-04-22T16:12:43.504432Z'); +insert into balances values ('2', 'EUR', 880.2, '2020-04-22T16:18:34.404665Z'); +insert into balances values ('1', 'USD', 330.5, '2020-04-22T16:20:14.404997Z'); +``` + +This provides us with a table with the following content: + +| cust_id | balance_ccy | balance | ts | +| ------- | ----------- | ------- | --------------------------- | +| 1 | USD | 600.5 | 2020-04-21T16:01:22.104234Z | +| 2 | USD | 950 | 2020-04-21T16:03:43.504432Z | +| 2 | EUR | 780.2 | 2020-04-21T16:08:34.404665Z | +| 1 | USD | 1500 | 2020-04-21T16:11:22.704665Z | +| 1 | EUR | 650.5 | 2020-04-22T16:11:32.904234Z | +| 2 | USD | 900.75 | 2020-04-22T16:12:43.504432Z | +| 2 | EUR | 880.2 | 2020-04-22T16:18:34.404665Z | +| 1 | USD | 330.5 | 2020-04-22T16:20:14.404997Z | + +### Single column + +When a single `symbol` column is specified in `LATEST ON` queries, the query +will end after all distinct symbol values are found. + +```questdb-sql title="Latest records by customer ID" +SELECT * FROM balances +LATEST ON ts PARTITION BY cust_id; +``` + +The query returns two rows with the most recent records per unique `cust_id` +value: + +| cust_id | balance_ccy | balance | ts | +| ------- | ----------- | ------- | --------------------------- | +| 2 | EUR | 880.2 | 2020-04-22T16:18:34.404665Z | +| 1 | USD | 330.5 | 2020-04-22T16:20:14.404997Z | + +### Multiple columns + +When multiple columns are specified in `LATEST ON` queries, the returned results +are the most recent **unique combinations** of the column values. This example +query returns `LATEST ON` customer ID and balance currency: + +```questdb-sql title="Latest balance by customer and currency" +SELECT cust_id, balance_ccy, balance +FROM balances +LATEST ON ts PARTITION BY cust_id, balance_ccy; +``` + +The results return the most recent records for each unique combination of +`cust_id` and `balance_ccy`. + +| cust_id | balance_ccy | balance | inactive | ts | +| ------- | ----------- | ------- | -------- | --------------------------- | +| 1 | EUR | 650.5 | FALSE | 2020-04-22T16:11:32.904234Z | +| 2 | USD | 900.75 | FALSE | 2020-04-22T16:12:43.504432Z | +| 2 | EUR | 880.2 | FALSE | 2020-04-22T16:18:34.404665Z | +| 1 | USD | 330.5 | FALSE | 2020-04-22T16:20:14.404997Z | + +#### Performance considerations + +When the `LATEST ON` clause contains a single `symbol` column, QuestDB will know +all distinct values upfront and stop scanning table contents once the latest +entry has been found for each distinct symbol value. + +When the `LATEST ON` clause contains multiple columns, QuestDB has to scan the +entire table to find distinct combinations of column values. + +Although scanning is fast, performance will degrade on hundreds of millions of +records. If there are multiple columns in the `LATEST ON` clause, this will +result in a full table scan. + +### LATEST ON over sub-query + +For this example, we can create another table called `unordered_balances` with +the following SQL: + +```questdb-sql +CREATE TABLE unordered_balances ( + cust_id SYMBOL, + balance_ccy SYMBOL, + balance DOUBLE, + ts TIMESTAMP +); + +insert into unordered_balances values ('2', 'USD', 950, '2020-04-21T16:08:34.404665Z'); +insert into unordered_balances values ('1', 'USD', 330.5, '2020-04-22T16:20:14.404997Z'); +insert into unordered_balances values ('2', 'USD', 900.75, '2020-04-22T16:12:43.504432Z'); +insert into unordered_balances values ('1', 'USD', 1500, '2020-04-21T16:11:32.904234Z'); +insert into unordered_balances values ('1', 'USD', 600.5, '2020-04-21T16:03:43.504432Z'); +insert into unordered_balances values ('1', 'EUR', 650.5, '2020-04-22T16:11:32.904234Z'); +insert into unordered_balances values ('2', 'EUR', 880.2, '2020-04-22T16:18:34.404665Z'); +insert into unordered_balances values ('2', 'EUR', 780.2, '2020-04-21T16:11:22.704665Z'); +``` + +Note that this table doesn't have a designated timestamp column and also +contains time series that are unordered by `ts` column. + +Due to the absent designated timestamp column, we can't use `LATEST ON` directly +on this table, but it's possible to use `LATEST ON` over a sub-query: + +```questdb-sql title="Latest balance by customer over unordered data" +(SELECT * FROM unordered_balances) +LATEST ON ts PARTITION BY cust_id; +``` + +Just like with the `balances` table, the query returns two rows with the most +recent records per unique `cust_id` value: + +| cust_id | balance_ccy | balance | ts | +| ------- | ----------- | ------- | --------------------------- | +| 2 | EUR | 880.2 | 2020-04-22T16:18:34.404665Z | +| 1 | USD | 330.5 | 2020-04-22T16:20:14.404997Z | + +### Execution order + +The following queries illustrate how to change the execution order in a query by +using brackets. + +#### WHERE first + +```questdb-sql +SELECT * FROM balances +WHERE balance > 800 +LATEST ON ts PARTITION BY cust_id; +``` + +This query executes `WHERE` before `LATEST ON` and returns the most recent +balance which is above 800. The execution order is as follows: + +- filter out all balances below 800 +- find the latest balance by `cust_id` + +| cust_id | balance_ccy | balance | ts | +| ------- | ----------- | ------- | --------------------------- | +| 1 | USD | 1500 | 2020-04-22T16:11:22.704665Z | +| 2 | EUR | 880.2 | 2020-04-22T16:18:34.404665Z | + +#### LATEST ON first + +```questdb-sql +(SELECT * FROM balances LATEST ON ts PARTITION BY cust_id) --note the brackets +WHERE balance > 800; +``` + +This query executes `LATEST ON` before `WHERE` and returns the most recent +records, then filters out those below 800. The steps are: + +1. Find the latest balances by customer ID. +2. Filter out balances below 800. Since the latest balance for customer 1 is + equal to 330.5, it is filtered out in this step. + +| cust_id | balance_ccy | balance | inactive | ts | +| ------- | ----------- | ------- | -------- | --------------------------- | +| 2 | EUR | 880.2 | FALSE | 2020-04-22T16:18:34.404665Z | + +#### Combination + +It's possible to combine a time-based filter with the balance filter from the +previous example to query the latest values for the `2020-04-21` date and filter +out those below 800. + +```questdb-sql +(balances WHERE ts in '2020-04-21' LATEST ON ts PARTITION BY cust_id) +WHERE balance > 800; +``` + +Since QuestDB allows you to omit the `SELECT * FROM` part of the query, we +omitted it to keep the query compact. + +Such a combination is very powerful since it allows you to find the latest +values for a time slice of the data and then apply a filter to them in a single +query. diff --git a/docs/reference/sql/limit.md b/docs/reference/sql/limit.md new file mode 100644 index 00000000..0d0bf394 --- /dev/null +++ b/docs/reference/sql/limit.md @@ -0,0 +1,46 @@ +--- +title: LIMIT keyword +sidebar_label: LIMIT +description: LIMIT SQL keyword reference documentation. +--- + +Specify the number and position of records returned by a +[SELECT statement](/docs/reference/sql/select/). + +In other implementations of SQL, this is sometimes replaced by statements such +as `OFFSET` or `ROWNUM` Our implementation of `LIMIT` encompasses both in one +statement. + +## Syntax + +![Flow chart showing the syntax of the LIMIT keyword](/img/docs/diagrams/limit.svg) + +- `numberOfRecords` is the number of records to return. +- `upperBound` and `lowerBound` is the return range. `lowerBound` is + **exclusive** and `upperBound` is **inclusive**. + +A `positive` number will return the `first` n records. A `negative` number will +return the `last` n records. + +## Examples + +```questdb-sql title="First 5 results" +SELECT * FROM ratings LIMIT 5; +``` + +```questdb-sql title="Last 5 results" +SELECT * FROM ratings LIMIT -5; +``` + +```questdb-sql title="Range results - this will return records 3, 4 and 5" +SELECT * FROM ratings LIMIT 2,5; +``` + +`negative` range parameters will return results from the bottom of the table. +Assuming a table with `n` records, the following will return records between n-7 +(exclusive) and n-3 (inclusive), i.e {n-6, n-5, n-4, n-3}. Both `upperBound` and +`lowerBound` must be negative numbers, in this case: + +```questdb-sql title="Range results (negative)" +SELECT * FROM ratings LIMIT -7, -3; +``` diff --git a/docs/reference/sql/order-by.md b/docs/reference/sql/order-by.md new file mode 100644 index 00000000..bba601ba --- /dev/null +++ b/docs/reference/sql/order-by.md @@ -0,0 +1,34 @@ +--- +title: ORDER BY keyword +sidebar_label: ORDER BY +description: ORDER BY SQL keyword reference documentation. +--- + +Sort the results of a query in ascending or descending order. + +## Syntax + +![Flow chart showing the syntax of the ORDER BY keyword](/img/docs/diagrams/orderBy.svg) + +Default order is `ASC`. You can omit to order in ascending order. + +## Notes + +Ordering data requires holding it in RAM. For large operations, we suggest you +check you have sufficient memory to perform the operation. + +## Examples + +```questdb-sql title="Omitting ASC will default to ascending order" +ratings ORDER BY userId; +``` + +```questdb-sql title="Ordering in descending order" +ratings ORDER BY userId DESC; +``` + +```questdb-sql title="Multi-level ordering" +ratings ORDER BY userId, rating DESC; +``` + + diff --git a/docs/reference/sql/reindex.md b/docs/reference/sql/reindex.md new file mode 100644 index 00000000..7255f52d --- /dev/null +++ b/docs/reference/sql/reindex.md @@ -0,0 +1,40 @@ +--- +title: REINDEX +sidebar_label: REINDEX +description: REINDEX SQL keyword reference documentation. +--- + +Rebuilds one or more [index](/docs/concept/indexes/) columns of the given table. +This operation is intended to be used after a hardware or software crash, when the index data are corrupted and the table cannot be opened for writes. + +The operation can only be performed when there is no other reader and writer working on the table. During the operation, the table is locked and no read and write should be performed on the selected table. + +## Syntax + +![Flow chart showing the syntax of the REINDEX keyword](/img/docs/diagrams/reindex.svg) + +## Options + +By default, `REINDEX` rebuilds all indexes in the selected table. The following options can be used to narrow down the scope of the operation: + +- `COLUMN`: When defined, `REINDEX` rebuilds the index for the selected column. +- `PARTITION`: When defined, `REINDEX` rebuilds index files in the selected partition only. The partition name must match the name of the directory for the given partition. The naming convention is detailed in [Partitions](/docs/concept/partitions/). + +## Example + +Rebuilding all the indexes in the table `trades`: + +```questdb-sql title="Rebuilding an index" +REINDEX TABLE trades LOCK EXCLUSIVE; +``` + +Rebuilding the index in the column `instruments`: + +```questdb-sql title="Rebuilding an index" +REINDEX TABLE trades COLUMN instruments LOCK EXCLUSIVE; +``` +Rebuilding one partition (`2021-12-17`) of the index in the column `instruments`: + +```questdb-sql title="Rebuilding an index" +REINDEX TABLE trades COLUMN instruments PARTITION '2021-12-17' LOCK EXCLUSIVE; +``` \ No newline at end of file diff --git a/docs/reference/sql/rename.md b/docs/reference/sql/rename.md new file mode 100644 index 00000000..20d18fdb --- /dev/null +++ b/docs/reference/sql/rename.md @@ -0,0 +1,17 @@ +--- +title: RENAME TABLE keyword +sidebar_label: RENAME TABLE +description: RENAME TABLE SQL keyword reference documentation. +--- + +`RENAME TABLE` is used to change the name of a table. + +## Syntax + +![Flow chart showing the syntax of the RENAME TABLE keyword](/img/docs/diagrams/renameTable.svg) + +## Example + +```questdb-sql +RENAME TABLE 'test.csv' TO 'myTable'; +``` diff --git a/docs/reference/sql/sample-by.md b/docs/reference/sql/sample-by.md new file mode 100644 index 00000000..0837bf28 --- /dev/null +++ b/docs/reference/sql/sample-by.md @@ -0,0 +1,418 @@ +--- +title: SAMPLE BY keyword +sidebar_label: SAMPLE BY +description: SAMPLE BY SQL keyword reference documentation. +--- + +`SAMPLE BY` is used on time-series data to summarize large datasets into +aggregates of homogeneous time chunks as part of a +[SELECT statement](/docs/reference/sql/select/). + +To use `SAMPLE BY`, a table column needs to be specified as a +[designated timestamp](/docs/concept/designated-timestamp/). + +Users performing `SAMPLE BY` queries on datasets **with missing data** may make +use of the [FILL](#fill-options) keyword to specify a fill behavior. + +## Syntax + +![Flow chart showing the syntax of the SAMPLE BY keywords](/img/docs/diagrams/sampleBy.svg) +![Flow chart showing the syntax of the ALIGN TO keywords](/img/docs/diagrams/alignToCalTimeZone.svg) +![Flow chart showing the syntax of the FILL keyword](/img/docs/diagrams/fill.svg) + +## Sample units + +The size of sampled groups are specified with the following syntax: + +```questdb-sql +SAMPLE BY n{units} +``` + +Where the unit for sampled groups may be one of the following: + +| unit | description | +| ---- | ----------- | +| `U` | microsecond | +| `T` | millisecond | +| `s` | second | +| `m` | minute | +| `h` | hour | +| `d` | day | +| `M` | month | +| `y` | year | + +For example, given a table `trades`, the following query returns the number of +trades per hour: + +```questdb-sql +SELECT ts, count() FROM trades SAMPLE BY 1h +``` + +## Fill options + +The `FILL` keyword is optional and expects one or more `fillOption` strategies +which will be applied to one or more aggregate columns. The following +restrictions apply: + +- Keywords denoting fill strategies may not be combined. Only one option from + `NONE`, `NULL`, `PREV`, `LINEAR` and constants may be used. +- `LINEAR` strategy is not supported for keyed queries, i.e. queries that + contain non-aggregated columns other than the timestamp in the SELECT clause. +- The `FILL` keyword must precede alignment described in the + [sample calculation section](#sample-calculation), i.e.: + + ```questdb-sql + SELECT ts, max(price) max FROM prices + SAMPLE BY 1h FILL(LINEAR) + ALIGN TO ... + ``` + +| fillOption | Description | +| ---------- | ------------------------------------------------------------------------------------------------------------------------- | +| `NONE` | No fill applied. If there is no data, the time sample will be skipped in the results. A table could be missing intervals. | +| `NULL` | Fills with `NULL` values. | +| `PREV` | Fills using the previous value. | +| `LINEAR` | Fills by linear interpolation of the 2 surrounding points. | +| `x` | Fills with a constant value - where `x` is the desired value, for example `FILL(100.05)`. | + +Consider an example table named `prices` which has no records during the entire +third hour (`2021-01-01T03`): + +| ts | price | +| --------------------------- | ----- | +| 2021-01-01T01:00:00.000000Z | p1 | +| 2021-01-01T02:00:00.000000Z | p2 | +| 2021-01-01T04:00:00.000000Z | p4 | +| 2021-01-01T05:00:00.000000Z | p5 | + +The following query returns the maximum price per hour. As there are missing +values, an aggregate cannot be calculated: + +```questdb-sql +SELECT ts, max(price) max FROM prices SAMPLE BY 1h; +``` + +A row is missing for the `2021-01-01T03:00:00.000000Z` sample: + +| ts | max | +| --------------------------- | ---- | +| 2021-01-01T01:00:00.000000Z | max1 | +| 2021-01-01T02:00:00.000000Z | max2 | +| 2021-01-01T04:00:00.000000Z | max4 | +| 2021-01-01T05:00:00.000000Z | max5 | + +A `FILL` strategy can be employed which fills with the previous value using +`PREV`: + +```questdb-sql +SELECT ts, max(price) max FROM prices SAMPLE BY 1h FILL(PREV); +``` + +| ts | max | +| ------------------------------- | -------- | +| 2021-01-01T01:00:00.000000Z | max1 | +| 2021-01-01T02:00:00.000000Z | max2 | +| **2021-01-01T03:00:00.000000Z** | **max2** | +| 2021-01-01T04:00:00.000000Z | max4 | +| 2021-01-01T05:00:00.000000Z | max5 | + +Linear interpolation is done using the `LINEAR` fill option: + +```questdb-sql +SELECT ts, max(price) max FROM prices SAMPLE BY 1h FILL(LINEAR); +``` + +| ts | max | +| ------------------------------- | ----------------- | +| 2021-01-01T01:00:00.000000Z | max1 | +| 2021-01-01T02:00:00.000000Z | max2 | +| **2021-01-01T03:00:00.000000Z** | **(max2+max4)/2** | +| 2021-01-01T04:00:00.000000Z | max4 | +| 2021-01-01T05:00:00.000000Z | max5 | + +A constant value can be used as a `fillOption`: + +```questdb-sql +SELECT ts, max(price) max FROM prices SAMPLE BY 1h FILL(100.5); +``` + +| ts | max | +| ------------------------------- | --------- | +| 2021-01-01T01:00:00.000000Z | max1 | +| 2021-01-01T02:00:00.000000Z | max2 | +| **2021-01-01T03:00:00.000000Z** | **100.5** | +| 2021-01-01T04:00:00.000000Z | max4 | +| 2021-01-01T05:00:00.000000Z | max5 | + +Finally, `NULL` may be used as a `fillOption`: + +```questdb-sql +SELECT ts, max(price) max FROM prices SAMPLE BY 1h FILL(NULL); +``` + +| ts | max | +| ------------------------------- | -------- | +| 2021-01-01T01:00:00.000000Z | max1 | +| 2021-01-01T02:00:00.000000Z | max2 | +| **2021-01-01T03:00:00.000000Z** | **null** | +| 2021-01-01T04:00:00.000000Z | max4 | +| 2021-01-01T05:00:00.000000Z | max5 | + +### Multiple fill values + +`FILL()` accepts a list of values where each value corresponds to a single +aggregate column in the SELECT clause order: + +```questdb-sql +SELECT min(price), max(price), avg(price), ts +FROM prices +SAMPLE BY 1h +FILL(NULL, 10, PREV); +``` + +In the above query `min(price)` aggregate will get `FILL(NULL)` strategy +applied, `max(price)` will get `FILL(10)`, and `avg(price)` will get +`FILL(PREV)`. + +## Sample calculation + +The default time calculation of sampled groups is an absolute value, in other +words, sampling by one day is a 24 hour range which is not bound to calendar +dates. To align sampled groups to calendar dates, the `ALIGN TO` keywords can be +used and are described in the [ALIGN TO CALENDAR](#align-to-calendar) section +below. + +Consider a table `sensors` with the following data spanning three calendar days: + +| ts | val | +| --------------------------- | --- | +| 2021-05-31T23:10:00.000000Z | 10 | +| 2021-06-01T01:10:00.000000Z | 80 | +| 2021-06-01T07:20:00.000000Z | 15 | +| 2021-06-01T13:20:00.000000Z | 10 | +| 2021-06-01T19:20:00.000000Z | 40 | +| 2021-06-02T01:10:00.000000Z | 90 | +| 2021-06-02T07:20:00.000000Z | 30 | + +The following query can be used to sample the table by day. Note that the +default sample calculation can be made explicit in a query using +`ALIGN TO FIRST OBSERVATION`: + +```questdb-sql +SELECT ts, count() FROM sensors +SAMPLE BY 1d + +-- Equivalent to +SELECT ts, count() FROM sensors +SAMPLE BY 1d +ALIGN TO FIRST OBSERVATION +``` + +This query will return two rows: + +| ts | count | +| --------------------------- | ----- | +| 2021-05-31T23:10:00.000000Z | 5 | +| 2021-06-01T23:10:00.000000Z | 2 | + +The timestamp value for the 24 hour groups start at the first-observed +timestamp. + +## ALIGN TO CALENDAR + +The option aligns data to calendar dates, with two optional parameters: + +- [TIME ZONE](#time-zone) +- [WITH OFFSET](#with-offset) + +### TIME ZONE + +A time zone may be provided for sampling with calendar alignment. Details on the +options for specifying time zones with available formats are provided in the +guide for +[working with timestamps and time zones](/docs/guides/working-with-timestamps-timezones/). + +```questdb-sql +SELECT ts, count() FROM sensors +SAMPLE BY 1d +ALIGN TO CALENDAR TIME ZONE 'Europe/Berlin' +``` + +In this case, the 24 hour samples begin at `2021-05-31T01:00:00.000000Z`: + +| ts | count | +| --------------------------- | ----- | +| 2021-05-31T01:00:00.000000Z | 1 | +| 2021-06-01T01:00:00.000000Z | 4 | +| 2021-06-02T01:00:00.000000Z | 2 | + +Additionally, an offset may be applied when aligning sample calculation to +calendar + +```questdb-sql +SELECT ts, count() FROM sensors +SAMPLE BY 1d +ALIGN TO CALENDAR TIME ZONE 'Europe/Berlin' WITH OFFSET '00:45' +``` + +In this case, the 24 hour samples begin at `2021-05-31T01:45:00.000000Z`: + +| ts | count | +| --------------------------- | ----- | +| 2021-05-31T01:45:00.000000Z | 2 | +| 2021-06-01T01:45:00.000000Z | 4 | +| 2021-06-02T01:45:00.000000Z | 1 | + +#### Local timezone output + +The timestamp values output from `SAMPLE BY` queries is in UTC. To have UTC +values converted to specific timezones the +[to_timezone() function](/docs/reference/function/date-time/#to_timezone) should +be used. + +```questdb-sql +SELECT to_timezone(ts, 'PST') ts, count +FROM (SELECT ts, count() + FROM sensors SAMPLE BY 2h + ALIGN TO CALENDAR TIME ZONE 'PST') +``` + +#### Time zone transitions + +Calendar dates may contain historical time zone transitions or may vary in the +total number of hours due to daylight savings time. Considering the 31st October +2021, in the `Europe/London` calendar day which consists of 25 hours: + +> - Sunday, 31 October 2021, 02:00:00 clocks are turned backward 1 hour to +> - Sunday, 31 October 2021, 01:00:00 local standard time + +When a `SAMPLE BY` operation crosses time zone transitions in cases such as +this, the first sampled group which spans a transition will include aggregates +by full calendar range. Consider a table `sensors` with one data point per hour +spanning three calendar hours: + +| ts | val | +| --------------------------- | --- | +| 2021-10-31T00:10:00.000000Z | 10 | +| 2021-10-31T01:10:00.000000Z | 20 | +| 2021-10-31T02:10:00.000000Z | 30 | +| 2021-10-31T03:10:00.000000Z | 40 | +| 2021-10-31T04:10:00.000000Z | 50 | + +The following query will sample by hour with the `Europe/London` time zone and +align to calendar ranges: + +```questdb-sql +SELECT ts, count() FROM sensors +SAMPLE BY 1h +ALIGN TO CALENDAR TIME ZONE 'Europe/London' +``` + +The record count for the hour which encounters a time zone transition will +contain two records for both hours at the time zone transition: + +| ts | count | +| --------------------------- | ----- | +| 2021-10-31T00:00:00.000000Z | 2 | +| 2021-10-31T01:00:00.000000Z | 1 | +| 2021-10-31T02:00:00.000000Z | 1 | +| 2021-10-31T03:00:00.000000Z | 1 | + +Similarly, given one data point per hour on this table, running `SAMPLE BY 1d` +will have a count of `25` for this day when aligned to calendar time zone +'Europe/London'. + +### WITH OFFSET + +Aligning sampling calculation can be provided an arbitrary offset in the format +`'+/-HH:mm'`, for example: + +- `'00:30'` plus thirty minutes +- `'+00:30'` plus thirty minutes +- `'-00:15'` minus 15 minutes + +The query uses the default offset '00:00' if the parameter is not set. + +```questdb-sql +SELECT ts, count() FROM sensors +SAMPLE BY 1d +ALIGN TO CALENDAR WITH OFFSET '02:00' +``` + +In this case, the 24 hour samples begin at `2021-05-31T02:00:00.000000Z`: + +| ts | count | +| --------------------------- | ----- | +| 2021-05-31T02:00:00.000000Z | 2 | +| 2021-06-01T02:00:00.000000Z | 4 | +| 2021-06-02T02:00:00.000000Z | 1 | + +## Examples + +Assume the following table `trades`: + +| ts | quantity | price | +| --------------------------- | -------- | ------ | +| 2021-05-31T23:45:10.000000Z | 10 | 100.05 | +| 2021-06-01T00:01:33.000000Z | 5 | 100.05 | +| 2021-06-01T00:15:14.000000Z | 200 | 100.15 | +| 2021-06-01T00:30:40.000000Z | 300 | 100.15 | +| 2021-06-01T00:45:20.000000Z | 10 | 100 | +| 2021-06-01T01:00:50.000000Z | 50 | 100.15 | + +This query will return the number of trades per hour: + +```questdb-sql title="Hourly interval" +SELECT ts, count() FROM trades SAMPLE BY 1h; +``` + +| ts | count | +| --------------------------- | ----- | +| 2021-05-31T23:45:10.000000Z | 3 | +| 2021-06-01T00:45:10.000000Z | 1 | +| 2021-05-31T23:45:10.000000Z | 1 | +| 2021-06-01T00:45:10.000000Z | 1 | + +The following will return the trade volume in 30 minute intervals + +```questdb-sql title="30 minute interval" +SELECT ts, sum(quantity*price) FROM trades SAMPLE BY 30m; +``` + +| ts | sum | +| --------------------------- | ------ | +| 2021-05-31T23:45:10.000000Z | 1000.5 | +| 2021-06-01T00:15:10.000000Z | 16024 | +| 2021-06-01T00:45:10.000000Z | 8000 | +| 2021-06-01T00:15:10.000000Z | 8012 | +| 2021-06-01T00:45:10.000000Z | 8000 | + +The following will return the average trade notional (where notional is = q \* +p) by day: + +```questdb-sql title="Daily interval" +SELECT ts, avg(quantity*price) FROM trades SAMPLE BY 1d; +``` + +| ts | avg | +| --------------------------- | ----------------- | +| 2021-05-31T23:45:10.000000Z | 6839.416666666667 | + +To make this sample align to calendar dates: + +```questdb-sql title="Calendar alignment" +SELECT ts, avg(quantity*price) FROM trades SAMPLE BY 1d ALIGN TO CALENDAR; +``` + +| ts | avg | +| --------------------------- | ------ | +| 2021-05-31T00:00:00.000000Z | 1000.5 | +| 2021-06-01T00:00:00.000000Z | 8007.2 | + +## See also + +This section includes links to additional information such as tutorials: + +- [SQL Extensions for Time-Series Data in QuestDB](/blog/2022/11/23/sql-extensions-time-series-data-questdb-part-ii/) + +- [Three SQL Keywords for Finding Missing Data](/blog/three-sql-keywords-for-finding-missing-data/) diff --git a/docs/reference/sql/select.md b/docs/reference/sql/select.md new file mode 100644 index 00000000..c310c50b --- /dev/null +++ b/docs/reference/sql/select.md @@ -0,0 +1,280 @@ +--- +title: SELECT keyword +sidebar_label: SELECT +description: SELECT SQL keyword reference documentation. +--- + +`SELECT` allows you to specify a list of columns and expressions to be selected +and evaluated from a table. + +## Syntax + +![Flow chart showing the syntax of the SELECT keyword](/img/docs/diagrams/select.svg) + +:::tip + +The `table` can either be in your database (in which case you would pass the +table's name), or the result of a sub-query. + +::: + +## Simple select + +### All columns + +QuestDB supports `SELECT * FROM tablename`. When selecting all, you can also +omit most of the statement and pass the table name. + +The two examples below are equivalent + +```questdb-sql title="QuestDB dialect" +ratings; +``` + +```questdb-sql title="Traditional SQL equivalent" +SELECT * FROM ratings; +``` + +### Specific columns + +To select specific columns, replace \* by the names of the columns you are +interested in. + +Example: + +```questdb-sql +SELECT movieId, rating FROM ratings; +``` + +### Arithmetic expressions + +`SELECT` is capable of evaluating multiple expressions and functions. You can +mix comma separated lists of expressions with the column names you are +selecting. + +```questdb-sql +SELECT movieId, (100 - rating)*2, rating > 3.5 good +FROM ratings; +``` + +The result of `rating > 3.5` is a boolean. The column will be named good and +take values true or false. + +### Aliases + +Using aliases allow you to give expressions or column names of your choice. You +can assign an alias to a column or an expression by writing the alias name you +want after that expression + +:::note + +Alias names and column names must be unique. + +::: + +```questdb-sql +SELECT movieId alias1, rating alias2 +FROM ratings +``` + +## Aggregation + +Supported aggregation functions are listed on the +[aggregation reference](/docs/reference/function/aggregation/). + +### Aggregation by group + +QuestDB evaluates aggregation functions without need for traditional `GROUP BY`. +Use a mix of column names and aggregation functions in a `SELECT` clause. You +can have any number of discrete value columns and any number of aggregation +functions. + +```questdb-sql title="QuestDB dialect" +SELECT movieId, avg(rating), count() +FROM ratings; +``` + +```questdb-sql title="Traditional SQL equivalent" +SELECT movieId, avg(rating), count() +FROM ratings +GROUP BY movieId; +``` + +### Aggregation arithmetic + +Aggregation functions can be used in arithmetic expressions. The following +computes `mid` of rating values for every movie. + +```questdb-sql +SELECT movieId, (min(rating) + max(rating))/2 mid, count() count +FROM ratings; +``` + +:::tip + +Whenever possible, it is recommended to perform arithmetic `outside` of +aggregation functions as this can have a dramatic impact on performance. For +example, `min(value/2)` is going to execute considerably more slowly than +`min(value)/2`, although both return the same result. + +::: + +## Supported clauses + +QuestDB supports the following standard SQL clauses within SELECT statements. + +### CASE + +Conditional results based on expressions. + +#### Syntax + +![Flow chart showing the syntax of CASE](/img/docs/diagrams/case.svg) + +For more information, please refer to the +[CASE reference](/docs/reference/sql/case/) + +### CAST + +Convert values and expression between types. + +#### Syntax + +![Flow chart showing the syntax of the CAST keyword](/img/docs/diagrams/cast.svg) + +For more information, please refer to the +[CAST reference](/docs/reference/sql/cast/) + +### DISTINCT + +Returns distinct values of the specified column(s). + +#### Syntax + +![Flow chart showing the syntax of the DISTINCT keyword](/img/docs/diagrams/distinct.svg) + +For more information, please refer to the +[DISTINCT reference](/docs/reference/sql/distinct/). + +### FILL + +Defines filling strategy for missing data in aggregation queries. This function +complements [SAMPLE BY](/docs/reference/sql/sample-by/) queries. + +#### Syntax + +![Flow chart showing the syntax of the FILL keyword](/img/docs/diagrams/fill.svg) + +For more information, please refer to the +[FILL reference](/docs/reference/sql/fill/). + +### JOIN + +Join tables based on a key or timestamp. + +#### Syntax + +![Flow chart showing the syntax of the high-level syntax of the JOIN keyword](/img/docs/diagrams/joinOverview.svg) + +For more information, please refer to the +[JOIN reference](/docs/reference/sql/join/) + +### LIMIT + +Specify the number and position of records returned by a query. + +#### Syntax + +![Flow chart showing the syntax of the LIMIT keyword](/img/docs/diagrams/limit.svg) + +For more information, please refer to the +[LIMIT reference](/docs/reference/sql/limit/). + +### ORDER BY + +Orders the results of a query by one or several columns. + +#### Syntax + +![Flow chart showing the syntax of the ORDER BY keyword](/img/docs/diagrams/orderBy.svg) + +For more information, please refer to the +[ORDER BY reference](/docs/reference/sql/order-by) + +### UNION, EXCEPT & INTERSECT + +Combine the results of two or more select statements. Can include or ignore +duplicates. + +#### Syntax + +![Flow chart showing the syntax of the UNION, EXCEPT & INTERSECT keyword](/img/docs/diagrams/unionExceptIntersect.svg) + +For more information, please refer to the +[UNION, EXCEPT & INTERSECT reference](/docs/reference/sql/union-except-intersect/) + +### WHERE + +Filters query results + +#### Syntax + +![Flow chart showing the syntax of the WHERE clause](/img/docs/diagrams/where.svg) + +QuestDB supports complex WHERE clauses along with type-specific searches. For +more information, please refer to the +[WHERE reference](/docs/reference/sql/where/). There are different syntaxes for +[text](/docs/reference/sql/where/#symbol-and-string), +[numeric](/docs/reference/sql/where/#numeric), or +[timestamp](/docs/reference/sql/where/#timestamp-and-date) filters. + +## Additional time-series clauses + +QuestDB augments SQL with the following clauses. + +### LATEST ON + +Retrieves the latest entry by timestamp for a given key or combination of keys +This function requires a +[designated timestamp](/docs/concept/designated-timestamp/). + +#### Syntax + +![Flow chart showing the syntax of the LATEST ON keyword](/img/docs/diagrams/latestOn.svg) + +For more information, please refer to the +[LATEST ON reference](/docs/reference/sql/latest-on/). + +### SAMPLE BY + +Aggregates time-series data into homogeneous time chunks. For example daily +average, monthly maximum etc. This function requires a +[designated timestamp](/docs/concept/designated-timestamp/). + +#### Syntax + +![Flow chart showing the syntax of the SAMPLE BY keyword](/img/docs/diagrams/sampleBy.svg) + +For more information, please refer to the +[SAMPLE BY reference](/docs/reference/sql/sample-by/). + +### TIMESTAMP + +Dynamically creates a [designated timestamp](/docs/concept/designated-timestamp/) +on the output of a query. This allows to perform timestamp operations like +[SAMPLE BY](#sample-by) or [LATEST ON](#latest-on) on tables which originally do +not have a designated timestamp. + +:::caution + +The output query must be ordered by time. `TIMESTAMP()` does not check for order +and using timestamp functions on unordered data may produce unexpected results. + +::: + +#### Syntax + +![Flow chart showing the syntax of the timestamp function](/img/docs/diagrams/dynamicTimestamp.svg) + +For more information, refer to the +[TIMESTAMP reference](/docs/reference/function/timestamp/) diff --git a/docs/reference/sql/show.md b/docs/reference/sql/show.md new file mode 100644 index 00000000..816c19b3 --- /dev/null +++ b/docs/reference/sql/show.md @@ -0,0 +1,68 @@ +--- +title: SHOW keyword +sidebar_label: SHOW +description: SHOW SQL keyword reference documentation. +--- + +This keyword provides table, column, and partition information including +metadata. The `SHOW` keyword is useful for checking the +[designated timestamp setting](/docs/concept/designated-timestamp/) column, the +[partition attachment settings](/docs/reference/sql/alter-table-attach-partition/), +and partition storage size on disk. + +## Syntax + +![Flow chart showing the syntax of the SHOW keyword](/img/docs/diagrams/show.svg) + +- `SHOW` returns all the tables. +- `SHOW COLUMNS` returns all the columns and their metadata for the selected + table. +- `SHOW PARTITIONS` returns the partition information for the selected table. + +## Examples + +### Show tables + +```questdb-sql +SHOW TABLES; +``` + +| table | +| -------- | +| weather | +| my_table | +| ... | + +### Show columns + +```questdb-sql +SHOW COLUMNS FROM my_table; +``` + +| column | type | indexed | indexBlockCapacity | symbolCached | symbolCapacity | designated | +| ------ | --------- | ------- | ------------------ | ------------ | -------------- | ---------- | +| symb | SYMBOL | true | 1048576 | false | 256 | false | +| price | DOUBLE | false | 0 | false | 0 | false | +| ts | TIMESTAMP | false | 0 | false | 0 | true | +| s | STRING | false | 0 | false | 0 | false | + +### Show partitions + +```questdb-sql +SHOW PARTITIONS FROM my_table; +``` + +| index | partitionBy | name | minTimestamp | maxTimestamp | numRows | diskSize | diskSizeHuman | readOnly | active | attached | detached | attachable | +|-------|-------------|------------|-----------------------|-----------------------| ------- |----------|---------------|----------|--------|----------|----------|------------| +| 0 | WEEK | 2022-W52 | 2023-01-01 00:36:00.0 | 2023-01-01 23:24:00.0 | 39 | 98304 | 96.0 KiB | false | false | true | false | false | +| 1 | WEEK | 2023-W01 | 2023-01-02 00:00:00.0 | 2023-01-08 23:24:00.0 | 280 | 98304 | 96.0 KiB | false | false | true | false | false | +| 2 | WEEK | 2023-W02 | 2023-01-09 00:00:00.0 | 2023-01-15 23:24:00.0 | 280 | 98304 | 96.0 KiB | false | false | true | false | false | +| 3 | WEEK | 2023-W03 | 2023-01-16 00:00:00.0 | 2023-01-18 12:00:00.0 | 101 | 83902464 | 80.0 MiB | false | true | true | false | false | + +## See also + +The following functions allow querying tables with filters and using the results as part of a function: + +- [table_columns()](/docs/reference/function/meta/#table_columns) +- [tables()](/docs/reference/function/meta/#all_tables) +- [table_partitions()](/docs/reference/function/meta/#table_partitions) \ No newline at end of file diff --git a/docs/reference/sql/snapshot.md b/docs/reference/sql/snapshot.md new file mode 100644 index 00000000..0ce8cb63 --- /dev/null +++ b/docs/reference/sql/snapshot.md @@ -0,0 +1,99 @@ +--- +title: SNAPSHOT keyword +sidebar_label: SNAPSHOT +description: SNAPSHOT SQL keyword reference documentation. +--- + +Prepares the database for a filesystem (disk) snapshot. + +:::note + +Snapshot statements are not supported on Windows OS. + +::: + +## Syntax + +![Flow chart showing the syntax of the SNAPSHOT keyword](/img/docs/diagrams/snapshot.svg) + +## Snapshot process + +Snapshot recovery mechanism requires a **snapshot instance ID** to be specified +using the `cairo.snapshot.instance.id` +[configuration key](/docs/reference/configuration/): + +```shell title="server.conf" +cairo.snapshot.instance.id=your_id +``` + +A snapshot instance ID may be an arbitrary string value, such as string +representation of a UUID. + +Collecting a snapshot of the database involves the following steps: + +1. Run `SNAPSHOT PREPARE` statement to acquire reader locks for all database + tables, create table metadata file copies in the `snapshot` directory, and + flush the committed data to disk. +2. Start a filesystem snapshot. Refer to the + [next section](#filesystem-snapshot) to learn how to create a filesystem + snapshot on the most common cloud providers. +3. Run `SNAPSHOT COMPLETE` statement to release the reader locks and delete the + metadata file copies. + +For some cloud vendors, snapshot creation operation is asynchronous, i.e. the +point-in-time snapshot is created immediately, as soon as the operation starts, +but the end snapshot artifact may become available later. In such case, the +`SNAPSHOT COMPLETE` statement (step 3) may be run without waiting for the end +artifact, but once the snapshot creation has started. + +:::caution + +No DDL statements, such as `ALTER TABLE my_table DROP COLUMN my_col`, should be +run in parallel with the above steps. Otherwise, the snapshot may contain +corrupted metadata making it unusable. + +::: + +## Filesystem snapshot + +The most common ways to perform cloud-native filesystem snapshots are described +in the following resources, which rely on similar steps but have minor +differences in terminology and services: + +- [AWS](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-creating-snapshot.html) - + creating EBS snapshots +- [Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/snapshot-copy-managed-disk?tabs=portal) - + creating snapshots of a virtual hard disk +- [GCP](https://cloud.google.com/compute/docs/disks/create-snapshots) - working + with persistent disk snapshots + +## Snapshot recovery + +To start the database on a filesystem snapshot, you should make sure to +configure a different snapshot instance ID. + +When the database starts, it checks the current instance ID and the ID stored in +the `snapshot` directory, if present. On IDs mismatch, the database runs a +snapshot recovery procedure restoring the metadata files from the snapshot. When +this happens, you should see something like the following in the server logs: + +``` +2022-03-07T08:24:12.348004Z I i.q.g.DatabaseSnapshotAgent starting snapshot recovery [currentId=`id2`, previousId=`id1`] +... +2022-03-07T08:24:12.349922Z I i.q.g.DatabaseSnapshotAgent snapshot recovery finished [metaFilesCount=1, txnFilesCount=1, cvFilesCount=1] +``` + +Snapshot recovery can be disabled using the `cairo.snapshot.recovery.enabled` +configuration key: + +```shell title="server.conf" +cairo.snapshot.recovery.enabled=false +``` + +## Examples + +```questdb-sql +SNAPSHOT PREPARE; +-- Start a filesystem snapshot. +SNAPSHOT COMPLETE; +``` diff --git a/docs/reference/sql/truncate.md b/docs/reference/sql/truncate.md new file mode 100644 index 00000000..07d87258 --- /dev/null +++ b/docs/reference/sql/truncate.md @@ -0,0 +1,29 @@ +--- +title: TRUNCATE TABLE keyword +sidebar_label: TRUNCATE TABLE +description: TRUNCATE SQL keyword reference documentation. +--- + +`TRUNCATE TABLE` permanently deletes the contents of a table without +deleting the table itself. + +## Syntax + +![Flow chart showing the syntax of the TRUNCATE TABLE keyword](/img/docs/diagrams/truncateTable.svg) + +## Notes + +This command irremediably deletes the data in the target table. In doubt, make +sure you have created [backups](/docs/reference/sql/backup/) of your data. + +## Examples + +```questdb-sql +TRUNCATE TABLE ratings; +``` + +## See also + +To delete both the data and the table structure, use +[DROP](/docs/reference/sql/drop/). + diff --git a/docs/reference/sql/union-except-intersect.md b/docs/reference/sql/union-except-intersect.md new file mode 100644 index 00000000..e256bf2b --- /dev/null +++ b/docs/reference/sql/union-except-intersect.md @@ -0,0 +1,263 @@ +--- +title: UNION EXCEPT INTERSECT keywords +sidebar_label: UNION EXCEPT INTERSECT +description: UNION, EXCEPT, and INTERSECT SQL keyword reference documentation. +--- + +## Overview + +`UNION`, `EXCEPT`, and `INTERSECT` perform set operations. + +`UNION` is used to combine the results of two or more queries. + +`EXCEPT` and `INTERSECT` return distinct rows by comparing the results of two +queries. + +To work properly, all of the following must be true: + +- Each query statement should return the same number of column. +- Each column to be combined should have data types that are either the same, or + supported by `implicit cast`. See [CAST](/docs/reference/sql/cast/) for more + information. +- Columns in each query statement should be in the same order. + +## Syntax + +### UNION + +![Flow chart showing the syntax of the UNION, EXCEPT & INTERSECT keyword](/img/docs/diagrams/unionExceptIntersect.svg) + +- `UNION` returns distinct results. +- `UNION ALL` returns all `UNION` results including duplicates. +- `EXCEPT` returns distinct rows from the left input query that are not returned + by the right input query. +- `EXCEPT ALL` returns all `EXCEPT` results including duplicates. +- `INTERSECT` returns distinct rows that are returned by both input queries. +- `INTERSECT ALL` returns all `INTERSECT` results including duplicates. + +## Examples + +The examples for the set operations use the following tables: + +sensor_1: + +| ID | make | city | +| --- | ----------------- | ------------- | +| 1 | Honeywell | New York | +| 2 | United Automation | Miami | +| 3 | Omron | Miami | +| 4 | Honeywell | San Francisco | +| 5 | Omron | Boston | +| 6 | RS Pro | Boston | +| 1 | Honeywell | New York | + +Notice that the last row in the sensor_1 table is a duplicate. + +sensor_2: + +| ID | make | city | +| --- | ----------------- | ------------- | +| 1 | Honeywell | San Francisco | +| 2 | United Automation | Boston | +| 3 | Eberle | New York | +| 4 | Honeywell | Boston | +| 5 | Omron | Boston | +| 6 | RS Pro | Boston | + +### UNION + +```questdb-sql +sensor_1 UNION sensor_2; +``` + +returns + +| ID | make | city | +| --- | ----------------- | ------------- | +| 1 | Honeywell | New York | +| 2 | United Automation | Miami | +| 3 | Omron | Miami | +| 4 | Honeywell | San Francisco | +| 5 | Omron | Boston | +| 6 | RS Pro | Boston | +| 1 | Honeywell | San Francisco | +| 2 | United Automation | Boston | +| 3 | Eberle | New York | +| 4 | Honeywell | Boston | + +`UNION` eliminates duplication even when one of the queries returns nothing. + +For instance: + +```questdb-sql +sensor_1 +UNION +sensor_2 WHERE ID > 10; +``` + +returns: + +| ID | make | city | +| --- | ----------------- | ------------- | +| 1 | Honeywell | New York | +| 2 | United Automation | Miami | +| 3 | Omron | Miami | +| 4 | Honeywell | San Francisco | +| 5 | Omron | Boston | +| 6 | RS Pro | Boston | + +The duplicate row in `sensor_1` is not returned as a result. + +```questdb-sql +sensor_1 UNION ALL sensor_2; +``` + +returns + +| ID | make | city | +| --- | ----------------- | ------------- | +| 1 | Honeywell | New York | +| 2 | United Automation | Miami | +| 3 | Omron | Miami | +| 4 | Honeywell | San Francisco | +| 5 | Omron | Boston | +| 6 | RS Pro | Boston | +| 1 | Honeywell | San Francisco | +| 2 | United Automation | Boston | +| 3 | Eberle | New York | +| 4 | Honeywell | Boston | +| 5 | Omron | Boston | +| 6 | RS Pro | Boston | + +### EXCEPT + +```questdb-sql +sensor_1 EXCEPT sensor_2; +``` + +returns + +| ID | make | city | +| --- | ----------------- | ------------- | +| 1 | Honeywell | New York | +| 2 | United Automation | Miami | +| 3 | Omron | Miami | +| 4 | Honeywell | San Francisco | + +Notice that `EXCEPT` eliminates duplicates. Let's run `EXCEPT ALL` to change +that. + +```questdb-sql +sensor_1 EXCEPT ALL sensor_2; +``` + +| ID | make | city | +| --- | ----------------- | ------------- | +| 1 | Honeywell | New York | +| 2 | United Automation | Miami | +| 3 | Omron | Miami | +| 4 | Honeywell | San Francisco | +| 1 | Honeywell | New York | + +### INTERSECT + +```questdb-sql +sensor_1 INTERSECT sensor_2; +``` + +returns + +| ID | make | city | +| --- | ------ | ------ | +| 5 | Omron | Boston | +| 6 | RS Pro | Boston | + +In this example we have no duplicates, but if there were any, we could use +`INTERSECT ALL` to have them. + +## Keyword execution priority + +The QuestDB's engine processes the keywords from left to right, unless the +priority is defined by parenthesis. + +For example: + +```questdb-sql +query_1 UNION query_2 EXCEPT query_3; +``` + +is executed as: + +```questdb-sql +(query_1 UNION query_2) EXCEPT query_3; +``` + +Similarly, the following syntax: + +```questdb-sql +query_1 UNION query_2 INTERSECT query_3; +``` + +is executed as: + +```questdb-sql +(query_1 UNION query_2) INTERSECT query_3; +``` + +## Clauses + +The set operations can be used with clauses such as `LIMIT`, `ORDER BY`, and +`WHERE`. However, when the clause keywords are added after the set operations, +the execution order for different clauses varies. + +For `LIMIT` and `ORDER BY`, the clauses are applied after the set operations. + +For example: + +```questdb-sql +query_1 UNION query_2 +LIMIT 3; +``` + +is executed as: + +```questdb-sql +(query_1 UNION query_2) +LIMIT 3; +``` + +For `WHERE`, the clause is applied first to the query immediate prior to it. + +```questdb-sql +query_1 UNION query_2 +WHERE value = 1; +``` + +is executed as: + +```questdb-sql +query_1 UNION (query_2 WHERE value = 1); +``` + +:::note + +- QuestDB applies `GROUP BY` implicitly. See + [GROUP BY reference](/docs/reference/sql/group-by/) for more information. +- Quest does not support the clause `HAVING` yet. + +::: + +## Alias + +When different aliases are used with set operations, the execution follows a +left-right order and the output uses the first alias. + +For example: + +```questdb-sql +SELECT alias_1 FROM table_1 +UNION +SELECT alias_2 FROM table_2; +``` + +The output shows `alias_1`. diff --git a/docs/reference/sql/update.md b/docs/reference/sql/update.md new file mode 100644 index 00000000..3e3e7153 --- /dev/null +++ b/docs/reference/sql/update.md @@ -0,0 +1,58 @@ +--- +title: UPDATE keyword +sidebar_label: UPDATE +description: UPDATE SQL keyword reference documentation. +--- + +Updates data in a database table. + +## Syntax + +![Flow chart showing the syntax of the UPDATE keyword](/img/docs/diagrams/update.svg) + +:::note + +- the same `columnName` cannot be specified multiple times after the SET keyword + as it would be ambiguous +- the designated timestamp column cannot be updated as it would lead to altering + history of the time-series data +- If the target partition is + [attached by a symbolic link](/docs/reference/sql/alter-table-attach-partition/#symbolic-links), + the partition is read-only. `UPDATE` operation on a read-only partition will + fail and generate an error. + +::: + +## Examples + +```questdb-sql title="Update with constant" +UPDATE trades SET price = 125.34 WHERE symbol = 'AAPL'; +``` + +```questdb-sql title="Update with function" +UPDATE book SET mid = (bid + ask)/2 WHERE symbol = 'AAPL'; +``` + +```questdb-sql title="Update with join" +UPDATE spreads s SET s.spread = p.ask - p.bid FROM prices p WHERE s.symbol = p.symbol; +``` + +```questdb-sql title="Update with multiple joins" +UPDATE spreads s +SET s.spread = p.ask - p.bid +FROM prices p +JOIN instruments i ON p.symbol = i.symbol +WHERE s.timestamp = p.timestamp AND i.type = 'BOND'; +``` + +```questdb-sql title="Update with a sub-query" +WITH up AS ( + SELECT symbol, spread, ts + FROM temp_spreads + WHERE timestamp between '2022-01-02' and '2022-01-03' +) +UPDATE spreads s +SET spread = up.spread +FROM up +WHERE up.ts = s.ts AND s.symbol = up.symbol; +``` diff --git a/docs/reference/sql/vacuum-table.md b/docs/reference/sql/vacuum-table.md new file mode 100644 index 00000000..6c40bf3e --- /dev/null +++ b/docs/reference/sql/vacuum-table.md @@ -0,0 +1,41 @@ +--- +title: VACUUM TABLE +sidebar_label: VACUUM TABLE +description: VACUUM TABLE SQL keyword reference documentation +--- + +`VACUUM TABLE` reclaims storage by scanning file systems and deleting duplicate +directories and files. + +## Syntax + +![Flow chart showing Vacuum Table syntax](/img/docs/diagrams/vacuumTable.svg) + +## Description + +This command provides a manual mechanism to reclaim the disk space. The +implementation scans file system to detect duplicate directories and files. +Frequent usage of the command can be relatively expensive. Thus, `VACUUM TABLE` +has to be executed sparingly. + +When a table is appended in an out-of-order manner, the `VACUUM TABLE` command +writes a new partition version to the disk. The old partition version directory +is deleted once it is not read by `SELECT` queries. In the event of file system +errors, physical deletion of old files may be interrupted and an outdated +partition version may be left behind consuming the disk space. + +When an `UPDATE` SQL statement is run, it copies column files of the selected +table. The old column files are automatically deleted but in certain +circumstances, they can be left behind. In this case, `VACUUM TABLE` can be used +to re-trigger the deletion process of the old column files. + +The `VACUUM TABLE` command starts a new scan over table partition directories +and column files. It detects redundant, unused files consuming the disk space +and deletes them. `VACUUM TABLE` executes asynchronously, i.e. it may keep +scanning and deleting files after their response is returned to the SQL client. + +## Example + +```questdb-sql +VACUUM TABLE trades; +``` diff --git a/docs/reference/sql/where.md b/docs/reference/sql/where.md new file mode 100644 index 00000000..f80d8c29 --- /dev/null +++ b/docs/reference/sql/where.md @@ -0,0 +1,432 @@ +--- +title: WHERE keyword +sidebar_label: WHERE +description: WHERE SQL keyword reference documentation. +--- + +`WHERE` clause filters data. Filter expressions are required to return boolean +result. + +QuestDB includes a [JIT compiler](/docs/concept/jit-compiler/) for SQL queries which contain `WHERE` clauses. + +## Syntax + +The general syntax is as follows. Specific filters have distinct syntaxes +detailed thereafter. + +![Flow chart showing the syntax of the WHERE clause](/img/docs/diagrams/where.svg) + +### Logical operators + +QuestDB supports `AND`, `OR`, `NOT` as logical operators and can assemble +conditions using brackets `()`. + +![Flow chart showing the detailed syntax of the WHERE clause](/img/docs/diagrams/whereComplex.svg) + +```questdb-sql title="Example" +SELECT * FROM table +WHERE +a = 1 AND (b = 2 OR c = 3 AND NOT d); +``` + +## Symbol and string + +QuestDB can filter strings and symbols based on equality, inequality, and +regular expression patterns. + +### Exact match + +Evaluates match of a string or symbol. + +![Flow chart showing the syntax of the WHERE clause with a string comparison](/img/docs/diagrams/whereExactString.svg) + +```questdb-sql title="Example" +SELECT * FROM users +WHERE name = 'John'; +``` + +| name | age | +| ---- | --- | +| John | 31 | +| John | 45 | +| ... | ... | + +### Does NOT match + +Evaluates mismatch of a string or symbol. + +![Flow chart showing the syntax of the WHERE clause with a string comparison](/img/docs/diagrams/whereStringNotMatch.svg) + +```questdb-sql title="Example" +SELECT * FROM users +WHERE name != 'John'; +``` + +| name | age | +| ---- | --- | +| Tim | 31 | +| Tom | 45 | +| ... | ... | + +### Regular expression match + +Evaluates match against a regular expression defined using +[java.util.regex](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html) +patterns. + +![Flow chart showing the syntax of the WHERE clause with a regex comparison](/img/docs/diagrams/whereRegexMatch.svg) + +```questdb-sql title="Regex example" +SELECT * FROM users WHERE name ~ 'Jo'; +``` + +| name | age | +| -------- | --- | +| Joe | 31 | +| Jonathan | 45 | +| ... | ... | + +### Regular expression does NOT match + +Evaluates mismatch against a regular expression defined using +[java.util.regex](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html) +patterns. + +![Flow chart showing the syntax of the WHERE clause with a regex comparison](/img/docs/diagrams/whereRegexNotMatch.svg) + +```questdb-sql title="Example" +SELECT * FROM users WHERE name !~ 'Jo'; +``` + +| name | age | +| ---- | --- | +| Tim | 31 | +| Tom | 45 | +| ... | ... | + +### List search + +Evaluates match or mismatch against a list of elements. + +![Flow chart showing the syntax of the WHERE clause with a list comparison](/img/docs/diagrams/whereListIn.svg) + +```questdb-sql title="List match" +SELECT * FROM users WHERE name in('Tim', 'Tom'); +``` + +| name | age | +| ---- | --- | +| Tim | 31 | +| Tom | 45 | +| ... | ... | + +```questdb-sql title="List mismatch" +SELECT * FROM users WHERE NOT name in('Tim', 'Tom'); +``` + +| name | age | +| ------ | --- | +| Aaron | 31 | +| Amelie | 45 | +| ... | ... | + +## Numeric + +QuestDB can filter numeric values based on equality, inequality, comparison, and +proximity. + +:::note + +For timestamp filters, we recommend the +[timestamp search notation](#timestamp-and-date) which is faster and less +verbose. + +::: + +### Equality, inequality and comparison + +![Flow chart showing the syntax of the WHERE clause with a numeric comparison](/img/docs/diagrams/whereNumericValue.svg) + +```questdb-sql title="Superior or equal to 23" +SELECT * FROM users WHERE age >= 23; +``` + +```questdb-sql title="Equal to 23" +SELECT * FROM users WHERE age = 23; +``` + +```questdb-sql title="NOT Equal to 23" +SELECT * FROM users WHERE age != 23; +``` + + + +## Boolean + +![Flow chart showing the syntax of the WHERE clause with a boolean comparison](/img/docs/diagrams/whereBoolean.svg) + +Using the columnName will return `true` values. To return `false` values, +precede the column name with the `NOT` operator. + +```questdb-sql title="Example - true" +SELECT * FROM users WHERE isActive; +``` + +| userId | isActive | +| ------ | -------- | +| 12532 | true | +| 38572 | true | +| ... | ... | + +```questdb-sql title="Example - false" +SELECT * FROM users WHERE NOT isActive; +``` + +| userId | isActive | +| ------ | -------- | +| 876534 | false | +| 43234 | false | +| ... | ... | + +## Timestamp and date + +QuestDB supports both its own timestamp search notation and standard search +based on inequality. This section describes the use of the **timestamp search +notation** which is efficient and fast but requires a +[designated timestamp](/docs/concept/designated-timestamp/). + +If a table does not have a designated timestamp applied during table creation, +one may be applied dynamically +[during a select operation](/docs/reference/function/timestamp/#during-a-select-operation). + +### Native timestamp format + +QuestDB automatically recognizes strings formatted as ISO timestamp as a +`timestamp` type. The following are valid examples of strings parsed as +`timestamp` types: + +| Valid STRING Format | Resulting Timestamp | +| -------------------------------- | --------------------------- | +| 2010-01-12T12:35:26.123456+01:30 | 2010-01-12T11:05:26.123456Z | +| 2010-01-12T12:35:26.123456+01 | 2010-01-12T11:35:26.123456Z | +| 2010-01-12T12:35:26.123456Z | 2010-01-12T12:35:26.123456Z | +| 2010-01-12T12:35:26.12345 | 2010-01-12T12:35:26.123450Z | +| 2010-01-12T12:35:26.1234 | 2010-01-12T12:35:26.123400Z | +| 2010-01-12T12:35:26.123 | 2010-01-12T12:35:26.123000Z | +| 2010-01-12T12:35:26.12 | 2010-01-12T12:35:26.120000Z | +| 2010-01-12T12:35:26.1 | 2010-01-12T12:35:26.100000Z | +| 2010-01-12T12:35:26 | 2010-01-12T12:35:26.000000Z | +| 2010-01-12T12:35 | 2010-01-12T12:35:00.000000Z | +| 2010-01-12T12 | 2010-01-12T12:00:00.000000Z | +| 2010-01-12 | 2010-01-12T00:00:00.000000Z | +| 2010-01 | 2010-01-01T00:00:00.000000Z | +| 2010 | 2010-01-01T00:00:00.000000Z | +| 2010-01-12 12:35:26.123456-02:00 | 2010-01-12T14:35:26.123456Z | +| 2010-01-12 12:35:26.123456Z | 2010-01-12T12:35:26.123456Z | +| 2010-01-12 12:35:26.123 | 2010-01-12T12:35:26.123000Z | +| 2010-01-12 12:35:26.12 | 2010-01-12T12:35:26.120000Z | +| 2010-01-12 12:35:26.1 | 2010-01-12T12:35:26.100000Z | +| 2010-01-12 12:35:26 | 2010-01-12T12:35:26.000000Z | +| 2010-01-12 12:35 | 2010-01-12T12:35:00.000000Z | + +### Exact timestamp + +#### Syntax + +![Flow chart showing the syntax of the WHERE clause with a timestamp comparison](/img/docs/diagrams/whereTimestampExact.svg) + +```questdb-sql title="Timestamp equals date" +SELECT scores WHERE ts = '2010-01-12T00:02:26.000Z'; +``` + +| ts | score | +| ------------------------ | ----- | +| 2010-01-12T00:02:26.000Z | 2.4 | +| 2010-01-12T00:02:26.000Z | 3.1 | +| ... | ... | + +```questdb-sql title="Timestamp equals timestamp" +SELECT scores WHERE ts = '2010-01-12T00:02:26.000000Z'; +``` + +| ts | score | +| --------------------------- | ----- | +| 2010-01-12T00:02:26.000000Z | 2.4 | +| 2010-01-12T00:02:26.000000Z | 3.1 | +| ... | ... | + +### Time range + +Returns results within a defined range. + +#### Syntax + +![Flow chart showing the syntax of the WHERE clause with a partial timestamp comparison](/img/docs/diagrams/whereTimestampPartial.svg) + +```questdb-sql title="Results in a given year" +SELECT * FROM scores WHERE ts IN '2018'; +``` + +| ts | score | +| --------------------------- | ----- | +| 2018-01-01T00:0000.000000Z | 123.4 | +| ... | ... | +| 2018-12-31T23:59:59.999999Z | 115.8 | + +```questdb-sql title="Results in a given minute" +SELECT * FROM scores WHERE ts IN '2018-05-23T12:15'; +``` + +| ts | score | +| --------------------------- | ----- | +| 2018-05-23T12:15:00.000000Z | 123.4 | +| ... | ... | +| 2018-05-23T12:15:59.999999Z | 115.8 | + +### Time range with modifier + +You can apply a modifier to further customize the range. The modifier extends +the upper bound of the original timestamp based on the modifier parameter. An +optional interval with occurrence can be set, to apply the search in the given +time range repeatedly, for a set number of times. + +#### Syntax + +![Flow chart showing the syntax of the WHERE clause with a timestamp/modifier comparison](/img/docs/diagrams/whereTimestampIntervalSearch.svg) + +- `timestamp` is the original time range for the query. +- `modifier` is a signed integer modifying the upper bound applying to the +`timestamp`: + + - A `positive` value extends the selected period. + - A `negative` value reduces the selected period. + +- `interval` is an unsigned integer indicating the desired interval period for + the time range. +- `repetition` is an unsigned integer indicating the number of times the + interval should be applied. + +#### Examples + +Modifying the range: + +```questdb-sql title="Results in a given year and the first month of the next year" +SELECT * FROM scores WHERE ts IN '2018;1M'; +``` + +The range is 2018. The modifier extends the upper bound (originally 31 Dec 2018) +by one month. + +| ts | score | +| --------------------------- | ----- | +| 2018-01-01T00:00:00.000000Z | 123.4 | +| ... | ... | +| 2019-01-31T23:59:59.999999Z | 115.8 | + +```questdb-sql title="Results in a given month excluding the last 3 days" +SELECT * FROM scores WHERE ts IN '2018-01;-3d'; +``` + +The range is Jan 2018. The modifier reduces the upper bound (originally 31 +Jan 2018) by 3 days. + +| ts | score | +| --------------------------- | ----- | +| 2018-01-01T00:00:00.000000Z | 123.4 | +| ... | ... | +| 2018-01-28T23:59:59.999999Z | 113.8 | + + +Modifying the interval: + +```questdb-sql title="Results on a given date with an interval" +SELECT * FROM scores WHERE ts IN '2018-01-01;1d;1y;2'; + +``` + +The range is extended by one day from Jan 1 2018, with a one-year interval, +repeated twice. This means that the query searches for results on Jan 1-2 in +2018 and in 2019: + +| ts | score | +| --------------------------- | ----- | +| 2018-01-01T00:00:00.000000Z | 123.4 | +| ... | ... | +| 2018-01-02T23:59:59.999999Z | 110.3 | +| 2019-01-01T00:00:00.000000Z | 128.7 | +| ... | ... | +| 2019-01-02T23:59:59.999999Z | 103.8 | + +### IN with multiple arguments + +#### Syntax + +`IN` with more than 1 argument is treated as standard SQL `IN`. It is a +shorthand of multiple `OR` conditions, i.e. the following query: + +```questdb-sql title="IN list" +SELECT * FROM scores +WHERE ts IN ('2018-01-01', '2018-01-01T12:00', '2018-01-02'); +``` + +is equivalent to: + +```questdb-sql title="IN list equivalent OR" +SELECT * FROM scores +WHERE ts = '2018-01-01' or ts = '2018-01-01T12:00' or ts = '2018-01-02'); +``` + +| ts | value | +| --------------------------- | ----- | +| 2018-01-01T00:00:00.000000Z | 123.4 | +| 2018-01-01T12:00:00.000000Z | 589.1 | +| 2018-01-02T00:00:00.000000Z | 131.5 | + +### BETWEEN + +#### Syntax + +For non-standard ranges, users can explicitly specify the target range using the +`BETWEEN` operator. As with standard SQL, both upper and lower bounds of +`BETWEEN` are inclusive, and the order of lower and upper bounds is not +important so that `BETWEEN X AND Y` is equivalent to `BETWEEN Y AND X`. + +```questdb-sql title="Explicit range" +SELECT * FROM scores +WHERE ts BETWEEN '2018-01-01T00:00:23.000000Z' AND '2018-01-01T00:00:23.500000Z'; +``` + +| ts | value | +| --------------------------- | ----- | +| 2018-01-01T00:00:23.000000Z | 123.4 | +| ... | ... | +| 2018-01-01T00:00:23.500000Z | 131.5 | + +`BETWEEN` can accept non-constant bounds, for example, the following query will +return all records older than one year before the current date: + +```questdb-sql title="One year before current date" +SELECT * FROM scores +WHERE ts BETWEEN to_str(now(), 'yyyy-MM-dd') +AND dateadd('y', -1, to_str(now(), 'yyyy-MM-dd')); +``` diff --git a/docs/reference/sql/with.md b/docs/reference/sql/with.md new file mode 100644 index 00000000..69ac3a3f --- /dev/null +++ b/docs/reference/sql/with.md @@ -0,0 +1,63 @@ +--- +title: WITH keyword +sidebar_label: WITH +description: WITH SQL keyword reference documentation. +--- + +Supports Common Table Expressions (CTEs), e.i., naming one or several +sub-queries to be used with a [`SELECT`](/docs/reference/sql/select/), +[`INSERT`](/docs/reference/sql/insert/), or +[`UPDATE`](/docs/reference/sql/update/) query. + +Using a CTE makes it easy to simplify large or complex statements which involve +sub-queries, particularly when such sub-queries are used several times. + +## Syntax + +![Flow chart showing the syntax of the WITH clause](/img/docs/diagrams/with.svg) + +Where: + +- `alias` is the name given to the sub-query for ease of reusing +- `subQuery` is a SQL query (e.g `SELECT * FROM table`) + +## Examples + +```questdb-sql title="Single alias" +WITH first_10_users AS (SELECT * FROM users limit 10) +SELECT user_name FROM first_10_users; +``` + +```questdb-sql title="Using recursively" +WITH first_10_users AS (SELECT * FROM users limit 10), +first_5_users AS (SELECT * FROM first_10_users limit 5) +SELECT user_name FROM first_5_users; +``` + +```questdb-sql title="Flag whether individual trips are longer or shorter than average" +WITH avg_distance AS (SELECT avg(trip_distance) average FROM trips) +SELECT pickup_datetime, trips.trip_distance > avg_distance.average longer_than_average +FROM trips CROSS JOIN avg_distance; +``` + +```questdb-sql title="Update with a sub-query" +WITH up AS ( + SELECT symbol, spread, ts + FROM temp_spreads + WHERE timestamp between '2022-01-02' and '2022-01-03' +) +UPDATE spreads s +SET spread = up.spread +FROM up +WHERE up.ts = s.ts AND s.symbol = up.symbol; +``` + +```questdb-sql title="Insert with a sub-query" +WITH up AS ( + SELECT symbol, spread, ts + FROM temp_spreads + WHERE timestamp between '2022-01-02' and '2022-01-03' +) +INSERT INTO spreads +SELECT * FROM up; +``` diff --git a/docs/third-party-tools.md b/docs/third-party-tools.md new file mode 100644 index 00000000..b382c6ea --- /dev/null +++ b/docs/third-party-tools.md @@ -0,0 +1,38 @@ +--- +title: Third Party Tools +description: + QuestDB integrates well with a number of third party tools. This page lists + some of the most popular integrations. +--- + +QuestDB integrates well with a number of third-party tools. + +The following is a list of our most popular integrations: + +- [Grafana](./third-party-tools/grafana.md): Create stunning dashboards and + interactive graphs for time-series data visualization. +- [Superset](./third-party-tools/superset.md): Create interactive visualizations + and perform ad-hoc data analysis on time-series data. +- [Apache Kafka](./third-party-tools/kafka/overview.md): Ingest, store, and + process high-throughput data streams in real time. +- [Pandas](./third-party-tools/pandas.md): Easily work with and analyze + time-series data in Python. +- [Telegraf](./third-party-tools/telegraf.md): Collect and report metrics from + various sources to QuestDB for storage and analysis. +- [Prometheus](./third-party-tools/prometheus.md): Store and analyze monitoring + metrics efficiently. +- [Redpanda](./third-party-tools/redpanda.md): Manage and process large-scale, + real-time data streams seamlessly. +- [Apache Flink](./third-party-tools/flink.md): Process real-time data streams + efficiently. +- [Apache Spark](./third-party-tools/spark.md): Perform complex data processing + tasks on time-series data at scale. +- [MindsDB](./third-party-tools/mindsdb.md): Perform advanced predictive + analytics and build machine learning models on time-series data. +- [SQLAlchemy](./third-party-tools/sqlalchemy.md): Interact with QuestDB using + Python's ORM capabilities. +- [qStudio](./third-party-tools/qstudio.md): free SQL GUI for running SQL + scripts, easy browsing of tables, charting and exporting results. + +Is there something missing? Please let us know by opening an issue on +[QuestDB Github]({@githubUrl@}/issues/new/choose). diff --git a/docs/third-party-tools/cube.md b/docs/third-party-tools/cube.md new file mode 100644 index 00000000..57d71171 --- /dev/null +++ b/docs/third-party-tools/cube.md @@ -0,0 +1,91 @@ +--- +title: "Cube" +description: Yaa +--- + +Cube is middleware that connects your data sources and your data applications. +Cube provides an API-first semantic layer for consolidating, caching, and +securing connections. Instead of having independent lines between data stores +and analytics, business or AI tools, Cube consolidates the complexity of overall +data modelling and cross-source data exchange into a cleaner interface. + +As a high performance time-series database, QuestDB and Cube are a strong pair. +Together, efficiently bridge your QuestDB data to one of the many applications +and libraries which integrate with Cube. + +This document is a quick start designed to get both applications running +together. For a deeper tutorial, see +[Time Series Data Analytics with QuestDB and Cube](/blog/2022/04/26/time-series-data-analytics-with-questdb-and-cube/). + +## Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) +- [QuestDB](/get-questdb) +- [Cube](https://github.com/cube-js/cube) + +## Start Cube & QuestDB + +Run QuestDB and Cube through Docker. + +First, create and enter an example directory: + +```shell +mkdir questdb-cube && cd $_ +``` + +### Dockerfile + +Next, create a dockerfile within the project directory: + +```yaml title=docker-compose.yml +version: "2.2" + +services: + cube: + environment: + - CUBEJS_DEV_MODE=true + image: "cubejs/cube:latest" + ports: + - "4000:4000" + volumes: + - ".:/cube/conf" + questdb: + container_name: questdb + hostname: questdb + image: "questdb/questdb:latest" + ports: + - "9000:9000" + - "8812:8812" +``` + +Within your project directory, create an `.env` file. + +These variables will allow Cube to connect to your QuestDB deployment. + +Remember: default passwords are dangerous! We recommend altering them. + +```shell title=.env +CUBEJS_DB_HOST=questdb +CUBEJS_DB_PORT=8812 +CUBEJS_DB_NAME=qdb +CUBEJS_DB_USER=admin +CUBEJS_DB_PASS=quest +CUBEJS_DB_TYPE=questdb +``` + +Finally, bring it all up with Docker: + +```bash title=shell +docker-compose up -d +``` + +## Access QuestDB & Cube + +Both applications are now up and ready. + +- QuestDB: http://localhost:9000 +- Cube: http://localhost:4000 + +Not sure what to do next? Check out +[our tutorial](/blog/2022/04/26/time-series-data-analytics-with-questdb-and-cube/) +for inspiration. diff --git a/docs/third-party-tools/flink.md b/docs/third-party-tools/flink.md new file mode 100644 index 00000000..73284d07 --- /dev/null +++ b/docs/third-party-tools/flink.md @@ -0,0 +1,195 @@ +--- +title: QuestDB Flink connector +sidebar_label: Flink +description: + QuestDB ships a QuestDB Flink connector for ingesting messages from Apache + Flink via the InfluxDB Line Protocol. +--- + +import InterpolateReleaseData from "../../src/components/InterpolateReleaseData" +import CodeBlock from "@theme/CodeBlock" + +[Apache Flink](https://flink.apache.org/) is a popular framework and +[stream processing](/glossary/stream-processing) engine. QuestDB ships a +[QuestDB Flink Sink connector](https://github.com/questdb/flink-questdb-connector) +for fast ingestion from Apache Flink into QuestDB. The connector implements the +[Table API and SQL](https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/dev/table/overview/) +for Flink. + +![Apache Flink logo](/img/logos/flink.svg) + +## Quick start + +This section shows the steps to use the QuestDB Flink connector to ingest data +from Flink into QuestDB. The connector uses the SQL interface to interact with +Flink. The overall steps are the followings: + +1. The connector creates a table in Flink backed by QuestDB. +2. The connector inserts data into the table. +3. Finally it queries the data in QuestDB. + +### Prerequisites + +- A local JDK version 11 installation +- Docker for running QuestDB + +### Connector installation + +- Start the QuestDB container image: + + ( + + {`docker run -p 9000:9000 -p 9009:9009 questdb/questdb:${release.name}`} + + )} + /> + +- Download [Apache Flink distribution](https://flink.apache.org/downloads/) and + unpack it. +- [Download](https://repo1.maven.org/maven2/org/questdb/flink-questdb-connector/0.2/flink-questdb-connector-0.2.jar) + the QuestDB Flink connector from Maven Central and place it in the `lib` + directory of your Flink installation. +- Go to the `bin` directory of your Flink installation and run the following to + start a Flink cluster: + + ```shell + ./start-cluster.sh + ``` + +- While still in the `bin` directory, start a Flink SQL console by running: + + ```shell + ./sql-client.sh + ``` + + Then, run the following SQL command in the Flink SQL console: + + ```sql + CREATE TABLE Orders ( + order_number BIGINT, + price BIGINT, + buyer STRING + ) WITH ( + 'connector'='questdb', + 'host'='localhost' + ); + ``` + + Expected output: `[INFO] Execute statement succeed.` + + This command created a Flink table backed by QuestDB. The table is called + `Orders` and has three columns: `order_number`, `price`, and `buyer`. The + `connector` option specifies the QuestDB Flink connector. The `host` option + specifies the host and port where QuestDB is running. The default port is + `9009`. + +- While still in the Flink SQL console execute: + + ```questdb-sql + INSERT INTO Orders values (0, 42, 'IBM'); + ``` + + Expected output: + + ```shell + [INFO] SQL update statement has been successfully submitted to the cluster: + Job ID: + ``` + + This command used Flink SQL to insert a row into the `Orders` table in Flink. + The table is connected to QuestDB, so the row is also into QuestDB. + +- Go to the QuestDB web console `http://localhost:9000/` and execute this query: + + ```questdb-sql + SELECT * FROM Orders; + ``` + + You should see a table with one row. + + ![QuestDB web console screenshot with the query result](/img/guides/flink/flink-questdb-console.png) + +Congratulations! You have successfully used the QuestDB Flink connector to +ingest data from Flink into QuestDB. You can now build Flink data pipelines that +use QuestDB as a sink. + +See the +[QuestDB Flink connector GitHub repository](https://github.com/questdb/flink-questdb-connector/tree/main/samples) +for more examples. + +## Configuration + +The QuestDB Flink connector supports the following configuration options: + +| Name | Type | Example | Default | Meaning | +| ---------------- | --------- | --------------------- | --------------------------- | -------------------------------------------------------------------------- | +| host | `string` | localhost:9009 | N/A | Host and port where QuestDB server is running | +| username | `string` | testUser1 | admin | Username for authentication. The default is used when also `token` is set. | +| token | `string` | GwBXoGG5c6NoUTLXnzMxw | admin | Token for authentication | +| table | `string` | my_table | Same as Flink table name | Target table in QuestDB | +| tls | `boolean` | true | false | Whether to use TLS/SSL for connecting to QuestDB server | +| buffer.size.kb | `integer` | 32 | 64 | Size of the QuestDB client send buffer | +| sink.parallelism | `integer` | 2 | Same as upstream processors | QuestDB Sink Parallelism | + +Example configuration for connecting to QuestDB running on localhost: + +```sql +CREATE TABLE Orders ( + order_number BIGINT, + price BIGINT, + buyer STRING + ) WITH ( + 'connector'='questdb', + 'host'='localhost', + 'table' = 'orders' +); +``` + +Example configuration for connecting to QuestDB running in +[QuestDB Cloud](/cloud/): + +```sql +CREATE TABLE Orders ( + order_number BIGINT, + price BIGINT, + buyer STRING + ) WITH ( + 'connector'='questdb', + 'host'='agreeable-brown-297-bee317da.ilp.b04c.questdb.net:31277', + 'username' = 'admin', + 'token' = 'KBeYuNwOHzEuxQ72YnToBCpQN7WVOHDm-oTp5dVNB1o', + 'tls' = 'true', + 'table' = 'orders' +); +``` + +## Connector Distribution + +The connector is distributed as a single jar file. The jar file is available in +the +[Maven Central repository](https://repo1.maven.org/maven2/org/questdb/flink-questdb-connector/) +and it's available under the following coordinates: + +```xml + + org.questdb + flink-questdb-connector + LATEST + +``` + +The latest version is: +[![a badge with the latest connector version in Maven Central](https://maven-badges.herokuapp.com/maven-central/org.questdb/flink-questdb-connector/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.questdb/flink-questdb-connector) + +## FAQ + +Q: Why is QuestDB client not repackaged into a different Java package?
A: +QuestDB client uses native code, this makes repackaging hard. + +Q: I need to use QuestDB as a Flink source, what should I do?
A: This +connector is Sink only. If you want to use QuestDB as a Source then your best +chance is to use +[Flink JDBC source](https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/connectors/table/jdbc/) +and rely on +[QuestDB Postgres compatibility](/docs/develop/query-data/#postgresql-wire-protocol). diff --git a/docs/third-party-tools/grafana.md b/docs/third-party-tools/grafana.md new file mode 100644 index 00000000..8509a74f --- /dev/null +++ b/docs/third-party-tools/grafana.md @@ -0,0 +1,140 @@ +--- +title: Grafana +description: Guide for time-series data visualization with QuestDB and Grafana +--- + +[Grafana](https://grafana.com/) is a popular observability and monitoring +application used to visualize data and help with time-series data analysis. It +has an extensive ecosystem of widgets and plugins. QuestDB supports connecting +to Grafana via the [Postgres](/docs/reference/api/postgres/) endpoint. + +## Prerequisites + +- [Docker](/docs/get-started/docker/) to run both Grafana and QuestDB + +We will use the `--add-host` parameter for both Grafana and QuestDB. + +## Start Grafana + +Start Grafana using `docker run`: + +```shell +docker run --add-host=host.docker.internal:host-gateway \ +-p 3000:3000 --name=grafana \ +-v grafana-storage:/var/lib/grafana \ +grafana/grafana-oss +``` + +Once the Grafana server has started, you can access it via port 3000 +(`http://localhost:3000`). The default login credentials are as follows: + +```shell +user:admin +password:admin +``` + +## Start QuestDB + +The Docker version for QuestDB can be run by exposing the port `8812` for the +PostgreSQL connection and port `9000` for the web and REST interface: + +```shell +docker run --add-host=host.docker.internal:host-gateway \ +-p 9000:9000 -p 9009:9009 -p 8812:8812 -p 9003:9003 \ +-e QDB_PG_READONLY_USER_ENABLED=true \ +questdb/questdb:latest + +``` + +## Add a data source + +1. Open Grafana's UI (by default available at `http://localhost:3000`) +2. Go to the `Configuration` section and click on `Data sources` +3. Click `Add data source` +4. Choose the `PostgreSQL` plugin and configure it with the following settings: + +``` +host:host.docker.internal:8812 +database:qdb +user:user +password:quest +TLS/SSL mode:disable +``` + +5. When adding a panel, use the "text edit mode" by clicking the pencil icon and + adding a query + +## Global variables + +Use +[global variables](https://grafana.com/docs/grafana/latest/variables/variable-types/global-variables/#global-variables) +to simplify queries with dynamic elements such as date range filters. + +### `$__timeFilter(timestamp)` + +This variable allows filtering results by sending a start-time and end-time to +QuestDB. This expression evaluates to: + +```questdb-sql +timestamp BETWEEN + '2018-02-01T00:00:00Z' AND '2018-02-28T23:59:59Z' +``` + +### `$__interval` + +This variable calculates a dynamic interval based on the time range applied to +the dashboard. By using this function, the sampling interval changes +automatically as the user zooms in and out of the panel. + +## Example query + +```questdb-sql +SELECT + pickup_datetime AS time, + avg(trip_distance) AS distance +FROM taxi_trips +WHERE $__timeFilter(pickup_datetime) +SAMPLE BY $__interval; +``` + +## Known issues +### Visual Query Builder not working +The visual query builder does not work with QuestDB. To work around this, use the text edit mode to write your queries. + +import Screenshot from "@theme/Screenshot" + + + +### Alerting with older QuestDB versions +:::note + +This issue affects QuestDB version 7.3.0 and earlier. Starting from version 7.3.1, QuestDB accepts nanosecond +precision timestamps. We are keeping this section for users who are using older versions of QuestDB. + +::: + + +For alert queries generated by certain Grafana versions, the macro +`$__timeFilter(timestamp)` produces timestamps with nanosecond precision, while +the expected precision is millisecond precision. As a result, the alert queries +are not compatible with QuestDB and lead to an `Invalid date` error. To resolve +this, we recommend the following workaround: + +```questdb-sql +SELECT + pickup_datetime AS time, + avg(trip_distance) AS distance +FROM taxi_trips +WHERE pickup_datetime BETWEEN cast($__unixEpochFrom()*1000000L as timestamp) and cast($__unixEpochTo()*1000000L as timestamp) + +``` + +See [Grafana issues](https://github.com/grafana/grafana/issues/51611) for more +information. + +## See also + +- [Build a monitoring dashboard with QuestDB and Grafana](/blog/time-series-monitoring-dashboard-grafana-questdb/) diff --git a/docs/third-party-tools/kafka/jdbc.md b/docs/third-party-tools/kafka/jdbc.md new file mode 100644 index 00000000..b8dc144f --- /dev/null +++ b/docs/third-party-tools/kafka/jdbc.md @@ -0,0 +1,190 @@ +--- +title: JDBC Connector +description: + JDBC driver support in QuestDB allows for ingesting messages from a Kafka + topic via Kafka Connect. +--- + +Support for the JDBC driver means that data can easily be exported from a Kafka +topic and ingested directly to QuestDB by means of Kafka Connect. + +This article assumes that users have successfully set up an installation of +Kafka and are ready to start exporting messages to QuestDB. + +## Prerequisites + +You will need the following: + +- Kafka +- A running QuestDB instance + +## Configure Kafka + +The following binaries must be available to Kafka: + +- Kafka Connect JDBC binary +- PostgreSQL JDBC driver + +To download these files, visit the +[Kafka Connect JDBC](https://www.confluent.io/hub/confluentinc/kafka-connect-jdbc) +page which provides CLI installation and a direct download of the required +`.jar` files. Select the ZIP file for download, unzip the contents of the +archive and copy the required `.jar` files to the Kafka `libs` directory: + +```shell +unzip confluentinc-kafka-connect-jdbc-10.0.1.zip +cd confluentinc-kafka-connect-jdbc-10.0.1 +cp kafka-connect-jdbc-10.0.1.jar /path/to/kafka_2.13-2.6.0/libs +cp postgresql-42.2.10.jar /path/to/kafka_2.13-2.6.0/libs +``` + +A configuration file `/path/to/kafka/config/connect-jdbc.properties` must be +created for Kafka Connect in standalone mode. The Postgres connection URL must +match the destination QuestDB instance and a topic can be specified under the +`topics={mytopic}` key. This example guide uses a topic `example-topic` and the +Postgres server is running on the default port `8812`. + +Create a file `config/connect-jdbc.properties` with the following contents: + +```shell +name=local-jdbc-sink +connector.class=io.confluent.connect.jdbc.JdbcSinkConnector +connection.url=jdbc:postgresql://127.0.0.1:8812/qdb?useSSL=false +connection.user=admin +connection.password=quest + +topics=example-topic +insert.mode=insert +dialect.name=PostgreSqlDatabaseDialect +pk.mode=none +auto.create=true +``` + +## Start Kafka + +The commands listed in this section must be run from the Kafka home directory +and in the order shown below. + +Start the Kafka Zookeeper used to coordinate the server: + +```shell +bin/zookeeper-server-start.sh config/zookeeper.properties +``` + +Start a Kafka server: + +```shell +bin/kafka-server-start.sh config/server.properties +``` + +Start Kafka Connect: + +```shell +bin/connect-standalone.sh config/connect-standalone.properties config/connect-jdbc.properties +``` + +## Publish messages + +Messages can be published via the console producer script: + +```shell +bin/kafka-console-producer.sh --topic example-topic --bootstrap-server localhost:9092 +``` + +A `>` greater-than symbol indicates that a messages can be published to the +example topic from the interactive session. Paste the following minified JSON as +a single line to publish messages and create the table `example-topic` in the +QuestDB instance: + + +```json +{"schema":{"type":"struct","fields":[{"type":"boolean","optional":false,"field":"flag"},{"type":"int8","optional":false,"field":"id8"},{"type":"int16","optional":false,"field":"id16"},{"type":"int32","optional":false,"field":"id32"},{"type":"int64","optional":false,"field":"id64"},{"type":"float","optional":false,"field":"idFloat"},{"type":"double","optional":false,"field":"idDouble"},{"type":"string","optional":true,"field":"msg"}],"optional":false,"name":"msgschema"},"payload":{"flag":false,"id8":222,"id16":222,"id32":222,"id64":222,"idFloat":222,"idDouble":333,"msg":"hi"}} +``` + + +## Verify the integration + +To verify that the data has been ingested into the `example-topic` table, the +following request to QuestDB's `/exp` REST endpoint can be made to export the +table contents via CURL: + +```shell +curl -G \ + --data-urlencode "query=select * from 'example-topic'" \ + http://localhost:9000/exp +``` + +The expected response based on the example JSON message published above will be +the following: + +```csv +"flag","id8","id16","id32","id64","idFloat","idDouble","msg" +false,-34,-34,222,222,222.0000,333.0,"hi" +``` + +## JSON format + +The JSON object sent in the example above has the following structure containing +`schema` and `payload` objects: + +```json +{ + "schema": { + "type": "struct", + "fields": [ + { + "type": "boolean", + "optional": false, + "field": "flag" + }, + { + "type": "int8", + "optional": false, + "field": "id8" + }, + { + "type": "int16", + "optional": false, + "field": "id16" + }, + { + "type": "int32", + "optional": false, + "field": "id32" + }, + { + "type": "int64", + "optional": false, + "field": "id64" + }, + { + "type": "float", + "optional": false, + "field": "idFloat" + }, + { + "type": "double", + "optional": false, + "field": "idDouble" + }, + { + "type": "string", + "optional": true, + "field": "msg" + } + ], + "optional": false, + "name": "msgschema" + }, + "payload": { + "flag": false, + "id8": 222, + "id16": 222, + "id32": 222, + "id64": 222, + "idFloat": 222, + "idDouble": 333, + "msg": "hi" + } +} +``` diff --git a/docs/third-party-tools/kafka/overview.md b/docs/third-party-tools/kafka/overview.md new file mode 100644 index 00000000..b8ef6f30 --- /dev/null +++ b/docs/third-party-tools/kafka/overview.md @@ -0,0 +1,71 @@ +--- +title: Ingestion from Kafka Overview +sidebar_label: Overview +description: Apache Kafka integration overview. +--- + +Ingesting data from Apache Kafka to QuestDB is a common use case. Possible +strategies are as the following: + +1. [QuestDB Kafka connector](/docs/third-party-tools/kafka/questdb-kafka/): The + recommended strategy for connecting to Kafka using InfluxDB Line Protocol and + Kafka Connect. +2. [JDBC connector](/docs/third-party-tools/kafka/jdbc/): A generic connector + using Kafka Connect. +3. Write a dedicated program to read data from Kafka and write to QuestDB. +4. Use a [stream processing](/glossary/stream-processing) engine. + +Each strategy has different trade-offs. The rest of this page discusses each +strategy and aims to guide advanced users. + +## QuestDB Kafka connector + +QuestDB has developed a QuestDB Kafka connector for Kafka. The connector is +built on top of the Kafka Connect framework and uses the +[InfluxDB Line Protocol](/docs/develop/insert-data/#influxdb-line-protocol/) for +communication with QuestDB. Kafka Connect handles concerns such as fault +tolerance and serialization. It also provides facilities for message +transformations, filtering, etc. InfluxDB Line Protocol ensures operational +simplicity and excellent performance: it can insert 100,000s rows per second. + +**This is the recommended strategy for most users.** + +## JDBC connector + +Similar to the QuestDB Kafka connector, the JDBC connector also uses the Kafka +Connect framework. However, instead of using a dedicated InfluxDB Line Protocol, +it relies on a [generic JDBC binary](/docs/third-party-tools/kafka/jdbc/) and +QuestDB +[PostgreSQL protocol compatibility](/docs/develop/connect/#postgresql-wire-protocol). +It requires objects in Kafka to have associated schema and overall it is more +complex to set up and run. Compared to the QuestDB Kafka connector, the JDBC +connector has significantly lower performance, but the following advantages: + +- JDBC insertion allows higher consistency guarantees than the fire-and-forget + InfluxDB Line Protocol method used by the QuestDB Kafka connector. +- Various Kafka-as-a-Service providers often have the JDBC connector + pre-packaged. + +This strategy is recommended when the QuestDB Kafka connector cannot be used for +some reason. + +## Dedicated program + +Writing a dedicated program reading from Kafka topics and writing to QuestDB +tables offers great flexibility: The program can do arbitrary data +transformations and filtering, including stateful operations. On the other hand: +It's the most complex strategy to implement. You'll have to deal with different +serialization formats, handle failures, etc. This strategy is recommended for +very advanced use cases only. This is not recommended for most users. + +## Stream processing engine + +Stream processing engine provides a middle ground between writing a dedicated +program and using one of the connectors. Engines such as +[Apache Flink](https://flink.apache.org/) provide rich API for data +transformations, enrichment, and filtering; at the same time, they can help you +with shared concerns such as fault-tolerance and serialization. However, they +often have a non-trivial learning curve. QuestDB offers a +[connector for Apache Flink](/docs/third-party-tools/flink/). It is the +recommended strategy if you are an existing Flink user, and you need to do +complex transformations while inserting entries from Kafka to QuestDB. diff --git a/docs/third-party-tools/kafka/questdb-kafka.md b/docs/third-party-tools/kafka/questdb-kafka.md new file mode 100644 index 00000000..80ce9be7 --- /dev/null +++ b/docs/third-party-tools/kafka/questdb-kafka.md @@ -0,0 +1,469 @@ +--- +title: QuestDB Kafka Connector +description: + QuestDB ships a QuestDB Kafka connector for ingesting messages from Kafka via + the InfluxDB Line Protocol. +--- + +QuestDB ships a +[QuestDB Kafka connector](https://github.com/questdb/kafka-questdb-connector) +for fast ingestion from Kafka into QuestDB. This is also useful for processing +[change data capture](/glossary/change-data-capture/) for the dataflow. +The connector is based on the [Kafka Connect framework](https://kafka.apache.org/documentation/#connect) +and acts as a sink for Kafka topics. + +This page has the following main sections: + +- A QuestDB Kafka connector integration guide +- A [configuration manual](#configuration-manual) for the connector +- [FAQ](#faq) + +## Integration guide + +This guide shows the steps to use the QuestDB Kafka connector to read JSON data +from Kafka topics and write them as rows into a QuestDB table. + +For Confluent users, please check the instructions in the +[Confluent Docker images](https://github.com/questdb/kafka-questdb-connector/tree/main/kafka-questdb-connector-samples/confluent-docker-images). + +### Prerequisites + +You will need the following: + +- Kafka +- A running QuestDB instance +- A local + [JDK installation](https://docs.oracle.com/en/java/javase/18/install/overview-jdk-installation.html#GUID-8677A77F-231A-40F7-98B9-1FD0B48C346A) + +### Configure Kafka + +Before starting Kafka, the following steps must be completed: + +1. Download the connector file. + +The Apache Kafka distribution contains the Kafka Connect framework, but the +QuestDB-specific components need to be downloaded from +[the QuestDB Kafka connector GH page](https://github.com/questdb/kafka-questdb-connector/releases/latest), +under the zip archive named `kafka-questdb-connector--bin.zip`. + +The connector is also available from +[the Confluent Hub](https://www.confluent.io/hub/questdb/kafka-questdb-connector). + +2. Copy the file to the Kafka `libs` directory. + +Once downloaded, unzip the contents of the archive and copy the required `.jar` +files to the Kafka `libs` directory: + +```shell +unzip kafka-questdb-connector-*-bin.zip +cd kafka-questdb-connector +cp ./*.jar /path/to/kafka_2.13-2.6.0/libs +``` + +:::tip + +You can automate downloading the latest connector package by running this +command: + +```shell +curl -s https://api.github.com/repos/questdb/kafka-questdb-connector/releases/latest | +jq -r '.assets[]|select(.content_type == "application/zip")|.browser_download_url'| +wget -qi - +``` + +::: + +3. Set the configuration file. + +A configuration file `/path/to/kafka/config/questdb-connector.properties` must +be created for Kafka Connect in the standalone mode. The host and port of the +running QuestDB server must be defined. A topic can be specified under the +`topics={mytopic}` key. + +The example below creates a configuration file. It assumes a running QuestDB +server on the InfluxDB Line Protocol default port, `9009`, creates a reader from +a Kafka topic, `example-topic`, and writes into a QuestDB table, +`example_table`: + +```shell title="Create a configuration file" +name=questdb-sink +connector.class=io.questdb.kafka.QuestDBSinkConnector +host=localhost:9009 +topics=example-topic +table=example_table +include.key=false +value.converter=org.apache.kafka.connect.json.JsonConverter +value.converter.schemas.enable=false +key.converter=org.apache.kafka.connect.storage.StringConverter +``` + +### Start Kafka + +The commands listed in this section must be run from the Kafka home directory +and in the order shown below. + +1. Start the Kafka Zookeeper used to coordinate the server: + +```shell +bin/zookeeper-server-start.sh config/zookeeper.properties +``` + +2. Start a Kafka server: + +```shell +bin/kafka-server-start.sh config/server.properties +``` + +3. Start the QuestDB Kafka connector: + +```shell +bin/connect-standalone.sh config/connect-standalone.properties config/questdb-connector.properties +``` + +### Publish messages + +Messages can be published via the console producer script: + +```shell +bin/kafka-console-producer.sh --topic example-topic --bootstrap-server localhost:9092 +``` + +A greater-than symbol, `>`, indicates that a message can be published to the +example topic from the interactive session. Paste the following minified JSON as +a single line to publish the message and create the table `example-topic` in the +QuestDB instance: + + + +```json +{"firstname": "Arthur", "lastname": "Dent", "age": 42} +``` + + + +### Verify the integration + +To verify that the data has been ingested into the `example-topic` table, the +following request to QuestDB's `/exp` REST API endpoint can be made to export +the table contents via the curl command: + +```shell +curl -G \ + --data-urlencode "query=select * from 'example_table'" \ + http://localhost:9000/exp +``` + +The expected response based on the example JSON message published above will be +similar to the following: + +```csv +"firstname","age","lastname","timestamp" +"Arthur",42,"Dent","2022-11-01T13:11:55.558108Z" +``` + +If you can see the expected result then congratulations, you have successfully +created and executed your first Kafka to QuestDB pipeline! 🎉 + +### Additional sample projects + +You can find additional sample projects on the +[QuestDB Kafka connector](https://github.com/questdb/kafka-questdb-connector/tree/main/kafka-questdb-connector-samples) +Github project page. It includes a +[sample integration](https://github.com/questdb/kafka-questdb-connector/tree/main/kafka-questdb-connector-samples/stocks) +with [Debezium](https://debezium.io/) for CDC from PostgreSQL. + +## Configuration manual + +This section lists configuration options as well as further information about +the Kafka Connect connector. + +### Configuration Options + +The connector supports the following configuration options: + +| Name | Type | Example | Default | Meaning | +|-----------------------------------|-----------|-------------------------------------------------------------|--------------------|------------------------------------------------------------| +| topics | `string` | orders,audit | N/A | Topics to read from | +| key.converter | `string` | org.apache.kafka.connect.storage.StringConverter | N/A | Converter for keys stored in Kafka | +| value.converter | `string` | org.apache.kafka.connect.json.JsonConverter | N/A | Converter for values stored in Kafka | +| host | `string` | localhost:9009 | N/A | Host and port where QuestDB server is running | +| table | `string` | my_table | Same as Topic name | Target table in QuestDB | +| key.prefix | `string` | from_key | key | Prefix for key fields | +| value.prefix | `string` | from_value | N/A | Prefix for value fields | +| skip.unsupported.types | `boolean` | false | false | Skip unsupported types | +| timestamp.field.name | `string` | pickup_time | N/A | Designated timestamp field name | +| timestamp.units | `string` | micros | auto | Designated timestamp field units | +| timestamp.kafka.native | `boolean` | true | false | Use Kafka timestamps as designated timestamps | +| timestamp.string.fields| `string` | creation_time,pickup_time | N/A | String fields with textual timestamps | +| timestamp.string.format| `string` | yyyy-MM-dd HH:mm:ss.SSSUUU z | N/A | Timestamp format, used when parsing timestamp string fields| +| include.key | `boolean` | false | true | Include message key in target table | +| symbols | `string` | instrument,stock | N/A | Comma separated list of columns that should be symbol type | +| doubles | `string` | volume,price | N/A | Comma separated list of columns that should be double type | +| username | `string` | user1 | admin | User name for QuestDB. Used only when token is non-empty | +| token | `string` | QgHCOyq35D5HocCMrUGJinEsjEscJlC | N/A | Token for QuestDB authentication | +| tls | `boolean` | true | false | Use TLS for QuestDB connection | +| retry.backoff.ms | `long` | 1000 | 3000 | Connection retry interval in milliseconds | +| max.retries | `long` | 1 | 10 | Maximum number of connection retry attempts | + +### How does the connector work? + +The connector reads data from Kafka topics and writes it to QuestDB tables via +InfluxDB Line Protocol. The connector converts each field in the Kafka message +to a column in the QuestDB table. Structures and maps are flatted into columns. + +Example: Consider the following Kafka message: + +```json +{ + "firstname": "John", + "lastname": "Doe", + "age": 30, + "address": { + "street": "Main Street", + "city": "New York" + } +} +``` + +The connector will create a table with the following columns: + +| firstname string | lastname string | age long | address_street string | address_city string | +| --------------------------- | -------------------------- | ------------------- | -------------------------------- | ------------------------------ | +| John | Doe | 30 | Main Street | New York | + +### Supported serialization formats + +The connector does not deserialize data independently. It relies on Kafka +Connect converters. The connector has been tested predominantly with JSON, but +it should work with any converter, including Avro. Converters can be configured +using `key.converter` and `value.converter` options, both are included in the +[Configuration options](#configuration-options) table above. + +### Designated timestamps + +The connector supports +[designated timestamps](https://questdb.io/docs/concept/designated-timestamp/). + +There are three distinct strategies for designated timestamp handling: + +1. QuestDB server assigns a timestamp when it receives data from the connector. (Default) +1. The connector extracts the timestamp from the Kafka message payload. +1. The connector extracts timestamps from [Kafka message metadata.](https://cwiki.apache.org/confluence/display/KAFKA/KIP-32+-+Add+timestamps+to+Kafka+message) + +Kafka messages carry various metadata, one of which is a timestamp. +To use the Kafka message metadata timestamp as a QuestDB designated timestamp, +set `timestamp.kafka.native` to `true`. + +If a message payload contains a timestamp field, the connector can utilize it +as a designated timestamp. The field's name should be configured using the +`timestamp.field.name` option. This field should either be an integer or a timestamp. + +When the field is defined as an integer, the connector will automatically detect its units. This is applicable for timestamps after `04/26/1970, 5:46:40 PM`. + +The units can also be configured explicitly using the `timestamp.units` option, which supports the following values: + +- `nanos` +- `micros` +- `millis` +- `auto` (default) + +Note: These 3 strategies are mutually exclusive. Cannot set both `timestamp.kafka.native=true` and `timestamp.field.name`. + +### Textual timestamps parsing +Kafka messages often contain timestamps in a textual format. The connector can parse these and use them as timestamps. Configure field names as a string with the `timestamp.string.fields` option. Set the timestamp format with the `timestamp.string.format` option, which adheres to the QuestDB timestamp format. + +See the [QuestDB timestamp](/docs/reference/function/date-time/#date-and-timestamp-format) documentation for more details. + +#### Example +Consider the following Kafka message: +```json +{ + "firstname": "John", + "lastname": "Doe", + "born": "1982-01-07 05:42:11.123456 UTC", + "died": "2031-05-01 09:11:42.456123 UTC" +} +``` +To use the `born` field as a designated timestamp and `died` as a regular timestamp +set the following properties in your QuestDB connector configuration: +1. `timestamp.field.name=born` - the field `born` is a designated timestamp. +1. `timestamp.string.fields=died` - set the field name `died` as a textual timestamp. + Notice this option does not contain the field `born`. This field is already set as + a designated timestamp so the connector will attempt to parse it as a timestamp automatically. +1. `timestamp.string.format=yyyy-MM-dd HH:mm:ss.SSSUUU z` - set the timestamp format. Please note the correct format for microseconds is `SSSUUU` (3 digits for milliseconds and 3 digits for microseconds). + + +### Symbol type + +QuestDB supports a special type called +[symbol](https://questdb.io/docs/concept/symbol/). Use the `symbols` +configuration option to specify which columns should be created as the `symbol` +type. + +### Numeric type inference for floating point type + +When a configured Kafka Connect deserializer provides a schema, the connector +uses it to determine column types. If a schema is unavailable, the connector +infers the type from the value. This might produce unexpected results for +floating point numbers, which may be interpreted as `long` initially and +generates an error. + +Consider this example: + +```json +{ + "instrument": "BTC-USD", + "volume": 42 +} +``` + +Kafka Connect JSON converter deserializes the `volume` field as a `long` value. +The connector sends it to the QuestDB server as a `long` value. If the target +table does not have a column `volume`, the database creates a `long` column. If +the next message contains a floating point value for the `volume` field, the +connector sends it to QuestDB as a `double` value. This causes an error because +the existing column `volume` is of type `long`. + +To avoid this problem, the connector can be configured to send selected numeric +columns as `double` regardless of the actual initial input value. Use the +`doubles` configuration option to specify which columns should the connector +always send as the `double` type. + +### Target table considerations + +When a target table does not exist in QuestDB, it will be created automatically. +This is the recommended approach for development and testing. + +In production, it's recommended to use the SQL +[CREATE TABLE](https://questdb.io/docs/reference/sql/create-table/) keyword, +because it gives you more control over the table schema, allowing per-table +partitioning, creating indexes, etc. + +## FAQ + +
+ Does this connector work with Schema Registry? +

+ + +The Connector works independently of the serialization strategy used. It relies +on Kafka Connect converters to deserialize data. Converters can be configured +using `key.converter` and `value.converter` options, see the configuration +section above. + +

+
+ + +
+ I'm getting this error: +"org.apache.kafka.connect.errors.DataException: JsonConverter with schemas.enable requires 'schema' and 'payload' fields and may not contain additional fields. If you are trying to deserialize plain JSON data, set schemas.enable=false in your converter configuration." +

+ + +This error means that the connector is trying to deserialize data using a +converter that expects a schema. The connector does not require schemas, so you +need to configure the converter to not expect a schema. For example, if you are +using a JSON converter, you need to set `value.converter.schemas.enable=false` +or `key.converter.schemas.enable=false` in the connector configuration. + +

+
+ + +
+ Does this connector work with Debezium? +

+ + +Yes, it's been tested with Debezium as a source and a +[sample project](https://github.com/questdb/kafka-questdb-connector/tree/main/kafka-questdb-connector-samples/stocks) +is available. Bear in mind that QuestDB is meant to be used as an append-only +database; hence, updates should be translated as new inserts. The connector +supports Debezium's `ExtractNewRecordState` transformation to extract the new +state of the record. The transformation by default drops DELETE events, so there +is no need to handle them explicitly. + +

+
+ + +
+ QuestDB is a + time-series database, how does it fit into Change Data +Capture via Debezium? +

+ + +QuestDB works with Debezium just great! This is the recommended pattern: +Transactional applications use a +[relational database](/glossary/relational-database/) to store the current state +of the data. QuestDB is used to store the history of changes. Example: Imagine +you have a PostgreSQL table with the most recent stock prices. Whenever a stock +price changes, an application updates the PostgreSQL table. Debezium captures +each UPDATE/INSERT and pushes it as an event to Kafka. Kafka Connect QuestDB +connector reads the events and inserts them into QuestDB. In this way, +PostgreSQL will have the most recent stock prices and QuestDB will have the +history of changes. You can use QuestDB to build a dashboard with the most +recent stock prices and a chart with the history of changes. + +

+
+ + +
+ How I can select which fields to include in the target table? +

+ + +Use the ReplaceField transformation to remove unwanted fields. For example, if +you want to remove the `address` field, you can use the following configuration: + +```json +{ + "name": "questdb-sink", + "config": { + "connector.class": "io.questdb.kafka.QuestDBSinkConnector", + "host": "localhost:9009", + "topics": "Orders", + "table": "orders_table", + "key.converter": "org.apache.kafka.connect.storage.StringConverter", + "value.converter": "org.apache.kafka.connect.json.JsonConverter", + "transforms": "removeAddress", + "transforms.removeAddress.type": "org.apache.kafka.connect.transforms.ReplaceField$Value", + "transforms.removeAddress.blacklist": "address" + } +} +``` + +See +[ReplaceField documentation](https://docs.confluent.io/platform/current/connect/transforms/replacefield.html#replacefield) +for more details. + +

+
+ + +
+ I need to run Kafka Connect on Java 8, but the connector says it requires +Java 11. What should I do? +

+ + +The Kafka Connect-specific part of the connectors works with Java 8. The +requirement for Java 11 is coming from QuestDB client itself. The zip archive +contains 2 JARs: `questdb-kafka-connector-VERSION.jar` and +`questdb-VERSION.jar`. You can replace the latter with +`questdb-VERSION-jdk8.jar` from the +[Maven central](https://mvnrepository.com/artifact/org.questdb/questdb/6.5.4-jdk8). +Please note that this setup is not officially supported, and you may encounter +issues. If you do, please report them to us. + +

+
+ + +## See also + +- [Change Data Capture with QuestDB and Debezium](/blog/2023/01/03/change-data-capture-with-questdb-and-debezium) +- [Realtime crypto tracker with QuestDB Kafka Connector](/blog/realtime-crypto-tracker-with-questdb-kafka-connector) diff --git a/docs/third-party-tools/mindsdb.md b/docs/third-party-tools/mindsdb.md new file mode 100644 index 00000000..54e22fcf --- /dev/null +++ b/docs/third-party-tools/mindsdb.md @@ -0,0 +1,457 @@ +--- +title: MindsDB +description: + Guide for getting started in Machine Learning with MindsDB and QuestDB +--- + +[MindsDB](https://mindsdb.com/questdb-machine-learning/) provides Machine +Learning capabilities to enable predictive questions about your data. With +MindsDB: + +- Developers can quickly add AI capabilities to their applications. +- Data scientists can streamline MLOps by deploying ML models as AI Tables. +- Data analysts can easily make forecasts on complex data, such as multivariate + time-series with [high cardinality](/glossary/high-cardinality/), and visualize + these in BI tools like Grafana, and Tableau. + +Combining both MindsDB and QuestDB provides unbound prediction ability **with +SQL**. + +This guide describes how to pre-process data in QuestDB and then access these +data from MindsDB to produce powerful ML models. + +## Prerequisites + +- [docker](https://docs.docker.com/): To create an image and run the container. +- mysql: The client we use to interact with MindsDB + (`mysql -h 127.0.0.1 --port 47335 -u mindsdb -p`). Alternatively, use MindsDB + web console at `http://localhost:47334/` instead. +- [Curl](https://curl.se/download.html): To upload data to QuestDB from a local + CSV file. + +## Instructions + +The following are the overall steps to connect MindsDB and QuestDB: + +1. Build a Docker image and spawn a container to run MindsDB and QuestDB + together. +2. Add QuestDB as a datasource to MindsDB using a SQL Statement. +3. Create a table and add data for a simple ML use case using QuestDB's web + console. +4. Connect to MindsDB using `mysql` as a client and write some SQL. + +We have put together all the files needed in +[GH](https://github.com/questdb/mindsdb-tutorial). + +### Running the Docker container + +Clone the +[repository for this tutorial](https://github.com/questdb/mindsdb-tutorial). The +[Dockerfile](https://github.com/questdb/mindsdb-tutorial/blob/main/Dockerfile) +allows us to build an image with the following command: + +```shell +docker build -t questdb/mindsdb:latest . +``` + +Then, start the service container `qmdb` with the following command: + +```shell +docker run --rm \ + -p 8812:8812 \ + -p 9009:9009 \ + -p 9000:9000 \ + -p 8888:8888 \ + -p 47334:47334 \ + -p 47335:47335 \ + -d \ + --name qmdb \ + questdb/mindsdb:latest +``` + +The container is run as user `quest`. It takes about 10 seconds to become +responsive, logs can be followed in the terminal: + +```shell +docker logs -f qmdb +... +http API: starting... +mysql API: starting... +mongodb API: starting... +... +mongodb API: started on 47336 +mysql API: started on 47335 +http API: started on 47334 +``` + +The container has these mount points: + +- **/home/quest**: User home directory. +- **~/questdb/**: QuestDB's root directory. +- **~/questdb/db/**: QuestDB's data root directory. +- **~/backups/**: Directory for backups. +- **~/csv/**: Directory for the `COPY` operation. +- **~/mindsdb/storage/**: MindsDB's data root directory. + +The container is running `Debian GNU/Linux 11 (bullseye)` and exposes these +ports: + +- 9000: QuestDB Web Console +- 8812: QuestDB pg-wire +- 9009: QuestDB InfluxDB Line Protocol ingress line protocol +- 47334: MindsDB WebConsole +- 47335: MindsDB mysql API +- 47336: MindsDB mongodb API + +### Adding data to QuestDB + +There are different ways to +[insert data to QuestDB](https://questdb.io/docs/develop/insert-data/). + +#### SQL + +We can access QuestDB's web console at `http://localhost:9000`. + +Run the following SQL query to create a simple table: + +```questdb-sql +CREATE TABLE IF NOT EXISTS house_rentals_data ( + number_of_rooms INT, + number_of_bathrooms INT, + sqft INT, + location SYMBOL, + days_on_market INT, + initial_price FLOAT, + neighborhood SYMBOL, + rental_price FLOAT, + ts TIMESTAMP +) TIMESTAMP(ts) PARTITION BY YEAR; +``` + +We could populate table house_rentals_data with random data: + +```questdb-sql +INSERT INTO house_rentals_data SELECT * FROM ( + SELECT + rnd_int(1,6,0), + rnd_int(1,3,0), + rnd_int(180,2000,0), + rnd_symbol('great', 'good', 'poor'), + rnd_int(1,20,0), + rnd_float(0) * 1000, + rnd_symbol('alcatraz_ave', 'berkeley_hills', 'downtown', 'south_side', 'thowsand_oaks', 'westbrae'), + rnd_float(0) * 1000 + 500, + timestamp_sequence( + to_timestamp('2021-01-01', 'yyyy-MM-dd'), + 14400000000L + ) + FROM long_sequence(100) +); +``` + +#### CURL command + +The +[data CSV file](https://github.com/questdb/mindsdb-tutorial/blob/main/sample_house_rentals_data.csv) +can be downloaded to a local folder and uploaded to QuestDB using the following +command: + +```shell +curl -F data=@sample_house_rentals_data.csv "http://localhost:9000/imp?forceHeader=true&name=house_rentals_data" +``` + +Either way, this gives us 100 data points, one every 4 hours, from +`2021-01-16T12:00:00.000000Z` (QuestDB's timestamps are UTC with microsecond +precision). + +### Connect to MindsDB + +We can connect to MindsDB with a standard mysql-wire-protocol compliant client +(no password, hit ENTER): + +```shell +mysql -h 127.0.0.1 --port 47335 -u mindsdb -p +``` + +Alternatively, we can use MindsDB web console at `http://localhost:47334`: + +From the terminal or the MindsDB web console, run the following command to check +the available databases: + +```sql +SHOW DATABASES; +``` + +QuestDB is not shown in the result: + +```shell ++--------------------+ +| Database | ++--------------------+ +| mindsdb | +| files | +| information_schema | ++--------------------+ +``` + +To see QuestDB as a database we need to add it to MindsDB: + +```sql +CREATE DATABASE questdb + WITH ENGINE = "questdb", + PARAMETERS = { + "user": "admin", + "password": "quest", + "host": "questdb", + "port": "8812", + "database": "questdb" + }; +``` + +Then, run `SHOW DATABASES;` should display both MindsDB and QuestDB: + +```shell ++--------------------+ +| Database | ++--------------------+ +| mindsdb | +| files | +| questdb | +| information_schema | ++--------------------+ +``` + +#### `questdb` + +This is a read-only view on our QuestDB instance. We can query it leveraging the +full power of QuestDB's unique SQL syntax because statements are sent from +MindsDB to QuestDB without interpreting them. It only works for SELECT +statements: + +```sql +SELECT * FROM questdb( + SELECT + ts, neighborhood, + sum(days_on_market) DaysLive, + min(rental_price) MinRent, + max(rental_price) MaxRent, + avg(rental_price) AvgRent + FROM house_rentals_data + WHERE ts BETWEEN '2021-01-08' AND '2021-01-10' + SAMPLE BY 1d FILL (0, 0, 0, 0) +); +``` + +The result should be something like this: + +```shell ++--------------+----------------+----------+----------+----------+--------------------+ +| ts | neighborhood | DaysLive | MinRent | MaxRent | AvgRent | ++--------------+----------------+----------+----------+----------+--------------------+ +| 1610064000.0 | south_side | 19 | 1285.338 | 1285.338 | 1285.338134765625 | +| 1610064000.0 | downtown | 7 | 1047.14 | 1047.14 | 1047.1396484375 | +| 1610064000.0 | berkeley_hills | 17 | 727.52 | 727.52 | 727.5198974609375 | +| 1610064000.0 | westbrae | 36 | 1038.358 | 1047.342 | 1042.85009765625 | +| 1610064000.0 | thowsand_oaks | 5 | 1067.319 | 1067.319 | 1067.318603515625 | +| 1610064000.0 | alcatraz_ave | 0 | 0.0 | 0.0 | 0.0 | +| 1610150400.0 | south_side | 10 | 694.403 | 694.403 | 694.4031982421875 | +| 1610150400.0 | downtown | 16 | 546.798 | 643.204 | 595.0011291503906 | +| 1610150400.0 | berkeley_hills | 4 | 1256.49 | 1256.49 | 1256.4903564453125 | +| 1610150400.0 | westbrae | 0 | 0.0 | 0.0 | 0.0 | +| 1610150400.0 | thowsand_oaks | 0 | 0.0 | 0.0 | 0.0 | +| 1610150400.0 | alcatraz_ave | 14 | 653.924 | 1250.477 | 952.2005004882812 | +| 1610236800.0 | south_side | 0 | 0.0 | 0.0 | 0.0 | +| 1610236800.0 | downtown | 9 | 1357.916 | 1357.916 | 1357.9158935546875 | +| 1610236800.0 | berkeley_hills | 0 | 0.0 | 0.0 | 0.0 | +| 1610236800.0 | westbrae | 0 | 0.0 | 0.0 | 0.0 | +| 1610236800.0 | thowsand_oaks | 0 | 0.0 | 0.0 | 0.0 | +| 1610236800.0 | alcatraz_ave | 0 | 0.0 | 0.0 | 0.0 | ++--------------+----------------+----------+----------+----------+--------------------+ +``` + +Beyond SELECT statements, for instance when we need to save the results of a +query into a new table, we need to use QuestDB's web console available at +`http://localhost:9000/`: + +```questdb-sql +CREATE TABLE sample_query_results AS ( + SELECT + ts, + neighborhood, + sum(days_on_market) DaysLive, + min(rental_price) MinRent, + max(rental_price) MaxRent, + avg(rental_price) AvgRent + FROM house_rentals_data + WHERE ts BETWEEN '2021-01-08' AND '2021-01-10' + SAMPLE BY 1d FILL (0, 0, 0, 0) +) TIMESTAMP(ts) PARTITION BY MONTH; +``` + +#### `mindsdb` + +Contains the metadata tables necessary to create ML models: + +```sql +USE mindsdb; +SHOW TABLES; +``` + +```shell ++-------------------+ +| Tables_in_mindsdb | ++-------------------+ +| models | +| models_versions | ++-------------------+ +``` + +### Create a predictor model + +We can create a predictor model `mindsdb.home_rentals_model_ts` to predict the +`rental_price` for a neighborhood considering the past 20 days, and no +additional features: + +```sql +CREATE PREDICTOR mindsdb.home_rentals_model_ts FROM questdb ( + SELECT + neighborhood, + rental_price, + ts + FROM house_rentals_data +) +PREDICT rental_price ORDER BY ts GROUP BY neighborhood +WINDOW 20 HORIZON 1; +``` + +This triggers MindsDB to create/train the model based on the full data available +from QuestDB's table `house_rentals_data` (100 rows) as a time series on the +column `ts`. + +When status is complete, the model is ready for use; otherwise, we simply wait +while we observe MindsDB's logs. Creating/training a model will take time +proportional to the number of features, i.e. cardinality of the source table as +defined in the inner SELECT of the CREATE MODEL statement, and the size of the +corpus, i.e. number of rows. The model is a table in MindsDB: + +```sql +SHOW TABLES; +``` + +The new table is displayed: + +```shell ++-----------------------+ +| Tables_in_mindsdb | ++-----------------------+ +| models | +| models_versions | +| home_rentals_model_ts | ++-----------------------+ +``` + +### Describe the predictor model + +We can get more information about the trained model, how was the accuracy +calculated or which columns are important for the model by executing the +`DESCRIBE MODEL` statement: + +```sql +DESCRIBE MODEL mindsdb.home_rentals_model_ts; +``` + +```shell +*************************** 1. row *************************** + accuracies: {'complementary_smape_array_accuracy':0.859} + outputs: ['rental_price'] + inputs: ['neighborhood', 'ts', '__mdb_ts_previous_rental_price'] + datasource: home_rentals_model_ts + model: encoders --> dtype_dict --> dependency_dict --> model --> problem_definition --> identifiers --> imputers --> accuracy_functions +``` + +Or, to see how the model encoded the data prior to training we can execute: + +```sql +DESCRIBE MODEL mindsdb.home_rentals_model_ts.features; +``` + +```shell ++--------------+-------------+------------------+---------+ +| column | type | encoder | role | ++--------------+-------------+------------------+---------+ +| neighborhood | categorical | OneHotEncoder | feature | +| rental_price | float | TsNumericEncoder | target | +| ts | datetime | ArrayEncoder | feature | ++--------------+-------------+------------------+---------+ +``` + +Additional information about the models and how they can be customized can be +found on the [Lightwood docs](https://lightwood.io/). + +### Query MindsDB for predictions + +The latest `rental_price` value per neighborhood in table +`questdb.house_rentals_data` can be obtained directly from QuestDB executing +query: + +```sql +SELECT * FROM questdb ( + SELECT + neighborhood, + rental_price, + ts + FROM house_rentals_data + LATEST BY neighborhood +); +``` + +```shell ++----------------+--------------+--------------+ +| neighborhood | rental_price | ts | ++----------------+--------------+--------------+ +| thowsand_oaks | 1150.427 | 1610712000.0 | (2021-01-15 12:00:00.0) +| south_side | 726.953 | 1610784000.0 | (2021-01-16 08:00:00.0) +| downtown | 568.73 | 1610798400.0 | (2021-01-16 12:00:00.0) +| westbrae | 543.83 | 1610841600.0 | (2021-01-17 00:00:00.0) +| berkeley_hills | 559.928 | 1610870400.0 | (2021-01-17 08:00:00.0) +| alcatraz_ave | 1268.529 | 1610884800.0 | (2021-01-17 12:00:00.0) ++----------------+--------------+--------------+ +``` + +To predict the next value: + +```sql +SELECT + tb.ts, + tb.neighborhood, + tb.rental_price as predicted_rental_price, + tb.rental_price_explain as explanation +FROM questdb.house_rentals_data AS ta +JOIN mindsdb.home_rentals_model_ts AS tb +WHERE ta.ts > LATEST; +``` + +```shell ++---------------------+----------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ts | neighborhood | predicted_rental_price | explanation | ++---------------------+----------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 2021-01-17 00:00:00 | downtown | 877.3007391233444 | {"predicted_value": 877.3007391233444, "confidence": 0.9991, "anomaly": null, "truth": null, "confidence_lower_bound": 379.43294697022424, "confidence_upper_bound": 1375.1685312764646} | +| 2021-01-19 08:00:00 | westbrae | 923.1387395936794 | {"predicted_value": 923.1387395936794, "confidence": 0.9991, "anomaly": null, "truth": null, "confidence_lower_bound": 385.8327438509463, "confidence_upper_bound": 1460.4447353364124} | +| 2021-01-15 16:00:00 | thowsand_oaks | 1418.678199780345 | {"predicted_value": 1418.678199780345, "confidence": 0.9991, "anomaly": null, "truth": null, "confidence_lower_bound": 1335.4600013965369, "confidence_upper_bound": 1501.8963981641532} | +| 2021-01-17 12:00:00 | berkeley_hills | 646.5979284300436 | {"predicted_value": 646.5979284300436, "confidence": 0.9991, "anomaly": null, "truth": null, "confidence_lower_bound": 303.253838410034, "confidence_upper_bound": 989.9420184500532} | +| 2021-01-18 12:00:00 | south_side | 1422.69481363723 | {"predicted_value": 1422.69481363723, "confidence": 0.9991, "anomaly": null, "truth": null, "confidence_lower_bound": 129.97617491441304, "confidence_upper_bound": 2715.413452360047} | +| 2021-01-18 04:00:00 | alcatraz_ave | 1305.009073065412 | {"predicted_value": 1305.009073065412, "confidence": 0.9991, "anomaly": null, "truth": null, "confidence_lower_bound": 879.0232742685288, "confidence_upper_bound": 1730.994871862295} | ++---------------------+----------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +### Stop the container + +To terminate the container, run: + +```shell +docker stop qmdb +``` + +- [MindsDB GitHub](https://github.com/mindsdb/mindsdb) +- [MindsDB Documentation](https://docs.mindsdb.com/) +- [This tutorial's artefacts](https://github.com/questdb/mindsdb-tutorial/) diff --git a/docs/third-party-tools/pandas.md b/docs/third-party-tools/pandas.md new file mode 100644 index 00000000..5cb9fc0f --- /dev/null +++ b/docs/third-party-tools/pandas.md @@ -0,0 +1,70 @@ +--- +title: Pandas +description: + Guide for using Pandas with QuestDB via the official QuestDB Python client + library +--- + +[Pandas](https://pandas.pydata.org/) is a fast, powerful, flexible, and +easy-to-use open-source data analysis and manipulation tool, built on top of the +Python programming language. The +[QuestDB Python client](https://py-questdb-client.readthedocs.io/en/latest/index.html) +provides native support for ingesting Pandas dataframes via the InfluxDB Line Protocol. + +## Prerequisites + +- QuestDB must be running and accessible. You can do so from + [Docker](/docs/get-started/docker/), the + [binaries](/docs/get-started/binaries/), or + [Homebrew](/docs/get-started/homebrew/) for macOS users. +- Python 3.8 or later +- [Pandas](https://pandas.pydata.org/) +- [pyarrow](https://pypi.org/project/pyarrow/) +- [NumPy](https://numpy.org/) + +## Overview + +The QuestDB Python client implements the `dataframe()` method to transform Pandas DataFrames into QuestDB-flavored InfluxDB Line Protocol messages. + + +The following example shows how to insert data from a Pandas DataFrame to the `trades` table: + +```python +from questdb.ingress import Sender, IngressError + +import sys +import pandas as pd + + +def example(host: str = 'localhost', port: int = 9009): + df = pd.DataFrame({ + 'pair': ['USDGBP', 'EURJPY'], + 'traded_price': [0.83, 142.62], + 'qty': [100, 400], + 'limit_price': [0.84, None], + 'timestamp': [ + pd.Timestamp('2022-08-06 07:35:23.189062', tz='UTC'), + pd.Timestamp('2022-08-06 07:35:23.189062', tz='UTC')]}) + try: + with Sender(host, port) as sender: + sender.dataframe( + df, + table_name='trades', # Table name to insert into. + symbols=['pair'], # Columns to be inserted as SYMBOL types. + at='timestamp') # Column containing the designated timestamps. + + except IngressError as e: + sys.stderr.write(f'Got error: {e}\n') + + +if __name__ == '__main__': + example() +``` + +## See also + +For detailed documentation, please see: + +- [`Sender.dataframe()`](https://py-questdb-client.readthedocs.io/en/latest/api.html#questdb.ingress.Sender.dataframe) +- [`Buffer.dataframe()`](https://py-questdb-client.readthedocs.io/en/latest/api.html#questdb.ingress.Buffer.dataframe) +- [Examples using `dataframe()`](https://py-questdb-client.readthedocs.io/en/latest/examples.html#data-frames) \ No newline at end of file diff --git a/docs/third-party-tools/prometheus.md b/docs/third-party-tools/prometheus.md new file mode 100644 index 00000000..02f3fbfb --- /dev/null +++ b/docs/third-party-tools/prometheus.md @@ -0,0 +1,247 @@ +--- +title: Prometheus monitoring and alerting +sidebar_label: Prometheus +description: + This document describes how to monitor QuestDB metrics such as memory + consumption using the Prometheus metrics endpoint, and how to log alerts to + Prometheus Alertmanager. +--- + +import InterpolateReleaseData from "../../src/components/InterpolateReleaseData" +import CodeBlock from "@theme/CodeBlock" + +Prometheus is an open-source systems monitoring and alerting toolkit. Prometheus +collects and stores metrics as time-series data, i.e. metrics information is +stored with the timestamp at which it was recorded, alongside optional key-value +pairs called labels. + +Users can measure the internal status of a QuestDB instance via an HTTP endpoint +exposed by QuestDB at port `9003`. This document describes how to enable metrics +via this endpoint, how to configure Prometheus to scrape metrics from a QuestDB +instance, and how to enable alerting from QuestDB to Prometheus Alertmanager. + +## Prerequisites + +- **QuestDB** must be running and accessible. You can do so from + [Docker](/docs/get-started/docker/), the + [binaries](/docs/get-started/binaries/), or + [Homebrew](/docs/get-started/homebrew/) for macOS users. + +- **Prometheus** can be installed using + [homebrew](https://formulae.brew.sh/formula/prometheus), + [Docker](https://hub.docker.com/u/prom), or directly as a binary. For more + details, refer to the official Prometheus + [installation instructions](https://prometheus.io/download/). + +- **Alertmanager** can be run using + [Docker](https://hub.docker.com/r/prom/alertmanager/) or + [Quay](https://quay.io/repository/prometheus/alertmanager), or can be built + from source by following the + [build instructions on GitHub](https://github.com/prometheus/alertmanager#compiling-the-binary). + +## Scraping Prometheus metrics from QuestDB + +QuestDB has a `/metrics` HTTP endpoint on port `9003` to expose Prometheus +metrics. Before being able to query metrics, they must be enabled via the +`metrics.enabled` key in server configuration: + +```ini title="/path/to/server.conf" +metrics.enabled=true +``` + +When running QuestDB via Docker, port `9003` must be exposed and the metrics +configuration can be enabled via the `QDB_METRICS_ENABLED` environment variable: + + ( + + {`docker run \\ + -e QDB_METRICS_ENABLED=TRUE \\ + -p 8812:8812 -p 9000:9000 -p 9003:9003 -p 9009:9009 \\ + -v "$(pwd):/var/lib/questdb" \\ + questdb/questdb:${release.name}`} + + )} +/> + +To verify that metrics are being exposed correctly by QuestDB, navigate to +`http://:9003/metrics` in a browser, where `` is the IP +address of an instance, or execute a basic curl like the following example: + +```bash title="Given QuestDB running at 127.0.0.1" +curl http://127.0.0.1:9003/metrics +# TYPE questdb_json_queries_total counter +questdb_json_queries_total 0 + +# TYPE questdb_memory_tag_MMAP_DEFAULT gauge +questdb_memory_tag_MMAP_DEFAULT 77872 + +# TYPE questdb_memory_malloc_count gauge +questdb_memory_malloc_count 659 + +# ... +``` + +To configure Prometheus to scrape these metrics, provide the QuestDB instance IP +and port `9003` as a target. The following example configuration file +`questdb.yml` assumes there is a running QuestDB instance on localhost +(127.0.0.1) with port `9003` available: + +```shell title="questdb.yml" +global: + scrape_interval: 5s + external_labels: + monitor: 'questdb' + +scrape_configs: + - job_name: 'questdb' + scrape_interval: 5s + static_configs: + - targets: ['127.0.0.1:9003'] +``` + +Start Prometheus and pass this configuration on launch: + +```bash +prometheus --config.file=questdb.yml +``` + +Prometheus should be available on `0.0.0.0:9090` and navigating to +`http://0.0.0.0:9090/targets` should show that QuestDB is being scraped +successfully: + +import Screenshot from "@theme/Screenshot" + + + +In the graphing tab of Prometheus (`http://0.0.0.0:9090/graph`), autocomplete +can be used to graph QuestDB-specific metrics which are all prefixed with +`questdb_`: + + + +The following metrics are available: + +| Metric | Type | Description | +| :--------------------------------------- | :------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `questdb_commits_total` | counter | Number of total commits of all types (in-order and out-of-order) executed on the database tables. | +| `questdb_o3_commits_total` | counter | Number of total out-of-order (O3) commits executed on the database tables. | +| `questdb_committed_rows_total` | counter | Number of total rows committed to the database tables. | +| `questdb_physically_written_rows_total` | counter | Number of total rows physically written to disk. Greater than `committed_rows` with [out-of-order ingestion. Write amplification is `questdb_physically_written_rows_total / questdb_committed_rows_total`. | +| `questdb_rollbacks_total` | counter | Number of total rollbacks executed on the database tables. | +| `questdb_json_queries_total` | counter | Number of total REST API queries, including retries. | +| `questdb_json_queries_completed` | counter | Number of successfully executed REST API queries. | +| `questdb_unhandled_errors_total` | counter | Number of total unhandled errors occurred in the database. Such errors usually mean a critical service degradation in one of the database subsystems. | +| `questdb_jvm_major_gc_count` | counter | Number of times major JVM garbage collection was triggered. | +| `questdb_jvm_major_gc_time` | counter | Total time spent on major JVM garbage collection in milliseconds. | +| `questdb_jvm_minor_gc_count` | counter | Number of times minor JVM garbage collection pause was triggered. | +| `questdb_jvm_minor_gc_time` | counter | Total time spent on minor JVM garbage collection pauses in milliseconds. | +| `questdb_jvm_unknown_gc_count` | counter | Number of times JVM garbage collection of unknown type was triggered. Non-zero values of this metric may be observed only on some, non-mainstream JVM implementations. | +| `questdb_jvm_unknown_gc_time` | counter | Total time spent on JVM garbage collection of unknown type in milliseconds. Non-zero values of this metric may be observed only on some, non-mainstream JVM implementations. | +| `questdb_memory_tag_MMAP_DEFAULT` | gauge | Amount of memory allocated for mmaped files. | +| `questdb_memory_tag_NATIVE_DEFAULT` | gauge | Amount of allocated untagged native memory. | +| `questdb_memory_tag_MMAP_O3` | gauge | Amount of memory allocated for O3 mmapped files. | +| `questdb_memory_tag_NATIVE_O3` | gauge | Amount of memory allocated for O3. | +| `questdb_memory_tag_NATIVE_RECORD_CHAIN` | gauge | Amount of memory allocated for SQL record chains. | +| `questdb_memory_tag_MMAP_TABLE_WRITER` | gauge | Amount of memory allocated for table writer mmapped files. | +| `questdb_memory_tag_NATIVE_TREE_CHAIN` | gauge | Amount of memory allocated for SQL tree chains. | +| `questdb_memory_tag_MMAP_TABLE_READER` | gauge | Amount of memory allocated for table reader mmapped files. | +| `questdb_memory_tag_NATIVE_COMPACT_MAP` | gauge | Amount of memory allocated for SQL compact maps. | +| `questdb_memory_tag_NATIVE_FAST_MAP` | gauge | Amount of memory allocated for SQL fast maps. | +| `questdb_memory_tag_NATIVE_LONG_LIST` | gauge | Amount of memory allocated for long lists. | +| `questdb_memory_tag_NATIVE_HTTP_CONN` | gauge | Amount of memory allocated for HTTP connections. | +| `questdb_memory_tag_NATIVE_PGW_CONN` | gauge | Amount of memory allocated for PGWire connections. | +| `questdb_memory_tag_MMAP_INDEX_READER` | gauge | Amount of memory allocated for index reader mmapped files. | +| `questdb_memory_tag_MMAP_INDEX_WRITER` | gauge | Amount of memory allocated for index writer mmapped files. | +| `questdb_memory_tag_MMAP_INDEX_SLIDER` | gauge | Amount of memory allocated for indexed column view mmapped files. | +| `questdb_memory_tag_NATIVE_REPL` | gauge | Amount of memory mapped for replication tasks. | +| `questdb_memory_free_count` | gauge | Number of times native memory was freed. | +| `questdb_memory_mem_used` | gauge | Current amount of allocated native memory. | +| `questdb_memory_malloc_count` | gauge | Number of times native memory was allocated. | +| `questdb_memory_realloc_count` | gauge | Number of times native memory was reallocated. | +| `questdb_memory_rss` | gauge | Resident Set Size (Linux/Unix) / Working Set Size (Windows). | +| `questdb_memory_jvm_free` | gauge | Current amount of free Java memory heap in bytes. | +| `questdb_memory_jvm_total` | gauge | Current size of Java memory heap in bytes. | +| `questdb_memory_jvm_max` | gauge | Maximum amount of Java heap memory that can be allocated in bytes. | +| `questdb_json_queries_cached` | gauge | Number of current cached REST API queries. | +| `questdb_pg_wire_select_queries_cached` | gauge | Number of current cached PGWire `SELECT` queries. | +| `questdb_pg_wire_update_queries_cached` | gauge | Number of current cached PGWire `UPDATE` queries. | + +All of the above metrics are volatile, i.e. they're collected since the current +database start. + +## Configuring Prometheus Alertmanager + +QuestDB includes a log writer that sends any message logged at critical level +(by default) to Prometheus +[Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) over a +TCP/IP socket connection. To configure this writer, add it to the `writers` +config alongside other log writers. Details on logging configuration can be +found on the +[server configuration documentation](/docs/reference/configuration#configuration-file). + +Alertmanager may be started via Docker with the following command: + +``` +docker run -p 127.0.0.1:9093:9093 --name alertmanager quay.io/prometheus/alertmanager +``` + +To discover the IP address of this container, run the following command which +specifies `alertmanager` as the container name: + +```bash +docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' alertmanager +``` + +To run QuestDB and point it towards Alertmanager for alerting, first create a +file `./conf/log.conf` with the following contents. `172.17.0.2` in this case is +the IP address of the docker container for alertmanager that was discovered by +running the `docker inspect ` command above. + +```ini title="./conf/log.conf" +# Which writers to enable +writers=stdout,alert + +# stdout +w.stdout.class=io.questdb.log.LogConsoleWriter +w.stdout.level=INFO + +# Prometheus Alerting +w.alert.class=io.questdb.log.LogAlertSocketWriter +w.alert.level=CRITICAL +w.alert.alertTargets=172.17.0.2:9093 +``` + +Start up QuestDB in Docker using the following command: + +```bash "Mounting a volume with the log.conf file" +docker run \ + -p 9000:9000 -p 8812:8812 -p 9009:9009 -p 9003:9003 \ + -v "$(pwd)::/var/lib/questdb" \ + questdb/questdb:6.1.3 +``` + +When alerts are successfully triggered, QuestDB logs will indicate the sent and +received status: + +```txt +2021-12-14T18:42:54.222967Z I i.q.l.LogAlertSocketWriter Sending: 2021-12-14T18:42:54.122874Z I i.q.l.LogAlertSocketWriter Sending: 2021-12-14T18:42:54.073978Z I i.q.l.LogAlertSocketWriter Received [0] 172.17.0.2:9093: {"status":"success"} +2021-12-14T18:42:54.223377Z I i.q.l.LogAlertSocketWriter Received [0] 172.17.0.2:9093: {"status":"success"} +``` + +## See also + +The template used by QuestDB for alerts is user-configurable and is described in +more detail in the +[server configuration](/docs/reference/configuration/#prometheus-alertmanager) +documentation. diff --git a/docs/third-party-tools/qstudio.md b/docs/third-party-tools/qstudio.md new file mode 100644 index 00000000..d0638bad --- /dev/null +++ b/docs/third-party-tools/qstudio.md @@ -0,0 +1,55 @@ +--- +title: qStudio +description: Guide for querying QuestDB using qStudio +--- + +import Screenshot from "@theme/Screenshot" + +[qStudio](https://www.timestored.com/qstudio/) is a free SQL GUI. It allows to +run SQL scripts, browse tables easily, chart and export results. + +qStudio includes charting functionality including time-series charting which is +particularly useful with QuestDB. It works on every operating system and with +every database including QuestDB via the PostgreSQL driver. + +## Prerequisites + +- A running QuestDB instance (See [Getting Started](/docs/#getting-started)) + +## Configure QuestDB connection + +1. [Download qStudio](https://www.timestored.com/qstudio/download) for your OS +2. Launch qStudio +3. Go to `Server` -> `Add Server` +4. Click `Add data source` +5. Choose the `PostgreSQL` plugin and configure it with the following settings: + + ``` + host:localhost + port:8812 + database:qdb + user:admin + password:quest + ``` + +## Sending Queries + +Run queries with: + +- Ctrl+Enter to run the current line, or +- Ctrl+E to run the highlighted code. + +export const screenshotTitle = + "Screenshot of the qStudio UI running QuestDB query" + + + +## See also + +- [QuestDB Postgres wire protocol](/docs/reference/api/postgres/) diff --git a/docs/third-party-tools/redpanda.md b/docs/third-party-tools/redpanda.md new file mode 100644 index 00000000..bc0d7d42 --- /dev/null +++ b/docs/third-party-tools/redpanda.md @@ -0,0 +1,240 @@ +--- +title: Redpanda +description: + Guide for using Redpanda with QuestDB via the QuestDB Kafka connector +--- + +[Redpanda](https://redpanda.com/) is an open-source, Kafka-compatible streaming +platform that uses C++ and Raft to replace Java and Zookeeper. Since it is Kafka +compatible, it can be used with the +[QuestDB Kafka connector](/docs/third-party-tools/kafka/questdb-kafka/), +providing an alternative data [streaming](/glossary/stream-processing) option. + +## Prerequisites + +- Docker +- A local JDK installation +- A running QuestDB instance + +## Configure and start Redpanda + +The Redpanda +[Quick start guide](https://docs.redpanda.com/docs/get-started/quick-start/quick-start-docker/#start-redpanda) +provides a `docker-compose.yaml` file that can be used. Copy and paste the +content into into a file named `docker-compose.yml` on your local filesystem: + +```yaml title="docker-compose.yml" +--- +version: "3.7" +name: redpanda-quickstart +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null +services: + redpanda-0: + command: + - redpanda + - start + - --kafka-addr + - internal://0.0.0.0:9092,external://0.0.0.0:19092 + # use the internal addresses to connect to the Redpanda brokers' + # from inside the same Docker network. + # + # use the external addresses to connect to the Redpanda brokers' + # from outside the Docker network. + # + # address the broker advertises to clients that connect to the Kafka API. + - --advertise-kafka-addr + - internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr + - internal://0.0.0.0:8082,external://0.0.0.0:18082 + # address the broker advertises to clients that connect to PandaProxy. + - --advertise-pandaproxy-addr + - internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr + - internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with eachother internally. + - --rpc-addr + - redpanda-0:33145 + - --advertise-rpc-addr + - redpanda-0:33145 + # tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + # the amount of memory to make available to Redpanda. + - --memory 1G + # the amount of memory that's left for the Seastar subsystem. + # For development purposes this is set to 0. + - --reserve-memory 0M + # Redpanda won't assume it has all of the provisioned CPU + # (to accommodate Docker resource limitations). + - --overprovisioned + # enable logs for debugging. + - --default-log-level=debug + image: docker.redpanda.com/vectorized/redpanda:v22.3.11 + container_name: redpanda-0 + volumes: + - redpanda-0:/var/lib/redpanda/data + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + console: + container_name: redpanda-console + image: docker.redpanda.com/vectorized/console:v2.1.1 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml; /app/console' + environment: + CONFIG_FILEPATH: /tmp/config.yml + CONSOLE_CONFIG_FILE: | + kafka: + brokers: ["redpanda-0:9092"] + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081"] + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644"] + ports: + - 8080:8080 + depends_on: + - redpanda-0 +``` + +Once the file is saved, run the following command to start a single Redpanda +broker inside Docker and expose Redpanda to your host machine: + +```shell +docker compose up +``` + +It also start the +[Redpanda web UI](https://docs.redpanda.com/docs/get-started/quick-start/quick-start-docker/#explore-your-topic-in-redpanda-console). + +## Download Apache Kafka + +Download +[Apache Kafka](https://downloads.apache.org/kafka/3.4.1/kafka_2.12-3.4.1.tgz) +and unzip the file. + +This step is required as Redpanda does not have its own Kafka Connect +equivalent. + +## Download the QuestDB Kafka connector + +Download +[the QuestDB Kafka connector](https://github.com/questdb/kafka-questdb-connector/releases/latest), +under the zip archive named `kafka-questdb-connector--bin.zip`. + +:::tip + +You can automate downloading the latest connector package by running this +command: + +```shell +curl -s https://api.github.com/repos/questdb/kafka-questdb-connector/releases/latest | +jq -r '.assets[]|select(.content_type == "application/zip")|.browser_download_url'| +wget -qi - +``` + +::: + +Unzip the connector - it has a directory with 2 JARs: Copy these JARs into +/path/to/kafka/lib: + +```shell +unzip kafka-questdb-connector-*-bin.zip +cd kafka-questdb-connector +cp ./*.jar /path/to/kafka/libs +``` + +There should be already a lot of other JAR files. That's how you can tell you +are in the right directory. + +## Configure properties + +Go to /path/to/kafka/config - there should be already quite a few \*.property +files. Create a new file: `questdb-connector.properties` with the following +lines: + +```json title="questdb-connector.properties" +name=questdb-sink +connector.class=io.questdb.kafka.QuestDBSinkConnector +host=localhost:9009 +topics=example-topic +table=example_table +include.key=false +value.converter=org.apache.kafka.connect.json.JsonConverter +value.converter.schemas.enable=false +key.converter=org.apache.kafka.connect.storage.StringConverter +``` + +In addition, pointing the open `connect-standalone.properties` and replace: + +```json +bootstrap.servers=localhost:9092 +``` + +with the Redpanda broker URL: + +```json +bootstrap.servers=127.0.0.1:19092 +``` + +## Start Kafka Connect + +Navigate to the Kafka Connect folder and then run: + +```shell +./bin/connect-standalone.sh config/connect-standalone.properties config/questdb-connector.properties +``` + +Now the Kafka Connect is initiated. + +## Send a message + +Open the Redpand UI topic page, `http://127.0.0.1:8080/topics`.It should display +`example-topic`: + +![Screenshot of the Redpanda UI showing the example-topic](/img/docs/guide/redpanda/redpanda-topic.png) + +If the topic is not there then refresh a few times. + +Select `example-topic` to expand more details and click `Actions` --> +`Publish Message`: + +![Screenshot of the Redpanda UI highlighting the Actions button](/img/docs/guide/redpanda/redpanda-actions.png) + +Paste the following message into the message box: + +```json +{ "firstname": "Arthur", "lastname": "Dent", "age": 42 } +``` + +![Screenshot of the Redpanda UI add message page](/img/docs/guide/redpanda/redpanda-add-messsage.png) + +Then, click 'Publish'. + +## See result from QuestDB + +Go to QuestDB web console at `http://localhost:9000/`. Run a `SELECT` query: + +```questdb-sql +SELECT * FROM example_table; +``` + +The message is delivered to QuestDB: + +![QuestDB web console result showing the Redpanda message](/img/docs/guide/redpanda/questdb-select.png) + +## See also + +- [QuestDB Kafka Connector reference manual](/docs/third-party-tools/kafka/questdb-kafka/#configuration-manual) +- [How to build a real-time crypto tracker with Redpanda and QuestDB](https://redpanda.com/blog/real-time-crypto-tracker-questdb-redpanda) diff --git a/docs/third-party-tools/spark.md b/docs/third-party-tools/spark.md new file mode 100644 index 00000000..33380af9 --- /dev/null +++ b/docs/third-party-tools/spark.md @@ -0,0 +1,220 @@ +--- +title: Apache Spark and Time-Series Analytics +sidebar_label: Spark +description: + This document describes how to load data from QuestDB to Apache Spark for + large-scale data engineering. +--- + +import InterpolateReleaseData from "../../src/components/InterpolateReleaseData" + +High-level instructions for loading data from QuestDB to Spark and back. + +## What is Spark? + +[Apache Spark](https://spark.apache.org/) is an analytics engine for large-scale +data engineering and [stream processing](/glossary/stream-processing), well-known in the big data landscape. It is suitable for +executing data engineering, data science, and machine learning on single-node +machines or clusters. + +## QuestDB Spark integration + +A typical Spark application processes data in the following steps: + +1. Loading data from different sources +1. Transforming and analyzing the data +1. Saving the result to a data storage + +Our example demonstrates these steps using QuestDB as the data source and +storage. It loads data from QuestDB into a Spark Dataframe; then the data is +enriched with new features, and eventually, it is written back into QuestDB. + +## Prerequisites + +- **Package manager**: This depends on your choice of OS. The below instructions + are for macOS using [Homebrew](/docs/get-started/homebrew/). + +- **QuestDB**: An instance must be running and accessible. You can do so from + [Docker](/docs/get-started/docker/), the + [binaries](/docs/get-started/binaries/), or + [Homebrew](/docs/get-started/homebrew/). + +## Installing Apache Spark + +Spark can be installed and set up in many ways, depending on requirements. +Typically, it is part of a Big Data stack, installed on multiple nodes with an +external cluster manager, such as +[Yarn](https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/YARN.html) +or [Apache Mesos](https://mesos.apache.org/). In this tutorial, we will work +with a single-node standalone Spark installation. + +Spark has a multi-language environment. It is written in Scala, runs on the Java +Virtual Machine, and also integrates with R and Python. Our example is written +using Python. By running the below commands Spark will be installed with all +required dependencies: + +```shell +brew install openjdk@11 +brew install python@3.10 +brew install scala +brew install apache-spark +``` + +The exact versions used for this example: + +```shell +openjdk@11 11.0.12 +python@3.10 3.10.10_1 +scala 3.2.2 +apache-spark 3.3.2 +``` + +## Installing the JDBC driver + +Spark communicates with QuestDB via JDBC, connecting to its Postgres Wire +Protocol endpoint. This requires the Postgres JDBC driver to be present. + +- Create a working directory: + +```shell +mkdir sparktest +cd sparktest +``` + +- Download the JDBC driver from [here](https://jdbc.postgresql.org/download/) + into the working directory. The exact version used for this example: + +```shell +postgresql-42.5.1.jar +``` + +## Setting up database tables + +First, start QuestDB. If you are using Docker run the following command: + + ( + + { + `docker run -p 9000:9000 -p 8812:8812 questdb/questdb:${release.name}` + } + + )} +/> + +The port mappings allow us to connect to QuestDB's REST and PGWire endpoints. +The former is required for opening the Web Console, and the latter is used by +Spark to connect to the database. + +Open the Web Console in your browser at `http://localhost:9000`. + +Run the following SQL commands using the console: + +```questdb-sql + +CREATE TABLE trades ( + symbol SYMBOL, + side SYMBOL, + price DOUBLE, + amount DOUBLE, + timestamp TIMESTAMP +) timestamp (timestamp) PARTITION BY DAY; + +CREATE TABLE trades_enriched ( + symbol SYMBOL, + volume DOUBLE, + mid DOUBLE, + ts TIMESTAMP, + ma10 DOUBLE, + std DOUBLE +) timestamp (ts) PARTITION BY DAY; + +INSERT INTO trades SELECT * FROM ( + SELECT 'BTC-USD' symbol, + rnd_symbol('buy', 'sell') side, + rnd_double() * 10000 price, + rnd_double() amount, + timestamp_sequence(1677628800000000, 10000000) ts + FROM long_sequence(25920) +) timestamp (ts); +``` + +The `INSERT` command generates 3 days' worth of test data, and stores it in the +`trades` table. + +## Feature engineering examples + +Save the below Python code into a file called `sparktest.py` inside the working +directory: + +```python +from pyspark.sql import SparkSession +from pyspark.sql.window import Window +from pyspark.sql.functions import avg, stddev, when + +# create Spark session +spark = SparkSession.builder.appName("questdb_test").getOrCreate() + +# load 1-minute aggregated trade data into the dataframe +df = spark.read.format("jdbc") \ + .option("url", "jdbc:postgresql://localhost:8812/questdb") \ + .option("driver", "org.postgresql.Driver") \ + .option("user", "admin").option("password", "quest") \ + .option("dbtable", "(SELECT symbol, sum(amount) as volume, " + "round((max(price)+min(price))/2, 2) as mid, " + "timestamp as ts " + "FROM trades WHERE symbol = 'BTC-USD' " + "SAMPLE BY 1m ALIGN to CALENDAR) AS mid_prices") \ + .option("partitionColumn", "ts") \ + .option("numPartitions", "3") \ + .option("lowerBound", "2023-03-01T00:00:00.000000Z") \ + .option("upperBound", "2023-03-04T00:00:00.000000Z") \ + .load() + +# extract new features, clean data +window_10 = Window.partitionBy(df.symbol).rowsBetween(-10, Window.currentRow) +df = df.withColumn("ma10", avg(df.mid).over(window_10)) +df = df.withColumn("std", stddev(df.mid).over(window_10)) +df = df.withColumn("std", when(df.std.isNull(), 0.0).otherwise(df.std)) + +# save the data as 'trades_enriched', overwrite if already exists +df.write.format("jdbc") \ + .option("url", "jdbc:postgresql://localhost:8812/questdb") \ + .option("driver", "org.postgresql.Driver") \ + .option("user", "admin").option("password", "quest") \ + .option("dbtable", "trades_enriched") \ + .option("truncate", True) \ + .option("createTableColumnTypes", "volume DOUBLE, mid DOUBLE, ma10 DOUBLE, std DOUBLE") \ + .save(mode="overwrite") +``` + +This Spark application loads aggregated data from the `trades` table into a +Dataframe, then adds two new features, a 10-minute moving average and the +standard deviation. Finally, it writes the enriched data back into QuestDB and +saves it to the `trades_enriched` table. + +## Run the example + +Submit the application to Spark for execution using `spark-submit`: + +```shell +spark-submit --jars postgresql-42.5.1.jar sparktest.py +``` + +The example requires the JDBC driver at runtime. This dependency is submitted to +Spark using the `--jars` option. + +After the execution is completed, we can check the content of the +`trades_enriched` table: + +```questdb-sql +SELECT * FROM trades_enriched; +``` + +The enriched data should be displayed in the Web Console. + +## See also + +For a more detailed explanation of the QuestDB Spark integration, please also +see our tutorial +[Integrate Apache Spark and QuestDB for Time-Series Analytics](/blog/integrate-apache-spark-questdb-time-series-analytics/#loading-data-to-spark/). diff --git a/docs/third-party-tools/sqlalchemy.md b/docs/third-party-tools/sqlalchemy.md new file mode 100644 index 00000000..0a565a51 --- /dev/null +++ b/docs/third-party-tools/sqlalchemy.md @@ -0,0 +1,79 @@ +--- +title: SQLAlchemy +description: Guide for using SQLAlchemy with QuestDB +--- + +[SQLAlchemy](https://www.sqlalchemy.org/) is an open-source SQL toolkit and ORM +library for Python. It provides a high-level API for communicating with +[relational databases](/glossary/relational-database/), including schema +creation and modification, an SQL expression language, and database connection +management. The ORM layer abstracts away the complexities of the database, +allowing developers to work with Python objects instead of raw SQL statements. + +QuestDB implements a dialect for SQLAlchemy using the +[QuestDB Connect](https://github.com/questdb/questdb-connect) Python package. + +Please note that the SQLAlchemy ORM and metadata operations are only partially +supported. + +## Prerequisites + +- Python from 3.9 to 3.11 +- Psycopg2 +- SQLAlchemy <=1.4.47 +- A QuestDB instance + +## Installation + +You can install this package using `pip`: + +```shell +pip install questdb-connect +``` + +## Example usage + +```python +import sqlalchemy +from sqlalchemy import create_engine +from sqlalchemy import text +from sqlalchemy import MetaData +from sqlalchemy import Table +from pprint import pprint + +engine = create_engine("questdb://admin:quest@localhost:8812/qdb") +with engine.connect() as conn: + # SQL statements with no parameters + conn.execute(text("CREATE TABLE IF NOT EXISTS some_table (x int, y int)")) + result=conn.execute(text("SHOW TABLES")) + print(result.all()) + # results can be iterated in many ways. Check SQLAlchemy docs for details + + # passing parameters to your statements + conn.execute( + text("INSERT INTO some_table (x, y) VALUES (:x, :y)"), + [{"x": 11, "y": 12}, {"x": 13, "y": 14}], + ) + + # basic select, no parameters + result = conn.execute(text("select * from some_table")) + print(result.all()) + + # select with parameters + result = conn.execute(text("SELECT x, y FROM some_table WHERE y > :y"), {"y": 2}) + print(result.all()) + + # partial support for metadata + metadata_obj = MetaData() + some_table = Table("some_table", metadata_obj, autoload_with=engine) + pprint(some_table) + + # cleaning up + conn.execute(text("DROP TABLE some_table")) +``` + +## See also + +- The + [SQLAlchemy tutorial](https://docs.sqlalchemy.org/en/14/tutorial/index.html) +- The [QuestDB Connect](https://pypi.org/project/questdb-connect/) GitHub diff --git a/docs/third-party-tools/superset.md b/docs/third-party-tools/superset.md new file mode 100644 index 00000000..b05b8509 --- /dev/null +++ b/docs/third-party-tools/superset.md @@ -0,0 +1,95 @@ +--- +title: Superset +description: Guide for using Superset with QuestDB +--- + +[Apache Superset](https://superset.apache.org/) is a popular open-source +business intelligence web application that enables users to visualize and +explore data through customizable dashboards and reports. + +QuestDB provides the +[QuestDB Connect](https://pypi.org/project/questdb-connect/) python package that +implements the SQLAlchemy dialect and Superset engine specification, to +integrate Apache Superset with QuestDB. + +## Installing Apache Superset via Docker (recommended) + +We recommend the Docker-based Apache Superset installation. You will need to +install the following requirements: + +- Docker +- QuestDB 7.1.2 or later + +Then, following the steps below: + +1. Clone the [Superset repo](https://github.com/apache/superset): + +```bash +git clone https://github.com/apache/superset.git +``` + +2. Change your directory: + + ```bash + cd superset + ``` + +3. Create a file `docker/requirements-local.txt` with the requirement to + `questdb-connect`: + +```bash +touch ./docker/requirements-local.txt +echo "questdb-connect==1.0.6" > docker/requirements-local.txt +``` +4. Run Apache Superset: + + ```bash + docker-compose -f docker-compose-non-dev.yml pull + docker-compose -f docker-compose-non-dev.yml up + ``` + + This step will initialize your Apache Superset installation, creating a + default admin, users, and several other settings. The first time you start + Apache Superset it can take a few minutes until it is completely initialized. + Please keep an eye on the console output to see when Apache Superset is ready + to be used. + +## Installing Superset via QuestDB Connect + +If you have a stand-alone installation of Apache Superset and are using Apache +Superset without Docker, you need to install the following requirements : + +- Python from 3.9 to 3.11 +- [Superset](https://superset.apache.org/docs/installation/installing-superset-from-scratch/) +- QuestDB 7.1.2 or later + +Install QuestDB Connect using `pip`: + +```bash +pip install 'questdb-connect==1.0.6' +``` + +## Connecting QuestDB to Superset + +Once installed and initialized, Apache Superset is accessible via +`localhost:8088`. + +1. Sign in with the following details: + - Username: admin + - Password: admin +2. From Superset UI, select Setting > Database Connections +3. Select `+Database` to add the following parameters: + - SUPPORTED DATABASES: Other + - DISPLAY NAME: QuestDB + - SQLALCHEMY URI: `questdb://admin:quest@host.docker.internal:8812/qdb` +4. For the `SQLALCHEMY URI` field, use `host.docker.internal` when running + Apache Superset from Docker and `localhost` for outside of Docker. +5. Once connected, tables in QuestDB will be visible for creating Datasets in + Apache Superset. + +## See also + +- [QuestDB Connect at GitHub](https://github.com/questdb/questdb-connect/) +- [QuestDB Connect Python module](https://pypi.org/project/questdb-connect/) +- [Apache Superset install](https://superset.apache.org/docs/installation/installing-superset-from-scratch/) +- [Blog post with Superset dashboard example](/blog/time-series-data-visualization-apache-superset-and-questdb/) diff --git a/docs/third-party-tools/telegraf.md b/docs/third-party-tools/telegraf.md new file mode 100644 index 00000000..d1cadcc0 --- /dev/null +++ b/docs/third-party-tools/telegraf.md @@ -0,0 +1,174 @@ +--- +title: Telegraf +description: + Learn how to use Telegraf to collect system metrics and send this data to + QuestDB. +--- + +[Telegraf](https://docs.influxdata.com/telegraf/v1.17/) is a client for +collecting metrics from many inputs and has support for sending it on to various +outputs. It is plugin-driven for the collection and delivery of data, so it is +easily configurable and customizable. Telegraf is compiled as a standalone +binary, which means there are no external dependencies required to manage. + +QuestDB supports ingesting from Telegraf over TCP. This page provides examples +for collecting CPU and memory usage metrics using Telegraf and sends these metrics +to a locally-running QuestDB instance for querying and visualization. + +## Prerequisites + +- **QuestDB** must be running and accessible. You can do so from + [Docker](/docs/get-started/docker/), the + [binaries](/docs/get-started/binaries/), or + [Homebrew](/docs/get-started/homebrew/) for macOS users. + +- **Telegraf** can be installed using + [homebrew](https://formulae.brew.sh/formula/telegraf), + [docker](https://hub.docker.com/_/telegraf), or directly as a binary. For more + details, refer to the official Telegraf + [installation instructions](https://docs.influxdata.com/telegraf/v1.17/introduction/installation/). + +## Configuring Telegraf + +As Telegraf is a plugin-driven agent, the configuration file provided when +Telegraf is launched will determine which metrics to collect, if and how +processing of the metrics should be performed, and the destination outputs. + +The default location that Telegraf can pick up configuration files is +`/usr/local/etc/` on macOS and `/etc/telegraf/` on Linux. After installation, +default configuration files are in the following locations: + +- Homebrew install: `/usr/local/etc/telegraf.conf` +- Linux, Deb and RPM: `/etc/telegraf/telegraf.conf` + +Full configuration files for writing over TCP are provided below and can +be placed in these directories and picked up by Telegraf. To view a +comprehensive configuration file with example inputs and outputs, the following +command can generate an example: + +``` +telegraf -sample-config > example.conf +``` + +### Example Inputs + +The examples on this page will use input plugins that read CPU and memory usage +statistics of the host machine and send this to the outputs specified in the +configuration file. The following snippet includes code comments which describe +the inputs in more detail: + +```shell title="Example inputs sending host data to QuestDB" +... +# -- INPUT PLUGINS -- # +[[inputs.cpu]] + # Read metrics about cpu usage + ## Whether to report per-cpu stats or not + percpu = true + ## Whether to report total system cpu stats or not + totalcpu = true + ## If true, collect raw CPU time metrics + collect_cpu_time = false + ## If true, compute and report the sum of all non-idle CPU states + report_active = false + +# Read metrics about memory usage +[[inputs.mem]] + # no customisation +``` + +## Writing to QuestDB over TCP + +QuestDB expects influx line protocol messages over TCP on port `9009`. To change +the default port, see the +[InfluxDB line protocol (TCP)](/docs/reference/configuration#influxdb-line-protocol-tcp) +section of the server configuration page. + +Create a new file named `questdb_tcp.conf` in one of the locations Telegraf can +load configuration files from and paste the following example: + +```shell title="/path/to/telegraf/config/questdb_tcp.conf" +# Configuration for Telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "5s" + hostname = "qdb" + +# -- OUTPUT PLUGINS -- # +[[outputs.socket_writer]] + # Write metrics to a local QuestDB instance over TCP + address = "tcp://127.0.0.1:9009" + +# -- INPUT PLUGINS -- # +[[inputs.cpu]] + percpu = true + totalcpu = true + collect_cpu_time = false + report_active = false +[[inputs.mem]] + # no customisation +``` + +Run Telegraf and specify this config file with TCP writer settings: + +```shell +telegraf --config questdb_tcp.conf +``` + +Telegraf should report the following if configured correctly: + +```bash +2021-01-29T12:11:32Z I! Loaded inputs: cpu mem +2021-01-29T12:11:32Z I! Loaded aggregators: +2021-01-29T12:11:32Z I! Loaded processors: +2021-01-29T12:11:32Z I! Loaded outputs: socket_writer +... +``` + +## Verifying the integration + +1. Navigate to the QuestDB Web Console at + `http://127.0.0.1:9000/`. The Schema Navigator in the + top left should display two new tables: + +- `cpu` generated from `inputs.cpu` +- `mem` generated from `inputs.mem` + +2. Type `cpu` in the query editor and click **RUN** + +The `cpu` table will have a column for each metric collected by the Telegraf +plugin for monitoring memory: + +import Screenshot from "@theme/Screenshot" + + + +### Graphing system CPU + +To create a graph that visualizes CPU usage over time, run the following example +query: + +``` +SELECT +avg(usage_system) cpu_average, +max(usage_system) cpu_max, +timestamp +FROM cpu SAMPLE BY 1m; +``` + +Select the **Chart** tab and set the following values: + +- Chart type **line** +- Labels **timestamp** +- Series **cpu_average** and **cpu_max** + + diff --git a/docs/troubleshooting/faq.md b/docs/troubleshooting/faq.md new file mode 100644 index 00000000..91b9cdd8 --- /dev/null +++ b/docs/troubleshooting/faq.md @@ -0,0 +1,124 @@ +--- +title: FAQ +description: FAQ for QuestDB troubleshooting. +--- + +The following document contains common hardware and software configuration +issues met when running QuestDB, as well as solutions to them. If you cannot +find the answers to your question, please join our +[Slack Community]({@slackUrl@}) and post your questions there. + +## Where do I find the log and how do I filter the log messages? + +Log files are stored in the `log` folder under the +[root_directory](/docs/concept/root-directory-structure/). The log has the +following levels to assist filtering: + +Check the [log](/docs/troubleshooting/log/) page for the available log levels. + +## How do I delete a row? + +See our guide on [modifying data](/docs/guides/modifying-data/). + +## How do I convert a `STRING` column to a `SYMBOL` or vice versa? + +The SQL `UPDATE` keyword can be used to change the data type of a column. The +same approach can also be used to increase the +[capacity of a `SYMBOL` column](/docs/concept/symbol/#symbol-columns) that is +undersized. + +The steps are as follows: + +1. Add a new column to the table and define the desired data type. +1. Stop data ingestion and increase + [SQL query timeout](/docs/reference/configuration/#cairo-engine/), + `query.timeout.sec`, as `UPDATE` may take a while to complete. Depending on + the size of the column, we recommend to increase the value significantly: the + default is 60 seconds and it may be reasonable to increase it to one hour. + Restart the instance after changing the configuration, to activate the + change. +1. Use `UPDATE` to copy the existing column content to the new column. Now, the + column has the correct content with the new data type. +1. Delete the old column. +1. Rename the new column accordingly. For example, to change `old_col` from + `STRING` to `SYMBOL` for table `my_table`: + +```questdb-sql +ALTER TABLE my_table ADD COLUMN new_col SYMBOL; +UPDATE my_table SET new_col = old_col; +ALTER TABLE my_table DROP COLUMN old_col; +ALTER TABLE my_table RENAME COLUMN new_col TO old_col; +``` + +## Why do I get `table busy` error messages when inserting data over PostgreSQL wire protocol? + +You may get `table busy [reason=insert]` or similar errors when running `INSERT` +statements concurrently on the same table. This means that the table is locked +by inserts issued from another SQL connection or other client protocols for data +import, like InfluxDB Line Protocol over TCP or CSV over HTTP. + +To avoid this error, we recommend using [WAL](/docs/concept/write-ahead-log/) +tables to allow concurrent ingestion across all interfaces. + +## Why do I see `could not open read-write` messages when creating a table or inserting rows? + +Log messages may appear like the following: + +``` +2022-02-01T13:40:11.336011Z I i.q.c.l.t.LineTcpMeasurementScheduler could not create table [tableName=cpu, ex=could not open read-write +... +io.questdb.cairo.CairoException: [24] could not open read-only [file=/root/.questdb/db/cpu/service.k] +``` + +The machine may have insufficient limits for the maximum number of open files. +Try checking the `ulimit` value on your machine. Refer to +[capacity planning](/docs/deployment/capacity-planning/#maximum-open-files) page +for more details. + +## Why do I see `errno=12` mmap messages in the server logs? + +Log messages may appear like the following: + +``` +2022-02-01T13:40:10.636014Z E i.q.c.l.t.LineTcpConnectionContext [8655] could not process line data [table=test_table, msg=could not mmap [size=248, offset=0, fd=1766, memUsed=314809894008, fileLen=8192], errno=12] +``` + +The machine may have insufficient limits of memory map areas a process may have. +Try checking and increasing the `vm.max_map_count` value on your machine. Refer +to +[capacity planning](/docs/deployment/capacity-planning/#max-virtual-memory-areas-limit) +page for more details. + +## Why do I see `async command/event queue buffer overflow` messages when dropping partitions? + +It could be the case that there are a lot of partitions to be dropped by the +DROP PARTITION [statement](/docs/reference/sql/alter-table-drop-partition) +you're trying to run, so the internal queue used by the server cannot fit them. +Try to increase `cairo.writer.command.queue.slot.size` value. Its default value +is `2K`, i.e. 2KB, so you may need to set it to a larger value, e.g. `32K`. + +## How do I avoid duplicate rows with identical fields? + +We have an open +[feature request to optionally de-duplicate rows](https://github.com/questdb/roadmap/issues/3) +inserted with identical fields. Until then, you need to +[modify the data](/docs/guides/modifying-data/) after it's inserted and use a +`GROUP BY` query to identify duplicates. + +## Can I query by time? + +Yes! When using the `WHERE` statement to define the time range for a query, the +[`IN`](/docs/reference/sql/where/#time-range-with-modifier) keyword allows +modifying the range and interval of the search. The range can be tuned to a +second resolution. + +For example, the following query search for daily records between 9:15 to 16:00 +inclusively from Jan 1 2000 for 365 days. The original timestamp, +2000-01-01T09:15, is extended for 405 minutes to cover the range. This range is +repeated every day for 365 times: + +```questdb-sql +SELECT timestamp, col1 +FROM 'table1' +WHERE timestamp IN '2000-01-01T09:15;405m;1d;365'; +``` diff --git a/docs/troubleshooting/log.md b/docs/troubleshooting/log.md new file mode 100644 index 00000000..0bd21059 --- /dev/null +++ b/docs/troubleshooting/log.md @@ -0,0 +1,119 @@ +--- +title: Log +description: Details about QuestDB log +--- + +QuestDB log provides database information. This page presents log level, their +details, and where to configure and find the log. + +## Log location + +### QuestDB open source + +QuestDB creates the following file structure in its +[root_directory](/docs/concept/root-directory-structure/): + +```filestructure +questdb +├── conf +├── db +├── log +├── public +└── snapshot (optional) +``` + +Log files are stored in the `log` folder: + +```filestructure +├── log +│   ├── stdout-2020-04-15T11-59-59.txt +│   └── stdout-2020-04-12T13-31-22.txt +``` + + + +## Log levels + +QuestDB log provides the following types of log information: + +| Type | Marker | Details | Default | +| -------- | ------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | +| Advisory | A | Startup information such as hosts, listening ports, etc. Rarely used after startup | Enabled | +| Critical | C | Internal database errors. Serious issues. Things that should not happen in general operation. | Enabled | +| Error | E | An error, usually (but not always) caused by a user action such as inserting a `symbol` into a `timestamp` column. For context on how this error happened, check for Info-level messages logged before the error. | Enabled | +| Info | I | Logs for activities. Info-level messages often provide context for an error if one is logged later. | Enabled | +| Debug | D | Finer details on what is happening. Useful to debug issues. | Disabled | + +For more information, see the +[source code](https://github.com/questdb/questdb/blob/master/core/src/main/java/io/questdb/log/LogLevel.java) +on GH. + +### Log examples + +The below is some examples of log messages by type. + +Advisory: + +``` +2023-02-24T14:59:45.076113Z A server-main Config: +2023-02-24T14:59:45.076130Z A server-main - http.enabled : true +2023-02-24T14:59:45.076144Z A server-main - tcp.enabled : true +2023-02-24T14:59:45.076159Z A server-main - pg.enabled : true +``` + +Critical: + +``` + +2022-08-08T11:15:13.040767Z C i.q.c.p.WriterPool could not open [table=`sys.text_import_log`, thread=1, ex=could not open read-write [file=/opt/homebrew/var/questdb/db/sys.text_import_log/_todo_], errno=13] +... +2022-08-23T07:55:15.490045Z C i.q.c.h.p.JsonQueryProcessorState [77] internal error [q=`REINDEX TABLE 'weather' COLUMN timestamp LOCK EXCLUSIVE;`, ex= +... +2022-11-17T10:28:00.464140Z C i.q.c.h.p.JsonQueryProcessorState [7] Uh-oh. Error! +``` + +Error: + +``` +2023-02-24T14:59:45.059012Z I i.q.c.t.t.InputFormatConfiguration loading input format config [resource=/text_loader.json] +... +2023-03-20T08:38:17.076744Z E i.q.c.l.u.AbstractLineProtoUdpReceiver could not set receive buffer size [fd=140, size=8388608, errno=55] +... +2022-08-03T11:13:41.947760Z E server-main [errno=48] could not bind socket [who=http-server, bindTo=0.0.0.0:9000] +``` + +Info: + +``` +2020-04-15T16:42:32.879970Z I i.q.c.TableReader new transaction [txn=2, transientRowCount=1, fixedRowCount=1, maxTimestamp=1585755801000000, attempts=0] +2020-04-15T16:42:32.880051Z I i.q.g.FunctionParser call to_timestamp('2020-05-01:15:43:21','yyyy-MM-dd:HH:mm:ss') -> to_timestamp(Ss) +2020-04-15T16:42:32.880657Z I i.q.c.p.WriterPool >> [table=`table_1`, thread=12] +2020-04-15T16:42:32.881330Z I i.q.c.AppendMemory truncated and closed [fd=32] +2020-04-15T16:42:32.881448Z I i.q.c.AppendMemory open /usr/local/var/questdb/db/table_1/2020-05/timestamp.d [fd=32, pageSize=16777216] +2020-04-15T16:42:32.881708Z I i.q.c.AppendMemory truncated and closed [fd=33] +``` + +Debug: + +``` +2023-03-31T11:47:05.723715Z D i.q.g.FunctionParser call cast(investmentMill,INT) -> cast(Li) +2023-03-31T11:47:05.723729Z D i.q.g.FunctionParser call rnd_symbol(4,4,4,2) -> rnd_symbol(iiii) +2023-03-31T11:47:05.723749Z D i.q.g.FunctionParser call investmentMill * 950399 -> *(LL) +2023-03-31T11:47:05.723794Z D i.q.g.FunctionParser call cast(1672531200000000L,TIMESTAMP) -> cast(Ln) +2023-03-31T11:47:05.723803Z D i.q.g.FunctionParser call cast(1672531200000000L,TIMESTAMP) + investmentMill * 950399 -> +(NL) +``` + +## Log configuration + +QuestDB logging can be configured globally to `DEBUG` via either providing +the java option `-Debug` or setting the environment variable `QDB_DEBUG=true`. See [reference manual](/docs/reference/configuration/#logging) for details on logging configuration. diff --git a/docs/troubleshooting/os-error-codes.md b/docs/troubleshooting/os-error-codes.md new file mode 100644 index 00000000..6f9a53c8 --- /dev/null +++ b/docs/troubleshooting/os-error-codes.md @@ -0,0 +1,322 @@ +--- +title: List of OS error codes +description: + List of OS error codes that may be reported by QuestDB running on Linux and + Windows. +--- + +The following document contains a partial list of Operating System (OS) error +codes that can be reported when running QuestDB and brief descriptions for them. + +## Where to find error codes + +QuestDB includes OS error codes into the `[]` part of the exception +message written to the error logs: + +``` +io.questdb.cairo.CairoException: [24] could not open read-only [file=/root/.questdb/db/cpu/service.k] +``` + +The above message reports error code 24 which is "Too many open files" on Linux. + +Some error log messages may also include `errno=` key/value pair: + +``` +2022-02-01T13:40:10.636014Z E i.q.c.l.t.LineTcpConnectionContext [8655] could not process line data [table=test_table, msg=could not mmap [size=248, offset=0, fd=1766, memUsed=314809894008, fileLen=8192], errno=12] +``` + +The above message reports error code 12 which is "Out of memory" on Linux. + +## Linux error codes + +| Error number | Error name | Description | +| ------------ | --------------- | ------------------------------------------------ | +| 1 | EPERM | Operation not permitted. | +| 2 | ENOENT | No such file or directory. | +| 3 | ESRCH | No such process. | +| 4 | EINTR | Interrupted system call. | +| 5 | EIO | I/O error. | +| 6 | ENXIO | No such device or address. | +| 7 | E2BIG | Argument list too long. | +| 8 | ENOEXEC | Exec format error. | +| 9 | EBADF | Bad file number. | +| 10 | ECHILD | No child processes. | +| 11 | EAGAIN | Try again. | +| 12 | ENOMEM | Out of memory. | +| 13 | EACCES | Permission denied. | +| 14 | EFAULT | Bad address. | +| 15 | ENOTBLK | Block device required. | +| 16 | EBUSY | Device or resource busy. | +| 17 | EEXIST | File exists. | +| 18 | EXDEV | Cross-device link. | +| 19 | ENODEV | No such device. | +| 20 | ENOTDIR | Not a directory. | +| 21 | EISDIR | Is a directory. | +| 22 | EINVAL | Invalid argument. | +| 23 | ENFILE | File table overflow. | +| 24 | EMFILE | Too many open files. | +| 25 | ENOTTY | Not a typewriter. | +| 26 | ETXTBSY | Text file busy. | +| 27 | EFBIG | File too large. | +| 28 | ENOSPC | No space left on device. | +| 29 | ESPIPE | Illegal seek. | +| 30 | EROFS | Read-only file system. | +| 31 | EMLINK | Too many links. | +| 32 | EPIPE | Broken pipe. | +| 33 | EDOM | Math argument out of domain of func. | +| 34 | ERANGE | Math result not representable. | +| 35 | EDEADLK | Resource deadlock would occur. | +| 36 | ENAMETOOLONG | File name too long. | +| 37 | ENOLCK | No record locks available. | +| 38 | ENOSYS | Function not implemented. | +| 39 | ENOTEMPTY | Directory not empty. | +| 40 | ELOOP | Too many symbolic links encountered. | +| 42 | ENOMSG | No message of desired type. | +| 43 | EIDRM | Identifier removed. | +| 44 | ECHRNG | Channel number out of range. | +| 45 | EL2NSYNC | Level 2 not synchronized. | +| 46 | EL3HLT | Level 3 halted. | +| 47 | EL3RST | Level 3 reset. | +| 48 | ELNRNG | Link number out of range. | +| 49 | EUNATCH | Protocol driver not attached. | +| 50 | ENOCSI | No CSI structure available. | +| 51 | EL2HLT | Level 2 halted. | +| 52 | EBADE | Invalid exchange. | +| 53 | EBADR | Invalid request descriptor. | +| 54 | EXFULL | Exchange full. | +| 55 | ENOANO | No anode. | +| 56 | EBADRQC | Invalid request code. | +| 57 | EBADSLT | Invalid slot. | +| 59 | EBFONT | Bad font file format. | +| 60 | ENOSTR | Device not a stream. | +| 61 | ENODATA | No data available. | +| 62 | ETIME | Timer expired. | +| 63 | ENOSR | Out of streams resources. | +| 64 | ENONET | Machine is not on the network. | +| 65 | ENOPKG | Package not installed. | +| 66 | EREMOTE | Object is remote. | +| 67 | ENOLINK | Link has been severed. | +| 68 | EADV | Advertise error. | +| 69 | ESRMNT | Srmount error. | +| 70 | ECOMM | Communication error on send. | +| 71 | EPROTO | Protocol error. | +| 72 | EMULTIHOP | Multihop attempted. | +| 73 | EDOTDOT | RFS specific error. | +| 74 | EBADMSG | Not a data message. | +| 75 | EOVERFLOW | Value too large for defined data type. | +| 76 | ENOTUNIQ | Name not unique on network. | +| 77 | EBADFD | File descriptor in bad state. | +| 78 | EREMCHG | Remote address changed. | +| 79 | ELIBACC | Can not access a needed shared library. | +| 80 | ELIBBAD | Accessing a corrupted shared library. | +| 81 | ELIBSCN | .lib section in a.out corrupted. | +| 82 | ELIBMAX | Attempting to link in too many shared libraries. | +| 83 | ELIBEXEC | Cannot exec a shared library directly. | +| 84 | EILSEQ | Illegal byte sequence. | +| 85 | ERESTART | Interrupted system call should be restarted. | +| 86 | ESTRPIPE | Streams pipe error. | +| 87 | EUSERS | Too many users. | +| 88 | ENOTSOCK | Socket operation on non-socket. | +| 89 | EDESTADDRREQ | Destination address required. | +| 90 | EMSGSIZE | Message too long. | +| 91 | EPROTOTYPE | Protocol wrong type for socket. | +| 92 | ENOPROTOOPT | Protocol not available. | +| 93 | EPROTONOSUPPORT | Protocol not supported. | +| 94 | ESOCKTNOSUPPORT | Socket type not supported. | +| 95 | EOPNOTSUPP | Operation not supported on transport endpoint. | +| 96 | EPFNOSUPPORT | Protocol family not supported. | +| 97 | EAFNOSUPPORT | Address family not supported by protocol. | +| 98 | EADDRINUSE | Address already in use. | +| 99 | EADDRNOTAVAIL | Cannot assign requested address. | +| 100 | ENETDOWN | Network is down. | +| 101 | ENETUNREACH | Network is unreachable. | +| 102 | ENETRESET | Network dropped connection because of reset. | +| 103 | ECONNABORTED | Software caused connection abort. | +| 104 | ECONNRESET | Connection reset by peer. | +| 105 | ENOBUFS | No buffer space available. | +| 106 | EISCONN | Transport endpoint is already connected. | +| 107 | ENOTCONN | Transport endpoint is not connected. | +| 108 | ESHUTDOWN | Cannot send after transport endpoint shutdown. | +| 109 | ETOOMANYREFS | Too many references: cannot splice. | +| 110 | ETIMEDOUT | Connection timed out. | +| 111 | ECONNREFUSED | Connection refused. | +| 112 | EHOSTDOWN | Host is down. | +| 113 | EHOSTUNREACH | No route to host. | +| 114 | EALREADY | Operation already in progress. | +| 115 | EINPROGRESS | Operation now in progress. | +| 116 | ESTALE | Stale NFS file handle. | +| 117 | EUCLEAN | Structure needs cleaning. | +| 118 | ENOTNAM | Not a XENIX named type file. | +| 119 | ENAVAIL | No XENIX semaphores available. | +| 120 | EISNAM | Is a named type file. | +| 121 | EREMOTEIO | Remote I/O error. | +| 122 | EDQUOT | Quota exceeded. | +| 123 | ENOMEDIUM | No medium found. | +| 124 | EMEDIUMTYPE | Wrong medium type. | +| 125 | ECANCELED | Operation Canceled. | +| 126 | ENOKEY | Required key not available. | +| 127 | EKEYEXPIRED | Key has expired. | +| 128 | EKEYREVOKED | Key has been revoked. | +| 129 | EKEYREJECTED | Key was rejected by service. | +| 130 | EOWNERDEAD | Owner died. | +| 131 | ENOTRECOVERABLE | State not recoverable. | + +## Windows error codes + +A complete list of Windows error codes may be found [here](https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes). + +| Error number | Error name | Description | +| ------------ | ----------------------------- | -------------------------------------------------------------------------------------------- | +| 1 | ERROR_INVALID_FUNCTION | Incorrect function. | +| 2 | ERROR_FILE_NOT_FOUND | The system cannot find the file specified. | +| 3 | ERROR_PATH_NOT_FOUND | The system cannot find the path specified. | +| 4 | ERROR_TOO_MANY_OPEN_FILES | The system cannot open the file. | +| 5 | ERROR_ACCESS_DENIED | Access is denied. | +| 6 | ERROR_INVALID_HANDLE | The handle is invalid. | +| 7 | ERROR_ARENA_TRASHED | The storage control blocks were destroyed. | +| 8 | ERROR_NOT_ENOUGH_MEMORY | Not enough memory is available to process this command. | +| 9 | ERROR_INVALID_BLOCK | The storage control block address is invalid. | +| 10 | ERROR_BAD_ENVIRONMENT | The environment is incorrect. | +| 11 | ERROR_BAD_FORMAT | An attempt was made to load a program with an incorrect format. | +| 12 | ERROR_INVALID_ACCESS | The access code is invalid. | +| 13 | ERROR_INVALID_DATA | The data is invalid. | +| 14 | ERROR_OUTOFMEMORY | Not enough storage is available to complete this operation. | +| 15 | ERROR_INVALID_DRIVE | The system cannot find the drive specified. | +| 16 | ERROR_CURRENT_DIRECTORY | The directory cannot be removed. | +| 17 | ERROR_NOT_SAME_DEVICE | The system cannot move the file to a different disk drive. | +| 18 | ERROR_NO_MORE_FILES | There are no more files. | +| 19 | ERROR_WRITE_PROTECT | The media is write protected. | +| 20 | ERROR_BAD_UNIT | The system cannot find the device specified. | +| 21 | ERROR_NOT_READY | The device is not ready. | +| 22 | ERROR_BAD_COMMAND | The device does not recognize the command. | +| 23 | ERROR_CRC | Data error (cyclic redundancy check). | +| 24 | ERROR_BAD_LENGTH | The program issued a command but the command length is incorrect. | +| 25 | ERROR_SEEK | The drive cannot locate a specific area or track on the disk. | +| 26 | ERROR_NOT_DOS_DISK | The specified disk or diskette cannot be accessed. | +| 27 | ERROR_SECTOR_NOT_FOUND | The drive cannot find the sector requested. | +| 28 | ERROR_OUT_OF_PAPER | The printer is out of paper. | +| 29 | ERROR_WRITE_FAULT | The system cannot write to the specified device. | +| 30 | ERROR_READ_FAULT | The system cannot read from the specified device. | +| 31 | ERROR_GEN_FAILURE | A device attached to the system is not functioning. | +| 32 | ERROR_SHARING_VIOLATION | The process cannot access the file because it is being used by another process. | +| 33 | ERROR_LOCK_VIOLATION | The process cannot access the file because another process has locked a portion of the file. | +| 34 | ERROR_WRONG_DISK | The wrong diskette is in the drive. Insert %2 (Volume Serial Number: %3) into drive %1. | +| 36 | ERROR_SHARING_BUFFER_EXCEEDED | Too many files opened for sharing. | +| 38 | ERROR_HANDLE_EOF | Reached the end of the file. | +| 39 | ERROR_HANDLE_DISK_FULL | The disk is full. | +| 87 | ERROR_INVALID_PARAMETER | The parameter is incorrect. | +| 112 | ERROR_DISK_FULL | The disk is full. | +| 123 | ERROR_INVALID_NAME | The file name, directory name, or volume label syntax is incorrect. | +| 1450 | ERROR_NO_SYSTEM_RESOURCES | Insufficient system resources exist to complete the requested service. | + +## MacOS error codes + +| Error number | Error name | Description | +| ------------ | ----------------- | ------------------------------------------------ | +| 0 | Base | Undefined error: 0 | +| 1 | EPERM | Operation not permitted | +| 2 | ENOENT | No such file or directory | +| 3 | ESRCH | No such process | +| 4 | EINTR | Interrupted system call | +| 5 | EIO | Input/output error | +| 6 | ENXIO | Device not configured | +| 7 | E2BIG | Argument list too long | +| 8 | ENOEXEC | Exec format error | +| 9 | EBADF | Bad file descriptor | +| 10 | ECHILD | No child processes | +| 11 | EDEADLK | Resource deadlock avoided | +| 12 | ENOMEM | Cannot allocate memory | +| 13 | EACCES | Permission denied | +| 14 | EFAULT | Bad address | +| 15 | ENOTBLK | Block device required | +| 16 | EBUSY | Device busy | +| 17 | EEXIST | File exists | +| 18 | EXDEV | Cross-device link | +| 19 | ENODEV | Operation not supported by device | +| 20 | ENOTDIR | Not a directory | +| 21 | EISDIR | Is a directory | +| 22 | EINVAL | Invalid argument | +| 23 | ENFILE | Too many open files in system | +| 24 | EMFILE | Too many open files | +| 25 | ENOTTY | Inappropriate ioctl for device | +| 26 | ETXTBSY | Text file busy | +| 27 | EFBIG | File too large | +| 28 | ENOSPC | No space left on device | +| 29 | ESPIPE | Illegal seek | +| 30 | EROFS | Read-only file system | +| 31 | EMLINK | Too many links | +| 32 | EPIPE | Broken pipe | +| 33 | EDOM | Numerical argument out of domain | +| 34 | ERANGE | Result too large | +| 35 | EAGAIN | Resource temporarily unavailable | +| 36 | EINPROGRESS | Operation now in progress | +| 37 | EALREADY | Operation already in progress | +| 38 | ENOTSOCK | Socket operation on non-socket | +| 39 | EDESTADDRREQ | Destination address required | +| 40 | EMSGSIZE | Message too long | +| 41 | EPROTOTYPE | Protocol wrong type for socket | +| 42 | ENOPROTOOPT | Protocol not available | +| 43 | EPROTONOSUPPORT | Protocol not supported | +| 44 | ESOCKTNOSUPPORT | Socket type not supported | +| 45 | ENOTSUP | Operation not supported | +| 46 | EPFNOSUPPORT | Protocol family not supported | +| 47 | EAFNOSUPPORT | Address family not supported by protocol family | +| 48 | EADDRINUSE | Address already in use | +| 49 | EADDRNOTAVAIL | Can’t assign requested address | +| 50 | ENETDOWN | Network is down | +| 51 | ENETUNREACH | Network is unreachable | +| 52 | ENETRESET | Network dropped connection on reset | +| 53 | ECONNABORTED | Software caused connection abort | +| 54 | ECONNRESET | Connection reset by peer | +| 55 | ENOBUFS | No buffer space available | +| 56 | EISCONN | Socket is already connected | +| 57 | ENOTCONN | Socket is not connected | +| 58 | ESHUTDOWN | Can’t send after socket shutdown | +| 59 | ETOOMANYREFS | Too many references: can’t splice | +| 60 | ETIMEDOUT | Operation timed out | +| 61 | ECONNREFUSED | Connection refused | +| 62 | ELOOP | Too many levels of symbolic links | +| 63 | ENAMETOOLONG | File name too long | +| 64 | EHOSTDOWN | Host is down | +| 65 | EHOSTUNREACH | No route to host | +| 66 | ENOTEMPTY | Directory not empty | +| 67 | EPROCLIM | Too many processes | +| 68 | EUSERS | Too many users | +| 69 | EDQUOT | Disc quota exceeded | +| 70 | ESTALE | Stale NFS file handle | +| 71 | EREMOTE | Too many levels of remote in path | +| 72 | EBADRPC | RPC struct is bad | +| 73 | ERPCMISMATCH | RPC version wrong | +| 74 | EPROGUNAVAIL | RPC prog. not avail | +| 75 | EPROGMISMATCH | Program version wrong | +| 76 | EPROCUNAVAIL | Bad procedure for program | +| 77 | ENOLCK | No locks available | +| 78 | ENOSYS | Function not implemented | +| 79 | EFTYPE | Inappropriate file type or format | +| 80 | EAUTH | Authentication error | +| 81 | ENEEDAUTH | Need authenticator | +| 82 | EPWROFF | Device power is off | +| 83 | EDEVERR | Device error | +| 84 | EOVERFLOW | Value too large to be stored in data type | +| 85 | EBADEXEC | Bad executable | +| 86 | EBADARCH | Bad CPU type in executable | +| 87 | ESHLIBVERS | Shared library version mismatch | +| 88 | EBADMACHO | Malformed Macho file | +| 89 | ECANCELED | Operation canceled | +| 90 | EIDRM | Identifier removed | +| 91 | ENOMSG | No message of desired type | +| 92 | EILSEQ | Illegal byte sequence | +| 93 | ENOATTR | Attribute not found | +| 94 | EBADMSG | Bad message | +| 95 | EMULTIHOP | EMULTIHOP (Reserved) | +| 96 | ENODATA | No message available on STREAM | +| 97 | ENOLINK | ENOLINK (Reserved) | +| 98 | ENOSR | No STREAM resources | +| 99 | ENOSTR | Not a STREAM | +| 100 | EPROTO | Protocol error | +| 101 | ETIME | STREAM ioctl timeout | +| 102 | EOPNOTSUPP | Operation not supported on socket | +| 103 | ENOPOLICY | Policy not found | +| 104 | ENOTRECOVERABLE | State not recoverable | +| 105 | EOWNERDEAD | Previous owner died | +| 106 | EQFULL | Interface output queue is full | diff --git a/sidebars.js b/sidebars.js new file mode 100644 index 00000000..9af223ef --- /dev/null +++ b/sidebars.js @@ -0,0 +1,301 @@ +let guidelines + +if (process.env.NODE_ENV === "development") { + guidelines = { + label: "Guidelines (DEV ONLY)", + type: "category", + items: [ + { + type: "category", + label: "Templates", + items: [ + "__guidelines/template/guide", + "__guidelines/template/function", + "__guidelines/template/sql", + ], + }, + "__guidelines/naming-convention", + "__guidelines/content-hierarchy", + "__guidelines/lexicon", + "__guidelines/markdown", + "__guidelines/sql-code-blocks", + "__guidelines/influences", + ], + } +} + +module.exports = { + docs: [ + { + id: "introduction", + type: "doc", + }, + { + label: "Get Started", + type: "category", + items: [ + { + id: "get-started/docker", + type: "doc", + customProps: { tag: "Popular" }, + }, + "get-started/binaries", + "get-started/homebrew", + "get-started/first-database", + "get-started/learn-more", + ], + }, + { + label: "Develop", + type: "category", + items: [ + "develop/connect", + "develop/insert-data", + "develop/query-data", + "develop/update-data", + "develop/web-console", + ], + }, + { + label: "Guides", + type: "category", + items: [ + { + id: "guides/importing-data", + type: "doc", + customProps: { tag: "COPY SQL" }, + }, + { + id: "guides/importing-data-rest", + type: "doc", + + customProps: { tag: "REST API" }, + }, + "guides/influxdb-migration", + "guides/modifying-data", + "guides/working-with-timestamps-timezones", + { + label: "More tutorials", + type: "link", + href: "/blog/tags/tutorial/", + }, + ], + }, + { + label: "Deployment", + type: "category", + items: [ + "deployment/capacity-planning", + "deployment/aws-official-ami", + "deployment/kubernetes", + "deployment/google-cloud-platform", + "deployment/digitalocean", + ], + }, + { + label: "Operations", + type: "category", + items: [ + "operations/design-for-performance", + "operations/data-retention", + "operations/health-monitoring", + "operations/backup", + "operations/updating-data", + ], + }, + { + label: "Third-party Tools", + type: "category", + items: [ + { + type: "doc", + id: "third-party-tools", + customProps: { + hidden: true, // hidden from sidebar but accessible via link + }, + }, + "third-party-tools/grafana", + { + label: "Kafka", + type: "category", + items: [ + "third-party-tools/kafka/overview", + "third-party-tools/kafka/questdb-kafka", + "third-party-tools/kafka/jdbc", + ], + }, + "third-party-tools/superset", + "third-party-tools/cube", + "third-party-tools/pandas", + "third-party-tools/telegraf", + "third-party-tools/prometheus", + "third-party-tools/redpanda", + "third-party-tools/flink", + "third-party-tools/spark", + "third-party-tools/mindsdb", + "third-party-tools/sqlalchemy", + "third-party-tools/qstudio", + ], + }, + { + label: "Concepts", + type: "category", + items: [ + "concept/write-ahead-log", + "concept/storage-model", + "concept/designated-timestamp", + "concept/deduplication", + "concept/sql-extensions", + "concept/jit-compiler", + "concept/partitions", + "concept/symbol", + "concept/indexes", + "concept/geohashes", + "concept/root-directory-structure", + ], + }, + { + label: "Reference", + type: "category", + items: [ + { + type: "category", + label: "API", + items: [ + "reference/api/rest", + "reference/api/postgres", + { + type: "category", + label: "InfluxDB Line Protocol", + items: [ + "reference/api/ilp/overview", + "reference/api/ilp/columnset-types", + "reference/api/ilp/tcp-receiver", + "reference/api/ilp/authenticate", + ], + }, + "reference/api/java-embedded", + ], + }, + "reference/command-line-options", + "reference/configuration", + { + type: "category", + label: "ILP client libraries", + items: ["reference/clients/overview", "reference/clients/java_ilp"], + }, + "reference/sql/datatypes", + { + type: "category", + label: "Functions", + items: [ + "reference/function/aggregation", + "reference/function/analytic", + "reference/function/binary", + "reference/function/boolean", + "reference/function/conditional", + "reference/function/date-time", + "reference/function/ipv4", + "reference/function/meta", + "reference/function/numeric", + "reference/function/random-value-generator", + "reference/function/row-generator", + "reference/function/spatial", + "reference/function/text", + "reference/function/timestamp-generator", + "reference/function/timestamp", + "reference/function/touch", + "reference/function/trigonometric", + ], + }, + "reference/logging", + { + type: "category", + label: "Operators", + items: [ + "reference/operators/bitwise", + "reference/operators/comparison", + "reference/operators/ipv4", + "reference/operators/pattern-matching", + "reference/operators/spatial", + ], + }, + { + type: "category", + label: "SQL", + items: [ + "concept/sql-execution-order", + { + type: "category", + label: "ALTER TABLE", + items: [ + "reference/sql/alter-table-add-column", + "reference/sql/alter-table-attach-partition", + "reference/sql/alter-table-detach-partition", + "reference/sql/alter-table-drop-column", + "reference/sql/alter-table-drop-partition", + "reference/sql/alter-table-rename-column", + "reference/sql/alter-table-resume-wal", + "reference/sql/alter-table-set-param", + "reference/sql/alter-table-set-type", + "reference/sql/alter-table-squash-partitions", + "reference/sql/alter-table-enable-deduplication", + "reference/sql/alter-table-disable-deduplication", + ], + }, + { + type: "category", + label: "ALTER COLUMN", + items: [ + "reference/sql/alter-table-alter-column-add-index", + "reference/sql/alter-table-alter-column-cache", + "reference/sql/alter-table-alter-column-drop-index", + ], + }, + "reference/sql/backup", + "reference/sql/case", + "reference/sql/cast", + "reference/sql/copy", + "reference/sql/create-table", + "reference/sql/distinct", + "reference/sql/explain", + "reference/sql/drop", + "reference/sql/fill", + "reference/sql/group-by", + "reference/sql/insert", + "reference/sql/join", + "reference/sql/latest-on", + "reference/sql/limit", + "reference/sql/order-by", + "reference/sql/reindex", + "reference/sql/rename", + "reference/sql/sample-by", + "reference/sql/select", + "reference/sql/show", + "reference/sql/snapshot", + "reference/sql/truncate", + "reference/sql/union-except-intersect", + "reference/sql/update", + "reference/sql/vacuum-table", + "reference/sql/where", + "reference/sql/with", + ], + }, + ], + }, + { + label: "Troubleshooting", + type: "category", + items: [ + "troubleshooting/faq", + "troubleshooting/log", + "troubleshooting/os-error-codes", + ], + }, + { + label: "Tutorials", + type: "link", + href: "/blog/tags/tutorial", + }, + ].filter(Boolean), +}