From 288a8b1790dbc64057ad51196b4736ffadb7c8f5 Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Thu, 11 Jul 2024 07:05:44 -0700 Subject: [PATCH 01/23] removes freebsd references (#12) --- concept/root-directory-structure.md | 2 +- quick-start.mdx | 45 +++++++++++++++-------------- reference/command-line-options.md | 10 +++---- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/concept/root-directory-structure.md b/concept/root-directory-structure.md index 1be0099a..d97c5c6f 100644 --- a/concept/root-directory-structure.md +++ b/concept/root-directory-structure.md @@ -23,7 +23,7 @@ By default, QuestDB's root directory will be the following: diff --git a/quick-start.mdx b/quick-start.mdx index 22f2843b..48a2b1aa 100644 --- a/quick-start.mdx +++ b/quick-start.mdx @@ -5,29 +5,27 @@ description: homebrew, our binaries, and more. --- -import Screenshot from "@theme/Screenshot" +import Screenshot from "@theme/Screenshot"; -import Button from "@theme/Button" +import Button from "@theme/Button"; -import InterpolateReleaseData from "../src/components/InterpolateReleaseData" +import InterpolateReleaseData from "../src/components/InterpolateReleaseData"; -import NoJrePrerequisite from -"./quick-start-utils/\_no-jre-prerequisites.partial.mdx" +import NoJrePrerequisite from "./quick-start-utils/_no-jre-prerequisites.partial.mdx"; -import CodeBlock from "@theme/CodeBlock" +import CodeBlock from "@theme/CodeBlock"; -import { TabsPlatforms } from "../src/modules/TabsPlatforms" +import { TabsPlatforms } from "../src/modules/TabsPlatforms"; -import RunWindows from "./quick-start-utils/\_run-windows.partial.mdx" +import RunWindows from "./quick-start-utils/_run-windows.partial.mdx"; -import OptionsNotWindows from -"./quick-start-utils/\_options-not-windows.partial.mdx" +import OptionsNotWindows from "./quick-start-utils/_options-not-windows.partial.mdx"; -import OptionsWindows from "./quick-start-utils/\_options-windows.partial.mdx" +import OptionsWindows from "./quick-start-utils/_options-windows.partial.mdx"; -import Tabs from "@theme/Tabs" +import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem" +import TabItem from "@theme/TabItem"; This guide will get your first QuestDB instance running. @@ -55,10 +53,13 @@ platform on the [official documentation](https://docs.docker.com/get-docker/). Once Docker is installed, you will need to pull QuestDB's image from [Docker Hub](https://hub.docker.com/r/questdb/questdb) and create a container: - ( - -{`docker run \\ -p 9000:9000 -p 9009:9009 -p 8812:8812 -p 9003:9003 \\ questdb/questdb:${release.name}`} - )} /> + ( + + {`docker run \\ -p 9000:9000 -p 9009:9009 -p 8812:8812 -p 9003:9003 \\ questdb/questdb:${release.name}`} + + )} +/> For deeper instructions, see the [Docker deployment guide](/docs/deployment/docker/). @@ -80,9 +81,11 @@ On macOS, the location of the root directory of QuestDB and ### Binaries -export const platforms = [ { label: "Linux", value: "linux" }, { label: -"Windows", value: "windows" }, { label: "FreeBSD", value: "bsd" }, { label: -"Any (no JVM)", value: "noJre" }, ] +export const platforms = [ + { label: "Linux", value: "linux" }, + { label: "Windows", value: "windows" }, + { label: "Any (no JVM)", value: "noJre" }, +]; Download and run QuestDB via binaries. @@ -149,7 +152,7 @@ Select your platform of choice: ## Run QuestDB - diff --git a/reference/command-line-options.md b/reference/command-line-options.md index b6f7f8db..f6322f8d 100644 --- a/reference/command-line-options.md +++ b/reference/command-line-options.md @@ -19,7 +19,7 @@ import TabItem from "@theme/TabItem" @@ -86,7 +86,7 @@ questdb.exe [start|stop|status|install|remove] \ @@ -127,7 +127,7 @@ will be the following: @@ -179,7 +179,7 @@ C:\Windows\System32\qdbroot @@ -223,7 +223,7 @@ questdb.exe stop From 0b5318ecdbefedf442837491a861b7f09a88dace Mon Sep 17 00:00:00 2001 From: Nick Woolmer <29717167+nwoolmer@users.noreply.github.com> Date: Thu, 11 Jul 2024 15:56:47 +0100 Subject: [PATCH 02/23] Add note on compression choices for ZFS (#13) * Add note on compression choices for ZFS * Apply suggestions from code review Thanks Andrei! Co-authored-by: Andrei Pechkurov <37772591+puzpuzpuz@users.noreply.github.com> * Update guides/compression-zfs.md --------- Co-authored-by: Andrei Pechkurov <37772591+puzpuzpuz@users.noreply.github.com> Co-authored-by: goodroot <9484709+goodroot@users.noreply.github.com> --- guides/compression-zfs.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/guides/compression-zfs.md b/guides/compression-zfs.md index 3af0b80a..5633ae82 100644 --- a/guides/compression-zfs.md +++ b/guides/compression-zfs.md @@ -26,3 +26,19 @@ The exact commands depend on which version of ZFS you are running. Use the to customize your ZFS to meet your requirements. Once created, ZFS provides system-level compression. + +## Compression choices, LZ4 and zstd + +ZFS offers a number of compression choices when constructing the volume. + +[LZ4](https://github.com/lz4/lz4) offers a good balance of compression ratio versus increased CPU usage, and slowed performance. For general usage, we recommend using LZ4. + +[zstd](https://github.com/facebook/zstd) is another strong option. This will provide higher compression ratios, but take longer to decompress. We recommend this when storage size is an absolute priority, or for embedded-style deployments (i.e. Raspberry Pi, home IoT setups). + +As always, it is best to benchmark your choice to ensure that the performance matches your use case. + +:::note + +We regularly run tests using *ZFS* with *LZ4* compression. If you encounter issues with other compression algorithms, please let us know. + +::: From e53854de321a3df58dbe2ae71ceefe2e2f250e88 Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Mon, 15 Jul 2024 07:59:17 -0700 Subject: [PATCH 03/23] copy jaro pr (#8) https://github.com/questdb/questdb.io/pull/2352/ --- configuration-utils/_http.config.json | 8 ++ reference/api/ilp/overview.md | 128 +++++--------------------- reference/api/rest.md | 61 ++++++++---- 3 files changed, 70 insertions(+), 127 deletions(-) diff --git a/configuration-utils/_http.config.json b/configuration-utils/_http.config.json index 0006f8f1..1498cf81 100644 --- a/configuration-utils/_http.config.json +++ b/configuration-utils/_http.config.json @@ -7,6 +7,14 @@ "default": "0.0.0.0:9000", "description": "IP address and port of HTTP server. A value of `0` means that the HTTP server will bind to all network interfaces. You can specify IP address of any individual network interface on your system." }, + "http.user": { + "default": "N/A", + "description": "Username for HTTP Basic Authentication in QuestDB Open Source. QuestDB Enterprise Edition supports more advanced authentication mechanisms: RBAC" + }, + "http.password": { + "default": "N/A", + "description": "Password for HTTP Basic Authentication in QuestDB Open Source. QuestDB Enterprise Edition supports more advanced authentication mechanisms: RBAC" + }, "http.net.connection.limit": { "default": "64", "description": "The maximum number permitted for simultaneous TCP connection to the HTTP server. The rationale of the value is to control server memory consumption." diff --git a/reference/api/ilp/overview.md b/reference/api/ilp/overview.md index 08960a72..d2372a3a 100644 --- a/reference/api/ilp/overview.md +++ b/reference/api/ilp/overview.md @@ -21,7 +21,7 @@ The InfluxDB Line Protocol is for **data ingestion only**. For building queries, see the [Query & SQL Overview](/docs/reference/sql/overview/). -Each ILP client library also has its own language-specific documentation set. +Each ILP client library also has its own language-specific documentation set. This supporting document thus provides an overview to aid in client selection and initial configuration: @@ -157,112 +157,6 @@ Exposing these values may expose your database to bad actors. - Defaults to 10 seconds. - Not all errors are retriable. -#### TCP Parameters - -- **username**: Username for TCP authentication. -- **token** (SENSITIVE): TCP Authentication `d` parameter. - - **token_x** (SENSITIVE): TCP Authentication `x` parameter. - - Used in C/C++/Rust/Python clients. - - **token_y** (SENSITIVE): TCP Authentication `y` parameter. - - Used in C/C++/Rust/Python clients. -- **auth_timeout**: Timeout for TCP authentication with QuestDB server, in - milliseconds. - - Default 15 seconds. - -##### TCP token authentication setup - -Create `d`, `x` & `y` tokens for client usage. - -##### Prerequisites - -- `jose`: C-language implementation of Javascript Object Signing and Encryption. - Generates tokens. -- `jq`: For pretty JSON output. - - - - - -```bash -brew install jose -brew install jq -``` - - - - - -```bash -yum install jose -yum install jq -``` - - - - - -```bash -apt install jose -apt install jq -``` - - - - - -##### Server configuration - -Next, create an authentication file. - -Only elliptic curve (P-256) are supported (key type `ec-p-256-sha256`): - -```bash -testUser1 ec-p-256-sha256 fLKYEaoEb9lrn3nkwLDA-M_xnuFOdSt9y0Z7_vWSHLU Dt5tbS1dEDMSYfym3fgMv0B99szno-dFc1rYF9t0aac -# [key/user id] [key type] {keyX keyY} -``` - -Generate an authentication file using the `jose` utility: - -```bash -jose jwk gen -i '{"alg":"ES256", "kid": "testUser1"}' -o /var/lib/questdb/conf/full_auth.json - -KID=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.kid') -X=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.x') -Y=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.y') - -echo "$KID ec-p-256-sha256 $X $Y" | tee /var/lib/questdb/conf/auth.txt -``` - -Once created, reference it in the server [configuration](/docs/configuration/): - -```ini title='/path/to/server.conf' -line.tcp.auth.db.path=conf/auth.txt -``` - -##### Client keys - -For the server configuration above, the corresponding JSON Web Key must be -stored on the clients' side. - -When sending a fully-composed JWK, it will have the following keys: - -```json -{ - "kty": "EC", - "d": "5UjEMuA0Pj5pjK8a-fa24dyIf-Es5mYny3oE_Wmus48", - "crv": "P-256", - "kid": "testUser1", - "x": "fLKYEaoEb9lrn3nkwLDA-M_xnuFOdSt9y0Z7_vWSHLU", - "y": "Dt5tbS1dEDMSYfym3fgMv0B99szno-dFc1rYF9t0aac" -} -``` - -The `d`, `x` and `y` parameters generate the public key. - -For example, the Python client would be configured as outlined in the -[Python docs](https://py-questdb-client.readthedocs.io/en/latest/conf.html#tcp-auth). - #### Auto-flushing behavior - **auto_flush**: Enable or disable automatic flushing (`on`/`off`). @@ -319,6 +213,26 @@ _Optional._ - Defaults to `127`. - Related to length limits for filenames on the user's host OS. +#### TCP Parameters + +:::note + +These parameters are only useful when using ILP over TCP with authentication +enabled. Most users should use ILP over HTTP. These parameters are listed for +completeness and for users who have specific requirements. + +::: + +- **username**: Username for TCP authentication. +- **token** (SENSITIVE): TCP Authentication `d` parameter. + - **token_x** (SENSITIVE): TCP Authentication `x` parameter. + - Used in C/C++/Rust/Python clients. + - **token_y** (SENSITIVE): TCP Authentication `y` parameter. + - Used in C/C++/Rust/Python clients. +- **auth_timeout**: Timeout for TCP authentication with QuestDB server, in + milliseconds. + - Default 15 seconds. + ## Transactionality caveat As of writing, the HTTP endpoint does not provide full transactionality in all diff --git a/reference/api/rest.md b/reference/api/rest.md index 03881774..659bb8e8 100644 --- a/reference/api/rest.md +++ b/reference/api/rest.md @@ -6,14 +6,14 @@ description: REST API reference documentation. import Tabs from "@theme/Tabs" import TabItem from "@theme/TabItem" -import GoImpPartial from "../../partials/_go.imp.insert.partial.mdx" -import CurlImpPartial from "../../partials/_curl.imp.insert.partial.mdx" -import NodejsImpPartial from "../../partials/_nodejs.imp.insert.partial.mdx" -import PythonImpPartial from "../../partials/_python.imp.insert.partial.mdx" -import CurlExecPartial from "../../partials/_curl.exec.insert.partial.mdx" -import GoExecPartial from "../../partials/_go.exec.insert.partial.mdx" -import NodejsExecPartial from "../../partials/_nodejs.exec.insert.partial.mdx" -import PythonExecPartial from "../../partials/_python.exec.insert.partial.mdx" +import GoImpPartial from "../../partials/\_go.imp.insert.partial.mdx" +import CurlImpPartial from "../../partials/\_curl.imp.insert.partial.mdx" +import NodejsImpPartial from "../../partials/\_nodejs.imp.insert.partial.mdx" +import PythonImpPartial from "../../partials/\_python.imp.insert.partial.mdx" +import CurlExecPartial from "../../partials/\_curl.exec.insert.partial.mdx" +import GoExecPartial from "../../partials/\_go.exec.insert.partial.mdx" +import NodejsExecPartial from "../../partials/\_nodejs.exec.insert.partial.mdx" +import PythonExecPartial from "../../partials/\_python.exec.insert.partial.mdx" The QuestDB REST API is based on standard HTTP features and is understood by off-the-shelf HTTP clients. It provides a simple way to interact with QuestDB @@ -48,8 +48,8 @@ For details such as content type, query parameters and more, refer to the Let's assume you want to upload the following data via the `/imp` entrypoint: @@ -83,10 +83,10 @@ query argument to obtain a response in JSON. You can also specify the schema explicitly. See the second example in Python for these features. @@ -117,10 +117,10 @@ parameterized queries that are necessary to avoid SQL injection issues. Prefer inserts. @@ -585,8 +585,9 @@ returned in a tabular form to be saved and reused as opposed to JSON. | Parameter | Required | Description | | :-------- | :------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `limit` | No | Paging opp parameter. For example, `limit=10,20` will return row numbers 10 through to 20 inclusive and `limit=20` will return first 20 rows, which is equivalent to `limit=0,20`. `limit=-20` will return the last 20 rows. | | `query` | Yes | URL encoded query text. It can be multi-line. | +| `limit` | No | Paging opp parameter. For example, `limit=10,20` will return row numbers 10 through to 20 inclusive and `limit=20` will return first 20 rows, which is equivalent to `limit=0,20`. `limit=-20` will return the last 20 rows. | +| `nm` | No | `true` or `false`. Skips the metadata section of the response when set to `true`. | The parameters must be URL encoded. @@ -653,7 +654,7 @@ A HTTP status code of `400` is returned with the following response body: :::note Role-based Access Control (RBAC) is available in -[QuestDB Enterprise](/enterprise/). +[QuestDB Enterprise](/enterprise/). See the next paragraph for authentication in QuestDB Open Source. ::: @@ -684,3 +685,23 @@ curl -G --data-urlencode "query=SELECT 1;" \ Refer to the [user management](/docs/operations/rbac/#user-management) page to learn more on how to generate a REST API token. + +## Authentication in QuestDB open source + +QuestDB Open Source supports HTTP basic authentication. To enable it, set the configuration +options `http.user` and `http.password` in `server.conf`. + +The following example shows how to enable HTTP basic authentication in QuestDB open source: + +```shell +http.user=my_user +http.password=my_password +``` + +Then this `curl` command executes a `SELECT 1;` query: + +```bash +curl -G --data-urlencode "query=SELECT 1;" \ + -u "my_user:my_password" \ + http://localhost:9000/exec +``` From 3f2bdc2cd15ac8cefc7912fa60a00269c13e1a8b Mon Sep 17 00:00:00 2001 From: Steven Sklar Date: Mon, 15 Jul 2024 19:09:36 +0200 Subject: [PATCH 04/23] Update influxdb-migration.md (#17) --- guides/influxdb-migration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guides/influxdb-migration.md b/guides/influxdb-migration.md index 93ad4e0a..401a6d56 100644 --- a/guides/influxdb-migration.md +++ b/guides/influxdb-migration.md @@ -29,7 +29,7 @@ While InfluxDB clients are supported, we recommend using the QuestDB clients. If QuestDB is not up and running, see the [quick start](/docs/quick-start/). -Tables are created during insert. +Tables are automatically created during insert. There is no need for an upfront schema! From fbb678b5272332a52ea620a6de0b34f4d21d4142 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Tue, 16 Jul 2024 18:13:46 +0100 Subject: [PATCH 05/23] Documenting the `json_extract` SQL function (#15) * json_extract * tweaks * yet some more tweaks * more tweaks * wording correction * Improvements and PR feedback * minor tweaks * tweak * PR feedback * Tweaks as per Nick's feedback * Different JSONPath link --- reference/function/json.md | 224 +++++++++++++++++++++++++++++++++++++ sidebars.js | 1 + 2 files changed, 225 insertions(+) create mode 100644 reference/function/json.md diff --git a/reference/function/json.md b/reference/function/json.md new file mode 100644 index 00000000..3f49fc1b --- /dev/null +++ b/reference/function/json.md @@ -0,0 +1,224 @@ +--- +title: JSON functions +sidebar_label: JSON +description: JSON functions reference documentation. +--- + +This page describes functions to handle JSON data. + +## json_extract + +Extracts fields from a JSON document stored in VARCHAR columns. + +`json_extract(doc, json_path)::datatype` + +Here [`datatype`](#type-conversions) can be any type supported by QuestDB. + +### Usage + +This is an example query that extracts fields from a `trade_details` `VARCHAR` column +containing JSON documents: + +```questdb-sql title="json_extract example" +SELECT + json_extract(trade_details, '$.quantity')::long quantity, + json_extract(trade_details, '$.price')::double price, + json_extract(trade_details, '$.executions[0].timestamp')::timestamp first_ex_ts +FROM + trades +WHERE + json_extract(trade_details, '$.exchange') == 'NASDAQ' +``` + +| quantity | price | first_ex_ts | +| -------- | ------ | --------------------------- | +| 1000 | 145.09 | 2023-07-12T10:00:00.000000Z | + +The query above: + * Filters rows, keeping only trades made on NASDAQ. + * Obtains the price and quantity fields. + * Extracts the timestamp of the first execution for the trade. + +The above query can run against this inserted JSON document: + +```json +{ + "trade_id": "123456", + "instrument_id": "AAPL", + "trade_type": "buy", + "quantity": 1000, + "price": 145.09, + "vwap": { + "start_timestamp": "2023-07-12T09:30:00Z", + "end_timestamp": "2023-07-12T16:00:00Z", + "executed_volume": 1000, + "executed_value": 145000 + }, + "execution_time": "2023-07-12T15:59:59Z", + "exchange": "NASDAQ", + "strategy": "VWAP", + "executions": [ + { + "timestamp": "2023-07-12T10:00:00Z", + "price": 144.50, + "quantity": 200 + }, + { + "timestamp": "2023-07-12T15:15:00Z", + "price": 145.50, + "quantity": 250 + } + ] +} +``` + +### JSON path syntax + +We support a subset of the [JSONPath](https://en.wikipedia.org/wiki/JSONPath) syntax. +* `$` denotes the root of the document. Its use is optional and provided for + compatibility with the JSON path standard and other databases. Note that + all search operations always start from the root. +* `.field` accesses a JSON object key. +* `[n]` accesses a JSON array index (where `n` is a number). + +The path cannot be constructed dynamically, such as via string concatenation. + +### Type conversions + +You can specify any +[datatype supported by QuestDB](/docs/reference/sql/datatypes) as the return +type. Here are some examples: + +```questdb-sql title="Extracting JSON to various datatypes" +-- Extracts the string, or the raw JSON token for non-string JSON types. +json_extract('{"name": "Lisa"}', '$.name')::varchar -- Lisa +json_extract('[0.25, 0.5, 1.0]', '$.name')::varchar -- [0.25, 0.5, 1.0] + +-- Extracts the number as a long, returning NULL if the field is not a number +-- or is out of range. Floating point numbers are truncated. +-- Numbers can be enclosed in JSON strings. +json_extract('{"qty": 10000}', '$.qty')::long -- 10000 +json_extract('{ "qty": '9999999' }', '$.qty')::long -- 9999999 +json_extract('1.75', '$')::long -- 1 + +-- Extracts the number as a double, returning NULL if the field is not a number +-- or is out of range. +json_extract('{"price": 100.25}', '$.price')::double -- 100.25 +json_extract('10000', '$')::double -- 10000.0 +json_extract('{"price": null}', '$.price')::double -- NULL + +-- JSON `true` is extracted as the boolean `true`. Everything else is `false`. +json_extract('[true]', '$[0]')::boolean -- true +json_extract('["true"]', '$[0]')::boolean -- false + +-- SHORT numbers can't represent NULL values, so return 0 instead. +json_extract('{"qty": 10000}', '$.qty')::short -- 10000 +json_extract('{"qty": null}', '$.qty')::short -- 0 +json_extract('{"qty": 1000000}', '$.qty')::short -- 0 (out of range) +``` + +Calling `json_extract` without immediately casting to a datatype will always +return a `VARCHAR`. + +```questdb-sql title="Extracting a path as VARCHAR" +json_extract('{"name": "Lisa"}', '$.name') -- Lisa +``` + +As a quirk, for PostgreSQL compatibility, suffix-casting to `::float` in QuestDB +produces a `DOUBLE` datatype. If you need a `FLOAT`, use the `cast` function +instead as so: + +```questdb-sql title="Extract a float from a JSON array" +SELECT + cast(json_extract('[0.25, 0.5, 1.0]', '$[0]') as float) a +FROM + long_sequence(1) +``` + +#### Table of type conversions + +The following table summarizes the type conversions. +* **Horizontal**: the source JSON field type +* **Vertical**: the target datatype + +| | null | boolean | string | number | array & object | +|---------------|-------|------------|--------|----------|----------------| +| **BOOLEAN** | false | ✓ | false | false | false | +| **SHORT** | 0 | 0 or 1 | ✓ (i) | ✓ (i) | 0 | +| **INT** | NULL | 0 or 1 | ✓ (i) | ✓ (i) | NULL | +| **LONG** | NULL | 0 or 1 | ✓ (i) | ✓ (i) | NULL | +| **FLOAT** | NULL | 0.0 or 1.0 | ✓ (ii) | ✓ (ii) | NULL | +| **DOUBLE** | NULL | 0.0 or 1.0 | ✓ (ii) | ✓ (ii) | NULL | +| **VARCHAR** | NULL | ✓ (iii) | ✓ | ✓ (iii) | ✓ (iii) | +| **DATE** | NULL | NULL | ✓ (iv) | ✓ (iv) | NULL | +| **TIMESTAMP** | NULL | NULL | ✓ (v) | ✓ (vi) | NULL | +| **IPV4** | NULL | NULL | ✓ | ✓ | NULL | + +All other types are supported through the `VARCHAR` type. In other words, +`json_extract(..)::UUID` is effectively equivalent to +`json_extract(..)::VARCHAR::UUID`. + +* **✓**: Supported conversion. +* **(i)**: Floating point numbers are truncated. Out of range numbers evaluate to `NULL` or `0` (for `SHORT`). +* **(ii)**: Out of range numbers evaluate to `NULL`. Non-IEEE754 numbers are rounded to the nearest representable value. The `FLOAT` type can incur further precision loss. +* **(iii)**: JSON booleans, numbers, arrays and objects are returned as their raw JSON string representation. +* **(iv)**: Dates are expected in ISO8601 format as strings. If the date is not in this format, the result is `NULL`. Numeric values are parsed as milliseconds since the Unix epoch. Floating point precision is ignored. +* **(v)**: Timestamps are expected in ISO8601 format as strings. If the timestamp is not in this format, the result is `NULL`. +* **(vi)**: Numeric values are parsed as microseconds since the Unix epoch. Floating point precision is ignored. + + + +### Error handling + +Any errors will return `NULL` data when extracting to any datatype except +boolean and short, where these will return `false` and `0` respectively. + +```questdb-sql title="Error examples" +-- If either the document or the path is NULL, the result is NULL. +json_extract(NULL, NULL) -- NULL + +-- If the document is malformed, the result is NULL. +json_extract('{"name": "Lisa"', '$.name') -- NULL +-- ^___ note the missing closing brace +``` + +### Performance + +Extracting fields from JSON documents provides flexibility, but comes at a +performance cost compared to storing fields directly in columns. + +As a ballpark estimate, you should expect extracting a field from a JSON +document to be around one order of magnitude slower than extracting the same +data directly from a dedicated database column. As such, we recommend using JSON +only when the requirement of handling multiple data fields flexibly outweighs +the performance penalty. + +### Migrating JSON fields to columns + +JSON offers an opportunity to capture a wide range of details early +in a solution's design process. During early stages, it may not be clear which +fields will provide the most value. Once known, you can then modify the database +schema to extract these fields into first-class columns. + +Extending the previous example, we can add `price` and `quantity` columns to +the pre-existing `trades` table as so: + +```questdb-sql title="Extracting JSON to a new column" +-- Add two columns for caching. +ALTER TABLE trades ADD COLUMN quantity long; +ALTER TABLE trades ADD COLUMN price double; + +-- Populate the columns from the existing JSON document. +UPDATE trades SET quantity = json_extract(trade_details, '$.quantity')::long; +UPDATE trades SET price = json_extract(trade_details, '$.price')::double; +``` + +Alternatively, you can insert the extracted fields into a separate table: + +```questdb-sql title="Extracting JSON fields to a separate table" +INSERT INTO trades_summary SELECT + json_extract(trade_details, '$.quantity')::long as quantity, + json_extract(trade_details, '$.price')::double as price, + timestamp +FROM trades; +``` diff --git a/sidebars.js b/sidebars.js index c5885c4e..74092a9a 100644 --- a/sidebars.js +++ b/sidebars.js @@ -289,6 +289,7 @@ module.exports = { "reference/function/row-generator", "reference/function/spatial", "reference/function/text", + "reference/function/json", "reference/function/timestamp-generator", "reference/function/timestamp", "reference/function/touch", From 3577909f4663262d4c0f6ba22b753ebb38abf1bc Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Tue, 16 Jul 2024 10:53:17 -0700 Subject: [PATCH 06/23] second nick pr (#10) https://github.com/questdb/questdb.io/pull/2355 --- reference/function/date-time.md | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/reference/function/date-time.md b/reference/function/date-time.md index d54b2a01..e37c02f6 100644 --- a/reference/function/date-time.md +++ b/reference/function/date-time.md @@ -879,6 +879,54 @@ SELECT | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | :-------------------------- | | 2016-02-10T16:18:22.862145Z | 2016-02-10T16:18:22.862000Z | 2016-02-10T16:18:22.000000Z | 2016-02-10T16:18:00.000000Z | 2016-02-10T16:00:00.000000Z | 2016-02-10T00:00:00.000000Z | 2016-02-01T00:00:00.000000Z | 2016-01-01T00:00:00.000000Z | +#### timestamp_floor with offset + +When timestamps are floored by `timestamp_floor(unit, timestamp)`, they are based on a root timestamp of `0`. This means that some floorings with a stride can be confusing, since they are based on a modulo from `1970-01-01`. + +For example: + +```questdb-sql +SELECT timestamp_floor('5d', '2018-01-01') +``` + +Gives: + +| timestamp_floor | +| --------------------------- | +| 2017-12-30T00:00:00.000000Z | + +If you wish to calculate bins from an offset other than `1970-01-01`, you can add a third parameter: `timestamp_floor(unit, timestamp, offset)`. The offset acts as a baseline from which +further values are calculated. + +```questdb-sql +SELECT timestamp_floor('5d', '2018-01-01', '2018-01-01') +``` + +Gives: + +| timestamp_floor | +| --------------------------- | +| 2018-01-01T00:00:00.000000Z | + +You can test this on the QuestDB Demo: + +```questdb-sql +SELECT timestamp_floor('5d', pickup_datetime, '2018') t, count +FROM trips +WHERE pickup_datetime in '2018' +ORDER BY 1; +``` + +Gives: + +| t | count | +| --------------------------- | ------- | +| 2018-01-01T00:00:00.000000Z | 1226531 | +| 2018-01-06T00:00:00.000000Z | 1468302 | +| 2018-01-11T00:00:00.000000Z | 1604016 | +| 2018-01-16T00:00:00.000000Z | 1677303 | +| ... | ... | + ## timestamp_shuffle `timestamp_shuffle(timestamp_1, timestamp_2)` - generates a random timestamp From aeca50a3f8b59c2880da9015d99fef628a45148c Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Wed, 17 Jul 2024 11:02:06 -0700 Subject: [PATCH 07/23] Adds Embeddable to 3-party tools (#19) * add doc, edited with a question * polish as per the Emeddable team & light edit * add to sidebar * also add missing doc while here * add to overview * adds flavour to overview, merge ready --- sidebars.js | 36 ++++--------- third-party-tools/embeddable.md | 94 +++++++++++++++++++++++++++++++++ third-party-tools/overview.md | 30 ++++++----- 3 files changed, 119 insertions(+), 41 deletions(-) create mode 100644 third-party-tools/embeddable.md diff --git a/sidebars.js b/sidebars.js index 74092a9a..24a37d2b 100644 --- a/sidebars.js +++ b/sidebars.js @@ -1,23 +1,3 @@ -let guidelines - -if (process.env.NODE_ENV === "development") { - guidelines = { - label: "Guidelines (DEV ONLY)", - type: "category", - items: [ - { - type: "category", - label: "Templates", - items: [ - "__guidelines/template/guide", - "__guidelines/template/function", - "__guidelines/template/sql", - ], - }, - ], - } -} - module.exports = { docs: [ { @@ -302,17 +282,18 @@ module.exports = { type: "category", label: "Operators", items: [ - "reference/operators/precedence", - "reference/operators/misc", - "reference/operators/numeric", "reference/operators/bitwise", - "reference/operators/logical", - "reference/operators/ipv4", + "reference/operators/comparison", "reference/operators/date-time", + "reference/operators/ipv4", + "reference/operators/logical", + "reference/operators/misc", + "reference/operators/numeric", + "reference/operators/precedence", "reference/operators/spatial", - "reference/operators/text" + "reference/operators/text", ], - } + }, ], }, { @@ -437,6 +418,7 @@ module.exports = { id: "third-party-tools/overview", }, "third-party-tools/cube", + "third-party-tools/embeddable", "third-party-tools/flink", "third-party-tools/grafana", { diff --git a/third-party-tools/embeddable.md b/third-party-tools/embeddable.md new file mode 100644 index 00000000..1903dccf --- /dev/null +++ b/third-party-tools/embeddable.md @@ -0,0 +1,94 @@ +--- +title: Embeddable +description: + Embeddable is a developer toolkit for building fast, interactive, fully-custom + analytics experiences directly into your app. +--- + +Embeddable is a developer toolkit for building fast, interactive customer-facing +analytics. It works well with a high performance time-series database like +QuestDB. + +In [Embeddable](https://embeddable.com/) define +[Data Models](https://trevorio.notion.site/Data-modeling-35637bbbc01046a1bc47715456bfa1d8) +and +[Components](https://trevorio.notion.site/Using-components-761f52ac2d0743b488371088a1024e49) +in code stored in your own code repository, then use the **SDK** to make these +available for your team in the powerful Embeddable **no-code builder.** The end +result is the ability to deliver fast, interactive **customer-facing analytics** +directly into your product. + +Built-in **row-level security** means that every user only ever sees **exactly** +the data they’re allowed to see. And two levels of fully-configurable +**caching** mean you can deliver fast, realtime analytics at scale. + +## Prerequisites + +- A running QuestDB instance + - Not running yet? See the [quick start](/docs/quick-start/) + +## Getting started with Embeddable + +Add a database connection using Embeddable API. This connection connects to your +QuestDB instance. To add a connection, use the following API call: + +```javascript +// for security reasons, this must *never* be called from your client-side +fetch("https://api.embeddable.com/api/v1/connections", { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "application/json", + Authorization: `Bearer ${apiKey}` /* keep your API Key secure */, + }, + body: JSON.stringify({ + name: "my-questdb-db", + type: "questdb", + credentials: { + host: "my.questdb.host", + port: "8812", + user: "admin", + password: "quest", + }, + }), +}) +``` + +In response you will receive: + +```json +Status 201 { errorMessage: null } +``` + +The above represents a `CREATE` action, but all `CRUD` operations are available. + +The `apiKey` can be found by clicking “**Publish**” on one of your Embeddable +dashboards. + +The `name` is a unique name to identify this **connection**. + +- By default your **data models** will look for a **connection** called + “default”, but you can supply models with different + [**data_source**](https://cube.dev/docs/reference/data-model/cube#data_source) + names to support connecting different **data models** to different + **connections**. To do so , specify the + **[data_source](https://cube.dev/docs/reference/data-model/cube#data_source)** + name in the model. + +The `type` tells Embeddable which driver to use, in this case `questdb`. You can +also connect multiple datasources like `postgres`, `bigquery` or `mongodb`. For +a full list, see +[the documentaiton](https://cube.dev/docs/product/configuration/data-sources). + +The `credentials` is a javascript object containing the credentials expected by +the driver: + +- Credentials are securely encrypted and only used to retrieve exactly the data + described in the data models. +- Emeddable strongly encourages you to create a **read-only** database user for + each connection. Embeddable will only ever read from your database, not write. + +To support connecting to different databases for prod, qa, test, etc, or to +support different databases for different customers, you can assign each +**connection** to an **environment**. For more information, see +[Environments API](https://www.notion.so/Environments-API-497169036b5148b38f7936aa75e62949?pvs=21). diff --git a/third-party-tools/overview.md b/third-party-tools/overview.md index e8cda6d4..4a69e8e8 100644 --- a/third-party-tools/overview.md +++ b/third-party-tools/overview.md @@ -15,25 +15,25 @@ more. Interact with and visualize your QuestDB data using these powerful visualization platforms: -- **[Grafana](/third-party-tools/grafana.md):** Create stunning dashboards and +- **[Grafana](/third-party-tools/grafana/):** Create stunning dashboards and interactive graphs for time-series data visualization. -- [Superset](/third-party-tools/superset.md): Build interactive visualizations - and perform ad-hoc data analysis. +- [Superset](/third-party-tools/superset/): Build interactive visualizations and + perform ad-hoc data analysis. ## Data Ingestion and Streaming Ingest, store, and process high-throughput and real-time data streams with these integrations: -- **[Apache Kafka](/third-party-tools/kafka/overview.md):** A distributed event +- **[Apache Kafka](/third-party-tools/kafka/overview/):** A distributed event streaming platform for high-throughput data pipelines. -- [Telegraf](/third-party-tools/telegraf.md): Collect and report metrics from +- [Telegraf](/third-party-tools/telegraf/): Collect and report metrics from various sources. -- **[Redpanda](/third-party-tools/redpanda.md): A Kafka-compatible streaming +- **[Redpanda](/third-party-tools/redpanda/): A Kafka-compatible streaming platform for simplified data pipelines.** -- [Apache Flink](/third-party-tools/flink.md): Process real-time data streams +- [Apache Flink](/third-party-tools/flink/): Process real-time data streams efficiently. -- [Apache Spark](/third-party-tools/spark.md): Handle complex data processing +- [Apache Spark](/third-party-tools/spark/): Handle complex data processing tasks at scale. ## Analytics and Processing @@ -41,20 +41,22 @@ integrations: Enhance your data analysis and processing capabilities with QuestDB through these tools: -- [Pandas](/third-party-tools/pandas.md): Analyze time-series data in Python - with powerful data structures. -- [MindsDB](/third-party-tools/mindsdb.md): Build machine learning models for +- [Pandas](/third-party-tools/pandas/): Analyze time-series data in Python with + powerful data structures. +- [MindsDB](/third-party-tools/mindsdb/): Build machine learning models for predictive analytics on time-series data. +- [Embeddable](/third-party-tools/embeddable/): Developer toolkit for building + fast, interactive customer-facing analytics. ## Tooling and Interfaces Improve your interactions with QuestDB using these tools and interfaces: -- **[Prometheus](/third-party-tools/prometheus.md):** Efficiently store and +- **[Prometheus](/third-party-tools/prometheus/):** Efficiently store and analyze monitoring metrics. -- [SQLAlchemy](/third-party-tools/sqlalchemy.md): Utilize Python's ORM +- [SQLAlchemy](/third-party-tools/sqlalchemy/): Utilize Python's ORM capabilities for database interactions. -- [qStudio](/third-party-tools/qstudio.md): A free SQL GUI for query execution, +- [qStudio](/third-party-tools/qstudio/): A free SQL GUI for query execution, table browsing, and result charting. Is there an integration you'd like to see that's not listed? Let us know by From 858ff960efe26c27b979a14448d56a1c63269d98 Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Wed, 17 Jul 2024 11:10:23 -0700 Subject: [PATCH 08/23] full overview links --- third-party-tools/overview.md | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/third-party-tools/overview.md b/third-party-tools/overview.md index 4a69e8e8..90f6981c 100644 --- a/third-party-tools/overview.md +++ b/third-party-tools/overview.md @@ -15,25 +15,25 @@ more. Interact with and visualize your QuestDB data using these powerful visualization platforms: -- **[Grafana](/third-party-tools/grafana/):** Create stunning dashboards and - interactive graphs for time-series data visualization. -- [Superset](/third-party-tools/superset/): Build interactive visualizations and - perform ad-hoc data analysis. +- **[Grafana](/docs/third-party-tools/grafana/):** Create stunning dashboards + and interactive graphs for time-series data visualization. +- [Superset](/docs/third-party-tools/superset/): Build interactive + visualizations and perform ad-hoc data analysis. ## Data Ingestion and Streaming Ingest, store, and process high-throughput and real-time data streams with these integrations: -- **[Apache Kafka](/third-party-tools/kafka/overview/):** A distributed event - streaming platform for high-throughput data pipelines. -- [Telegraf](/third-party-tools/telegraf/): Collect and report metrics from +- **[Apache Kafka](/docs/third-party-tools/kafka/overview/):** A distributed + event streaming platform for high-throughput data pipelines. +- [Telegraf](/docs/third-party-tools/telegraf/): Collect and report metrics from various sources. -- **[Redpanda](/third-party-tools/redpanda/): A Kafka-compatible streaming +- **[Redpanda](/docs/third-party-tools/redpanda/): A Kafka-compatible streaming platform for simplified data pipelines.** -- [Apache Flink](/third-party-tools/flink/): Process real-time data streams +- [Apache Flink](/docs/third-party-tools/flink/): Process real-time data streams efficiently. -- [Apache Spark](/third-party-tools/spark/): Handle complex data processing +- [Apache Spark](/docs/third-party-tools/spark/): Handle complex data processing tasks at scale. ## Analytics and Processing @@ -41,23 +41,23 @@ integrations: Enhance your data analysis and processing capabilities with QuestDB through these tools: -- [Pandas](/third-party-tools/pandas/): Analyze time-series data in Python with - powerful data structures. -- [MindsDB](/third-party-tools/mindsdb/): Build machine learning models for +- [Pandas](/docs/third-party-tools/pandas/): Analyze time-series data in Python + with powerful data structures. +- [MindsDB](/docs/third-party-tools/mindsdb/): Build machine learning models for predictive analytics on time-series data. -- [Embeddable](/third-party-tools/embeddable/): Developer toolkit for building - fast, interactive customer-facing analytics. +- [Embeddable](/docs/third-party-tools/embeddable/): Developer toolkit for + building fast, interactive customer-facing analytics. ## Tooling and Interfaces Improve your interactions with QuestDB using these tools and interfaces: -- **[Prometheus](/third-party-tools/prometheus/):** Efficiently store and +- **[Prometheus](/docs/third-party-tools/prometheus/):** Efficiently store and analyze monitoring metrics. -- [SQLAlchemy](/third-party-tools/sqlalchemy/): Utilize Python's ORM +- [SQLAlchemy](/docs/third-party-tools/sqlalchemy/): Utilize Python's ORM capabilities for database interactions. -- [qStudio](/third-party-tools/qstudio/): A free SQL GUI for query execution, - table browsing, and result charting. +- [qStudio](/docs/third-party-tools/qstudio/): A free SQL GUI for query + execution, table browsing, and result charting. Is there an integration you'd like to see that's not listed? Let us know by opening an issue on [QuestDB Github]({@githubUrl@}/issues/new/choose). From e5349e340bcb2ce132d8bcb8c6163e71b5c82be1 Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Wed, 17 Jul 2024 12:52:11 -0700 Subject: [PATCH 09/23] Updates intro query to a demoable query (#20) First usage of: https://github.com/questdb/questdb.io/pull/2386. --- introduction.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/introduction.md b/introduction.md index 887d9bf1..1727b44c 100644 --- a/introduction.md +++ b/introduction.md @@ -89,10 +89,17 @@ efficiency and value. Writing blazing-fast queries syntax and creating real-time [Grafana](/docs/third-party-tools/grafana/) is done via familiar SQL: -```sql title="Navigate time with SQL" -SELECT timestamp, sensorName, tempC -FROM sensors LATEST ON timestamp -PARTITION BY sensorName; +```questdb-sql title='Navigate time with SQL' demo +SELECT + timestamp, symbol, + first(price) AS open, + last(price) AS close, + min(price), + max(price), + sum(amount) AS volume +FROM trades +WHERE timestamp > dateadd('d', -1, now()) +SAMPLE BY 15m; ``` Intrigued? The best way to see whether QuestDB is right for you is to try it From ab9d0bec15b0cfa59bd9550c5d48786e214928d9 Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Wed, 17 Jul 2024 13:38:53 -0700 Subject: [PATCH 10/23] nix bad typo --- introduction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/introduction.md b/introduction.md index 1727b44c..fc2f7c2d 100644 --- a/introduction.md +++ b/introduction.md @@ -13,7 +13,7 @@ import CodeBlock from "@theme/CodeBlock" QuestDB is an Apache 2.0 open source columnar database that specializes in time series. It offers category-leading ingestion throughput and fast SQL queries with operational simplicity. QuestDB reduces operational costs and overcomes -ingestion bottlenecks, offering greatly simplify overall ingress infrastructure. +ingestion bottlenecks, offering greatly simplified overall ingress infrastructure. This introduction provides a brief overview on: From 282bb3bc9a0e768a30b86de23e8cd433ff8650a1 Mon Sep 17 00:00:00 2001 From: Andrei Pechkurov <37772591+puzpuzpuz@users.noreply.github.com> Date: Thu, 18 Jul 2024 18:05:59 +0300 Subject: [PATCH 11/23] fix: fix Cube quick start page (#22) --- third-party-tools/cube.md | 14 +++++++++++--- third-party-tools/grafana.md | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/third-party-tools/cube.md b/third-party-tools/cube.md index fe63366a..b67183cc 100644 --- a/third-party-tools/cube.md +++ b/third-party-tools/cube.md @@ -1,6 +1,7 @@ --- title: "Cube" -description: Yaa +description: + Guide for QuestDB and Cube integration. --- Cube is middleware that connects your data sources and your data applications. @@ -47,6 +48,7 @@ services: image: "cubejs/cube:latest" ports: - "4000:4000" + env_file: "cube.env" volumes: - ".:/cube/conf" questdb: @@ -58,13 +60,13 @@ services: - "8812:8812" ``` -Within your project directory, create an `.env` file. +Within your project directory, create an `cube.env` file. These variables will allow Cube to connect to your QuestDB deployment. Remember: default passwords are dangerous! We recommend altering them. -```shell title=.env +```bash title=.env CUBEJS_DB_HOST=questdb CUBEJS_DB_PORT=8812 CUBEJS_DB_NAME=qdb @@ -73,6 +75,12 @@ CUBEJS_DB_PASS=quest CUBEJS_DB_TYPE=questdb ``` +Create `model` directory to be used by Cube: + +```bash +mkdir model +``` + Finally, bring it all up with Docker: ```bash title=shell diff --git a/third-party-tools/grafana.md b/third-party-tools/grafana.md index 2003c20b..c0d30d11 100644 --- a/third-party-tools/grafana.md +++ b/third-party-tools/grafana.md @@ -2,7 +2,7 @@ title: Grafana description: Guide for fastest, high performance time-series data visualizations with - QuestDB and Grafana + QuestDB and Grafana. --- import Screenshot from "@theme/Screenshot" From 20a493de090a4a52faf334d25558b7d52840771f Mon Sep 17 00:00:00 2001 From: Evgeny Oshchepkov Date: Thu, 18 Jul 2024 17:15:16 +0200 Subject: [PATCH 12/23] Update embeddable.md docs (#21) Co-authored-by: goodroot <9484709+goodroot@users.noreply.github.com> --- third-party-tools/embeddable.md | 40 +++++++++++---------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/third-party-tools/embeddable.md b/third-party-tools/embeddable.md index 1903dccf..a058a762 100644 --- a/third-party-tools/embeddable.md +++ b/third-party-tools/embeddable.md @@ -65,30 +65,16 @@ The above represents a `CREATE` action, but all `CRUD` operations are available. The `apiKey` can be found by clicking “**Publish**” on one of your Embeddable dashboards. -The `name` is a unique name to identify this **connection**. - -- By default your **data models** will look for a **connection** called - “default”, but you can supply models with different - [**data_source**](https://cube.dev/docs/reference/data-model/cube#data_source) - names to support connecting different **data models** to different - **connections**. To do so , specify the - **[data_source](https://cube.dev/docs/reference/data-model/cube#data_source)** - name in the model. - -The `type` tells Embeddable which driver to use, in this case `questdb`. You can -also connect multiple datasources like `postgres`, `bigquery` or `mongodb`. For -a full list, see -[the documentaiton](https://cube.dev/docs/product/configuration/data-sources). - -The `credentials` is a javascript object containing the credentials expected by -the driver: - -- Credentials are securely encrypted and only used to retrieve exactly the data - described in the data models. -- Emeddable strongly encourages you to create a **read-only** database user for - each connection. Embeddable will only ever read from your database, not write. - -To support connecting to different databases for prod, qa, test, etc, or to -support different databases for different customers, you can assign each -**connection** to an **environment**. For more information, see -[Environments API](https://www.notion.so/Environments-API-497169036b5148b38f7936aa75e62949?pvs=21). +The `name` is a unique name to identify this connection. + +- By default your data models will look for a connection called “default”, but you can supply your models with different `data_source` names to support connecting different data models to different connections (simply specify the data_source name in the model) + +The `type` tells Embeddable which driver to use + +- Here you'll want to use `questbd`, but you can connect multiple different datasources to one Embeddable workspace so you may use others such as: `postgres`, `bigquery`, `mongodb`, etc. + +The `credentials` is a javascript object containing the necessary credentials expected by the driver +- These are securely encrypted and only used to retrieve exactly the data you have described in your data models. +- Embeddable strongly encourage you to create a read-only database user for each connection (Embeddable will only ever read from your database, not write). + +In order to support connecting to different databases for prod, qa, test, etc (or to support different databases for different customers) you can assign each connection to an environment (see [Environments API](https://www.notion.so/Environments-API-497169036b5148b38f7936aa75e62949?pvs=21)). From a45303b90b2fa4535eb5664e83bc1ff5a820d362 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?javier=20ram=C3=ADrez?= Date: Thu, 18 Jul 2024 23:03:02 +0200 Subject: [PATCH 13/23] Jv/ingestion examples (#14) Changed examples and some improvements to the client docs for Python, Go, and Java clients --------- Co-authored-by: goodroot <9484709+goodroot@users.noreply.github.com> --- clients/ingest-c-and-cpp.md | 450 ++++++++++++++++++++++++++++------ clients/ingest-dotnet.md | 328 ++++++++++--------------- clients/ingest-go.md | 125 ++++++---- clients/ingest-node.md | 129 ++++++++-- clients/ingest-python.md | 165 +++++++------ clients/ingest-rust.md | 196 ++++++++------- clients/java_ilp.md | 177 +++++-------- ingestion-overview.md | 7 +- reference/api/ilp/overview.md | 394 +++++++++++++++++++++++------ 9 files changed, 1251 insertions(+), 720 deletions(-) diff --git a/clients/ingest-c-and-cpp.md b/clients/ingest-c-and-cpp.md index 09b4d96f..78dfa00b 100644 --- a/clients/ingest-c-and-cpp.md +++ b/clients/ingest-c-and-cpp.md @@ -23,38 +23,39 @@ Key features of the QuestDB C & C++ client include: health monitoring - **Automatic write retries**: Reuse connections and retry after interruptions -This guide aims to help you swiftly set up and begin using the QuestDB C++ -client. - -## C++ - - - -Explore the full capabilities of the C++ client via the -[C++ README](https://github.com/questdb/c-questdb-client/blob/main/doc/CPP.md). ### Requirements -- Requires a C++ compiler and standard libraries. +- Requires a C/C++ compiler and standard libraries. - Assumes QuestDB is running. If it's not, refer to [the general quick start](/docs/quick-start/). ### Client Installation -Clone the GitHub repository and compile the source code: +You need to add the client as a dependency to your project. Depending on your environment, +you can do this in different ways. Please check the documentation at the +[client's repository](https://github.com/questdb/c-questdb-client/blob/main/doc/DEPENDENCY.md). -```bash -git clone https://github.com/questdb/c-questdb-client.git -cd c-questdb-client -make -``` -This will compile the client library, which can then be linked to your C++ -projects. +## C++ -### Connection +:::note -The QuestDB C client supports basic connection and authentication +This section is for the QuestDB C++ client. + +For the QuestDB C Client, see the below seciton. + +::: + + + +Explore the full capabilities of the C++ client via the +[C++ README](https://github.com/questdb/c-questdb-client/blob/main/doc/CPP.md). + + +## Authentication + +The QuestDB C++ client supports basic connection and authentication configurations. Here is an example of how to configure and use the client for data ingestion: @@ -69,46 +70,138 @@ auto sender = questdb::ingress::line_sender::from_conf( ``` +You can also pass the connection configuration via the `QDB_CLIENT_CONF` environment variable: + +```bash +export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" +``` + +Then you use it like this: + +```c +auto sender = questdb::ingress::line_sender::from_env(); +``` + +When using QuestDB Enterprise, authentication can also be done via REST token. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. + + ### Basic data insertion +Basic insertion (no-auth): + ```c -questdb::ingress::line_sender_buffer buffer; -buffer - .table("cpp_cars") - .symbol("id", "d6e5fe92-d19f-482a-a97a-c105f547f721") - .column("x", 30.5) - .at(timestamp_nanos::now()); +// main.cpp +#include + +int main() +{ + auto sender = questdb::ingress::line_sender::from_conf( + "http::addr=localhost:9000;"); + + questdb::ingress::line_sender_buffer buffer; + buffer + .table("trades") + .symbol("symbol","ETH-USD") + .symbol("side","sell") + .column("price", 2615.54) + .column("amount", 0.00044) + .at(questdb::ingress::timestamp_nanos::now()); -// To insert more records, call `buffer.table(..)...` again. + // To insert more records, call `buffer.table(..)...` again. -sender.flush(buffer); + sender.flush(buffer); + return 0; +} ``` -## C +These are the main steps it takes: - +- Use `questdb::ingress::line_sender::from_conf` to get the `sender` object +- Populate a `Buffer` with one or more rows of data +- Send the buffer using `sender.flush()`(`Sender::flush`) -Explore the full capabilities of the C client via the -[C README](https://github.com/questdb/c-questdb-client/blob/main/doc/C.md). +In this case, the designated timestamp will be the one at execution time. -### Requirements +Let's see now an example with timestamps, custom timeout, basic auth, and error control. -- Requires a C compiler and standard libraries. -- Assumes QuestDB is running. If it's not, refer to - [the general quick start](/docs/quick-start/). +```cpp +#include +#include +#include + +int main() +{ + try + { + // Create a sender using HTTP protocol + auto sender = questdb::ingress::line_sender::from_conf( + "http::addr=localhost:9000;username=admin;password=quest;retry_timeout=20000;"); + + // Get the current time as a timestamp + auto now = std::chrono::system_clock::now(); + auto duration = now.time_since_epoch(); + auto nanos = std::chrono::duration_cast(duration).count(); + + // Add rows to the buffer of the sender with the same timestamp + questdb::ingress::line_sender_buffer buffer; + buffer + .table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .column("price", 2615.54) + .column("amount", 0.00044) + .at(questdb::ingress::timestamp_nanos(nanos)); + + buffer + .table("trades") + .symbol("symbol", "BTC-USD") + .symbol("side", "sell") + .column("price", 39269.98) + .column("amount", 0.001) + .at(questdb::ingress::timestamp_nanos(nanos)); + + // Transactionality check + if (!buffer.transactional()) { + std::cerr << "Buffer is not transactional" << std::endl; + sender.close(); + return 1; + } + + // Flush the buffer of the sender, sending the data to QuestDB + sender.flush(buffer); + + // Close the connection after all rows ingested + sender.close(); + return 0; + } + catch (const questdb::ingress::line_sender_error& err) + { + std::cerr << "Error running example: " << err.what() << std::endl; + return 1; + } +} +``` -### Client Installation +As you can see, both events now are using the same timestamp. We recommended using the original event timestamps when +ingesting data into QuestDB. Using the current timestamp will hinder the ability to deduplicate rows which is +[important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). -Clone the GitHub repository and compile the source code: -```bash -git clone https://github.com/questdb/c-questdb-client.git -cd c-questdb-client -make -``` +## C + +:::note + +This sectioni s for the QuestDB C client. + +Skip to the bottom of this page for information relating to both the C and C++ clients. + +::: + + +Explore the full capabilities of the C client via the +[C README](https://github.com/questdb/c-questdb-client/blob/main/doc/C.md). -This will compile the client library, which can then be linked to your C -projects. ### Connection @@ -124,68 +217,263 @@ data ingestion: line_sender_utf8 conf = QDB_UTF8_LITERAL( "http::addr=localhost:9000;"); -line_sender_error* err = NULL; -line_sender* sender = sender = line_sender_from_conf(&err); +line_sender_error *error = NULL; +line_sender *sender = line_sender_from_conf( + line_sender_utf8, &error); if (!sender) { /* ... handle error ... */ } ``` -### Basic data insertion +You can also pass the connection configuration via the `QDB_CLIENT_CONF` environment variable: +```bash +export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" +``` + +Then you use it like this: ```c -line_sender_table_name table_name = QDB_TABLE_NAME_LITERAL("c_cars"); -line_sender_column_name id_name = QDB_COLUMN_NAME_LITERAL("id"); -line_sender_column_name x_name = QDB_COLUMN_NAME_LITERAL("x"); +#include +... +line_sender *sender = line_sender_from_env(&error); -line_sender_buffer* buffer = line_sender_buffer_new(); +``` -if (!line_sender_buffer_table(buffer, table_name, &err)) - goto on_error; +### Basic data insertion -line_sender_utf8 id_value = QDB_UTF8_LITERAL( - "d6e5fe92-d19f-482a-a97a-c105f547f721"); -if (!line_sender_buffer_symbol(buffer, id_name, id_value, &err)) - goto on_error; +```c +// line_sender_trades_example.c +#include +#include +#include + +int main() { + // Initialize line sender + line_sender_error *error = NULL; + line_sender *sender = line_sender_from_conf( + QDB_UTF8_LITERAL("http::addr=localhost:9000;username=admin;password=quest;"), &error); + + if (error != NULL) { + size_t len; + const char *msg = line_sender_error_msg(error, &len); + fprintf(stderr, "Failed to create line sender: %.*s\n", (int)len, msg); + line_sender_error_free(error); + return 1; + } + + // Print success message + printf("Line sender created successfully\n"); + + // Initialize line sender buffer + line_sender_buffer *buffer = line_sender_buffer_new(); + if (buffer == NULL) { + fprintf(stderr, "Failed to create line sender buffer\n"); + line_sender_close(sender); + return 1; + } + + // Add data to buffer for ETH-USD trade + if (!line_sender_buffer_table(buffer, QDB_TABLE_NAME_LITERAL("trades"), &error)) goto error; + if (!line_sender_buffer_symbol(buffer, QDB_COLUMN_NAME_LITERAL("symbol"), QDB_UTF8_LITERAL("ETH-USD"), &error)) goto error; + if (!line_sender_buffer_symbol(buffer, QDB_COLUMN_NAME_LITERAL("side"), QDB_UTF8_LITERAL("sell"), &error)) goto error; + if (!line_sender_buffer_column_f64(buffer, QDB_COLUMN_NAME_LITERAL("price"), 2615.54, &error)) goto error; + if (!line_sender_buffer_column_f64(buffer, QDB_COLUMN_NAME_LITERAL("amount"), 0.00044, &error)) goto error; + if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &error)) goto error; + + + // Flush the buffer to QuestDB + if (!line_sender_flush(sender, buffer, &error)) { + size_t len; + const char *msg = line_sender_error_msg(error, &len); + fprintf(stderr, "Failed to flush data: %.*s\n", (int)len, msg); + line_sender_error_free(error); + line_sender_buffer_free(buffer); + line_sender_close(sender); + return 1; + } + + // Print success message + printf("Data flushed successfully\n"); + + // Free resources + line_sender_buffer_free(buffer); + line_sender_close(sender); + + return 0; + +error: + { + size_t len; + const char *msg = line_sender_error_msg(error, &len); + fprintf(stderr, "Error: %.*s\n", (int)len, msg); + line_sender_error_free(error); + line_sender_buffer_free(buffer); + line_sender_close(sender); + return 1; + } +} -if (!line_sender_buffer_column_f64(buffer, x_name, 30.5, &err)) - goto on_error; +``` -if (!line_sender_buffer_at_nanos(buffer, line_sender_now_nanos(), &err)) - goto on_error; +In this case, the designated timestamp will be the one at execution time. -// To insert more records, call `line_sender_buffer_table(..)...` again. +Let's see now an example with timestamps, custom timeout, basic auth, error control, and transactional +awareness. -if (!line_sender_flush(sender, buffer, &err)) - goto on_error; -line_sender_close(sender); +```c +// line_sender_trades_example.c +#include +#include +#include +#include + +int main() { + // Initialize line sender + line_sender_error *error = NULL; + line_sender *sender = line_sender_from_conf( + QDB_UTF8_LITERAL( + "http::addr=localhost:9000;username=admin;password=quest;retry_timeout=20000;" + ), &error); + + if (error != NULL) { + size_t len; + const char *msg = line_sender_error_msg(error, &len); + fprintf(stderr, "Failed to create line sender: %.*s\n", (int)len, msg); + line_sender_error_free(error); + return 1; + } + + // Print success message + printf("Line sender created successfully\n"); + + // Initialize line sender buffer + line_sender_buffer *buffer = line_sender_buffer_new(); + if (buffer == NULL) { + fprintf(stderr, "Failed to create line sender buffer\n"); + line_sender_close(sender); + return 1; + } + + // Get current time in nanoseconds + int64_t nanos = line_sender_now_nanos(); + + // Add data to buffer for ETH-USD trade + if (!line_sender_buffer_table(buffer, QDB_TABLE_NAME_LITERAL("trades"), &error)) goto error; + if (!line_sender_buffer_symbol(buffer, QDB_COLUMN_NAME_LITERAL("symbol"), QDB_UTF8_LITERAL("ETH-USD"), &error)) goto error; + if (!line_sender_buffer_symbol(buffer, QDB_COLUMN_NAME_LITERAL("side"), QDB_UTF8_LITERAL("sell"), &error)) goto error; + if (!line_sender_buffer_column_f64(buffer, QDB_COLUMN_NAME_LITERAL("price"), 2615.54, &error)) goto error; + if (!line_sender_buffer_column_f64(buffer, QDB_COLUMN_NAME_LITERAL("amount"), 0.00044, &error)) goto error; + if (!line_sender_buffer_at_nanos(buffer, nanos, &error)) goto error; + + // Add data to buffer for BTC-USD trade + if (!line_sender_buffer_table(buffer, QDB_TABLE_NAME_LITERAL("trades"), &error)) goto error; + if (!line_sender_buffer_symbol(buffer, QDB_COLUMN_NAME_LITERAL("symbol"), QDB_UTF8_LITERAL("BTC-USD"), &error)) goto error; + if (!line_sender_buffer_symbol(buffer, QDB_COLUMN_NAME_LITERAL("side"), QDB_UTF8_LITERAL("sell"), &error)) goto error; + if (!line_sender_buffer_column_f64(buffer, QDB_COLUMN_NAME_LITERAL("price"), 39269.98, &error)) goto error; + if (!line_sender_buffer_column_f64(buffer, QDB_COLUMN_NAME_LITERAL("amount"), 0.001, &error)) goto error; + if (!line_sender_buffer_at_nanos(buffer, nanos, &error)) goto error; + + // If we detect multiple tables within the same buffer, we abort to avoid potential + // inconsistency issues. Read below in this page for transaction details + if (!line_sender_buffer_transactional(buffer)) { + fprintf(stderr, "Buffer is not transactional\n"); + line_sender_buffer_free(buffer); + line_sender_close(sender); + return 1; + } + + // Flush the buffer to QuestDB + if (!line_sender_flush(sender, buffer, &error)) { + size_t len; + const char *msg = line_sender_error_msg(error, &len); + fprintf(stderr, "Failed to flush data: %.*s\n", (int)len, msg); + line_sender_error_free(error); + line_sender_buffer_free(buffer); + line_sender_close(sender); + return 1; + } + + // Print success message + printf("Data flushed successfully\n"); + + // Free resources + line_sender_buffer_free(buffer); + line_sender_close(sender); + + return 0; + +error: + { + size_t len; + const char *msg = line_sender_error_msg(error, &len); + fprintf(stderr, "Error: %.*s\n", (int)len, msg); + line_sender_error_free(error); + line_sender_buffer_free(buffer); + line_sender_close(sender); + return 1; + } +} + ``` -## Health check +As you can see, both events use the same timestamp. We recommended using the original event timestamps when +ingesting data into QuestDB. Using the current timestamp hinder the ability to deduplicate rows which is +[important for exactly-once processing](#/docs/clients/java_ilp/#exactly-once-delivery-vs-at-least-once-delivery). -To monitor your active connection, there is a `ping` endpoint: -```shell -curl -I http://localhost:9000/ping -``` +## Other Considerations for both C and C++ + +### Configuration options -Returns (pong!): +The easiest way to configure the line sender is the configuration string. The +general structure is: -```shell -HTTP/1.1 204 OK -Server: questDB/1.0 -Date: Fri, 2 Feb 2024 17:09:38 GMT -Transfer-Encoding: chunked -Content-Type: text/plain; charset=utf-8 -X-Influxdb-Version: v2.7.4 +```plain +::addr=host:port;param1=val1;param2=val2;... ``` -Determine whether an instance is active and confirm the version of InfluxDB Line -Protocol with which you are interacting. +`transport` can be `http`, `https`, `tcp`, or `tcps`. The C/C++ and Rust clients share +the same codebase. Please refer to the +[Rust client's documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) for the +full details on configuration. + +### Don't forget to flush + +The sender and buffer objects are entirely decoupled. This means that the sender +won't get access to the data in the buffer until you explicitly call +`sender.flush` or `line_sender_flush`. +This may lead to a pitfall where you drop a buffer that still has some data in it, +resulting in permanent data loss. + +Unlike other official QuestDB clients, the Rust client does not supports auto-flushing +via configuration. + +A common technique is to flush periodically on a timer and/or once the buffer +exceeds a certain size. You can check the buffer's size by calling +`buffer.size()` or `line_sender_buffer_size(..)`. + +The default `flush()` method clears the buffer after sending its data. If you +want to preserve its contents (for example, to send the same data to multiple +QuestDB instances), call `sender.flush_and_keep(&mut buffer)` instead. + +### Transactional flush + +As described in the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), +the HTTP transport has some support for transactions. + +To ensure in advance that a flush will not affect more than one table, call +`buffer.transactional()` or `line_sender_buffer_transactional(buffer)` as we demonstrated on +the examples in this document. + +This call will return false if the flush wouldn't be data-transactional. ## Next Steps +Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details +about transactions, error control, delivery guarantees, health check, or table and +column auto-creation. + With data flowing into QuestDB, now it's time to for analysis. To learn _The Way_ of QuestDB SQL, see the diff --git a/clients/ingest-dotnet.md b/clients/ingest-dotnet.md index 71d5c577..0093029d 100644 --- a/clients/ingest-dotnet.md +++ b/clients/ingest-dotnet.md @@ -11,6 +11,16 @@ import { ILPClientsTable } from "@theme/ILPClientsTable" QuestDB supports the .NET ecosystem with its dedicated .NET client, engineered for high-throughput data ingestion, focusing on insert-only operations. +Apart from blazing fast ingestion, our clients provide these key benefits: + +- **Automatic table creation**: No need to define your schema upfront. +- **Concurrent schema changes**: Seamlessly handle multiple data streams with + on-the-fly schema modifications +- **Optimized batching**: Use strong defaults or curate the size of your batches +- **Health checks and feedback**: Ensure your system's integrity with built-in + health monitoring +- **Automatic write retries**: Reuse connections and retry after interruptions + This quick start guide aims to familiarize you with the fundamental features of the .NET client, including how to establish a connection, authenticate, and perform basic insert operations. @@ -23,7 +33,7 @@ perform basic insert operations. - QuestDB must be running. If not, see [the general quick start guide](/docs/quick-start/). -## Quickstart +## Client installation The latest version of the library is [2.0.0](https://www.nuget.org/packages/net-questdb-client/) @@ -35,15 +45,6 @@ The NuGet package can be installed using the dotnet CLI: dotnet add package net-questdb-client ``` -The .NET ILP client streams data to QuestDB using the ILP format. - -The format is a text protocol with the following form: - -`table,symbol=value column1=value1 column2=value2 nano_timestamp` - -The client provides a convenient API to manage the construction and sending of -ILP rows. - :::note `Sender` is single-threaded, and uses a single connection to the database. @@ -53,33 +54,117 @@ tasking. ::: -### Basic usage +## Authentication + +### HTTP + +The HTTP protocol supports authentication via +[Basic Authentication](https://datatracker.ietf.org/doc/html/rfc7617), and +[Token Authentication](https://datatracker.ietf.org/doc/html/rfc6750). + +**Basic Authentication** + +Configure Basic Authentication with the `username` and `password` parameters: ```csharp -using var sender = Sender.New("http::addr=localhost:9000;"); -await sender.Table("metric_name") - .Symbol("Symbol", "value") - .Column("number", 10) - .Column("double", 12.23) - .Column("string", "born to shine") - .AtAsync(new DateTime(2021, 11, 25, 0, 46, 26)); +using QuestDB; + ... +using var sender = Sender.New("http::addr=localhost:9000;username=admin;password=quest;"); + ... +``` + +**Token Authentication** + +_QuestDB Enterprise Only_ + +Configure Token Authentication with the `username` and `token` parameters: + +```csharp +using var sender = Sender.New("http::addr=localhost:9000;username=admin;token="); +``` + +### TCP + +TCP authentication can be configured using JWK tokens: + +```csharp +using var sender = Sender.New("tcp::addr=localhost:9000;username=admin;token="); +``` + +The connection string can also be built programatically. See [Configuration](#configuration) for details. + +## Basic insert + +Basic insertion (no-auth): + +```csharp +using System; +using QuestDB; + +using var sender = Sender.New("http::addr=localhost:9000;"); +await sender.Table("trades") + .Symbol("symbol", "ETH-USD") + .Symbol("side", "sell") + .Column("price", 2615.54) + .Column("amount", 0.00044) + .AtNowAsync(); +await sender.Table("trades") + .Symbol("symbol", "BTC-USD") + .Symbol("side", "sell") + .Column("price", 39269.98) + .Column("amount", 0.001) + .AtNowAsync(); await sender.SendAsync(); ``` -### Multi-line send (sync) +In this case, the designated timestamp will be the one at execution time. Let's see now an example with timestamps, custom auto-flushing, basic auth, and error reporting. ```csharp -using var sender = Sender.New("http::addr=localhost:9000;auto_flush=off;"); -for(int i = 0; i < 100; i++) +using QuestDB; +using System; +using System.Threading.Tasks; + +class Program { - sender.Table("metric_name") - .Column("counter", i) - .At(DateTime.UtcNow); + static async Task Main(string[] args) + { + using var sender = Sender.New("http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;"); + + var now = DateTime.UtcNow; + try + { + await sender.Table("trades") + .Symbol("symbol", "ETH-USD") + .Symbol("side", "sell") + .Column("price", 2615.54) + .Column("amount", 0.00044) + .AtAsync(now); + + await sender.Table("trades") + .Symbol("symbol", "BTC-USD") + .Symbol("side", "sell") + .Column("price", 39269.98) + .Column("amount", 0.001) + .AtAsync(now); + + await sender.SendAsync(); + + Console.WriteLine("Data flushed successfully."); + } + catch (Exception ex) + { + Console.Error.WriteLine($"Error: {ex.Message}"); + } + } } -sender.Send(); ``` -## Initialisation +As you can see, both events use the same timestamp. We recommended using the original event timestamps when +ingesting data into QuestDB. Using the current timestamp hinder the ability to deduplicate rows which is +[important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). + + +## Configuration Construct new Senders via the `Sender` factory. @@ -87,12 +172,12 @@ It is mandatory to provide the `addr` config, as this defines the transport protocol and the server location. By default, the HTTP protocol uses `9000`, the same as the other HTTP endpoints. -Optionally, TCP uses `9009'. +Optionally, TCP uses `9009`. ### With a configuration string It is recommended, where possible, to initialise the sender using a -[configuration string](https://questdb.io/docs/reference/api/ilp/overview/#configuration-strings). +[configuration string](https://questdb.io/docs/reference/api/ilp/overview/#client-side-configuration). Configuration strings provide a convenient shorthand for defining client properties, and are validated during construction of the `Sender`. @@ -135,27 +220,6 @@ var options = new ConfigurationBuilder() .Get(); ``` -### Choosing a protocol - -The client currently supports streaming ILP data over HTTP and TCP transports. - -The sender performs some validation, but it is still possible that errors are -present and the server will reject the data. - -With the TCP protocol, this will lead to a dropped connection and an error -server-side. - -With the HTTP transport, errors will be returned via standard HTTP responses and -propagated to the user via `IngressError`. - -HTTP transport also provides better guarantees around transactionality for -submitted data. - -In general, it is recommended to use the HTTP transport. If the absolute highest -performance is required, then in some cases, the TCP transport will be faster. -However, it is important to use deduplication keys judiciously in your table -schemas, as this will help guard against duplication of data in the error case. - ## Preparing Data Senders use an internal buffer to convert input values into an ILP-compatible @@ -246,25 +310,6 @@ QuestDB's deduplication feature, and should be avoided where possible. ::: -#### Designated timestamp - -QuestDB clusters the table around a -[designated timestamp](/docs/concept/designated-timestamp/). - -The timestamp provided in the `At*` calls will be used as the designated -timestamp. - -Choosing the right timestamp is critical for performance! - -#### Table creation - -If the table corresponding to the ILP submission does not exist, it will be -automatically created, with a 'best guess' schema. This may not be optimal for -your use case, but this functionality does provide flexibility in what the -database will accept. - -It is recommended, when possible, to create your tables ahead of time using a -thought-out schema. This can be done via APIs other than the ILP ingestion. ## Flushing @@ -365,9 +410,15 @@ Server-side transactions are only for a single table. Therefore, a request containing multiple tables will be split into a single transaction per table. If a transaction fails for one table, other transactions may still complete. -For true transactionality, one can use the transaction feature to enforce a +For data transactionality, one can use the transaction feature to enforce a batch only for a single table. +:::caution + +As described in the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), the HTTP transport has some limitations for transactions when adding new columns. + +::: + Transactions follow this flow: ```mermaid @@ -483,6 +534,8 @@ sender.Clear(); // empties the internal buffer ## Security +_QuestDB Enterprise offers native TLS support_ + ### TLS Enable TLS via the `https` or `tcps` protocol, along with other associated @@ -497,160 +550,33 @@ For development purposes, the verification of TLS certificates can be disabled: using var sender = Sender.New("https::addr=localhost:9000;tls_verify=unsafe_off;"); ``` -### Authentication - -The client supports both TLS encryption, and authentication. - -The authentication credentials can be set up by following the -[RBAC](https://questdb.io/docs/operations/rbac/) documentation. - -#### HTTP - -The HTTP protocol supports authentication via -[Basic Authentication](https://datatracker.ietf.org/doc/html/rfc7617), and -[Token Authentication](https://datatracker.ietf.org/doc/html/rfc6750). - -**Basic Authentication** - -Configure Basic Authentication with the `username` and `password` parameters: - -```csharp -using var sender = Sender.New("http::addr=localhost:9000;username=admin;password=quest;"); -``` - -**Token Authentication** - -Configure Token Authentication with the `username` and `token` parameters: - -```csharp -using var sender = Sender.New("http::addr=localhost:9000;username=admin;token="); -``` - -#### TCP - -TCP authentication can be configured using JWK tokens: - -```csharp -using var sender = Sender.New("tcp::addr=localhost:9000;username=admin;token="); -``` - -## Examples - -### Basic Usage - -```csharp -using System; -using QuestDB; - -using var sender = Sender.New("http::addr=localhost:9000;"); -await sender.Table("trades") - .Symbol("pair", "USDGBP") - .Symbol("type", "buy") - .Column("traded_price", 0.83) - .Column("limit_price", 0.84) - .Column("qty", 100) - .Column("traded_ts", new DateTime(2022, 8, 6, 7, 35, 23, 189, DateTimeKind.Utc)) - .AtAsync(DateTime.UtcNow); -await sender.Table("trades") - .Symbol("pair", "GBPJPY") - .Column("traded_price", 135.97) - .Column("qty", 400) - .AtAsync(DateTime.UtcNow); -await sender.SendAsync(); -``` - -### Streaming data - -```csharp -using System.Diagnostics; -using QuestDB; - -var rowsToSend = 1e6; - -using var sender = Sender.New("http::addr=localhost:9000;auto_flush=on;auto_flush_rows=75000;auto_flush_interval=off;"); - -var timer = new Stopwatch(); -timer.Start(); - -for (var i = 0; i < rowsToSend; i++) -{ - await sender.Table("trades") - .Symbol("pair", "USDGBP") - .Symbol("type", "buy") - .Column("traded_price", 0.83) - .Column("limit_price", 0.84) - .Column("qty", 100) - .Column("traded_ts", new DateTime( - 2022, 8, 6, 7, 35, 23, 189, DateTimeKind.Utc)) - .AtAsync(DateTime.UtcNow); -} - -// Ensure no pending rows. -await sender.SendAsync(); - -timer.Stop(); - -Console.WriteLine( - $"Wrote {rowsToSend} rows in {timer.Elapsed.TotalSeconds} seconds at a rate of {rowsToSend / timer.Elapsed.TotalSeconds} rows/second."); -``` - ### HTTP TLS with Basic Authentication ```csharp -using QuestDB; - // Runs against QuestDB Enterprise, demonstrating HTTPS and Basic Authentication support. using var sender = Sender.New("https::addr=localhost:9000;tls_verify=unsafe_off;username=admin;password=quest;"); -await sender.Table("trades") - .Symbol("pair", "USDGBP") - .Symbol("type", "buy") - .Column("traded_price", 0.83) - .Column("limit_price", 0.84) - .Column("qty", 100) - .Column("traded_ts", new DateTime( - 2022, 8, 6, 7, 35, 23, 189, DateTimeKind.Utc)) - .AtAsync(DateTime.UtcNow); -await sender.Table("trades") - .Symbol("pair", "GBPJPY") - .Column("traded_price", 135.97) - .Column("qty", 400) - .AtAsync(DateTime.UtcNow); -await sender.SendAsync(); ``` ### TCP TLS with JWK Authentication ```csharp -using System; -using QuestDB; - // Demonstrates TCPS connection against QuestDB Enterprise using var sender = Sender.New( "tcps::addr=localhost:9009;tls_verify=unsafe_off;username=admin;token=NgdiOWDoQNUP18WOnb1xkkEG5TzPYMda5SiUOvT1K0U=;"); // See: https://questdb.io/docs/reference/api/ilp/authenticate -await sender.Table("trades") - .Symbol("pair", "USDGBP") - .Symbol("type", "buy") - .Column("traded_price", 0.83) - .Column("limit_price", 0.84) - .Column("qty", 100) - .Column("traded_ts", new DateTime( - 2022, 8, 6, 7, 35, 23, 189, DateTimeKind.Utc)) - .AtAsync(DateTime.UtcNow); -await sender.Table("trades") - .Symbol("pair", "GBPJPY") - .Column("traded_price", 135.97) - .Column("qty", 400) - .AtAsync(DateTime.UtcNow); -await sender.SendAsync(); + ``` ## Next Steps +Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details +about transactions, error control, delivery guarantees, health check, or table and +column auto-creation. + Dive deeper into the .NET client capabilities by exploring more examples provided in the [GitHub repository](https://github.com/questdb/dotnet-questdb-client). diff --git a/clients/ingest-go.md b/clients/ingest-go.md index 2b17870f..f2ee0dba 100644 --- a/clients/ingest-go.md +++ b/clients/ingest-go.md @@ -71,18 +71,39 @@ Or, set the QDB_CLIENT_CONF environment variable and call 1. Export the configuration string as an environment variable: ```bash - export QDB_CLIENT_CONF="addr=localhost:9000;username=admin;password=quest;" + export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" ``` 2. Then in your Go code: ```Go client, err := questdb.LineSenderFromEnv(context.TODO()) ``` +Alternatively, you can use the built-in Go API to specify the connection options. + + ```go + package main + +import ( + "context" + qdb "github.com/questdb/go-questdb-client/v3" +) + + +func main() { + ctx := context.TODO() + + client, err := qdb.NewLineSender(context.TODO(), qdb.WithHttp(), qdb.WithAddress("localhost:9000"), qdb.WithBasicAuth("admin", "quest")) +``` + + +When using QuestDB Enterprise, authentication can also be done via REST token. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. + ## Basic Insert -Example: inserting data from a temperature sensor. +Example: inserting executed trades for cryptocurrencies. -Without authentication: +Without authentication and using the current timestamp: ```Go package main @@ -90,7 +111,6 @@ package main import ( "context" "github.com/questdb/go-questdb-client/v3" - "time" ) func main() { @@ -101,12 +121,12 @@ func main() { panic("Failed to create client") } - timestamp := time.Now() - err = client.Table("sensors"). - Symbol("id", "toronto1"). - Float64Column("temperature", 20.0). - Float64Column("humidity", 0.5). - At(ctx, timestamp) + err = client.Table("trades"). + Symbol("symbol", "ETH-USD"). + Symbol("side", "sell"). + Float64Column("price", 2615.54). + Float64Column("amount", 0.00044). + AtNow(ctx) if err != nil { panic("Failed to insert data") @@ -119,57 +139,74 @@ func main() { } ``` -## Limitations +In this case, the designated timestamp will be the one at execution time. Let's see now an example with an explicit timestamp, custom auto-flushing, and basic auth. + +```Go +package main + +import ( + "context" + "github.com/questdb/go-questdb-client/v3" + "time" +) -### Transactionality +func main() { + ctx := context.TODO() -The Go client does not support full transactionality: + client, err := questdb.LineSenderFromConf(ctx, "http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;") + if err != nil { + panic("Failed to create client") + } -- Data for the first table in an HTTP request will be committed even if the - second table's commit fails. -- An implicit commit occurs each time a new column is added to a table. This - action cannot be rolled back if the request is aborted or encounters parse - errors. + timestamp := time.Now() + err = client.Table("trades"). + Symbol("symbol", "ETH-USD"). + Symbol("side", "sell"). + Float64Column("price", 2615.54). + Float64Column("amount", 0.00044). + At(ctx, timestamp) -### Timestamp column + if err != nil { + panic("Failed to insert data") + } -QuestDB's underlying InfluxDB Line Protocol (ILP) does not name timestamps, -leading to an automatic column name of timestamp. To use a custom name, -pre-create the table with the desired timestamp column name: + err = client.Flush(ctx) + // You can flush manually at any point. + // If you don't flush manually, the client will flush automatically + // when a row is added and either: + // * The buffer contains 75000 rows (if HTTP) or 600 rows (if TCP) + // * The last flush was more than 1000ms ago. + // Auto-flushing can be customized via the `auto_flush_..` params. -```sql -CREATE TABLE temperatures ( - ts timestamp, - sensorID symbol, - sensorLocation symbol, - reading double -) timestamp(my_ts); + if err != nil { + panic("Failed to flush data") + } +} ``` +We recommended to use User-assigned timestamps when ingesting data into QuestDB. + Using the current timestamp hinder the ability to deduplicate rows which is +[important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). -## Health check +## Configuration options -To monitor your active connection, there is a `ping` endpoint: +The minimal configuration string needs to have the protocol, host, and port, as in: -```shell -curl -I http://localhost:9000/ping +``` +http::addr=localhost:9000; ``` -Returns (pong!): +In the Go client, you can set the configuration options via the standard config string, +which is the same across all clients, or using [the built-in API](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3#LineSenderOption). -```shell -HTTP/1.1 204 OK -Server: questDB/1.0 -Date: Fri, 2 Feb 2024 17:09:38 GMT -Transfer-Encoding: chunked -Content-Type: text/plain; charset=utf-8 -X-Influxdb-Version: v2.7.4 -``` +For all the extra options you can use, please check [the client docs](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3#LineSenderFromConf) -Determine whether an instance is active and confirm the version of InfluxDB Line -Protocol with which you are interacting. ## Next Steps +Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details +about transactions, error control, delivery guarantees, health check, or table and +column auto-creation. + Explore the full capabilities of the Go client via [Go.dev](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3). diff --git a/clients/ingest-node.md b/clients/ingest-node.md index 2aca8f8b..180e0a90 100644 --- a/clients/ingest-node.md +++ b/clients/ingest-node.md @@ -41,10 +41,41 @@ Install the QuestDB Node.js client via npm: npm i -s @questdb/nodejs-client ``` -## Basic Usage +## Authentication + +Passing in a configuration string with basic auth: + +```javascript +const { Sender } = require("@questdb/nodejs-client"); + +const conf = "http::addr=localhost:9000;username=admin;password=quest;" +const sender = Sender.fromConfig(conf); + ... +``` + +Passing via the `QDB_CLIENT_CONF` env var: + +```bash +export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" +``` + +```javascript +const { Sender } = require("@questdb/nodejs-client"); + + +const sender = Sender.fromEnv(); + ... +``` + +When using QuestDB Enterprise, authentication can also be done via REST token. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. + +## Basic insert + +Example: inserting executed trades for cryptocurrencies. + +Without authentication and using the current timestamp. -A simple example to connect to QuestDB, insert some data into a table, and flush -the data: ```javascript const { Sender } = require("@questdb/nodejs-client") @@ -55,42 +86,96 @@ async function run() { // add rows to the buffer of the sender await sender - .table("prices") - .symbol("instrument", "EURUSD") - .floatColumn("bid", 1.0195) - .floatColumn("ask", 1.0221) - .at(Date.now(), "ms") - await sender - .table("prices") - .symbol("instrument", "GBPUSD") - .floatColumn("bid", 1.2076) - .floatColumn("ask", 1.2082) - .at(Date.now(), "ms") + .table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .floatColumn("price", 2615.54) + .floatColumn("amount", 0.00044) + .atNow() // flush the buffer of the sender, sending the data to QuestDB // the buffer is cleared after the data is sent, and the sender is ready to accept new data await sender.flush() - // add rows to the buffer again, and send it to the server + // close the connection after all rows ingested + // unflushed data will be lost + await sender.close() +} + +run().then(console.log).catch(console.error) +``` + +In this case, the designated timestamp will be the one at execution time. Let's see now an example with an explicit +timestamp, custom auto-flushing, and basic auth. + + +```javascript +const { Sender } = require("@questdb/nodejs-client") + +async function run() { + // create a sender using HTTP protocol + const sender = Sender.fromConfig( + "http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;" + ) + + // Calculate the current timestamp. You could also parse a date from your source data. + const timestamp = Date.now(); + + // add rows to the buffer of the sender + await sender + .table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .floatColumn("price", 2615.54) + .floatColumn("amount", 0.00044) + .at(timestamp, "ms") + + // add rows to the buffer of the sender await sender - .table("prices") - .symbol("instrument", "EURUSD") - .floatColumn("bid", 1.0197) - .floatColumn("ask", 1.0224) - .at(Date.now(), "ms") + .table("trades") + .symbol("symbol", "BTC-USD") + .symbol("side", "sell") + .floatColumn("price", 39269.98) + .floatColumn("amount", 0.001) + .at(timestamp, "ms") + + + // flush the buffer of the sender, sending the data to QuestDB + // the buffer is cleared after the data is sent, and the sender is ready to accept new data await sender.flush() + // close the connection after all rows ingested + // unflushed data will be lost await sender.close() } run().then(console.log).catch(console.error) ``` +As you can see, both events now are using the same timestamp. We recommended to use the original event timestamps when +ingesting data into QuestDB. Using the current timestamp hinder the ability to deduplicate rows which is +[important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). + + +## Configuration options + +The minimal configuration string needs to have the protocol, host, and port, as in: + +``` +http::addr=localhost:9000; +``` + +For all the extra options you can use, please check [the client docs](https://questdb.github.io/nodejs-questdb-client/SenderOptions.html) + + ## Next Steps -Dive deeper into the Node.js client capabilities by exploring more examples -provided in the +Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details +about transactions, error control, delivery guarantees, health check, or table and +column auto-creation. + +Dive deeper into the Node.js client capabilities, including TypeScript and Worker Threads examples, by exploring the [GitHub repository](https://github.com/questdb/nodejs-questdb-client). To learn _The Way_ of QuestDB SQL, see the diff --git a/clients/ingest-python.md b/clients/ingest-python.md index 9a635f52..7d048a76 100644 --- a/clients/ingest-python.md +++ b/clients/ingest-python.md @@ -84,13 +84,27 @@ with Sender.from_conf(conf) as sender: Passing via the `QDB_CLIENT_CONF` env var: -```python +```bash export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" ``` -## Basic insert +```python +from questdb.ingress import Sender + +with Sender.from_env() as sender: + ... +``` + +```python +from questdb.ingress import Sender, Protocol + +with Sender(Protocol.Http, 'localhost', 9000, username='admin', password='quest') as sender: +``` -Consider something such as a temperature sensor. +When using QuestDB Enterprise, authentication can also be done via REST token. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. + +## Basic insert Basic insertion (no-auth): @@ -100,33 +114,19 @@ from questdb.ingress import Sender, TimestampNanos conf = f'http::addr=localhost:9000;' with Sender.from_conf(conf) as sender: sender.row( - 'sensors', - symbols={'id': 'toronto1'}, - columns={'temperature': 20.0, 'humidity': 0.5}, + 'trades', + symbols={'symbol': 'ETH-USD', 'side': 'sell'}, + columns={'price': 2615.54, 'amount': 0.00044}, + at=TimestampNanos.now()) + sender.row( + 'trades', + symbols={'symbol': 'BTC-USD', 'side': 'sell'}, + columns={'price': 39269.98, 'amount': 0.001}, at=TimestampNanos.now()) sender.flush() ``` -The same temperature senesor, but via a Pandas dataframe: - -```python -import pandas as pd -from questdb.ingress import Sender - -df = pd.DataFrame({ - 'id': pd.Categorical(['toronto1', 'paris3']), - 'temperature': [20.0, 21.0], - 'humidity': [0.5, 0.6], - 'timestamp': pd.to_datetime(['2021-01-01', '2021-01-02'])}) - -conf = f'http::addr=localhost:9000;' -with Sender.from_conf(conf) as sender: - sender.dataframe(df, table_name='sensors', at='timestamp') -``` - -What about market data? - -A "full" example, with timestamps and auto-flushing: +In this case, the designated timestamp will be the one at execution time. Let's see now an example with timestamps, custom auto-flushing, basic auth, and error reporting. ```python from questdb.ingress import Sender, IngressError, TimestampNanos @@ -136,25 +136,25 @@ import datetime def example(): try: - conf = f'http::addr=localhost:9000;' + conf = f'http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;' with Sender.from_conf(conf) as sender: # Record with provided designated timestamp (using the 'at' param) # Notice the designated timestamp is expected in Nanoseconds, # but timestamps in other columns are expected in Microseconds. - # The API provides convenient functions + # You can use the TimestampNanos or TimestampMicros classes, + # or you can just pass a datetime object sender.row( 'trades', symbols={ - 'pair': 'USDGBP', - 'type': 'buy'}, + 'symbol': 'ETH-USD', + 'side': 'sell'}, columns={ - 'traded_price': 0.83, - 'limit_price': 0.84, - 'qty': 100, - 'traded_ts': datetime.datetime( - 2022, 8, 6, 7, 35, 23, 189062, - tzinfo=datetime.timezone.utc)}, - at=TimestampNanos.now()) + 'price': 2615.54, + 'amount': 0.00044, + }, + at=datetime.datetime( + 2022, 3, 8, 18, 53, 57, 609765, + tzinfo=datetime.timezone.utc)) # You can call `sender.row` multiple times inside the same `with` # block. The client will buffer the rows and send them in batches. @@ -178,70 +178,77 @@ if __name__ == '__main__': example() ``` -The above generates rows of InfluxDB Line Protocol (ILP) flavoured data: +We recommended `User`-assigned timestamps when ingesting data into QuestDB. +Using `Server`-assigned timestamps hinders the ability to deduplicate rows which is +[important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). + + +The same `trades` insert, but via a Pandas dataframe: ```python -trades,pair=USDGBP,type=sell traded_price=0.82,limit_price=0.81,qty=150,traded_ts=1659784523190000000\n -trades,pair=EURUSD,type=buy traded_price=1.18,limit_price=1.19,qty=200,traded_ts=1659784523191000000\n -trades,pair=USDJPY,type=sell traded_price=110.5,limit_price=110.4,qty=80,traded_ts=1659784523192000000\n -``` +import pandas as pd +from questdb.ingress import Sender -## Limitations +df = pd.DataFrame({ + 'symbol': pd.Categorical(['ETH-USD', 'BTC-USD']), + 'side': pd.Categorical(['sell', 'sell']), + 'price': [2615.54, 39269.98], + 'amount': [0.00044, 0.001], + 'timestamp': pd.to_datetime(['2022-03-08T18:03:57.609765Z', '2022-03-08T18:03:57.710419Z'])}) -### Transactionality +conf = f'http::addr=localhost:9000;' +with Sender.from_conf(conf) as sender: + sender.dataframe(df, table_name='trades', at=TimestampNanos.now()) +``` -The client does not provide full transactionality in all cases: +Note that you can also add a column of your dataframe with your timestamps and +reference that column in the `at` parameter: -- Data for the first table in an HTTP request will be committed even if the - second table's commit fails. -- An implicit commit occurs each time a new column is added to a table. This - action cannot be rolled back if the request is aborted or encounters parse - errors. +```python +import pandas as pd +from questdb.ingress import Sender -### Timestamp column +df = pd.DataFrame({ + 'symbol': pd.Categorical(['ETH-USD', 'BTC-USD']), + 'side': pd.Categorical(['sell', 'sell']), + 'price': [2615.54, 39269.98], + 'amount': [0.00044, 0.001], + 'timestamp': pd.to_datetime(['2022-03-08T18:03:57.609765Z', '2022-03-08T18:03:57.710419Z'])}) -The underlying ILP protocol sends timestamps to QuestDB without a name. +conf = f'http::addr=localhost:9000;' +with Sender.from_conf(conf) as sender: + sender.dataframe(df, table_name='trades', at='timestamp') +``` -Therefore, if you provide it one, say `my_ts`, you will find that the timestamp -column is named `timestamp`. +## Configuration options -To address this, issue a CREATE TABLE statement to create the table in advance: +The minimal configuration string needs to have the protocol, host, and port, as in: -```questdb-sql title="Creating a timestamp named my_ts" -CREATE TABLE temperatures ( - ts timestamp, - sensorID symbol, - sensorLocation symbol, - reading double -) timestamp(my_ts); +``` +http::addr=localhost:9000; ``` -Now, when you can send data to the specified column. +In the Python client, you can set the configuration options via the standard config string, +which is the same across all clients, or using [the built-in API](https://py-questdb-client.readthedocs.io/en/latest/sender.html#sender-programmatic-construction). -## Health check -To monitor your active connection, there is a `ping` endpoint: +For all the extra options you can use, please check [the client docs](https://py-questdb-client.readthedocs.io/en/latest/conf.html#sender-conf) -```shell -curl -I http://localhost:9000/ping -``` -Returns (pong!): +## Transactional flush -```shell -HTTP/1.1 204 OK -Server: questDB/1.0 -Date: Fri, 2 Feb 2024 17:09:38 GMT -Transfer-Encoding: chunked -Content-Type: text/plain; charset=utf-8 -X-Influxdb-Version: v2.7.4 -``` +As described at the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), +the HTTP transport has some support for transactions. -Determine whether an instance is active and confirm the version of InfluxDB Line -Protocol with which you are interacting. +The python client exposes [an API](https://py-questdb-client.readthedocs.io/en/latest/sender.html#http-transactions) +to make working with transactions more convenient ## Next steps +Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for general details +about transactions, error control, delivery guarantees, health check, or table and +column auto-creation. The [Python client docs](https://py-questdb-client.readthedocs.io/en/latest/sender.html) explain how to apply those concepts using the built-in API. + For full docs, checkout [ReadTheDocs](https://py-questdb-client.readthedocs.io/en). diff --git a/clients/ingest-rust.md b/clients/ingest-rust.md index 86ddcc85..f1313b33 100644 --- a/clients/ingest-rust.md +++ b/clients/ingest-rust.md @@ -33,10 +33,35 @@ Add the QuestDB client to your project using the command line: cargo add questdb-rs ``` -## Quick example +## Authentication -This snippet connects to QuestDB running locally, creates the table `sensors`, -and adds one row to it: +This is how you'd set up the client to authenticate using the HTTP Basic +authentication: + +```rust +let mut sender = Sender::from_conf( + "https::addr=localhost:9000;username=admin;password=quest;" +)?; +``` + +You can also pass the connection configuration via the `QDB_CLIENT_CONF` environment variable: + +```bash +export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" +``` + +Then you use it like this: + +```rust +let mut sender = Sender::from_env()?; +``` + +When using QuestDB Enterprise, authentication can also be done via REST token. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. + +## Basic insert + +Basic insertion (no-auth): ```rust use questdb::{ @@ -50,10 +75,11 @@ fn main() -> Result<()> { let mut sender = Sender::from_conf("http::addr=localhost:9000;")?; let mut buffer = Buffer::new(); buffer - .table("sensors")? - .symbol("id", "toronto1")? - .column_f64("temperature", 20.0)? - .column_i64("humidity", 50)? + .table("trades")? + .symbol("symbol", "ETH-USD")? + .symbol("side", "sell")? + .column_f64("price", 2615.54)? + .column_f64("amount", 0.00044)? .at(TimestampNanos::now())?; sender.flush(&mut buffer)?; Ok(()) @@ -62,11 +88,56 @@ fn main() -> Result<()> { These are the main steps it takes: -- Use `Sender::from_conf()` to get the `Sender` object +- Use `Sender::from_conf()` to get the `sender` object - Populate a `Buffer` with one or more rows of data - Send the buffer using `sender.flush()`(`Sender::flush`) -## Configuration string +In this case, the designated timestamp will be the one at execution time. + +Let's see now an example with timestamps using Chrono, custom timeout, and basic auth. + +You need to enable the `chrono_timestamp` feature to the QuestDB crate and add the Chrono crate. + +```bash +cargo add questdb-rs --features chrono_timestamp +cargo add chrono +``` + +```rust +use questdb::{ + Result, + ingress::{ + Sender, + Buffer, + TimestampNanos + }, +}; +use chrono::Utc; + +fn main() -> Result<()> { + let mut sender = Sender::from_conf( + "http::addr=localhost:9000;username=admin;password=quest;retry_timeout=20000;" + )?; + let mut buffer = Buffer::new(); + let current_datetime = Utc::now(); + + buffer + .table("trades")? + .symbol("symbol", "ETH-USD")? + .symbol("side", "sell")? + .column_f64("price", 2615.54)? + .column_f64("amount", 0.00044)? + .at(TimestampNanos::from_datetime(current_datetime)?)?; + + sender.flush(&mut buffer)?; + Ok(()) +} +``` + +Using the current timestamp hinder the ability to deduplicate rows which is +[important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). + +## Configuration options The easiest way to configure the line sender is the configuration string. The general structure is: @@ -86,14 +157,28 @@ won't get access to the data in the buffer until you explicitly call `sender.flush(&mut buffer)` or a variant. This may lead to a pitfall where you drop a buffer that still has some data in it, resulting in permanent data loss. +Unlike other official QuestDB clients, the rust client does not supports auto-flushing +via configuration. + A common technique is to flush periodically on a timer and/or once the buffer -exceeds a certain size. You can check the buffer's size by the calling +exceeds a certain size. You can check the buffer's size by calling `buffer.len()`. The default `flush()` method clears the buffer after sending its data. If you want to preserve its contents (for example, to send the same data to multiple QuestDB instances), call `sender.flush_and_keep(&mut buffer)` instead. + +## Transactional flush + +As described at the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), +the HTTP transport has some support for transactions. + +In order to ensure in advance that a flush will not affect more than one table, call +[`sender.flush_and_keep_with_flags(&mut buffer, true)`](Sender::flush_and_keep_with_flags). +This call will refuse to flush a buffer if the flush wouldn't be data-transactional. + + ## Error handling The two supported transport modes, HTTP and TCP, handle errors very differently. @@ -121,33 +206,9 @@ on the reason. When this has happened, the sender transitions into an error state, and it is permanently unusable. You must drop it and create a new sender. You can inspect the sender's error state by calling `sender.must_close()`. -## Authentication example: HTTP Basic - -This is how you'd set up the client to authenticate using the HTTP Basic -authentication: - -```no_run -let mut sender = Sender::from_conf( - "https::addr=localhost:9000;username=testUser1;password=Yfym3fgMv0B9;" -)?; -``` - -Go to [the docs](https://docs.rs/questdb-rs/latest/questdb/ingress) for the -other available options. +For more details about the HTTP and TCP transports, please refer to the +[ILP overview](/docs/reference/api/ilp/overview#transport-selection). -## Configure using the environment variable - -You can set the `QDB_CLIENT_CONF` environment variable: - -```bash -export QDB_CLIENT_CONF="https::addr=localhost:9000;username=admin;password=quest;" -``` - -Then you use it like this: - -```rust -let mut sender = Sender::from_env()?; -``` ## Crate features @@ -171,67 +232,12 @@ These features are opt-in: - `insecure-skip-verify`: Allows skipping server certificate validation in TLS (this compromises security). -## Usage considerations - -### Transactional flush - -When using HTTP, you can arrange that each `flush()` call happens within its own -transaction. For this to work, your buffer must contain data that targets only -one table. This is because QuestDB doesn't support multi-table transactions. - -In order to ensure in advance that a flush will be transactional, call -[`sender.flush_and_keep_with_flags(&mut buffer, true)`](Sender::flush_and_keep_with_flags). -This call will refuse to flush a buffer if the flush wouldn't be transactional. - -### When to choose the TCP transport? - -The TCP transport mode is raw and simplistic: it doesn't report any errors to -the caller (the server just disconnects), has no automatic retries, requires -manual handling of connection failures, and doesn't support transactional -flushing. - -However, TCP has a lower overhead than HTTP and it's worthwhile to try out as an -alternative in a scenario where you have a constantly high data rate and/or deal -with a high-latency network connection. - -### Timestamp column name - -InfluxDB Line Protocol (ILP) does not give a name to the designated timestamp, -so if you let this client auto-create the table, it will have the default name. -To use a custom name, create the table using a DDL statement: - -```sql -CREATE TABLE sensors ( - my_ts timestamp, - id symbol, - temperature double, - humidity double, -) timestamp(my_ts); -``` - -## Health check - -The QuestDB server has a "ping" endpoint you can access to see if it's alive, -and confirm the version of InfluxDB Line Protocol with which you are -interacting: - -```shell -curl -I http://localhost:9000/ping -``` - -Example of the expected response: - -```shell -HTTP/1.1 204 OK -Server: questDB/1.0 -Date: Fri, 2 Feb 2024 17:09:38 GMT -Transfer-Encoding: chunked -Content-Type: text/plain; charset=utf-8 -X-Influxdb-Version: v2.7.4 -``` - ## Next steps +Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details +about transactions, error control, delivery guarantees, health check, or table and +column auto-creation. + Explore the full capabilities of the Rust client via the [Crate API page](https://docs.rs/questdb-rs/latest/questdb/). diff --git a/clients/java_ilp.md b/clients/java_ilp.md index d730bb9a..b09dcda8 100644 --- a/clients/java_ilp.md +++ b/clients/java_ilp.md @@ -11,9 +11,16 @@ import CodeBlock from "@theme/CodeBlock" import InterpolateReleaseData from "../../src/components/InterpolateReleaseData" import { RemoteRepoExample } from "@theme/RemoteRepoExample" -The QuestDB Java client is baked right into the QuestDB binary. -It requires no additional configuration steps. +:::note + +This is the reference for the QuestDB Java Client when QuestDB is used as a server. + +For embedded QuestDB, please check our [Java Embedded Guide](/docs/reference/api/java-embedded/). + +::: + +The QuestDB Java client is baked right into the QuestDB binary. The client provides the following benefits: @@ -94,6 +101,9 @@ This sample configures a client to use HTTP transport with TLS enabled for a connection to a QuestDB server. It also instructs the client to authenticate using HTTP Basic Authentication. +When using QuestDB Enterprise, authentication can also be done via REST token. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. + ## Client instantiation @@ -155,21 +165,6 @@ There are three ways to create a client instance: 7. Go to the step no. 2 to start a new row. 8. Use `close()` to dispose the Sender after you no longer need it. -## Transport selection - -Client supports the following transport options: - -- HTTP (default port 9000) -- TCP (default port 9009) - -The HTTP transport is recommended for most use cases. It provides feedback on -errors, automatically retries failed requests, and is easier to configure. The -TCP transport is kept for compatibility with older QuestDB versions. It has -limited error feedback, no automatic retries, and requires manual handling of -connection failures. However, while HTTP is recommended, TCP has a lower -overhead than HTTP and may be useful in high-throughput scenarios in -high-latency networks. - ## Flushing Client accumulates data into an internal buffer. Flushing the buffer sends the @@ -183,17 +178,18 @@ An explicit flush can be done by calling the `flush()` method. ```java try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;")) { - sender.table("weather_sensor") - .symbol("id", "toronto1") - .doubleColumn("temperature", 23.5) - .doubleColumn("humidity", 0.49) - .atNow(); - sender.flush(); - sender.table("weather_sensor") - .symbol("id", "dubai2") - .doubleColumn("temperature", 41.2) - .doubleColumn("humidity", 0.34) - .atNow(); + sender.table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .doubleColumn("price", 2615.54) + .doubleColumn("amount", 0.00044) + .atNow(); + sender.table("trades") + .symbol("symbol", "TC-USD") + .symbol("side", "sell") + .doubleColumn("price", 39269.98) + .doubleColumn("amount", 0.001) + .atNow(); sender.flush(); } ``` @@ -257,27 +253,6 @@ client receives no additional error information from the server. This limitation significantly contributes to the preference for HTTP transport over TCP transport. -### Exactly-once delivery vs at-least-once delivery - -The retrying behavior of the HTTP transport can lead to some data being sent to -the server more than once. - -**Example**: Client sends a batch to the server, the server receives the batch, -processes it, but fails to send a response back to the client due to a network -error. The client will retry sending the batch to the server. This means the -server will receive the batch again and process it again. This can lead to -duplicated rows in the server. - -The are two ways to mitigate this issue: - -- Use [QuestDB deduplication feature](/docs/concept/deduplication/) to remove - duplicated rows. QuestDB server can detect and remove duplicated rows - automatically, resulting in exactly-once processing. This is recommended when - using the HTTP transport with retrying enabled. -- Disable retrying by setting `retry_timeout` to 0. This will make the client - send the batch only once, failed requests will not be retried and the client - will receive an error. This effectively turns the client into an at-most-once - delivery. ## Designated timestamp considerations @@ -290,11 +265,12 @@ There are two ways to assign a designated timestamp to a row: ```java java.time.Instant timestamp = Instant.now(); // or any other timestamp - sender.table("weather_sensor") - .symbol("id", "toronto1") - .doubleColumn("temperature", 23.5) - .doubleColumn("humidity", 0.49) - .at(timestamp); + sender.table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .doubleColumn("price", 2615.54) + .doubleColumn("amount", 0.00044) + .at(timestamp); ``` The `Instant` class is part of the `java.time` package and is used to @@ -308,16 +284,17 @@ There are two ways to assign a designated timestamp to a row: 2. Server-assigned timestamp: The server automatically assigns a timestamp to the row based on the server's wall-clock time. Example: ```java - sender.table("weather_sensor") - .symbol("id", "toronto1") - .doubleColumn("temperature", 23.5) - .doubleColumn("humidity", 0.49) - .atNow(); + sender.table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .doubleColumn("price", 2615.54) + .doubleColumn("amount", 0.00044) + .atNow(); ``` We recommended to use User-assigned timestamps when ingesting data into QuestDB. Using Server-assigned hinder the ability to deduplicate rows which is -[important for exactly-once processing](#exactly-once-delivery-vs-at-least-once-delivery). +[important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). :::note @@ -326,31 +303,6 @@ rows with older timestamps are ingested before rows with newer timestamps. ::: -## Table and column auto-creation - -When sending data to a table that does not exist, the server will create the -table automatically. This also applies to columns that do not exist. The server -will use the first row of data to determine the column types. - -If the table already exists, the server will validate that the columns match the -existing table. If the columns do not match, the server will return a -non-recoverable error which is propagated to the client as a -`LineSenderException`. - -If you're using QuestDB Enterprise, you must grant further permissions to the -authenticated user: - -```sql -CREATE SERVICE ACCOUNT ingest_user; -- creates a service account to be used by a client -GRANT ilp, create table TO ingest_user; -- grants permissions to ingest data and create tables -GRANT add column, insert ON all tables TO ingest_user; -- grants permissions to add columns and insert data to all tables --- OR -GRANT add column, insert ON table1, table2 TO ingest_user; -- grants permissions to add columns and insert data to specific tables -``` - -Read more setup details in the -[Enterprise quickstart](/docs/guides/enterprise-quick-start/#4-ingest-data-influxdb-line-protocol) -and the [role-based access control](/docs/operations/rbac/) guides. ## Configuration options @@ -373,20 +325,6 @@ When using the configuration string, the following options are available: - `username`: Username for TCP authentication. - `token`: Token for TCP authentication. -### TLS encryption - -TLS in enabled by selecting the `https` or `tcps` protocol. The following -options are available: - -- `tls_roots` : Path to a Java keystore file containing trusted root - certificates. Defaults to the system default trust store. -- `tls_roots_password` : Password for the keystore file. It's always required - when `tls_roots` is set. -- `tls_verify` : Whether to verify the server's certificate. This should only be - used for testing as a last resort and never used in production as it makes the - connection vulnerable to man-in-the-middle attacks. Options are `on` or - `unsafe_off`. Defaults to `on`. - ### Auto-flushing - `auto_flush` : Global switch for the auto-flushing behavior. Options are `on` @@ -424,8 +362,27 @@ controls the auto-flushing behavior of the TCP transport. `request_timeout`. This is useful for large requests. You can set this value to `0` to disable this logic. +### TLS encryption + +To enable TLS, select the `https` or `tcps` protocol. + +The following options are available: + +- `tls_roots` : Path to a Java keystore file containing trusted root + certificates. Defaults to the system default trust store. +- `tls_roots_password` : Password for the keystore file. It's always required + when `tls_roots` is set. +- `tls_verify` : Whether to verify the server's certificate. This should only be + used for testing as a last resort and never used in production as it makes the + connection vulnerable to man-in-the-middle attacks. Options are `on` or + `unsafe_off`. Defaults to `on`. + + ## Other considerations +- Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details +about transactions, error control, delivery guarantees, health check, or table and +column auto-creation. - The Sender is not thread-safe. For multiple threads to send data to QuestDB, each thread should have its own Sender instance. An object pool can also be used to re-use Sender instances. @@ -435,25 +392,3 @@ controls the auto-flushing behavior of the TCP transport. pattern can be used to ensure that the Sender is closed. - The method `flush()` can be called to force sending the internal buffer to a server, even when the buffer is not full yet. - -### Health check - -To monitor your active connection, there is a `ping` endpoint: - -```shell -curl -I http://localhost:9000/ping -``` - -Returns (pong!): - -```shell -HTTP/1.1 204 OK -Server: questDB/1.0 -Date: Fri, 2 Feb 2024 17:09:38 GMT -Transfer-Encoding: chunked -Content-Type: text/plain; charset=utf-8 -X-Influxdb-Version: v2.7.4 -``` - -Determine whether an instance is active and confirm the version of InfluxDB Line -Protocol with which you are interacting. diff --git a/ingestion-overview.md b/ingestion-overview.md index 70c7a7bf..a503967c 100644 --- a/ingestion-overview.md +++ b/ingestion-overview.md @@ -47,10 +47,9 @@ higher throughput. It also provides some key benefits: An example of "data-in" - via the line - appears as: ```shell -# temperature sensor example -readings,city=London temperature=23.2 1465839830100400000\n -readings,city=London temperature=23.6 1465839830100700000\n -readings,make=Honeywell temperature=23.2,humidity=0.443 1465839830100800000\n +trades,symbol=ETH-USD,side=sell price=2615.54,amount=0.00044 1646762637609765000\n +trades,symbol=BTC-USD,side=sell price=39269.98,amount=0.001 1646762637710419000\n +trades,symbol=ETH-USD,side=buy price=2615.4,amount=0.002 1646762637764098000\n ``` Once inside of QuestDB, it's yours to manipulate and query via extended SQL. diff --git a/reference/api/ilp/overview.md b/reference/api/ilp/overview.md index d2372a3a..59d3aa32 100644 --- a/reference/api/ilp/overview.md +++ b/reference/api/ilp/overview.md @@ -27,9 +27,16 @@ This supporting document thus provides an overview to aid in client selection and initial configuration: 1. [Client libraries](/docs/reference/api/ilp/overview/#client-libraries) -2. [Configuration](/docs/reference/api/ilp/overview/#configuration) -3. [Authentication](/docs/reference/api/ilp/overview/#authentication) -4. [Transactionality caveat](/docs/reference/api/ilp/overview/#transactionality-caveat) +2. [Server-Side Configuration](/docs/reference/api/ilp/overview/#server-side-configuration) +3. [Transport Selection](/docs/reference/api/ilp/overview/#transport-selection) +4. [Client-Side Configuration](/docs/reference/api/ilp/overview/#client-side-configuration) +5. [Error handling](/docs/reference/api/ilp/overview/#error-handling) +6. [Authentication](/docs/reference/api/ilp/overview/#authentication) +7. [Table and Column Auto-creation](/docs/reference/api/ilp/overview/#table-and-column-auto-creation) +8. [Timestamp Column Name](/docs/reference/api/ilp/overview/#timestamp-column-name) +9. [HTTP Transaction semantics](/docs/reference/api/ilp/overview/#http-transaction-semantics) +10. [Exactly-once delivery](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery) +11. [Health Check](/docs/reference/api/ilp/overview/#health-check) ## Client libraries @@ -58,7 +65,7 @@ following is set in `server.conf`: line.http.enabled=true ``` -## Configuration +## Server-Side Configuration The HTTP receiver configuration can be completely customized using [QuestDB configuration keys for ILP](/docs/configuration/#influxdb-line-protocol-ilp). @@ -69,32 +76,27 @@ port, load balancing, and more. For more guidance in how to tune QuestDB, see [capacity planning](/docs/deployment/capacity-planning/). -## Authentication - -:::note +## Transport selection -Using [QuestDB Enterprise](/enterprise/)? +The ILP protocol in QuestDB supports the following transport options: -Skip to [advanced security features](/docs/operations/rbac/) instead, which -provides holistic security out-of-the-box. +- HTTP (default port 9000) +- TCP (default port 9009) -::: - -InfluxDB Line Protocol supports authentication. - -A similar pattern is used across all client libraries. - -This document will break down and demonstrate the configuration keys and core -configuration options. +On QuestDB Enterprise HTTPS and TCPS are also available. -Once a client has been selected and configured, resume from your language client -documentation. +The HTTP(s) transport is recommended for most use cases. It provides feedback on +errors, automatically retries failed requests, and is easier to configure. The +TCP(s) transport is kept for compatibility with older QuestDB versions. It has +limited error feedback, no automatic retries, and requires manual handling of +connection failures. However, while HTTP is recommended, TCP has slightly lower +overhead than HTTP and may be useful in high-throughput scenarios in +high-latency networks. -### Configuration strings -Configuration strings combine a set of key/value pairs. +## Client-Side Configuration -Assembling a string connects an ILP client to a QuestDB ILP server. +Clients connect to a QuestDB using ILP via a configuration string. Configuration strings combine a set of key/value pairs. The standard configuration string pattern is: @@ -107,7 +109,7 @@ schema::key1=value1;key2=value2;key3=value3; It is made up of the following parts: - **Schema**: One of the specified schemas in the - [base values](/docs/reference/api/ilp/overview/#base-parameters) section below + [core parameters](/docs/reference/api/ilp/overview/#core-parameters) section below - **Key=Value**: Each key-value pair sets a specific parameter for the client - **Terminating semicolon**: A semicolon must follow the last key-value pair @@ -118,10 +120,8 @@ Below is a list of common parameters that ILP clients will accept. These params facilitate connection to QuestDB's ILP server and define client-specific behaviors. -Some are shared across all clients, while some are client specific. - -See the [Usage section](/docs/reference/api/ilp/overview/#usage) for write -examples that use these schemas. +Some are shared across all clients, while some are client specific. Refer to +the clients documentation for details. :::warning @@ -135,15 +135,11 @@ Exposing these values may expose your database to bad actors. - **schema**: Specifies the transport method, with support for: `http`, `https`, `tcp` & `tcps` -- **addr**: The address and port of the QuestDB server. +- **addr**: The address and port of the QuestDB server, as in `localhost:9000`. #### HTTP Parameters -- **username**: Username for HTTP authentication. -- **password** (SENSITIVE): Password for HTTP Basic authentication. -- **token** (SENSITIVE): Bearer token for HTTP Token authentication. - - Open source HTTP users are unable to generate tokens. For TCP token auth, - see the below section. +- **password** (SENSITIVE): Password for HTTP Basic Authentication. - **request_min_throughput**: Expected throughput for network send to the database server, in bytes. - Defaults to 100 KiB/s @@ -156,6 +152,33 @@ Exposing these values may expose your database to bad actors. milliseconds. - Defaults to 10 seconds. - Not all errors are retriable. +- **token** (SENSITIVE): Bearer token for HTTP Token authentication. + - Open source HTTP users are unable to generate tokens. For TCP token auth, + see the below section. +- **username**: Username for HTTP Basic Authentication. + +#### TCP Parameters + +:::note + +These parameters are only useful when using ILP over TCP with authentication +enabled. Most users should use ILP over HTTP. These parameters are listed for +completeness and for users who have specific requirements. + +::: + +_See the [Authentication](/docs/reference/api/ilp/overview/#authentication) section below for configuration._ + +- **auth_timeout**: Timeout for TCP authentication with QuestDB server, in + milliseconds. + - Default 15 seconds. +- **token** (SENSITIVE): TCP Authentication `d` parameter. + - **token_x** (SENSITIVE): TCP Authentication `x` parameter. + - Used in C/C++/Rust/Python clients. + - **token_y** (SENSITIVE): TCP Authentication `y` parameter. + - Used in C/C++/Rust/Python clients. +- **username**: Username for TCP authentication. + #### Auto-flushing behavior @@ -164,10 +187,8 @@ Exposing these values may expose your database to bad actors. - Default is “on” for clients that support auto-flushing (all except C, C++ & Rust). -- **auto_flush_rows**: Auto-flushing is triggered above this row count. - - - Defaults to `75,000` for HTTP, and `600` for TCP. - - If set, this implies “auto_flush=on”. +- **auto_flush_bytes** Auto-flushing is triggered above this buffer size. + - Disabled by default. - **auto_flush_interval**: Auto-flushing is triggered after this time period has elapsed since the last flush, in milliseconds. @@ -176,19 +197,27 @@ Exposing these values may expose your database to bad actors. - This is not a periodic timer - it will only be checked on the next row creation. -- **auto_flush_bytes** Auto-flushing is triggered above this buffer size. - - Disabled by default. +- **auto_flush_rows**: Auto-flushing is triggered above this row count. -#### Network configuration + - Defaults to `75,000` for HTTP, and `600` for TCP. + - If set, this implies “auto_flush=on”. -_Optional._ +#### Buffer configuration -- **bind_interface**: Specify the local network interface for outbound - connections. - - Not to be confused with the QuestDB port in the `addr` param. +- **init_buf_size**: Set the initial (but growable) size of the buffer in bytes. + - Defaults to `64 KiB`. +- **max_buf_size**: Sets the growth limit of the buffer in bytes. + - Defaults to `100 MiB`. + - Clients will error if this is exceeded. +- **max_name_len**: The maximum alloable number of UTF-8 bytes in the table or + column names. + - Defaults to `127`. + - Related to length limits for filenames on the user's host OS. #### TLS configuration +_QuestDB Enterprise only._ + - **tls_verify**: Toggle verification of TLS certificates. Default is `on`. - **tls_roots**: Specify the source of bundled TLS certificates. - The defaults and possible param values are client-specific. @@ -201,39 +230,201 @@ _Optional._ clients. - Java for instance would apply `tls_roots=/path/to/Java/key/store` -#### Buffer configuration +#### Network configuration -- **init_buf_size**: Set the initial (but growable) size of the buffer in bytes. - - Defaults to `64 KiB`. -- **max_buf_size**: Sets the growth limit of the buffer in bytes. - - Defaults to `100 MiB`. - - Clients will error if this is exceeded. -- **max_name_len**: The maximum alloable number of UTF-8 bytes in the table or - column names. - - Defaults to `127`. - - Related to length limits for filenames on the user's host OS. +- **bind_interface**: Optionally, specify the local network interface for outbound + connections. Useful if you have multiple interfaces or an accelerated network interface (e.g. Solarflare) + - Not to be confused with the QuestDB port in the `addr` param. -#### TCP Parameters +## Error handling + +The HTTP transport supports automatic retries for failed requests deemed +recoverable. Recoverable errors include network errors, some server errors, and +timeouts, while non-recoverable errors encompass invalid data, authentication +errors, and other client-side errors. + +Retrying is particularly beneficial during network issues or when the server is +temporarily unavailable. The retrying behavior can be configured through the +`retry_timeout` configuration option or, in some clients, via their API. +The client continues to retry recoverable errors until they either succeed or the specified timeout is +reached. + +The TCP transport lacks support for error propagation from the server. In such +cases, the server merely closes the connection upon encountering an error. Consequently, the +client receives no additional error information from the server. This limitation +significantly contributes to the preference for HTTP transport over TCP +transport. + +## Authentication :::note -These parameters are only useful when using ILP over TCP with authentication -enabled. Most users should use ILP over HTTP. These parameters are listed for -completeness and for users who have specific requirements. +Using [QuestDB Enterprise](/enterprise/)? + +Skip to [advanced security features](/docs/operations/rbac/) instead, which +provides holistic security out-of-the-box. ::: -- **username**: Username for TCP authentication. -- **token** (SENSITIVE): TCP Authentication `d` parameter. - - **token_x** (SENSITIVE): TCP Authentication `x` parameter. - - Used in C/C++/Rust/Python clients. - - **token_y** (SENSITIVE): TCP Authentication `y` parameter. - - Used in C/C++/Rust/Python clients. -- **auth_timeout**: Timeout for TCP authentication with QuestDB server, in - milliseconds. - - Default 15 seconds. +InfluxDB Line Protocol supports authentication via HTTP Basic Authentication, using [the HTTP Parameters](/docs/reference/api/ilp/overview/#http-parameters), or via token when using the TCP transport, using [the TCP Parameters](/docs/reference/api/ilp/overview/#tcp-parameters). + +A similar pattern is used across all client libraries. If you want to use a TCP token, you need to +configure your QuestDB server. This document will break down and demonstrate the configuration keys and core +configuration options. + +Once a client has been selected and configured, resume from your language client +documentation. + +##### TCP token authentication setup + +Create `d`, `x` & `y` tokens for client usage. + +##### Prerequisites + +- `jose`: C-language implementation of Javascript Object Signing and Encryption. + Generates tokens. +- `jq`: For pretty JSON output. + + -## Transactionality caveat + + +```bash +brew install jose +brew install jq +``` + + + + + +```bash +yum install jose +yum install jq +``` + + + + + +```bash +apt install jose +apt install jq +``` + + + + + +##### Server configuration + +Next, create an authentication file. + +Only elliptic curve (P-256) are supported (key type `ec-p-256-sha256`): + +```bash +testUser1 ec-p-256-sha256 fLKYEaoEb9lrn3nkwLDA-M_xnuFOdSt9y0Z7_vWSHLU Dt5tbS1dEDMSYfym3fgMv0B99szno-dFc1rYF9t0aac +# [key/user id] [key type] {keyX keyY} +``` + +Generate an authentication file using the `jose` utility: + +```bash +jose jwk gen -i '{"alg":"ES256", "kid": "testUser1"}' -o /var/lib/questdb/conf/full_auth.json + +KID=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.kid') +X=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.x') +Y=$(cat /var/lib/questdb/conf/full_auth.json | jq -r '.y') + +echo "$KID ec-p-256-sha256 $X $Y" | tee /var/lib/questdb/conf/auth.txt +``` + +Once created, reference it in the server [configuration](/docs/configuration/): + +```ini title='/path/to/server.conf' +line.tcp.auth.db.path=conf/auth.txt +``` + +##### Client keys + +For the server configuration above, the corresponding JSON Web Key must be +stored on the clients' side. + +When sending a fully-composed JWK, it will have the following keys: + +```json +{ + "kty": "EC", + "d": "5UjEMuA0Pj5pjK8a-fa24dyIf-Es5mYny3oE_Wmus48", + "crv": "P-256", + "kid": "testUser1", + "x": "fLKYEaoEb9lrn3nkwLDA-M_xnuFOdSt9y0Z7_vWSHLU", + "y": "Dt5tbS1dEDMSYfym3fgMv0B99szno-dFc1rYF9t0aac" +} +``` + +The `d`, `x` and `y` parameters generate the public key. + +For example, the Python client would be configured as outlined in the +[Python docs](https://py-questdb-client.readthedocs.io/en/latest/conf.html#tcp-auth). + +## Table and column auto-creation + +When sending data to a table that does not exist, the server will create the +table automatically. This also applies to columns that do not exist. The server +will use the first row of data to determine the column types. + +If the table already exists, the server will validate that the columns match the +existing table. If the columns do not match, the server will return a +non-recoverable error which, when using the HTTP/HTTPS transport, is propagated to the client. + +You can avoid table and/or column auto-creation by setting the `line.auto.create.new.columns` and + `line.auto.create.new.tables`configuration parameters to false. + +If you're using QuestDB Enterprise, you must grant further permissions to the +authenticated user: + +```sql +CREATE SERVICE ACCOUNT ingest_user; -- creates a service account to be used by a client +GRANT ilp, create table TO ingest_user; -- grants permissions to ingest data and create tables +GRANT add column, insert ON all tables TO ingest_user; -- grants permissions to add columns and insert data to all tables +-- OR +GRANT add column, insert ON table1, table2 TO ingest_user; -- grants permissions to add columns and insert data to specific tables +``` + +Read more setup details in the +[Enterprise quickstart](/docs/guides/enterprise-quick-start/#4-ingest-data-influxdb-line-protocol) +and the [role-based access control](/docs/operations/rbac/) guides. + +## Timestamp Column Name + +QuestDB's underlying ILP protocol sends timestamps to QuestDB without a name. + +If your table has been created beforehand, the designated timestamp will be correctly +assigned based on the payload sent bt the client. But if your table does not +exist, it will be automatically created and the timestamp column will be named +`timestamp`. To use a custom name, say `my_ts`, pre-create the table with the desired +timestamp column name. + +To do so, issue a `CREATE TABLE` statement to create the table in advance: + +```questdb-sql title="Creating a timestamp named my_ts" +CREATE TABLE IF NOT EXISTS 'trades' ( + symbol SYMBOL capacity 256 CACHE, + side SYMBOL capacity 256 CACHE, + price DOUBLE, + amount DOUBLE, + my_ts TIMESTAMP +) timestamp (my_ts) PARTITION BY DAY WAL; +``` + +You can use the `CREATE TABLE IF NOT EXISTS` construct to make sure the table is +created, but without raising an error if the table already exists. + +## HTTP transaction semantics + +The TCP endpoint does not support transactions. The HTTP ILP endpoint treats every requests as an individual transaction, so long as it contains rows for a single table. As of writing, the HTTP endpoint does not provide full transactionality in all cases. @@ -243,8 +434,65 @@ Specifically: - If an HTTP request contains data for two tables and the final commit fails for the second table, the data for the first table will still be committed. This is a deviation from full transactionality, where a failure in any part of the - transaction would result in the entire transaction being rolled back. + transaction would result in the entire transaction being rolled back. If data + transactionality is important for you, the best practice is to make sure you + flush data to the server in batches that contain rows for a single table. + +- Even when you are sending data to a single table, when dynamically adding new columns to + a table, an implicit commit occurs each time a new column is added. If the request + is aborted or has parse errors, no data will be inserted into the corresponding + table, but the new column will be added and will not be rolled back. + +- Some clients have built-in support for controlling transactions. These APIs help to comply with the single-table-per-request pre-requisite for HTTP transactions, but they don't control if new columns + are being added. + +- As of writing, if you want to make sure you have data transactionality and + schema/metadata transactionality, you should disable `line.auto.create.new.columns` and + `line.auto.create.new.tables` on your configuration. Be aware that if you do this, + you will not have dynamic schema capabilities and you will need to create each table + and column before you try to ingest data, via [`CREATE TABLE`](/docs/reference/sql/create-table/) and/or [`ALTER TABLE ADD COLUMN`](/docs/reference/sql/alter-table-add-column/) SQL statements. + + +## Exactly-once delivery vs at-least-once delivery + +The retrying behavior of the HTTP transport can lead to some data being sent to +the server more than once. + +**Example**: Client sends a batch to the server, the server receives the batch, +processes it, but fails to send a response back to the client due to a network +error. The client will retry sending the batch to the server. This means the +server will receive the batch again and process it again. This can lead to +duplicated rows in the server. + +The are two ways to mitigate this issue: + +- Use [QuestDB deduplication feature](/docs/concept/deduplication/) to remove + duplicated rows. QuestDB server can detect and remove duplicated rows + automatically, resulting in exactly-once processing. This is recommended when + using the HTTP transport with retrying enabled. +- Disable retrying by setting `retry_timeout` to 0. This will make the client + send the batch only once, failed requests will not be retried and the client + will receive an error. This effectively turns the client into an at-most-once + delivery. + +## Health Check + +To monitor your active connection, there is a `ping` endpoint: + +```shell +curl -I http://localhost:9000/ping +``` + +Returns (pong!): + +```shell +HTTP/1.1 204 OK +Server: questDB/1.0 +Date: Fri, 2 Feb 2024 17:09:38 GMT +Transfer-Encoding: chunked +Content-Type: text/plain; charset=utf-8 +X-Influxdb-Version: v2.7.4 +``` -- When adding new columns to a table, an implicit commit occurs each time a new - column is added. If the request is aborted or has parse errors, this commit - cannot be rolled back. +Determine whether an instance is active and confirm the version of InfluxDB Line +Protocol with which you are interacting. From 066e1b5a8fe78ffd9dc4d6f2934881ec9614ecd8 Mon Sep 17 00:00:00 2001 From: Evgeny Oshchepkov Date: Fri, 19 Jul 2024 18:53:04 +0200 Subject: [PATCH 14/23] Update embeddable.md (#23) Small text corrections --- third-party-tools/embeddable.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third-party-tools/embeddable.md b/third-party-tools/embeddable.md index a058a762..c1d7ee68 100644 --- a/third-party-tools/embeddable.md +++ b/third-party-tools/embeddable.md @@ -9,11 +9,11 @@ Embeddable is a developer toolkit for building fast, interactive customer-facing analytics. It works well with a high performance time-series database like QuestDB. -In [Embeddable](https://embeddable.com/) define +In [Embeddable](https://embeddable.com/) you define [Data Models](https://trevorio.notion.site/Data-modeling-35637bbbc01046a1bc47715456bfa1d8) and [Components](https://trevorio.notion.site/Using-components-761f52ac2d0743b488371088a1024e49) -in code stored in your own code repository, then use the **SDK** to make these +in code, which are stored in your own code repository, then use the **SDK** to make these available for your team in the powerful Embeddable **no-code builder.** The end result is the ability to deliver fast, interactive **customer-facing analytics** directly into your product. From 68d0a145b3fab0782f5337b79d2a7b3a85d90eaa Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:17:55 -0700 Subject: [PATCH 15/23] Update broken client links (#25) That won't work! Updating because linter will soon fail on broken links again. --- clients/ingest-rust.md | 2 +- introduction.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clients/ingest-rust.md b/clients/ingest-rust.md index f1313b33..2a2c1611 100644 --- a/clients/ingest-rust.md +++ b/clients/ingest-rust.md @@ -175,7 +175,7 @@ As described at the [ILP overview](/docs/reference/api/ilp/overview#http-transac the HTTP transport has some support for transactions. In order to ensure in advance that a flush will not affect more than one table, call -[`sender.flush_and_keep_with_flags(&mut buffer, true)`](Sender::flush_and_keep_with_flags). +`sender.flush_and_keep_with_flags(&mut buffer, true)`. This call will refuse to flush a buffer if the flush wouldn't be data-transactional. diff --git a/introduction.md b/introduction.md index fc2f7c2d..aec0c1d0 100644 --- a/introduction.md +++ b/introduction.md @@ -80,6 +80,10 @@ infrastructure sprawl. title="Benchmark results for QuestDB 7.3.10, InfluxDB 2.7.4 and Timescale 2.14.2" /> +```sql title='yo' demo='https://example.com' +SQL +``` + With a specialized [time-series database](/glossary/time-series-database/), you don't need to worry about out-of-order data, duplicates, exactly one semantics, frequency of ingestion, or the many other details you will find in real-time From 1aacc628c083898d5f2b5b9a04ef5b6dd65f9969 Mon Sep 17 00:00:00 2001 From: Alex Pelagenko <2159629+ideoma@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:55:20 +0100 Subject: [PATCH 16/23] feat(doc): parquet_read func doc (#28) Co-authored-by: goodroot <9484709+goodroot@users.noreply.github.com> --- reference/function/parquet.md | 77 +++++++++++++++++++ reference/sql/overview.md | 140 ++++++++++++++++++++++------------ sidebars.js | 3 +- 3 files changed, 172 insertions(+), 48 deletions(-) create mode 100644 reference/function/parquet.md diff --git a/reference/function/parquet.md b/reference/function/parquet.md new file mode 100644 index 00000000..6478bedd --- /dev/null +++ b/reference/function/parquet.md @@ -0,0 +1,77 @@ +--- +title: Parquet functions +sidebar_label: Parquet +description: QuestDB Apache Parquet functions reference documentation. +--- + +This page introduces the [Apache Parquet](/glossary/apache-parquet/) read function. + +:::info + +Apache Parquet support is in **beta**. + +It may not be fit for production use. + +Please let us know if you run into issues. + +Either: + +1. Email us at [support@questdb.io](mailto:support@questdb.io) +2. Join our [public Slack](https://slack.questdb.io/) +3. Post on our [Discourse community](https://community.questdb.io/) + +::: + +## parquet_read + +Reads a parquet file as a table. + +`parquet_read(parquet_file_path)` + +### Usage + +With this function, query a Parquet file located at the QuestDB copy root directory. Both relative and absolute file paths are supported. + +```questdb-sql title="parquet_read example" +SELECT + * +FROM + parquet_read('trades.parquet') +WHERE + exchange == 'NASDAQ' +``` + +| quantity | price | exchange | timestamp | +| -------- | ------ | -------- | ------------------------- | +| 1000 | 145.09 | NASDAQ | 2023-07-12T09:30:00.0000Z | + +The query above: + +- Reads all columns from the file `trades.parquet` located at the server copy root directory. +- Filters rows, keeping only trades made on NASDAQ. + +### Configuration + +For security reason, reading is only allowed if copy root directory is configured. To configure the copy root directory: + +- `cairo.sql.copy.root` must be defined using one of the following settings: + - The environment variable `QDB_CAIRO_SQL_COPY_ROOT`. + - The `cairo.sql.copy.root` key in `server.conf`. + +### Limitations + +Parquet format support rich set of data types, including structural types. QuestDB only can read data types that match QuestDB data types: + +- Varchar +- Int +- Long +- Short +- Byte +- Boolean +- UUID +- Double +- Float +- Timestamp +- Binary + +Parquet columns with unsupported data types are ignored. diff --git a/reference/sql/overview.md b/reference/sql/overview.md index 6e4d3235..40ec9b1c 100644 --- a/reference/sql/overview.md +++ b/reference/sql/overview.md @@ -9,18 +9,18 @@ description: import Screenshot from "@theme/Screenshot" import Tabs from "@theme/Tabs" import TabItem from "@theme/TabItem" -import CQueryPartial from "../../partials/_c.sql.query.partial.mdx" -import CsharpQueryPartial from "../../partials/_csharp.sql.query.partial.mdx" -import GoQueryPartial from "../../partials/_go.sql.query.partial.mdx" -import JavaQueryPartial from "../../partials/_java.sql.query.partial.mdx" -import NodeQueryPartial from "../../partials/_nodejs.sql.query.partial.mdx" -import RubyQueryPartial from "../../partials/_ruby.sql.query.partial.mdx" -import PHPQueryPartial from "../../partials/_php.sql.query.partial.mdx" -import PythonQueryPartial from "../../partials/_python.sql.query.partial.mdx" -import CurlExecQueryPartial from "../../partials/_curl.exec.query.partial.mdx" -import GoExecQueryPartial from "../../partials/_go.exec.query.partial.mdx" -import NodejsExecQueryPartial from "../../partials/_nodejs.exec.query.partial.mdx" -import PythonExecQueryPartial from "../../partials/_python.exec.query.partial.mdx" +import CQueryPartial from "../../partials/\_c.sql.query.partial.mdx" +import CsharpQueryPartial from "../../partials/\_csharp.sql.query.partial.mdx" +import GoQueryPartial from "../../partials/\_go.sql.query.partial.mdx" +import JavaQueryPartial from "../../partials/\_java.sql.query.partial.mdx" +import NodeQueryPartial from "../../partials/\_nodejs.sql.query.partial.mdx" +import RubyQueryPartial from "../../partials/\_ruby.sql.query.partial.mdx" +import PHPQueryPartial from "../../partials/\_php.sql.query.partial.mdx" +import PythonQueryPartial from "../../partials/\_python.sql.query.partial.mdx" +import CurlExecQueryPartial from "../../partials/\_curl.exec.query.partial.mdx" +import GoExecQueryPartial from "../../partials/\_go.exec.query.partial.mdx" +import NodejsExecQueryPartial from "../../partials/\_nodejs.exec.query.partial.mdx" +import PythonExecQueryPartial from "../../partials/\_python.exec.query.partial.mdx" Querying - as a base action - is performed in three primary ways: @@ -28,6 +28,7 @@ Querying - as a base action - is performed in three primary ways: [QuestDB Web Console](/docs/reference/sql/overview/#questdb-web-console) 2. Query via [PostgreSQL](/docs/reference/sql/overview/#postgresql) 3. Query via [REST HTTP API](/docs/reference/sql/overview/#rest-http-api) +4. Query via [Apache Parquet](/docs/reference/sql/overview/#apache-parquet) QuestDB provides SQL with enhanced time series extensions. @@ -65,14 +66,14 @@ Query QuestDB using the PostgreSQL endpoint via the default port `8812`. Examples in multiple languages are shown below. @@ -163,8 +164,8 @@ obtaining the results as CSV. For obtaining results in JSON, use `/exec` instead, documented next. @@ -218,10 +219,10 @@ This is similar to the `/exp` entry point which returns results as CSV. ##### Querying Data @@ -246,9 +247,9 @@ Alternatively, the `/exec` endpoint can be used to create a table and the `INSERT` statement can be used to populate it with values: @@ -277,56 +278,56 @@ curl -G \ The `node-fetch` package can be installed using `npm i node-fetch`. ```javascript -const fetch = require("node-fetch") +const fetch = require("node-fetch"); -const HOST = "http://localhost:9000" +const HOST = "http://localhost:9000"; async function createTable() { try { - const query = "CREATE TABLE IF NOT EXISTS trades (name VARCHAR, value INT)" + const query = "CREATE TABLE IF NOT EXISTS trades (name VARCHAR, value INT)"; const response = await fetch( `${HOST}/exec?query=${encodeURIComponent(query)}`, - ) - const json = await response.json() + ); + const json = await response.json(); - console.log(json) + console.log(json); } catch (error) { - console.log(error) + console.log(error); } } async function insertData() { try { - const query = "INSERT INTO trades VALUES('abc', 123456)" + const query = "INSERT INTO trades VALUES('abc', 123456)"; const response = await fetch( `${HOST}/exec?query=${encodeURIComponent(query)}`, - ) - const json = await response.json() + ); + const json = await response.json(); - console.log(json) + console.log(json); } catch (error) { - console.log(error) + console.log(error); } } async function updateData() { try { - const query = "UPDATE trades SET value = 9876 WHERE name = 'abc'" + const query = "UPDATE trades SET value = 9876 WHERE name = 'abc'"; const response = await fetch( `${HOST}/exec?query=${encodeURIComponent(query)}`, - ) - const json = await response.json() + ); + const json = await response.json(); - console.log(json) + console.log(json); } catch (error) { - console.log(error) + console.log(error); } } -createTable().then(insertData).then(updateData) +createTable().then(insertData).then(updateData); ``` @@ -360,6 +361,51 @@ run_query("UPDATE trades SET value = 9876 WHERE name = 'abc'") +## Apache Parquet + +:::info + +Apache Parquet support is in **beta**. + +It may not be fit for production use. + +Please let us know if you run into issues. + +Either: + +1. Email us at [support@questdb.io](mailto:support@questdb.io) +2. Join our [public Slack](https://slack.questdb.io/) +3. Post on our [Discourse community](https://community.questdb.io/) + +::: + +Parquet files can be read and thus queried by QuestDB. + +To do so, first set a directory where the Parquet file lives. + +This can be done one of two ways: + +1. Set the environment variable `QDB_CAIRO_SQL_COPY_ROOT` +2. Set the `cairo.sql.copy.root` key in `server.conf` + +After that, apply the function like so: + +`parquet_read(parquet_file_path)` + +In context: + +```questdb-sql title="parquet_read example" +SELECT + * +FROM + parquet_read('trades.parquet') +WHERE + exchange == 'NASDAQ' +``` + +For more information, see the +[Parquet documentation](/docs/reference/function/parquet/). + ## What's next? Now... SQL! It's query time. diff --git a/sidebars.js b/sidebars.js index 24a37d2b..2ba05e8c 100644 --- a/sidebars.js +++ b/sidebars.js @@ -264,6 +264,7 @@ module.exports = { "reference/function/finance", "reference/function/meta", "reference/function/numeric", + "reference/function/parquet", "reference/function/pattern-matching", "reference/function/random-value-generator", "reference/function/row-generator", @@ -450,4 +451,4 @@ module.exports = { ], }, ].filter(Boolean), -} +}; From af23f883fe199faa9cfa471b3120f055e3989ae4 Mon Sep 17 00:00:00 2001 From: Nick Woolmer <29717167+nwoolmer@users.noreply.github.com> Date: Thu, 25 Jul 2024 12:36:06 +0100 Subject: [PATCH 17/23] Add systemd changes to file descriptor limit docs (#29) When running the service under systemd, you also need to set LimitNOFILE in the service config. --- deployment/capacity-planning.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/deployment/capacity-planning.md b/deployment/capacity-planning.md index 5d15774a..ac2de541 100644 --- a/deployment/capacity-planning.md +++ b/deployment/capacity-planning.md @@ -352,6 +352,16 @@ sysctl -p sysctl fs.file-max ``` +#### Extra steps for systemd + +If you are running the QuestDB using `systemd`, you will also need to set the `LimitNOFILE` property in your service file. + +If you have followed the [setup guide](https://questdb.io/docs/deployment/systemd/), then the file should be called `questdb.service` and be located at `~/.config/systemd/user/questdb.service`. + +Add this property to the `[Service]` section, setting it to at least `1048576`, or higher if you have set higher OS-wide limits. + +Then restart the service. If you have configured these settings correctly, any warnings in the web console should now be cleared. + #### Setting system-wide open file limit on MacOS: On MacOS, the system-wide limit can be modified by using `launchctl`: From 245f05ed00ca73a256a4e6c0a2b76532a8a79a14 Mon Sep 17 00:00:00 2001 From: Jaromir Hamala Date: Thu, 25 Jul 2024 16:04:47 +0000 Subject: [PATCH 18/23] snapshot/backup docs improvement (#26) TODO: - [x] figure out how to link it with reference doc at https://questdb.io/docs/reference/sql/snapshot/ - this should be improved too - [ ] editorial changes --------- Co-authored-by: goodroot <9484709+goodroot@users.noreply.github.com> Co-authored-by: Nick Woolmer <29717167+nwoolmer@users.noreply.github.com> --- operations/backup.md | 160 ++++++++++++++++++++++++++++++------ reference/sql/backup.md | 167 -------------------------------------- reference/sql/snapshot.md | 62 ++++---------- sidebars.js | 1 - 4 files changed, 152 insertions(+), 238 deletions(-) delete mode 100644 reference/sql/backup.md diff --git a/operations/backup.md b/operations/backup.md index 518894b1..85875e83 100644 --- a/operations/backup.md +++ b/operations/backup.md @@ -6,55 +6,163 @@ description: to prevent data loss. --- -The recommended QuestDB backup method is to create a -[SNAPSHOT](/docs/reference/sql/snapshot/). +QuestDB provides a snapshot feature that allows users to create backups of their +databases. This feature is essential for preventing data loss and ensuring that +data can be restored in the event of a failure. A snapshot: -- Supports both full backup and incremental snapshots -- All OSes except Windows -- Provides both a full data backup as well as filesystem snapshot +- Support both full backups and incremental snapshots +- Are available on all operating systems except Windows +- Can be created while the database is running -It is an easy and reliable way to back up your database. +:::caution +QuestDB currently does not support creating snapshots on Windows. +If you are a Windows user and require backup functionality, please let us know +by [commenting on this issue](https://github.com/questdb/questdb/issues/4811). +::: -If you see "backup" indicated, assume we are referencing SNAPSHOT and not BACKUP -unless clearly indicated. -Alternatively, such as for windows users, there is a a more limited - and -deprecated - [BACKUP](/docs/reference/sql/backup/) operation. +## Overview -- Supports full database or table backup only -- Windows OS only, deprecated on other OSes such as Linux +Snapshot is a feature that instructs QuestDB to record the state of the database +at a specific point in time. This state includes all data, metadata, and indexes +required to restore the database to the condition it was in when the snapshot was taken. ---- +### Terminology -:::caution +This guide uses the word "snapshot" in 2 different meanings: + +- **Database snapshot**: Instructs QuestDB to record the state of the database at a specific point in time. This is done + via `SNAPSHOT PREPARE` SQL command. +- **Filesystem and volume snapshot**: A point-in-time copy of the filesystem that can be used to create a backup. This is done + using filesystem-specific tools or commands. + +Database backup involves creating a database snapshot and then using a filesystem snapshot or file copying to create a +backup. + +## Creating a database snapshot + +QuestDB database files, including snapshots, are stored inside the server root +directory provided at startup. The root directory contains the following subdirectories: -- A backup includes the contents of the database up to the point of executing a - backup. Any data inserted while a backup is underway is not stored as part of - the backup. +- `db`: Contains database files +- `log`: Contains log files +- `conf`: Contains configuration files +- `public`: Contains static files for the web console +- `snapshot`: Contains snapshot files, if any -- Users can't use NFS or a similar distributed filesystem directly with QuestDB, - but users may copy a backup to such a filesystem after a backup has been made. +:::tip +The default location of the server root directory varies by operating system: +- MacOS: When using Homebrew, the server root directory is located at /opt/homebrew/var/questdb/. +- Linux: The default location is ~/.questdb. + +If you are unsure of the server root directory's location, you can determine it by +inspecting the QuestDB logs. Look for a line that +reads, `QuestDB configuration files are in /opt/homebrew/var/questdb/conf`. The server root directory is one level up +from the conf directory indicated in this log entry. +See the [root directory structure](/docs/concept/root-directory-structure/) for more information. ::: ---- +Typically, the `db` directory is the largest and contains the most critical data. +As you ingest data, the `db` directory will grow in size. To create a backup, +you cannot simply copy the `db` directory, as it may be in an inconsistent state. +Instead, you have to instruct QuestDB to create a database snapshot. + +To create a database snapshot, execute the following SQL command: + +```sql +SNAPSHOT PREPARE; +``` + +This command creates a snapshot of the database inside the `db` directory and records +additional metadata in the `snapshot` directory. + +When `SNAPSHOT PREPARE` command finishes, you can copy all directories inside +the server root directory to a backup location. + +### Data backup + +After issuing the `SNAPSHOT PREPARE` command, it's your responsibility to back up the database files. +You can use any backup method that suits your infrastructure, such as filesystem snapshots or file-based backups. + +In Cloud environments, you can use the volume snapshot functionality provided by your cloud provider. +See guides for creating volume snapshots on the following cloud platforms: + +- [AWS](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-creating-snapshot.html) - + creating EBS snapshots +- [Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/snapshot-copy-managed-disk?tabs=portal) - + creating snapshots of a virtual hard disk +- [GCP](https://cloud.google.com/compute/docs/disks/create-snapshots) - working + with persistent disk snapshots + +Even if you are not in a cloud environment volume snapshots can be taken using either the +filesystem ([ZFS](https://ubuntu.com/tutorials/using-zfs-snapshots-clones#1-overview)) or a volume +manager ([LVM](https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/8/html/configuring_and_managing_logical_volumes/snapshot-of-logical-volumes_configuring-and-managing-logical-volumes#snapshot-of-logical-volumes_configuring-and-managing-logical-volumes)). + +If filesystem or volume snapshots are not available, you can use file-based backups to back up the QuestDB server root directory. +We recommend using a backup tool that supports incremental backups to reduce the amount of data transferred during each +backup. [rsync](https://linux.die.net/man/1/rsync) is a popular tool for this purpose. Make sure to back up +the entire server root directory, including the `db`, `snapshot`, and all other directories. + +Once the backup is complete, you must issue the following command to clean up the database snapshot: + +```sql +SNAPSHOT COMPLETE; +``` + +This command informs QuestDB that the database snapshot is no longer needed, +allowing it to clean up any temporary files created during the snapshot process. + +:::note +For some cloud vendors, volume snapshot creation operation is asynchronous, +i.e. the point-in-time snapshot is created immediately, as soon as the operation +starts, but the end snapshot artifact may become available later. In such case, +the `SNAPSHOT COMPLETE` statement (step 3) may be run without waiting for the +end artifact, but once the snapshot creation has started. +::: + +Failing to issue the `SNAPSHOT COMPLETE` command will result in the snapshot files +being retained indefinitely, potentially leading to disk space exhaustion. + +## Restoring from a snapshot + +To restore a database from a snapshot, follow these steps: + +1. Stop the QuestDB server. +2. Remove everything inside the server root directory. +3. Copy the backup directories to the server root directory. If you are using a filesystem snapshot, you restore the + snapshot. +4. Create an empty file named `_restore` in the server root directory. You can use a simple touch command to create this + file. + This file serves as a signal to QuestDB that it should restore the database from the snapshot. +5. Start the QuestDB server. + +Make sure the `_restore` file is present in the server root directory before starting the server, +otherwise QuestDB will start normally without restoring the database. + +After starting the server, QuestDB will restore the database to the state it was in when the snapshot was taken. +If a snapshot recovery cannot be completed, for example, if the snapshot files are missing or corrupted, +QuestDB will log an error message and abort startup. In this case, you should investigate the cause of the error +and attempt to restore the database again. ## Supported filesystems -QuestDB open source supports the following filesystems: +QuestDB supports the following filesystems: - APFS - EXT4 - NTFS - OVERLAYFS (used by Docker) - XFS +- ZFS -Other file systems supporting -[mmap](https://man7.org/linux/man-pages/man2/mmap.2.html) feature are untested -but may work with QuestDB. +Other file systems supporting are untested and while they may work, +they are not officially supported. See +the [filesystem compatibility](/docs/deployment/capacity-planning/#supported-filesystems) section for more information. -They should not be used in production. +## Further reading -QuestDB Enterprise is required if you wish to use [ZFS](https://en.wikipedia.org/wiki/ZFS). +- [Snapshot API](/docs/reference/sql/snapshot/) - Reference documentation for the SQL commands used to create and manage + snapshots. \ No newline at end of file diff --git a/reference/sql/backup.md b/reference/sql/backup.md deleted file mode 100644 index 7a2fbcdc..00000000 --- a/reference/sql/backup.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: BACKUP keyword -sidebar_label: BACKUP -description: BACKUP SQL keyword reference documentation. ---- - -Creates a backup for one, several, or all database tables. - ---- - -:::caution - -**The BACKUP statement is deprecated since QuestDB version 7.3.3 on all -operating systems except Windows.** We recommend the -[SNAPSHOT](/docs/reference/sql/snapshot/) statements instead. - -::: - ---- - -## Syntax - -![Flow chart showing the syntax of the BACKUP keyword](/img/docs/diagrams/backup.svg) - -## Backup directory - -Backing up a database or tables requires a **backup directory** which is set -using the `cairo.sql.backup.root` [configuration key](/docs/configuration/) in a -[server.conf](/docs/concept/root-directory-structure/#serverconf) file: - -```shell title="server.conf" -cairo.sql.backup.root=/Users/UserName/Desktop -``` - -The **backup directory** can be on a disk local to the server, a remote disk or -a remote filesystem. QuestDB will enforce that the backup is only written in a -location relative to the `backup directory`. This is a security feature to -disallow random file access by QuestDB. - -The tables will be written in a directory with today's date with the default -format `yyyy-MM-dd` (e.g., `2020-04-20`). A custom date format can be specified -using the `cairo.sql.backup.dir.datetime.format` -[configuration key](/docs/configuration/): - -```shell title="server.conf" -cairo.sql.backup.dir.datetime.format=yyyy-dd-MM -``` - -Given a `BACKUP` query run on `2021-02-25`, the data and metadata files will be -written following the -[db directory structure](/docs/concept/root-directory-structure/#db) - -```filestructure title="/path/to/backup_directory" -├── 2021-02-25 -│   ├── table1 -│   │   ├── ... -│   ├── table2 -│   │   ├── ... -│   ├── table3 -│   ... -``` - -If a user performs several backups on the same date, each backup will be written -a new directory. Subsequent backups on the same date will look as follows: - -```filestructure title="/path/to/backup_directory" -├── 2021-02-22 'first' -├── 2021-02-22.1 'second' -├── 2021-02-22.2 'third' -├── 2021-02-24 'first new date' -├── 2021-02-24.1 'first new date' -│   ... -``` - -## Creating a full backup - -When creating a backup in QuestDB, you can specify that the whole database or -specific tables should be backed up. This process will create a backup in the -`backup directory`. - -A backup can then be triggered via [SQL command](/docs/reference/sql/backup/) -and the backup is complete as soon as the SQL query has finished executing: - -```questdb-sql --- backup whole database -BACKUP DATABASE; --- backup a specific table -BACKUP TABLE my_table; -``` - -Note that calling `BACKUP TABLE ` will only copy table data and -metadata to the destination folder. This form of backup will not copy entire -database configuration files required to perform a complete database restore. - -Alternatively, the [REST API](/docs/reference/api/rest/#exec---execute-queries) -can be used to execute the SQL for a database backup: - -```bash title="Backing up a database via curl" -curl -G --data-urlencode "query=BACKUP DATABASE;" \ - http://localhost:9000/exec -``` - -## Restoring from a backup - -In order to restore a backup, the QuestDB executable must be provided with the -directory location of an existing backup as the **root directory**. This can -done via the `-d` flag as `-d /path/to/backup` when starting up QuestDB. - -```bash -java -p /path/to/questdb-.jar \ - -m io.questdb/io.questdb.ServerMain \ - -d /path/to/backup_directory -``` - -Users who are starting QuestDB via `systemd` or the official AWS AMI may refer -to the -[systemd file](https://github.com/questdb/questdb/blob/master/pkg/ami/marketplace/assets/systemd.service#L21) -for reference. To verify that database information has been successfully -imported, check logs via `journalctl -u questdb` which will contain a list -existing tables. - -Docker instances may have a backup directory mounted to the root directory as -follows: - -```bash -docker run \ - -p 9000:9000 -p 9009:9009 \ - -p 8812:8812 -p 9003:9003 \ - -v "/path/to/backup_directory:/root/.questdb/" questdb/questdb -``` - -## Examples - -```questdb-sql title="Single table" -BACKUP TABLE table1; -``` - -```questdb-sql title="Multiple tables" -BACKUP TABLE table1, table2, table3; -``` - -```questdb-sql title="All tables" -BACKUP DATABASE; -``` - -The following example sets up a cronjob which triggers a daily backup via REST -API: - -```bash -# this will add crontab record that will run trigger at backup every-day at 01:00 AM -# copy paste this into server terminal -crontab -l | { cat; echo "0 1 * * * /usr/bin/curl --silent -G --data-urlencode 'query=BACKUP DATABASE;' http://localhost:9000/exec &>/dev/null"; } | crontab - -``` - -This example shows how to compress a backup using the `tar` utility. An archive -file `questdb_backup.tar.gz` will be created in the directory that the command -is run: - -```bash -tar -zcvf questdb_backup.tar.gz /path/to/backup -``` - -The backup file can be expanded using the same utility: - -```bash -tar -xf questdb_backup.tar.gz -``` diff --git a/reference/sql/snapshot.md b/reference/sql/snapshot.md index 742c8b70..8cac25dd 100644 --- a/reference/sql/snapshot.md +++ b/reference/sql/snapshot.md @@ -4,26 +4,23 @@ sidebar_label: SNAPSHOT description: SNAPSHOT SQL keyword reference documentation. --- -Prepares the database for a full backup or a filesystem (disk) snapshot. +Prepare the database for a full backup or a filesystem (disk) snapshot. -**Snapshot statements are not supported on Windows OS.** +_Are you looking for a detailed guide on how to create backups and restore them? Check out our [Backup and Restore](/docs/operations/backup/) guide!_ ## Syntax ![Flow chart showing the syntax of the SNAPSHOT keyword](/img/docs/diagrams/snapshot.svg) -## Snapshot process +:::caution -Snapshot recovery mechanism requires a **snapshot instance ID** to be specified -using the `cairo.snapshot.instance.id` -[configuration key](/docs/configuration/): +QuestDB currently does not support creating snapshots on Windows. -```shell title="server.conf" -cairo.snapshot.instance.id=your_id -``` +If you are a Windows user and require backup functionality, please [comment on this issue](https://github.com/questdb/questdb/issues/4811). -A snapshot instance ID may be an arbitrary string value, such as string -representation of a UUID. +::: + +## Snapshot process Database snapshots may be used in combination with filesystem snapshots or together with file copying for a full data backup. Collecting a snapshot @@ -34,52 +31,25 @@ involves the following steps: flush the committed data to disk. 2. Start a filesystem snapshot or copy the [root directory](/docs/concept/root-directory-structure/) to the backup - location on the disk. Refer to the [next section](#filesystem-snapshot) to + location on the disk. learn how to create a filesystem snapshot on the most common cloud providers. 3. Run `SNAPSHOT COMPLETE` statement to release the reader locks and delete the metadata file copies. -For some cloud vendors, filesystem snapshot creation operation is asynchronous, -i.e. the point-in-time snapshot is created immediately, as soon as the operation -starts, but the end snapshot artifact may become available later. In such case, -the `SNAPSHOT COMPLETE` statement (step 3) may be run without waiting for the -end artifact, but once the snapshot creation has started. - -In case you prefer full backups over filesystem snapshots, you should keep in -mind that the database will retain older partition and column file files on disk -until `SNAPSHOT COMPLETE`. This means that you may run out of disk space if your -disk doesn't have enough free space at the time you call `SNAPSHOT PREPARE`. - -## Filesystem snapshot - -The most common ways to perform cloud-native filesystem snapshots are described -in the following resources, which rely on similar steps but have minor -differences in terminology and services: - -- [AWS](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-creating-snapshot.html) - - creating EBS snapshots -- [Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/snapshot-copy-managed-disk?tabs=portal) - - creating snapshots of a virtual hard disk -- [GCP](https://cloud.google.com/compute/docs/disks/create-snapshots) - working - with persistent disk snapshots - ## Snapshot recovery In case of a full backup, you should also delete the old root directory and copy the files from your backup to the same location or, alternatively, you can point the database at the new root directory. -To start the database on a filesystem snapshot, you should make sure to -configure a different snapshot instance ID. +When the database starts, it checks for the presence of a file named `_restore` in the root directory. If the file is present, the database runs a +snapshot recovery procedure restoring the metadata files from the snapshot. -When the database starts, it checks the current instance ID and the ID stored in -the `snapshot` directory, if present. On IDs mismatch, the database runs a -snapshot recovery procedure restoring the metadata files from the snapshot. When -this happens, you should see something like the following in the server logs: +When this happens, you should see the following in the server logs: ``` -2022-03-07T08:24:12.348004Z I i.q.g.DatabaseSnapshotAgent starting snapshot recovery [currentId=`id2`, previousId=`id1`] -... +2022-03-07T08:24:12.348004Z I i.q.g.DatabaseSnapshotAgent starting snapshot recovery [trigger=file] +[...] 2022-03-07T08:24:12.349922Z I i.q.g.DatabaseSnapshotAgent snapshot recovery finished [metaFilesCount=1, txnFilesCount=1, cvFilesCount=1] ``` @@ -104,3 +74,7 @@ SNAPSHOT PREPARE; -- $ cp -r /root/dir/path /backup/dir/path SNAPSHOT COMPLETE; ``` + +## Further reading + +- [Backup and Restore](/docs/operations/backup/) - Detailed guide on how to create backups and restore them. diff --git a/sidebars.js b/sidebars.js index 2ba05e8c..fca4e2dc 100644 --- a/sidebars.js +++ b/sidebars.js @@ -162,7 +162,6 @@ module.exports = { type: "doc", customProps: { tag: "Enterprise" }, }, - "reference/sql/backup", "reference/sql/cancel-query", "reference/sql/case", "reference/sql/cast", From b1f9d198f0f9558a7e371cadf3de7169bcfa0d04 Mon Sep 17 00:00:00 2001 From: Nick Woolmer <29717167+nwoolmer@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:14:04 +0100 Subject: [PATCH 19/23] Changes for SAMPLE BY FROM-TO (#27) WIP Docs for https://github.com/questdb/questdb/pull/4733 --------- Co-authored-by: goodroot <9484709+goodroot@users.noreply.github.com> --- reference/sql/sample-by.md | 67 +++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/reference/sql/sample-by.md b/reference/sql/sample-by.md index 9d067153..d15072b0 100644 --- a/reference/sql/sample-by.md +++ b/reference/sql/sample-by.md @@ -56,6 +56,66 @@ trades per hour: SELECT ts, count() FROM trades SAMPLE BY 1h ``` +## FROM-TO + +:::note + +Versions prior to QuestDB 8.1.0 do not have access to this extension. + +Please see the new blog for more information. + +::: + +When using `SAMPLE BY` with `FILL`, you can fill missing rows within the result set with pre-determined values. + +However, this method will only fill rows between existing data in the data set and cannot fill rows outside of this range. +rows outside of this range. + +To fill outside the bounds of the existing data, you can specify a fill range using a `FROM-TO` clause. + +#### Syntax + +Specify the shape of the query using `FROM` and `TO`: + +```questdb-sql title='Pre-filling trip data' demo +SELECT pickup_datetime as t, count +FROM trips +SAMPLE BY 1d FROM '2008-12-28' TO '2009-01-05' FILL(NULL) +``` + +Since no rows existed before 2009, QuestDB automatically fills in these rows. + +This is distinct from the `WHERE` clause with a simple rule of thumb - +`WHERE` controls what data flows in, `FROM-TO` controls what data flows out. + +Use both `FROM` and `TO` in isolation to pre-fill or post-fill data. If `FROM` is not provided, then the lower bound is the start of the dataset, aligned to calendar. The opposite is true omitting `TO`. + +#### `WHERE` clause optimisation + +If the user does not provide a `WHERE` clause, or the `WHERE` clause does not consider the designated timestamp, +QuestDB will add one for you, matching the `FROM-TO` interval. + +This means that the query will run optimally, and avoid touching data not relevant to the result. + +Therefore, we compile the prior query into something similar to this: + +```questdb-sql title='Pre-filling trip data with WHERE optimisation' demo +SELECT pickup_datetime as t, count +FROM trips +WHERE pickup_datetime >= '2008-12-28' + AND pickup_datetime < '2009-01-05' +SAMPLE BY 1d FROM '2008-12-28' TO '2009-01-05' FILL(NULL) +``` + +#### Limitations + +Here are the current limits to this feature. + +- This syntax is not compatible with `FILL(PREV)` or `FILL(LINEAR)`. +- This syntax is for `ALIGN TO CALENDAR` only (default alignment). +- Does not consider any specified `OFFSET`. +- This syntax is for non-keyed `SAMPLE BY` i.e. only designated timestamp and aggregate columns. + ## Fill options The `FILL` keyword is optional and expects one or more `fillOption` strategies @@ -192,10 +252,10 @@ below. :::note -Since QuestDB v7.4.0, the default behaviour for `ALIGN TO` has been changed. If you do not specify +Since QuestDB v7.4.0, the default behaviour for `ALIGN TO` has changed. If you do not specify an explicit alignment, `SAMPLE BY` expressions will use `ALIGN TO CALENDAR` behaviour. -The prior default behaviour can be retained by specifying `ALIGN TO FIRST OBSERVATION` on a `SAMPLE BY` query. +The prior default behaviour can be retained by specifying `ALIGN TO FIRST OBSERVATION` on a `SAMPLE BY` query. Alternatively, one can set the `cairo.sql.sampleby.default.alignment.calendar` option to `false` in `server.conf`. @@ -211,7 +271,7 @@ ts TIMESTAMP, val INT ) TIMESTAMP(ts) PARTITION BY DAY WAL -INSERT INTO sensors (ts, val) VALUES +INSERT INTO sensors (ts, val) VALUES ('2021-05-31T23:10:00.000000Z', 10), ('2021-06-01T01:10:00.000000Z', 80), ('2021-06-01T07:20:00.000000Z', 15), @@ -261,7 +321,6 @@ ALIGN TO CALENDAR Gives the following result: - | ts | count | | --------------------------- | ----- | | 2021-05-31T00:00:00.000000Z | 1 | From f69ceee95aa4a5a5187ae16ee7b78857712701a5 Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Thu, 25 Jul 2024 10:05:58 -0700 Subject: [PATCH 20/23] Add additional limitations to Parquet function (#30) --- reference/function/parquet.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/reference/function/parquet.md b/reference/function/parquet.md index 6478bedd..07ac5d86 100644 --- a/reference/function/parquet.md +++ b/reference/function/parquet.md @@ -75,3 +75,7 @@ Parquet format support rich set of data types, including structural types. Quest - Binary Parquet columns with unsupported data types are ignored. + +Multiple files are not suppored, only a single file. + +Nested data and/or arrays are not supported. From 18b24791d2aa94e93b7eabb0d08a951015902f7b Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Tue, 30 Jul 2024 16:03:25 -0700 Subject: [PATCH 21/23] fix typo --- introduction.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/introduction.md b/introduction.md index aec0c1d0..7f52dcb5 100644 --- a/introduction.md +++ b/introduction.md @@ -49,6 +49,11 @@ No obscure domain-specific languages required. Use extended SQL. [High data cardinality](/glossary/high-cardinality/) will not lead to performance degradation. +#### Hardware efficiency + +Strong, cost-saving performance on very mninimal hardware, +including sensors and Raspberry Pi. + #### Time series SQL extensions Fast, SIMD-optimized SQL extensions to cruise through querying and analysis. @@ -80,10 +85,6 @@ infrastructure sprawl. title="Benchmark results for QuestDB 7.3.10, InfluxDB 2.7.4 and Timescale 2.14.2" /> -```sql title='yo' demo='https://example.com' -SQL -``` - With a specialized [time-series database](/glossary/time-series-database/), you don't need to worry about out-of-order data, duplicates, exactly one semantics, frequency of ingestion, or the many other details you will find in real-time From c18a28730ec591cf382e451e3a7ff018ea6cb1f5 Mon Sep 17 00:00:00 2001 From: Andrei Pechkurov <37772591+puzpuzpuz@users.noreply.github.com> Date: Wed, 31 Jul 2024 18:00:26 +0300 Subject: [PATCH 22/23] chore(docs): remove unsupported show server_conf statement (#32) --- reference/operators/spatial.md | 2 +- reference/sql/show.md | 56 ++++++---------------------------- 2 files changed, 11 insertions(+), 47 deletions(-) diff --git a/reference/operators/spatial.md b/reference/operators/spatial.md index 66f48de1..f4501176 100644 --- a/reference/operators/spatial.md +++ b/reference/operators/spatial.md @@ -12,7 +12,7 @@ calculations. For more information on this type of data, see the ### within `within(geohash, ...)` - evaluates if a comma-separated list of geohashes are -equal to are within another geohash: +equal to or within another geohash: - The `within` operator can only be used in `LATEST ON` queries and all symbol columns within the query **must be indexed**. diff --git a/reference/sql/show.md b/reference/sql/show.md index bcc28cd4..40732ced 100644 --- a/reference/sql/show.md +++ b/reference/sql/show.md @@ -32,8 +32,6 @@ and partition storage size on disk. - `SHOW SERVICE ACCOUNT` displays details of a service account (enterprise-only) - `SHOW PERMISSIONS` displays permissions of user, group or service account (enterprise-only) -- `SHOW SERVER_CONF` shows the content of QuestDB's server.conf configuration - file. (enterprise-only) ## Examples @@ -156,17 +154,13 @@ SHOW GROUPS john; ### SHOW SERVICE ACCOUNT ```questdb-sql - SHOW SERVICE ACCOUNT; - ``` or ```questdb-sql - SHOW SERVICE ACCOUNT ilp_ingestion; - ``` | auth_type | enabled | @@ -178,41 +172,28 @@ SHOW SERVICE ACCOUNT ilp_ingestion; ### SHOW SERVICE ACCOUNTS ```questdb-sql - SHOW SERVICE ACCOUNTS; - ``` -| name | - -|-------------| - +| name | +| ---------- | | management | - | svc1_admin | ```questdb-sql - SHOW SERVICE ACCOUNTS john; - ``` -| name | - -|-------------| - +| name | +| ---------- | | svc1_admin | ```questdb-sql - SHOW SERVICE ACCOUNTS admin_group; - ``` -| name | - -|-------------| - +| name | +| ---------- | | svc1_admin | ### SHOW PERMISSIONS FOR CURRENT USER @@ -252,20 +233,14 @@ SHOW PERMISSIONS admin_group; #### For a service account ```questdb-sql - SHOW PERMISSIONS ilp_ingestion; - ``` | permission | table_name | column_name | grant_option | origin | - -|------------|------------|-------------|--------------|--------| - -| SELECT | | | t | G | - -| INSERT | | | f | G | - -| UPDATE | | | f | G | +| ---------- | ---------- | ----------- | ------------ | ------ | +| SELECT | | | t | G | +| INSERT | | | f | G | +| UPDATE | | | f | G | ### SHOW SERVER_VERSION @@ -279,17 +254,6 @@ SHOW SERVER_VERSION; | -------------- | | 12.3 (questdb) | -### SHOW SERVER_CONF - -```questdb-sql -SHOW SERVER_CONF; -``` - -| name | value | -| ------------------------ | ----- | -| config.validation.strict | true | -| query.timeout.sec | 60 | - ## See also The following functions allow querying tables with filters and using the results From c2956f2d19173500a2ade52e2ce1693945ac5440 Mon Sep 17 00:00:00 2001 From: goodroot <9484709+goodroot@users.noreply.github.com> Date: Thu, 1 Aug 2024 11:01:58 -0700 Subject: [PATCH 23/23] Updates config string, snapshot, & adds intro/overivew polish (#34) --- clients/ingest-c-and-cpp.md | 83 +++++++++++----------- clients/ingest-dotnet.md | 24 ++++--- clients/ingest-go.md | 43 ++++++----- clients/ingest-node.md | 39 +++++----- clients/ingest-python.md | 41 +++++++---- clients/ingest-rust.md | 37 +++++----- clients/java_ilp.md | 118 +++++++----------------------- configuration-string.md | 91 ++++++++++++++++++++++++ introduction.md | 75 +++++++++++++------- reference/api/ilp/overview.md | 104 +++++++++++++++------------ reference/sql/overview.md | 130 +++++++++++++++++++--------------- reference/sql/snapshot.md | 41 +++++++++-- sidebars.js | 7 +- 13 files changed, 489 insertions(+), 344 deletions(-) create mode 100644 configuration-string.md diff --git a/clients/ingest-c-and-cpp.md b/clients/ingest-c-and-cpp.md index 78dfa00b..04206544 100644 --- a/clients/ingest-c-and-cpp.md +++ b/clients/ingest-c-and-cpp.md @@ -23,7 +23,6 @@ Key features of the QuestDB C & C++ client include: health monitoring - **Automatic write retries**: Reuse connections and retry after interruptions - ### Requirements - Requires a C/C++ compiler and standard libraries. @@ -32,16 +31,16 @@ Key features of the QuestDB C & C++ client include: ### Client Installation -You need to add the client as a dependency to your project. Depending on your environment, -you can do this in different ways. Please check the documentation at the +You need to add the client as a dependency to your project. Depending on your +environment, you can do this in different ways. Please check the documentation +at the [client's repository](https://github.com/questdb/c-questdb-client/blob/main/doc/DEPENDENCY.md). - ## C++ :::note -This section is for the QuestDB C++ client. +This section is for the QuestDB C++ client. For the QuestDB C Client, see the below seciton. @@ -52,7 +51,6 @@ For the QuestDB C Client, see the below seciton. Explore the full capabilities of the C++ client via the [C++ README](https://github.com/questdb/c-questdb-client/blob/main/doc/CPP.md). - ## Authentication The QuestDB C++ client supports basic connection and authentication @@ -70,7 +68,8 @@ auto sender = questdb::ingress::line_sender::from_conf( ``` -You can also pass the connection configuration via the `QDB_CLIENT_CONF` environment variable: +You can also pass the connection configuration via the `QDB_CLIENT_CONF` +environment variable: ```bash export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" @@ -83,8 +82,8 @@ auto sender = questdb::ingress::line_sender::from_env(); ``` When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. - +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more +info. ### Basic data insertion @@ -123,7 +122,8 @@ These are the main steps it takes: In this case, the designated timestamp will be the one at execution time. -Let's see now an example with timestamps, custom timeout, basic auth, and error control. +Let's see now an example with timestamps, custom timeout, basic auth, and error +control. ```cpp #include @@ -183,26 +183,25 @@ int main() } ``` -As you can see, both events now are using the same timestamp. We recommended using the original event timestamps when -ingesting data into QuestDB. Using the current timestamp will hinder the ability to deduplicate rows which is +As you can see, both events now are using the same timestamp. We recommended +using the original event timestamps when ingesting data into QuestDB. Using the +current timestamp will hinder the ability to deduplicate rows which is [important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). - ## C :::note -This sectioni s for the QuestDB C client. +This sectioni s for the QuestDB C client. -Skip to the bottom of this page for information relating to both the C and C++ clients. +Skip to the bottom of this page for information relating to both the C and C++ +clients. -::: - +::: Explore the full capabilities of the C client via the [C README](https://github.com/questdb/c-questdb-client/blob/main/doc/C.md). - ### Connection The QuestDB C client supports basic connection and authentication @@ -225,13 +224,15 @@ if (!sender) { } ``` -You can also pass the connection configuration via the `QDB_CLIENT_CONF` environment variable: +You can also pass the connection configuration via the `QDB_CLIENT_CONF` +environment variable: ```bash export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" ``` Then you use it like this: + ```c #include ... @@ -317,9 +318,8 @@ error: In this case, the designated timestamp will be the one at execution time. -Let's see now an example with timestamps, custom timeout, basic auth, error control, and transactional -awareness. - +Let's see now an example with timestamps, custom timeout, basic auth, error +control, and transactional awareness. ```c // line_sender_trades_example.c @@ -417,11 +417,11 @@ error: ``` -As you can see, both events use the same timestamp. We recommended using the original event timestamps when -ingesting data into QuestDB. Using the current timestamp hinder the ability to deduplicate rows which is +As you can see, both events use the same timestamp. We recommended using the +original event timestamps when ingesting data into QuestDB. Using the current +timestamp hinder the ability to deduplicate rows which is [important for exactly-once processing](#/docs/clients/java_ilp/#exactly-once-delivery-vs-at-least-once-delivery). - ## Other Considerations for both C and C++ ### Configuration options @@ -433,21 +433,23 @@ general structure is: ::addr=host:port;param1=val1;param2=val2;... ``` -`transport` can be `http`, `https`, `tcp`, or `tcps`. The C/C++ and Rust clients share -the same codebase. Please refer to the -[Rust client's documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) for the -full details on configuration. +`transport` can be `http`, `https`, `tcp`, or `tcps`. The C/C++ and Rust clients +share the same codebase. Please refer to the +[Rust client's documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) +for the full details on configuration. + +Alternatively, for a breakdown of Configuration string options available across +all clients, see the [Configuration string](/docs/configuration-string/) page. ### Don't forget to flush The sender and buffer objects are entirely decoupled. This means that the sender won't get access to the data in the buffer until you explicitly call -`sender.flush` or `line_sender_flush`. -This may lead to a pitfall where you drop a buffer that still has some data in it, -resulting in permanent data loss. +`sender.flush` or `line_sender_flush`. This may lead to a pitfall where you drop +a buffer that still has some data in it, resulting in permanent data loss. -Unlike other official QuestDB clients, the Rust client does not supports auto-flushing -via configuration. +Unlike other official QuestDB clients, the Rust client does not supports +auto-flushing via configuration. A common technique is to flush periodically on a timer and/or once the buffer exceeds a certain size. You can check the buffer's size by calling @@ -459,20 +461,21 @@ QuestDB instances), call `sender.flush_and_keep(&mut buffer)` instead. ### Transactional flush -As described in the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), -the HTTP transport has some support for transactions. +As described in the +[ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), the +HTTP transport has some support for transactions. To ensure in advance that a flush will not affect more than one table, call -`buffer.transactional()` or `line_sender_buffer_transactional(buffer)` as we demonstrated on -the examples in this document. +`buffer.transactional()` or `line_sender_buffer_transactional(buffer)` as we +demonstrated on the examples in this document. This call will return false if the flush wouldn't be data-transactional. ## Next Steps Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table and -column auto-creation. +about transactions, error control, delivery guarantees, health check, or table +and column auto-creation. With data flowing into QuestDB, now it's time to for analysis. diff --git a/clients/ingest-dotnet.md b/clients/ingest-dotnet.md index 0093029d..590c6855 100644 --- a/clients/ingest-dotnet.md +++ b/clients/ingest-dotnet.md @@ -91,7 +91,8 @@ TCP authentication can be configured using JWK tokens: using var sender = Sender.New("tcp::addr=localhost:9000;username=admin;token="); ``` -The connection string can also be built programatically. See [Configuration](#configuration) for details. +The connection string can also be built programatically. See +[Configuration](#configuration) for details. ## Basic insert @@ -117,7 +118,9 @@ await sender.Table("trades") await sender.SendAsync(); ``` -In this case, the designated timestamp will be the one at execution time. Let's see now an example with timestamps, custom auto-flushing, basic auth, and error reporting. +In this case, the designated timestamp will be the one at execution time. Let's +see now an example with timestamps, custom auto-flushing, basic auth, and error +reporting. ```csharp using QuestDB; @@ -159,11 +162,11 @@ class Program } ``` -As you can see, both events use the same timestamp. We recommended using the original event timestamps when -ingesting data into QuestDB. Using the current timestamp hinder the ability to deduplicate rows which is +As you can see, both events use the same timestamp. We recommended using the +original event timestamps when ingesting data into QuestDB. Using the current +timestamp hinder the ability to deduplicate rows which is [important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). - ## Configuration Construct new Senders via the `Sender` factory. @@ -177,7 +180,7 @@ Optionally, TCP uses `9009`. ### With a configuration string It is recommended, where possible, to initialise the sender using a -[configuration string](https://questdb.io/docs/reference/api/ilp/overview/#client-side-configuration). +[configuration string](/docs/configuration-string/). Configuration strings provide a convenient shorthand for defining client properties, and are validated during construction of the `Sender`. @@ -310,7 +313,6 @@ QuestDB's deduplication feature, and should be avoided where possible. ::: - ## Flushing Once the buffer is filled with data ready to be sent, it can be flushed to the @@ -415,7 +417,9 @@ batch only for a single table. :::caution -As described in the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), the HTTP transport has some limitations for transactions when adding new columns. +As described in the +[ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), the +HTTP transport has some limitations for transactions when adding new columns. ::: @@ -574,8 +578,8 @@ using var sender = ## Next Steps Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table and -column auto-creation. +about transactions, error control, delivery guarantees, health check, or table +and column auto-creation. Dive deeper into the .NET client capabilities by exploring more examples provided in the diff --git a/clients/ingest-go.md b/clients/ingest-go.md index f2ee0dba..07ae9ebc 100644 --- a/clients/ingest-go.md +++ b/clients/ingest-go.md @@ -78,26 +78,27 @@ Or, set the QDB_CLIENT_CONF environment variable and call client, err := questdb.LineSenderFromEnv(context.TODO()) ``` -Alternatively, you can use the built-in Go API to specify the connection options. +Alternatively, you can use the built-in Go API to specify the connection +options. - ```go - package main +```go +package main import ( - "context" - qdb "github.com/questdb/go-questdb-client/v3" + "context" + qdb "github.com/questdb/go-questdb-client/v3" ) func main() { - ctx := context.TODO() + ctx := context.TODO() - client, err := qdb.NewLineSender(context.TODO(), qdb.WithHttp(), qdb.WithAddress("localhost:9000"), qdb.WithBasicAuth("admin", "quest")) + client, err := qdb.NewLineSender(context.TODO(), qdb.WithHttp(), qdb.WithAddress("localhost:9000"), qdb.WithBasicAuth("admin", "quest")) ``` - When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more +info. ## Basic Insert @@ -139,7 +140,9 @@ func main() { } ``` -In this case, the designated timestamp will be the one at execution time. Let's see now an example with an explicit timestamp, custom auto-flushing, and basic auth. +In this case, the designated timestamp will be the one at execution time. Let's +see now an example with an explicit timestamp, custom auto-flushing, and basic +auth. ```Go package main @@ -183,29 +186,35 @@ func main() { } } ``` + We recommended to use User-assigned timestamps when ingesting data into QuestDB. - Using the current timestamp hinder the ability to deduplicate rows which is +Using the current timestamp hinder the ability to deduplicate rows which is [important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). ## Configuration options -The minimal configuration string needs to have the protocol, host, and port, as in: +The minimal configuration string needs to have the protocol, host, and port, as +in: ``` http::addr=localhost:9000; ``` -In the Go client, you can set the configuration options via the standard config string, -which is the same across all clients, or using [the built-in API](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3#LineSenderOption). +In the Go client, you can set the configuration options via the standard config +string, which is the same across all clients, or using +[the built-in API](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3#LineSenderOption). -For all the extra options you can use, please check [the client docs](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3#LineSenderFromConf) +For all the extra options you can use, please check +[the client docs](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3#LineSenderFromConf) +Alternatively, for a breakdown of Configuration string options available across +all clients, see the [Configuration string](/docs/configuration-string/) page. ## Next Steps Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table and -column auto-creation. +about transactions, error control, delivery guarantees, health check, or table +and column auto-creation. Explore the full capabilities of the Go client via [Go.dev](https://pkg.go.dev/github.com/questdb/go-questdb-client/v3). diff --git a/clients/ingest-node.md b/clients/ingest-node.md index 180e0a90..aee48a2b 100644 --- a/clients/ingest-node.md +++ b/clients/ingest-node.md @@ -68,7 +68,8 @@ const sender = Sender.fromEnv(); ``` When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more +info. ## Basic insert @@ -76,7 +77,6 @@ Example: inserting executed trades for cryptocurrencies. Without authentication and using the current timestamp. - ```javascript const { Sender } = require("@questdb/nodejs-client") @@ -105,9 +105,9 @@ async function run() { run().then(console.log).catch(console.error) ``` -In this case, the designated timestamp will be the one at execution time. Let's see now an example with an explicit -timestamp, custom auto-flushing, and basic auth. - +In this case, the designated timestamp will be the one at execution time. Let's +see now an example with an explicit timestamp, custom auto-flushing, and basic +auth. ```javascript const { Sender } = require("@questdb/nodejs-client") @@ -115,11 +115,11 @@ const { Sender } = require("@questdb/nodejs-client") async function run() { // create a sender using HTTP protocol const sender = Sender.fromConfig( - "http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;" - ) + "http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;", + ) // Calculate the current timestamp. You could also parse a date from your source data. - const timestamp = Date.now(); + const timestamp = Date.now() // add rows to the buffer of the sender await sender @@ -139,12 +139,10 @@ async function run() { .floatColumn("amount", 0.001) .at(timestamp, "ms") - // flush the buffer of the sender, sending the data to QuestDB // the buffer is cleared after the data is sent, and the sender is ready to accept new data await sender.flush() - // close the connection after all rows ingested // unflushed data will be lost await sender.close() @@ -153,29 +151,34 @@ async function run() { run().then(console.log).catch(console.error) ``` -As you can see, both events now are using the same timestamp. We recommended to use the original event timestamps when -ingesting data into QuestDB. Using the current timestamp hinder the ability to deduplicate rows which is +As you can see, both events now are using the same timestamp. We recommended to +use the original event timestamps when ingesting data into QuestDB. Using the +current timestamp hinder the ability to deduplicate rows which is [important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). - ## Configuration options -The minimal configuration string needs to have the protocol, host, and port, as in: +The minimal configuration string needs to have the protocol, host, and port, as +in: ``` http::addr=localhost:9000; ``` -For all the extra options you can use, please check [the client docs](https://questdb.github.io/nodejs-questdb-client/SenderOptions.html) +For all the extra options you can use, please check +[the client docs](https://questdb.github.io/nodejs-questdb-client/SenderOptions.html) +Alternatively, for a breakdown of Configuration string options available across +all clients, see the [Configuration string](/docs/configuration-string/) page. ## Next Steps Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table and -column auto-creation. +about transactions, error control, delivery guarantees, health check, or table +and column auto-creation. -Dive deeper into the Node.js client capabilities, including TypeScript and Worker Threads examples, by exploring the +Dive deeper into the Node.js client capabilities, including TypeScript and +Worker Threads examples, by exploring the [GitHub repository](https://github.com/questdb/nodejs-questdb-client). To learn _The Way_ of QuestDB SQL, see the diff --git a/clients/ingest-python.md b/clients/ingest-python.md index 7d048a76..86e652bb 100644 --- a/clients/ingest-python.md +++ b/clients/ingest-python.md @@ -102,7 +102,8 @@ with Sender(Protocol.Http, 'localhost', 9000, username='admin', password='quest' ``` When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more +info. ## Basic insert @@ -126,7 +127,9 @@ with Sender.from_conf(conf) as sender: sender.flush() ``` -In this case, the designated timestamp will be the one at execution time. Let's see now an example with timestamps, custom auto-flushing, basic auth, and error reporting. +In this case, the designated timestamp will be the one at execution time. Let's +see now an example with timestamps, custom auto-flushing, basic auth, and error +reporting. ```python from questdb.ingress import Sender, IngressError, TimestampNanos @@ -179,10 +182,10 @@ if __name__ == '__main__': ``` We recommended `User`-assigned timestamps when ingesting data into QuestDB. -Using `Server`-assigned timestamps hinders the ability to deduplicate rows which is +Using `Server`-assigned timestamps hinders the ability to deduplicate rows which +is [important for exactly-once processing](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). - The same `trades` insert, but via a Pandas dataframe: ```python @@ -222,32 +225,40 @@ with Sender.from_conf(conf) as sender: ## Configuration options -The minimal configuration string needs to have the protocol, host, and port, as in: +The minimal configuration string needs to have the protocol, host, and port, as +in: ``` http::addr=localhost:9000; ``` -In the Python client, you can set the configuration options via the standard config string, -which is the same across all clients, or using [the built-in API](https://py-questdb-client.readthedocs.io/en/latest/sender.html#sender-programmatic-construction). - +In the Python client, you can set the configuration options via the standard +config string, which is the same across all clients, or using +[the built-in API](https://py-questdb-client.readthedocs.io/en/latest/sender.html#sender-programmatic-construction). -For all the extra options you can use, please check [the client docs](https://py-questdb-client.readthedocs.io/en/latest/conf.html#sender-conf) +For all the extra options you can use, please check +[the client docs](https://py-questdb-client.readthedocs.io/en/latest/conf.html#sender-conf) +Alternatively, for a breakdown of Configuration string options available across +all clients, see the [Configuration string](/docs/configuration-string/) page. ## Transactional flush -As described at the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), -the HTTP transport has some support for transactions. +As described at the +[ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), the +HTTP transport has some support for transactions. -The python client exposes [an API](https://py-questdb-client.readthedocs.io/en/latest/sender.html#http-transactions) +The python client exposes +[an API](https://py-questdb-client.readthedocs.io/en/latest/sender.html#http-transactions) to make working with transactions more convenient ## Next steps -Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for general details -about transactions, error control, delivery guarantees, health check, or table and -column auto-creation. The [Python client docs](https://py-questdb-client.readthedocs.io/en/latest/sender.html) explain how to apply those concepts using the built-in API. +Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for general +details about transactions, error control, delivery guarantees, health check, or +table and column auto-creation. The +[Python client docs](https://py-questdb-client.readthedocs.io/en/latest/sender.html) +explain how to apply those concepts using the built-in API. For full docs, checkout [ReadTheDocs](https://py-questdb-client.readthedocs.io/en). diff --git a/clients/ingest-rust.md b/clients/ingest-rust.md index 2a2c1611..ed4445c0 100644 --- a/clients/ingest-rust.md +++ b/clients/ingest-rust.md @@ -44,7 +44,8 @@ let mut sender = Sender::from_conf( )?; ``` -You can also pass the connection configuration via the `QDB_CLIENT_CONF` environment variable: +You can also pass the connection configuration via the `QDB_CLIENT_CONF` +environment variable: ```bash export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" @@ -57,7 +58,8 @@ let mut sender = Sender::from_env()?; ``` When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more +info. ## Basic insert @@ -94,9 +96,11 @@ These are the main steps it takes: In this case, the designated timestamp will be the one at execution time. -Let's see now an example with timestamps using Chrono, custom timeout, and basic auth. +Let's see now an example with timestamps using Chrono, custom timeout, and basic +auth. -You need to enable the `chrono_timestamp` feature to the QuestDB crate and add the Chrono crate. +You need to enable the `chrono_timestamp` feature to the QuestDB crate and add +the Chrono crate. ```bash cargo add questdb-rs --features chrono_timestamp @@ -150,6 +154,9 @@ general structure is: [crate documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) for the full details on configuration. +Alternatively, for breakdown of available params, see the +[Configuration string](/docs/configuration-string/) page. + ## Don't forget to flush The sender and buffer objects are entirely decoupled. This means that the sender @@ -157,8 +164,8 @@ won't get access to the data in the buffer until you explicitly call `sender.flush(&mut buffer)` or a variant. This may lead to a pitfall where you drop a buffer that still has some data in it, resulting in permanent data loss. -Unlike other official QuestDB clients, the rust client does not supports auto-flushing -via configuration. +Unlike other official QuestDB clients, the rust client does not supports +auto-flushing via configuration. A common technique is to flush periodically on a timer and/or once the buffer exceeds a certain size. You can check the buffer's size by calling @@ -168,16 +175,15 @@ The default `flush()` method clears the buffer after sending its data. If you want to preserve its contents (for example, to send the same data to multiple QuestDB instances), call `sender.flush_and_keep(&mut buffer)` instead. - ## Transactional flush -As described at the [ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), -the HTTP transport has some support for transactions. - -In order to ensure in advance that a flush will not affect more than one table, call -`sender.flush_and_keep_with_flags(&mut buffer, true)`. -This call will refuse to flush a buffer if the flush wouldn't be data-transactional. +As described at the +[ILP overview](/docs/reference/api/ilp/overview#http-transaction-semantics), the +HTTP transport has some support for transactions. +In order to ensure in advance that a flush will not affect more than one table, +call `sender.flush_and_keep_with_flags(&mut buffer, true)`. This call will +refuse to flush a buffer if the flush wouldn't be data-transactional. ## Error handling @@ -209,7 +215,6 @@ You can inspect the sender's error state by calling `sender.must_close()`. For more details about the HTTP and TCP transports, please refer to the [ILP overview](/docs/reference/api/ilp/overview#transport-selection). - ## Crate features The QuestDB client crate supports some optional features, mostly related to @@ -235,8 +240,8 @@ These features are opt-in: ## Next steps Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table and -column auto-creation. +about transactions, error control, delivery guarantees, health check, or table +and column auto-creation. Explore the full capabilities of the Rust client via the [Crate API page](https://docs.rs/questdb-rs/latest/questdb/). diff --git a/clients/java_ilp.md b/clients/java_ilp.md index b09dcda8..c3fda8fe 100644 --- a/clients/java_ilp.md +++ b/clients/java_ilp.md @@ -6,17 +6,22 @@ efficiency." --- import Tabs from "@theme/Tabs" + import TabItem from "@theme/TabItem" + import CodeBlock from "@theme/CodeBlock" + import InterpolateReleaseData from "../../src/components/InterpolateReleaseData" -import { RemoteRepoExample } from "@theme/RemoteRepoExample" +import { RemoteRepoExample } from "@theme/RemoteRepoExample" :::note -This is the reference for the QuestDB Java Client when QuestDB is used as a server. +This is the reference for the QuestDB Java Client when QuestDB is used as a +server. -For embedded QuestDB, please check our [Java Embedded Guide](/docs/reference/api/java-embedded/). +For embedded QuestDB, please check our +[Java Embedded Guide](/docs/reference/api/java-embedded/). ::: @@ -36,21 +41,18 @@ The client provides the following benefits: Add a QuestDB as a dependency in your project's build configuration file. - - + + ( {` - org.questdb - questdb - ${release.name} + +org.questdb questdb +${release.name} `} )} @@ -61,11 +63,7 @@ Add a QuestDB as a dependency in your project's build configuration file. renderText={(release) => ( {`compile group: 'org.questdb', name: 'questdb', version: '${release.name}'`} - - )} - /> - - + )} /> The code below creates an instance of a client configured to use HTTP transport to connect to a QuestDB server running on localhost on port 9000. It then sends @@ -102,7 +100,8 @@ connection to a QuestDB server. It also instructs the client to authenticate using HTTP Basic Authentication. When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more info. +Please check the [RBAC docs](/docs/operations/rbac/#authentication) for more +info. @@ -253,7 +252,6 @@ client receives no additional error information from the server. This limitation significantly contributes to the preference for HTTP transport over TCP transport. - ## Designated timestamp considerations The concept of [designated timestamp](/docs/concept/designated-timestamp/) is @@ -303,7 +301,6 @@ rows with older timestamps are ingested before rows with newer timestamps. ::: - ## Configuration options Client can be configured either by using a configuration string as shown in the @@ -312,77 +309,16 @@ examples above, or by using the builder API. The builder API is available via the `Sender.builder(Transport transport)` method. -When using the configuration string, the following options are available: - -### HTTP transport authentication - -- `username` : Username for HTTP basic authentication. -- `password` : Password for HTTP basic authentication. -- `token` : Bearer token for HTTP authentication. - -### TCP transport authentication - -- `username`: Username for TCP authentication. -- `token`: Token for TCP authentication. - -### Auto-flushing - -- `auto_flush` : Global switch for the auto-flushing behavior. Options are `on` - or `off`. Defaults to `on`. -- `auto_flush_rows` : The number of rows that will trigger a flush. This option - is supported for HTTP transport only. Defaults to 75,000. -- `auto_flush_interval` : The time in milliseconds that will trigger a flush. - Defaults to 1000. This option is support for HTTP transport only. - -The TCP transport for a client automatically flushes when its buffer is full. -The TCP transport utilizes a fixed-size buffer, and its maximum size is the same -as the initial size. Thus, the option `init_buf_size` (see below) effectively -controls the auto-flushing behavior of the TCP transport. - -### Buffer - -- `init_buf_size` : The initial size of the buffer in bytes. Default: 65536 - (64KiB) -- `max_buf_size` : The maximum size of the buffer in bytes. Default: 104857600 - (100MiB) This option is support for HTTP transport only. TCP transport uses a - fixed-size buffer and its maximum size is the same as the initial size. - -### HTTP Transport - -- `retry_timeout` : The time in milliseconds to continue retrying after a failed - HTTP request. The interval between retries is an exponential backoff starting - at 10ms and doubling after each failed attempt up to a maximum of 1 second. - Default: 10000 (10 seconds) -- `request_timeout` : The time in milliseconds to wait for a response from the - server. This is in addition to the calculation derived from the - `request_min_throughput` parameter. Default: 10000 (10 seconds) -- `request_min_throughput` : Minimum expected throughput in bytes per second for - HTTP requests. If the throughput is lower than this value, the connection will - time out. This is used to calculate an additional timeout on top of - `request_timeout`. This is useful for large requests. You can set this value - to `0` to disable this logic. - -### TLS encryption - -To enable TLS, select the `https` or `tcps` protocol. - -The following options are available: - -- `tls_roots` : Path to a Java keystore file containing trusted root - certificates. Defaults to the system default trust store. -- `tls_roots_password` : Password for the keystore file. It's always required - when `tls_roots` is set. -- `tls_verify` : Whether to verify the server's certificate. This should only be - used for testing as a last resort and never used in production as it makes the - connection vulnerable to man-in-the-middle attacks. Options are `on` or - `unsafe_off`. Defaults to `on`. - +For a breakdown of available options, see the +[Configuration string](/docs/configuration-string/) page. ## Other considerations -- Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table and -column auto-creation. +- Refer to the [ILP overview](/docs/reference/api/ilp/overview) for details + about transactions, error control, delivery guarantees, health check, or table + and column auto-creation. +- The method `flush()` can be called to force sending the internal buffer to a + server, even when the buffer is not full yet. - The Sender is not thread-safe. For multiple threads to send data to QuestDB, each thread should have its own Sender instance. An object pool can also be used to re-use Sender instances. @@ -390,5 +326,3 @@ column auto-creation. implements the `java.lang.AutoCloseable` interface, and therefore the [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) pattern can be used to ensure that the Sender is closed. -- The method `flush()` can be called to force sending the internal buffer to a - server, even when the buffer is not full yet. diff --git a/configuration-string.md b/configuration-string.md new file mode 100644 index 00000000..6df6820e --- /dev/null +++ b/configuration-string.md @@ -0,0 +1,91 @@ +--- +title: Client configuration string +description: + How to apply the configuration string used in multiple QuestDB clients. + Demonstrates available options, caveats, and more. +--- + +The QuestDB clients leverage a configuration string to pass common values. + +The presiding method will vary from client-to-client, but the string composition +is consistent. + +Naturally, languages will each have their own approach, and these will be +covered in the clients' documentation. + +This document provides a general overview. + +## Configuration string breakdown + +When using the configuration string, the following options are available: + +### HTTP transport authentication + +- `username` : Username for HTTP basic authentication. +- `password` : Password for HTTP basic authentication. +- `token` : Bearer token for HTTP authentication. + +### TCP transport authentication + +- `username`: Username for TCP authentication. +- `token`: Token for TCP authentication. + +### Auto-flushing + +- `auto_flush` : Global switch for the auto-flushing behavior. Options are `on` + or `off`. Defaults to `on`. +- `auto_flush_rows` : The number of rows that will trigger a flush. This option + is supported for HTTP transport only. Defaults to 75,000. +- `auto_flush_interval` : The time in milliseconds that will trigger a flush. + Defaults to 1000. This option is support for HTTP transport only. + +The TCP transport for a client automatically flushes when its buffer is full. +The TCP transport utilizes a fixed-size buffer, and its maximum size is the same +as the initial size. Thus, the option `init_buf_size` (see below) effectively +controls the auto-flushing behavior of the TCP transport. + +### Buffer + +- `init_buf_size` : The initial size of the buffer in bytes. Default: 65536 + (64KiB) +- `max_buf_size` : The maximum size of the buffer in bytes. Default: 104857600 + (100MiB) This option is support for HTTP transport only. TCP transport uses a + fixed-size buffer and its maximum size is the same as the initial size. + +### HTTP Transport + +- `retry_timeout` : The time in milliseconds to continue retrying after a failed + HTTP request. The interval between retries is an exponential backoff starting + at 10ms and doubling after each failed attempt up to a maximum of 1 second. + Default: 10000 (10 seconds) +- `request_timeout` : The time in milliseconds to wait for a response from the + server. This is in addition to the calculation derived from the + `request_min_throughput` parameter. Default: 10000 (10 seconds) +- `request_min_throughput` : Minimum expected throughput in bytes per second for + HTTP requests. If the throughput is lower than this value, the connection will + time out. This is used to calculate an additional timeout on top of + `request_timeout`. This is useful for large requests. You can set this value + to `0` to disable this logic. + +### TLS encryption + +To enable TLS, select the `https` or `tcps` protocol. + +The following options are available: + +- `tls_roots` : Path to a Java keystore file containing trusted root + certificates. Defaults to the system default trust store. +- `tls_roots_password` : Password for the keystore file. It's always required + when `tls_roots` is set. +- `tls_verify` : Whether to verify the server's certificate. This should only be + used for testing as a last resort and never used in production as it makes the + connection vulnerable to man-in-the-middle attacks. Options are `on` or + `unsafe_off`. Defaults to `on`. + +## Other considerations + +- Please refer to the [ILP overview](/docs/reference/api/ilp/overview) for + details about transactions, error control, delivery guarantees, health check, + or table and column auto-creation. +- The method `flush()` can be called to force sending the internal buffer to a + server, even when the buffer is not full yet. diff --git a/introduction.md b/introduction.md index 7f52dcb5..5e712a67 100644 --- a/introduction.md +++ b/introduction.md @@ -11,9 +11,15 @@ import Screenshot from "@theme/Screenshot" import CodeBlock from "@theme/CodeBlock" QuestDB is an Apache 2.0 open source columnar database that specializes in time -series. It offers category-leading ingestion throughput and fast SQL queries -with operational simplicity. QuestDB reduces operational costs and overcomes -ingestion bottlenecks, offering greatly simplified overall ingress infrastructure. +series. + +It offers **category-leading ingestion throughput** and **fast SQL queries** +with operational simplicity. + +Given its effiency, QuestDB **reduces operational costs**, all while overcoming +ingestion bottlenecks. + +As a result, QuestDB offers greatly simplified overall ingress infrastructure. This introduction provides a brief overview on: @@ -51,8 +57,8 @@ performance degradation. #### Hardware efficiency -Strong, cost-saving performance on very mninimal hardware, -including sensors and Raspberry Pi. +Strong, cost-saving performance on very mninimal hardware, including sensors and +Raspberry Pi. #### Time series SQL extensions @@ -71,12 +77,23 @@ Greatest hits include: ## Benefits of QuestDB {#benefits} -Time series data is seen increasingly in use cases across finance, internet of -things, e-commerce, security, blockchain, and many emerging industries. As more -and more time bound data is generated by an increasing number of clients, having -high performance storage at the receiving end of your servers, devices or queues -prevents ingestion bottlenecks, simplifies code and reduces costly -infrastructure sprawl. +Time series data is seen increasingly in use cases across: + +- finance +- internet of things (IoT) +- e-commerce +- security +- blockchain +- many other emerging technical industries + +As more time bound data is generated, high performance data reception is +essential to avoid ingestion bottlenecks. + +The right data store greatly simplifies code costly infrastructure sprawl and +spend. + +But to be _the right one_, the storage engine must be both high performance and +efficient: -With a specialized [time-series database](/glossary/time-series-database/), you -don't need to worry about out-of-order data, duplicates, exactly one semantics, -frequency of ingestion, or the many other details you will find in real-time -streaming scenarios. It's simplified, hyper-fast data ingestion with tremendous -efficiency and value. +Beyond performance and efficiency, with a specialized +[time-series database](/glossary/time-series-database/), you don't need to worry +about: + +- out-of-order data +- duplicates +- exactly one semantics +- frequency of ingestion +- many other details you will find in demanding real-world scenarios + +QuestDB provides simplified, hyper-fast data ingestion with tremendous +efficiency and therefore value. -Writing blazing-fast queries syntax and creating real-time -[Grafana](/docs/third-party-tools/grafana/) is done via familiar SQL: +Write blazing-fast queries and create real-time +[Grafana](/docs/third-party-tools/grafana/) via familiar SQL: ```questdb-sql title='Navigate time with SQL' demo SELECT @@ -110,18 +134,15 @@ SAMPLE BY 15m; Intrigued? The best way to see whether QuestDB is right for you is to try it out. +Click _Demo this query_ in the snippet above to visit our demo instance and +experiment. + +To bring your own data and learn more, keep reading! + ## QuestDB Enterprise QuestDB Enterprise offers everything from open source, plus additional features -for running QuestDB at larger scale or greater significance. Features within -Enterprise include high availability, role based access control, TLS on all -protocols, data compression, cold storage and priority support. - -Typically, when growing to multiple instances or to mission critical -deployments, Enterprise provides an additional layer of official operational -tooling with the added benefit of world-class QuestDB support. Enterprise -increases the reliability of the already solid open source deployments, while -providing better value for compute spend vs. existing engines and methods. +for running QuestDB at greater scale or significance. For a breakdown of Enterprise features, see the [QuestDB Enterprise](/enterprise/) page. diff --git a/reference/api/ilp/overview.md b/reference/api/ilp/overview.md index 59d3aa32..9e5bd2de 100644 --- a/reference/api/ilp/overview.md +++ b/reference/api/ilp/overview.md @@ -27,13 +27,13 @@ This supporting document thus provides an overview to aid in client selection and initial configuration: 1. [Client libraries](/docs/reference/api/ilp/overview/#client-libraries) -2. [Server-Side Configuration](/docs/reference/api/ilp/overview/#server-side-configuration) -3. [Transport Selection](/docs/reference/api/ilp/overview/#transport-selection) -4. [Client-Side Configuration](/docs/reference/api/ilp/overview/#client-side-configuration) +2. [Server-Side configuration](/docs/reference/api/ilp/overview/#server-side-configuration) +3. [Transport selection](/docs/reference/api/ilp/overview/#transport-selection) +4. [Client-Side configuration](/docs/reference/api/ilp/overview/#client-side-configuration) 5. [Error handling](/docs/reference/api/ilp/overview/#error-handling) 6. [Authentication](/docs/reference/api/ilp/overview/#authentication) -7. [Table and Column Auto-creation](/docs/reference/api/ilp/overview/#table-and-column-auto-creation) -8. [Timestamp Column Name](/docs/reference/api/ilp/overview/#timestamp-column-name) +7. [Table and column auto-creation](/docs/reference/api/ilp/overview/#table-and-column-auto-creation) +8. [Timestamp column name](/docs/reference/api/ilp/overview/#timestamp-column-name) 9. [HTTP Transaction semantics](/docs/reference/api/ilp/overview/#http-transaction-semantics) 10. [Exactly-once delivery](/docs/reference/api/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery) 11. [Health Check](/docs/reference/api/ilp/overview/#health-check) @@ -93,10 +93,10 @@ connection failures. However, while HTTP is recommended, TCP has slightly lower overhead than HTTP and may be useful in high-throughput scenarios in high-latency networks. - ## Client-Side Configuration -Clients connect to a QuestDB using ILP via a configuration string. Configuration strings combine a set of key/value pairs. +Clients connect to a QuestDB using ILP via a configuration string. Configuration +strings combine a set of key/value pairs. The standard configuration string pattern is: @@ -109,7 +109,8 @@ schema::key1=value1;key2=value2;key3=value3; It is made up of the following parts: - **Schema**: One of the specified schemas in the - [core parameters](/docs/reference/api/ilp/overview/#core-parameters) section below + [core parameters](/docs/reference/api/ilp/overview/#core-parameters) section + below - **Key=Value**: Each key-value pair sets a specific parameter for the client - **Terminating semicolon**: A semicolon must follow the last key-value pair @@ -120,8 +121,8 @@ Below is a list of common parameters that ILP clients will accept. These params facilitate connection to QuestDB's ILP server and define client-specific behaviors. -Some are shared across all clients, while some are client specific. Refer to -the clients documentation for details. +Some are shared across all clients, while some are client specific. Refer to the +clients documentation for details. :::warning @@ -167,7 +168,8 @@ completeness and for users who have specific requirements. ::: -_See the [Authentication](/docs/reference/api/ilp/overview/#authentication) section below for configuration._ +_See the [Authentication](/docs/reference/api/ilp/overview/#authentication) +section below for configuration._ - **auth_timeout**: Timeout for TCP authentication with QuestDB server, in milliseconds. @@ -179,7 +181,6 @@ _See the [Authentication](/docs/reference/api/ilp/overview/#authentication) sect - Used in C/C++/Rust/Python clients. - **username**: Username for TCP authentication. - #### Auto-flushing behavior - **auto_flush**: Enable or disable automatic flushing (`on`/`off`). @@ -188,6 +189,7 @@ _See the [Authentication](/docs/reference/api/ilp/overview/#authentication) sect Rust). - **auto_flush_bytes** Auto-flushing is triggered above this buffer size. + - Disabled by default. - **auto_flush_interval**: Auto-flushing is triggered after this time period has @@ -232,8 +234,9 @@ _QuestDB Enterprise only._ #### Network configuration -- **bind_interface**: Optionally, specify the local network interface for outbound - connections. Useful if you have multiple interfaces or an accelerated network interface (e.g. Solarflare) +- **bind_interface**: Optionally, specify the local network interface for + outbound connections. Useful if you have multiple interfaces or an accelerated + network interface (e.g. Solarflare) - Not to be confused with the QuestDB port in the `addr` param. ## Error handling @@ -245,15 +248,15 @@ errors, and other client-side errors. Retrying is particularly beneficial during network issues or when the server is temporarily unavailable. The retrying behavior can be configured through the -`retry_timeout` configuration option or, in some clients, via their API. -The client continues to retry recoverable errors until they either succeed or the specified timeout is -reached. +`retry_timeout` configuration option or, in some clients, via their API. The +client continues to retry recoverable errors until they either succeed or the +specified timeout is reached. The TCP transport lacks support for error propagation from the server. In such -cases, the server merely closes the connection upon encountering an error. Consequently, the -client receives no additional error information from the server. This limitation -significantly contributes to the preference for HTTP transport over TCP -transport. +cases, the server merely closes the connection upon encountering an error. +Consequently, the client receives no additional error information from the +server. This limitation significantly contributes to the preference for HTTP +transport over TCP transport. ## Authentication @@ -266,11 +269,14 @@ provides holistic security out-of-the-box. ::: -InfluxDB Line Protocol supports authentication via HTTP Basic Authentication, using [the HTTP Parameters](/docs/reference/api/ilp/overview/#http-parameters), or via token when using the TCP transport, using [the TCP Parameters](/docs/reference/api/ilp/overview/#tcp-parameters). +InfluxDB Line Protocol supports authentication via HTTP Basic Authentication, +using [the HTTP Parameters](/docs/reference/api/ilp/overview/#http-parameters), +or via token when using the TCP transport, using +[the TCP Parameters](/docs/reference/api/ilp/overview/#tcp-parameters). -A similar pattern is used across all client libraries. If you want to use a TCP token, you need to -configure your QuestDB server. This document will break down and demonstrate the configuration keys and core -configuration options. +A similar pattern is used across all client libraries. If you want to use a TCP +token, you need to configure your QuestDB server. This document will break down +and demonstrate the configuration keys and core configuration options. Once a client has been selected and configured, resume from your language client documentation. @@ -377,10 +383,12 @@ will use the first row of data to determine the column types. If the table already exists, the server will validate that the columns match the existing table. If the columns do not match, the server will return a -non-recoverable error which, when using the HTTP/HTTPS transport, is propagated to the client. +non-recoverable error which, when using the HTTP/HTTPS transport, is propagated +to the client. -You can avoid table and/or column auto-creation by setting the `line.auto.create.new.columns` and - `line.auto.create.new.tables`configuration parameters to false. +You can avoid table and/or column auto-creation by setting the +`line.auto.create.new.columns` and `line.auto.create.new.tables`configuration +parameters to false. If you're using QuestDB Enterprise, you must grant further permissions to the authenticated user: @@ -401,11 +409,11 @@ and the [role-based access control](/docs/operations/rbac/) guides. QuestDB's underlying ILP protocol sends timestamps to QuestDB without a name. -If your table has been created beforehand, the designated timestamp will be correctly -assigned based on the payload sent bt the client. But if your table does not -exist, it will be automatically created and the timestamp column will be named -`timestamp`. To use a custom name, say `my_ts`, pre-create the table with the desired -timestamp column name. +If your table has been created beforehand, the designated timestamp will be +correctly assigned based on the payload sent bt the client. But if your table +does not exist, it will be automatically created and the timestamp column will +be named `timestamp`. To use a custom name, say `my_ts`, pre-create the table +with the desired timestamp column name. To do so, issue a `CREATE TABLE` statement to create the table in advance: @@ -424,7 +432,9 @@ created, but without raising an error if the table already exists. ## HTTP transaction semantics -The TCP endpoint does not support transactions. The HTTP ILP endpoint treats every requests as an individual transaction, so long as it contains rows for a single table. +The TCP endpoint does not support transactions. The HTTP ILP endpoint treats +every requests as an individual transaction, so long as it contains rows for a +single table. As of writing, the HTTP endpoint does not provide full transactionality in all cases. @@ -438,20 +448,24 @@ Specifically: transactionality is important for you, the best practice is to make sure you flush data to the server in batches that contain rows for a single table. -- Even when you are sending data to a single table, when dynamically adding new columns to - a table, an implicit commit occurs each time a new column is added. If the request - is aborted or has parse errors, no data will be inserted into the corresponding - table, but the new column will be added and will not be rolled back. +- Even when you are sending data to a single table, when dynamically adding new + columns to a table, an implicit commit occurs each time a new column is added. + If the request is aborted or has parse errors, no data will be inserted into + the corresponding table, but the new column will be added and will not be + rolled back. -- Some clients have built-in support for controlling transactions. These APIs help to comply with the single-table-per-request pre-requisite for HTTP transactions, but they don't control if new columns - are being added. +- Some clients have built-in support for controlling transactions. These APIs + help to comply with the single-table-per-request pre-requisite for HTTP + transactions, but they don't control if new columns are being added. - As of writing, if you want to make sure you have data transactionality and - schema/metadata transactionality, you should disable `line.auto.create.new.columns` and - `line.auto.create.new.tables` on your configuration. Be aware that if you do this, - you will not have dynamic schema capabilities and you will need to create each table - and column before you try to ingest data, via [`CREATE TABLE`](/docs/reference/sql/create-table/) and/or [`ALTER TABLE ADD COLUMN`](/docs/reference/sql/alter-table-add-column/) SQL statements. - + schema/metadata transactionality, you should disable + `line.auto.create.new.columns` and `line.auto.create.new.tables` on your + configuration. Be aware that if you do this, you will not have dynamic schema + capabilities and you will need to create each table and column before you try + to ingest data, via [`CREATE TABLE`](/docs/reference/sql/create-table/) and/or + [`ALTER TABLE ADD COLUMN`](/docs/reference/sql/alter-table-add-column/) SQL + statements. ## Exactly-once delivery vs at-least-once delivery diff --git a/reference/sql/overview.md b/reference/sql/overview.md index 40ec9b1c..f281eea5 100644 --- a/reference/sql/overview.md +++ b/reference/sql/overview.md @@ -7,20 +7,36 @@ description: --- import Screenshot from "@theme/Screenshot" + import Tabs from "@theme/Tabs" + import TabItem from "@theme/TabItem" + import CQueryPartial from "../../partials/\_c.sql.query.partial.mdx" + import CsharpQueryPartial from "../../partials/\_csharp.sql.query.partial.mdx" + import GoQueryPartial from "../../partials/\_go.sql.query.partial.mdx" + import JavaQueryPartial from "../../partials/\_java.sql.query.partial.mdx" + import NodeQueryPartial from "../../partials/\_nodejs.sql.query.partial.mdx" + import RubyQueryPartial from "../../partials/\_ruby.sql.query.partial.mdx" + import PHPQueryPartial from "../../partials/\_php.sql.query.partial.mdx" + import PythonQueryPartial from "../../partials/\_python.sql.query.partial.mdx" + import CurlExecQueryPartial from "../../partials/\_curl.exec.query.partial.mdx" + import GoExecQueryPartial from "../../partials/\_go.exec.query.partial.mdx" -import NodejsExecQueryPartial from "../../partials/\_nodejs.exec.query.partial.mdx" -import PythonExecQueryPartial from "../../partials/\_python.exec.query.partial.mdx" + +import NodejsExecQueryPartial +from"../../partials/\_nodejs.exec.query.partial.mdx" + +import PythonExecQueryPartial from +"../../partials/\_python.exec.query.partial.mdx" Querying - as a base action - is performed in three primary ways: @@ -30,16 +46,15 @@ Querying - as a base action - is performed in three primary ways: 3. Query via [REST HTTP API](/docs/reference/sql/overview/#rest-http-api) 4. Query via [Apache Parquet](/docs/reference/sql/overview/#apache-parquet) -QuestDB provides SQL with enhanced time series extensions. +For efficient and clear querying, QuestDB provides SQL with enhanced time series +extensions. This makes analyzing, downsampling, processing and reading time +series data an intuitive and flexible experience. -This makes analyzing, downsampling, processing and reading time series data an -intuitive and flexible experience. - -Queries can be written into many applications using the many rich and diverse -drivers and clients of the PostgreSQL or REST-ful ecosystems. However, querying -is also leveraged heavily by third-party tools to provide visualizations, such -as within [Grafana](docs/third-party-tools/grafana/), or for connectivity into -broad data infrastructure and application environments such as with a tool like +Queries can be written into many applications using existing drivers and clients +of the PostgreSQL or REST-ful ecosystems. However, querying is also leveraged +heavily by third-party tools to provide visualizations, such as within +[Grafana](/docs/third-party-tools/grafana/), or for connectivity into broad data +infrastructure and application environments such as with a tool like [Cube](/docs/third-party-tools/cube/). > Need to ingest data first? Checkout our @@ -59,22 +74,35 @@ and simplest way, apply queries via the Web Console. title="Click to zoom" /> +For an example, click _Demo this query_ in the below snippet. This will run a +query within our public demo instance and Web Console: + +```questdb-sql title='Navigate time with SQL' demo +SELECT + timestamp, symbol, + first(price) AS open, + last(price) AS close, + min(price), + max(price), + sum(amount) AS volume +FROM trades +WHERE timestamp > dateadd('d', -1, now()) +SAMPLE BY 15m; +``` + +If you see _Demo this query_ on other snippets in this docs, they can be run +against the demo instance. + ## PostgreSQL Query QuestDB using the PostgreSQL endpoint via the default port `8812`. Examples in multiple languages are shown below. - + @@ -163,10 +191,8 @@ obtaining the results as CSV. For obtaining results in JSON, use `/exec` instead, documented next. - + @@ -218,12 +244,9 @@ This is similar to the `/exp` entry point which returns results as CSV. ##### Querying Data - + @@ -246,11 +269,8 @@ This is similar to the `/exp` entry point which returns results as CSV. Alternatively, the `/exec` endpoint can be used to create a table and the `INSERT` statement can be used to populate it with values: - + @@ -278,56 +298,56 @@ curl -G \ The `node-fetch` package can be installed using `npm i node-fetch`. ```javascript -const fetch = require("node-fetch"); +const fetch = require("node-fetch") -const HOST = "http://localhost:9000"; +const HOST = "http://localhost:9000" async function createTable() { try { - const query = "CREATE TABLE IF NOT EXISTS trades (name VARCHAR, value INT)"; + const query = "CREATE TABLE IF NOT EXISTS trades (name VARCHAR, value INT)" const response = await fetch( `${HOST}/exec?query=${encodeURIComponent(query)}`, - ); - const json = await response.json(); + ) + const json = await response.json() - console.log(json); + console.log(json) } catch (error) { - console.log(error); + console.log(error) } } async function insertData() { try { - const query = "INSERT INTO trades VALUES('abc', 123456)"; + const query = "INSERT INTO trades VALUES('abc', 123456)" const response = await fetch( `${HOST}/exec?query=${encodeURIComponent(query)}`, - ); - const json = await response.json(); + ) + const json = await response.json() - console.log(json); + console.log(json) } catch (error) { - console.log(error); + console.log(error) } } async function updateData() { try { - const query = "UPDATE trades SET value = 9876 WHERE name = 'abc'"; + const query = "UPDATE trades SET value = 9876 WHERE name = 'abc'" const response = await fetch( `${HOST}/exec?query=${encodeURIComponent(query)}`, - ); - const json = await response.json(); + ) + const json = await response.json() - console.log(json); + console.log(json) } catch (error) { - console.log(error); + console.log(error) } } -createTable().then(insertData).then(updateData); +createTable().then(insertData).then(updateData) ``` @@ -410,8 +430,6 @@ For more information, see the Now... SQL! It's query time. -SQL is fairly intuitive and known to many. - Whether you want to use the Web Console, PostgreSQL or REST HTTP (or both), query construction is rich. diff --git a/reference/sql/snapshot.md b/reference/sql/snapshot.md index 8cac25dd..5cf2700b 100644 --- a/reference/sql/snapshot.md +++ b/reference/sql/snapshot.md @@ -6,7 +6,8 @@ description: SNAPSHOT SQL keyword reference documentation. Prepare the database for a full backup or a filesystem (disk) snapshot. -_Are you looking for a detailed guide on how to create backups and restore them? Check out our [Backup and Restore](/docs/operations/backup/) guide!_ +_For a detailed guide backup creation and restoration? Check out our +[Backup and Restore](/docs/operations/backup/) guide!_ ## Syntax @@ -16,7 +17,8 @@ _Are you looking for a detailed guide on how to create backups and restore them? QuestDB currently does not support creating snapshots on Windows. -If you are a Windows user and require backup functionality, please [comment on this issue](https://github.com/questdb/questdb/issues/4811). +If you are a Windows user and require backup functionality, please +[comment on this issue](https://github.com/questdb/questdb/issues/4811). ::: @@ -31,19 +33,43 @@ involves the following steps: flush the committed data to disk. 2. Start a filesystem snapshot or copy the [root directory](/docs/concept/root-directory-structure/) to the backup - location on the disk. - learn how to create a filesystem snapshot on the most common cloud providers. + location on the disk. learn how to create a filesystem snapshot on the most + common cloud providers. 3. Run `SNAPSHOT COMPLETE` statement to release the reader locks and delete the metadata file copies. +### SNAPSHOT PREPARE + +To support precise usage, we'll unpack `SNAPSHOT PREPARE`. + +To answer: + +- What is the effect of `SNAPSHOT PERPARE`? +- Will QuestDB will write during `SNAPSHOT PREPAPRE` processing? +- Does QuestDB write to WAL during `SNAPSHOT PREPARE`, for processing at + `SNAPSHOT COMPLETE`? + +Using `SNAPSHOT PREPARE` triggers an fsync to sync outstanding writes to the +disk. It will then temporarily disable the cleanup process related to merging +out-of-order (O3) data. Then, the latest synced transaction will be used as the +checkpoint. After which, one can proceed with step 2 above, which is to take an +EBS snapshot of the files. Calling `SNAPSHOT COMPLETE` then resumes the usual +process, which cleans up old file versions that are no longer needed. + +In short: You can still write to the database during `SNAPSHOT PREPARE`. +However, old files versions are only purged after `SNAPSHOT COMPLETE`. During +`SNAPSHOT PREPARE` O3 writes are much more expensive on disk space while append +writes remain inexpensive. + ## Snapshot recovery In case of a full backup, you should also delete the old root directory and copy the files from your backup to the same location or, alternatively, you can point the database at the new root directory. -When the database starts, it checks for the presence of a file named `_restore` in the root directory. If the file is present, the database runs a -snapshot recovery procedure restoring the metadata files from the snapshot. +When the database starts, it checks for the presence of a file named `_restore` +in the root directory. If the file is present, the database runs a snapshot +recovery procedure restoring the metadata files from the snapshot. When this happens, you should see the following in the server logs: @@ -77,4 +103,5 @@ SNAPSHOT COMPLETE; ## Further reading -- [Backup and Restore](/docs/operations/backup/) - Detailed guide on how to create backups and restore them. +- [Backup and Restore](/docs/operations/backup/) - Detailed guide on how to + create backups and restore them. diff --git a/sidebars.js b/sidebars.js index fca4e2dc..e5536b2c 100644 --- a/sidebars.js +++ b/sidebars.js @@ -28,6 +28,11 @@ module.exports = { label: "Language Clients", collapsed: false, items: [ + { + id: "configuration-string", + type: "doc", + label: "Configuration string", + }, { id: "clients/ingest-python", type: "doc", @@ -450,4 +455,4 @@ module.exports = { ], }, ].filter(Boolean), -}; +}