From 344ab0d34721cf5b40662d0f4112cde6782ee922 Mon Sep 17 00:00:00 2001 From: tserakhau Date: Fri, 27 Dec 2024 16:54:40 +0100 Subject: [PATCH] Add airbyte example Github to Clickhouse as example how to use airbyte compatibility layer. Relates #149 --- examples/airbyte_adapter/README.md | 90 +++++++++++++++++++++ examples/airbyte_adapter/docker-compose.yml | 28 +++++++ examples/airbyte_adapter/transfer.yaml | 28 +++++++ 3 files changed, 146 insertions(+) create mode 100644 examples/airbyte_adapter/README.md create mode 100644 examples/airbyte_adapter/docker-compose.yml create mode 100644 examples/airbyte_adapter/transfer.yaml diff --git a/examples/airbyte_adapter/README.md b/examples/airbyte_adapter/README.md new file mode 100644 index 00000000..c2c60a08 --- /dev/null +++ b/examples/airbyte_adapter/README.md @@ -0,0 +1,90 @@ +## Airbyte provider + +This is a bridge between native transfer and airbyte connector. +This adapter is ideal for scenarios where you need to synchronize data from an Airbyte-compatible source to a Transfer-compatible sink with minimal configuration. + +We support source airbyte [connectors](https://docs.airbyte.com/category/sources) + +This adapter enables integration between [Airbyte](https://docs.airbyte.com/using-airbyte/core-concepts/) and [Transfer](https://github.com/doublecloud/transfer), facilitating the translation of Airbyte's core concepts into Transfer-compatible constructs for streamlined data movement and transformations. + + +This example showcase how to integrate data from [Github](https://airbyte.com/connectors/github) to Clickhouse via Airbyte Connector. + +## Overview + +1. **Github Connector**: An [airbyte](https://docs.airbyte.com/integrations/sources/github) github api connector. + - **PAM**: Personal access token to access to transfer opensource repo + +3. **Transfer CLI**: A Go-based application that load API data from github to Clickhouse. + +4. **Clickhouse**: An open source big data platform for distributed storage and processing. + +## Getting Started + +### Prerequisites + +- Docker and Docker Compose installed on your machine. +- Personal access token, see [here](https://github.com/settings/tokens) + +### Setup Instructions + +1. **Clone the Repository**: + ```bash + git clone https://github.com/doublecloud/transfer + cd transfer/examples/mysql2ch + ``` + +2. **Build and Run the Docker Compose**: + ```bash + export MY_TOKEN=TOKEN_VALUE + docker-compose up --build + ``` + +3. **Access to Clickhouse**: + Access to ClickHouse via CLI: + ```bash + clickhouse-client --host localhost --port 9000 --user default --password 'ch_password' + ``` + +### Configuration Files + +- **`transfer.yaml`**: Specifies the source (Github Airbyte) and destination (CH) settings inside docker-compose + +### Exploring results + +Once docker compose up and running your can explore results via clickhouse-cli + + +```sql + +SELECT * +FROM users +WHERE __data_transfer_delete_time = 0 + LIMIT 10 + + +┌───id─┬─email──────────────────┬─name────┬─__data_transfer_commit_time─┬─__data_transfer_delete_time─┐ +│ 3269 │ updated760@example.com │ User451 │ 1732118484000000000 │ 0 │ +│ 3281 │ updated646@example.com │ User91 │ 1732118486000000000 │ 0 │ +│ 3303 │ updated89@example.com │ User107 │ 1732118485000000000 │ 0 │ +│ 3332 │ updated907@example.com │ User7 │ 1732118485000000000 │ 0 │ +│ 3336 │ updated712@example.com │ User473 │ 1732118485000000000 │ 0 │ +│ 3338 │ updated993@example.com │ User894 │ 1732118485000000000 │ 0 │ +│ 3340 │ updated373@example.com │ User313 │ 1732118484000000000 │ 0 │ +│ 3347 │ updated994@example.com │ User589 │ 1732118484000000000 │ 0 │ +│ 3348 │ updated515@example.com │ User96 │ 1732118484000000000 │ 0 │ +│ 3354 │ updated35@example.com │ User267 │ 1732118485000000000 │ 0 │ +└──────┴────────────────────────┴─────────┴─────────────────────────────┴─────────────────────────────┘ +``` + +### Stopping the Application + +To stop the Docker containers, run: + +```bash +docker-compose down +``` + +## Conclusion + +This example provides a complete end-to-end Ingestion Solution using Github API, Clickhouse, and a Transfer application. You can use it to demonstrate how data can be replicated from Unstructured API Source to a Clickhouse data platform for real-time processing. diff --git a/examples/airbyte_adapter/docker-compose.yml b/examples/airbyte_adapter/docker-compose.yml new file mode 100644 index 00000000..4e4ba5bf --- /dev/null +++ b/examples/airbyte_adapter/docker-compose.yml @@ -0,0 +1,28 @@ +version: '3.8' + +services: + clickhouse: + image: clickhouse/clickhouse-server:latest + container_name: clickhouse + ports: + - "8123:8123" # HTTP interface + - "9000:9000" # Native TCP interface + environment: + CLICKHOUSE_USER: default + CLICKHOUSE_DB: default + CLICKHOUSE_PASSWORD: "ch_password" + + transfer: + build: ../.. # build main transfer docker CLI + command: + - replicate + - --transfer + - /usr/local/bin/transfer.yaml + - --log-level + - info + depends_on: + - clickhouse + volumes: + - ./transfer.yaml:/usr/local/bin/transfer.yaml + environment: + MY_TOKEN: $MY_TOKEN diff --git a/examples/airbyte_adapter/transfer.yaml b/examples/airbyte_adapter/transfer.yaml new file mode 100644 index 00000000..fc99125a --- /dev/null +++ b/examples/airbyte_adapter/transfer.yaml @@ -0,0 +1,28 @@ +id: test +type: SNAPSHOT_ONLY +src: + type: airbyte + params: + Config: | + { + "credentials":{ + "option_title":"PAT Credentials", + "personal_access_token":"${MY_TOKEN}" + }, + "repositories": ["airbytehq/airbyte", "doublecloud/transfer", "ClickHouse/ClickHouse"] + } + BatchSizeLimit: 10485760 + RecordsLimit: 10000 + MaxRowSize: 268435456 + Image: "airbyte/source-snowflake:0.1.32" +dst: + type: ch + params: + ShardsList: + - Hosts: + - clickhouse + HTTPPort: 8123 + NativePort: 9000 + Database: default + User: default + Password: "ch_password"