forked from datahub-project/datahub
-
Notifications
You must be signed in to change notification settings - Fork 12
/
bigquery_to_datahub.dhub.yaml
66 lines (64 loc) · 2.78 KB
/
bigquery_to_datahub.dhub.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
---
# see https://datahubproject.io/docs/generated/ingestion/sources/bigquery for complete documentation
source:
type: "bigquery"
config:
## Coordinates
project_id: project-id-1234567
## Credentials
## If GOOGLE_APPLICATION_CREDENTIALS environment variable is not set you can specify credentials here
#credential:
# project_id: project-id-1234567
# private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0"
# private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n"
# client_email: "[email protected]"
# client_id: "123456678890"
#include_tables: true
#include_views: true
#include_table_lineage: true
#extract_policy_tags_from_catalog: true
#start_time: 2021-12-15T20:08:23.091Z
#end_time: 2023-12-15T20:08:23.091Z
#profiling:
# enabled: true
# turn_off_expensive_profiling_metrics: false
# query_combiner_enabled: true
# max_number_of_fields_to_profile: 8
# profile_table_level_only: false
# include_field_null_count: true
# include_field_min_value: true
# include_field_max_value: true
# include_field_mean_value: true
# include_field_median_value: true
# include_field_stddev_value: false
# include_field_quantiles: false
# include_field_distinct_value_frequencies: false
# include_field_histogram: false
# include_field_sample_values: false
#profile_pattern:
# allow:
# - "schema.table.column"
# deny:
# - "*.*.*"
#storage_project_id: project-id-1234567
## Lineage with GCS Source
# include_column_lineage_with_gcs: true/false
# gcs_lineage_config:
# path_specs:
# - include: "gs://my-bucket/foo/tests/bar.avro"
# - include: "gs://my-bucket/foo/tests/*.*"
# - include: "gs://my-bucket/foo/tests/{table}/*.avro"
# - include: "gs://my-bucket/foo/tests/{table}/*/*.avro"
# - include: "gs://my-bucket/foo/tests/{table}/*.*"
# - include: "gs://my-bucket/{dept}/tests/{table}/*.avro"
# - include: "gs://my-bucket/{dept}/tests/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/*.avro"
# - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.avro"
# - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
# - include: "gs://my-bucket/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
# - include: "gs://my-bucket/*/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
# strip_urls: false
## see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"