From 41fbea9512f72c08cd1382e0c144256879661edf Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 29 Oct 2024 18:16:52 +0530 Subject: [PATCH 01/64] initial commit --- go.mod | 67 +- go.sum | 150 ++-- runtime/drivers/duckdb/config.go | 43 +- runtime/drivers/duckdb/duckdb.go | 338 ++------ runtime/drivers/duckdb/duckdb_test.go | 59 -- runtime/drivers/duckdb/olap.go | 772 +----------------- runtime/drivers/duckdb/olap_crud_test.go | 46 -- .../transporter_objectStore_to_duckDB.go | 6 +- runtime/drivers/slack/slack.go | 4 - runtime/registry_test.go | 7 +- 10 files changed, 216 insertions(+), 1276 deletions(-) diff --git a/go.mod b/go.mod index dd54c1aca3c..e8cefe0ac81 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,10 @@ module github.com/rilldata/rill -go 1.22.0 +go 1.23.2 require ( - cloud.google.com/go/bigquery v1.61.0 - cloud.google.com/go/storage v1.42.0 + cloud.google.com/go/bigquery v1.62.0 + cloud.google.com/go/storage v1.43.0 connectrpc.com/connect v1.16.1 github.com/AlecAivazis/survey/v2 v2.3.7 github.com/Andrew-M-C/go.jsonvalue v1.3.4 @@ -35,7 +35,7 @@ require ( github.com/dgraph-io/ristretto v0.1.1 github.com/docker/go-connections v0.5.0 github.com/eapache/go-resiliency v1.3.0 - github.com/envoyproxy/protoc-gen-validate v1.0.4 + github.com/envoyproxy/protoc-gen-validate v1.1.0 github.com/fatih/color v1.16.0 github.com/fsnotify/fsnotify v1.7.0 github.com/getkin/kin-openapi v0.126.0 @@ -75,6 +75,7 @@ require ( github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe github.com/prometheus/client_golang v1.19.1 github.com/redis/go-redis/v9 v9.0.2 + github.com/rilldata/duckdb-replicator v0.0.0 github.com/riverqueue/river v0.11.4 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.11.4 github.com/riverqueue/river/rivertype v0.11.4 @@ -93,8 +94,8 @@ require ( github.com/testcontainers/testcontainers-go/modules/clickhouse v0.33.0 github.com/testcontainers/testcontainers-go/modules/compose v0.33.0 github.com/xuri/excelize/v2 v2.7.1 - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 go.opentelemetry.io/contrib/instrumentation/runtime v0.52.0 go.opentelemetry.io/otel v1.30.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.27.0 @@ -109,15 +110,15 @@ require ( go.uber.org/zap v1.27.0 go.uber.org/zap/exp v0.2.0 gocloud.dev v0.36.0 - golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e - golang.org/x/oauth2 v0.21.0 + golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c + golang.org/x/oauth2 v0.22.0 golang.org/x/sync v0.8.0 - golang.org/x/sys v0.25.0 - golang.org/x/text v0.18.0 - google.golang.org/api v0.184.0 - google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3 - google.golang.org/grpc v1.64.1 - google.golang.org/protobuf v1.34.2 + golang.org/x/sys v0.26.0 + golang.org/x/text v0.19.0 + google.golang.org/api v0.191.0 + google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 + google.golang.org/grpc v1.67.1 + google.golang.org/protobuf v1.35.1 gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 @@ -128,8 +129,8 @@ require ( require ( cloud.google.com/go v0.115.0 // indirect - cloud.google.com/go/compute/metadata v0.3.0 // indirect - cloud.google.com/go/iam v1.1.8 // indirect + cloud.google.com/go/compute/metadata v0.5.0 // indirect + cloud.google.com/go/iam v1.1.13 // indirect dario.cat/mergo v1.0.1 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.2 // indirect @@ -148,7 +149,7 @@ require ( github.com/kylelemons/godebug v1.1.0 // indirect github.com/mtibben/percent v0.2.1 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect - google.golang.org/genproto v0.0.0-20240604185151-ef581f913117 // indirect + google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 // indirect ) require ( @@ -159,8 +160,8 @@ require ( ) require ( - cloud.google.com/go/auth v0.5.1 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect + cloud.google.com/go/auth v0.8.1 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect github.com/ClickHouse/ch-go v0.61.3 // indirect github.com/ForceCLI/config v0.0.0-20230217143549-9149d42a3c99 // indirect @@ -247,7 +248,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang-jwt/jwt/v5 v5.2.1 // indirect - github.com/golang/glog v1.2.0 // indirect + github.com/golang/glog v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect @@ -258,11 +259,11 @@ require ( github.com/google/go-github/v52 v52.0.0 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/s2a-go v0.1.7 // indirect + github.com/google/s2a-go v0.1.8 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/google/wire v0.5.0 // indirect + github.com/google/wire v0.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect - github.com/googleapis/gax-go/v2 v2.12.4 // indirect + github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/gorilla/mux v1.8.1 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect @@ -288,7 +289,7 @@ require ( github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/leodido/go-urn v1.2.4 // indirect github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect @@ -388,14 +389,14 @@ require ( go.uber.org/atomic v1.11.0 // indirect go.uber.org/goleak v1.3.0 // indirect go.uber.org/mock v0.4.0 // indirect - golang.org/x/crypto v0.26.0 // indirect - golang.org/x/mod v0.20.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/term v0.23.0 // indirect - golang.org/x/time v0.5.0 // indirect - golang.org/x/tools v0.24.0 // indirect - golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240610135401-a8a62080eff3 // indirect + golang.org/x/crypto v0.28.0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/term v0.25.0 // indirect + golang.org/x/time v0.6.0 // indirect + golang.org/x/tools v0.26.0 // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect gopkg.in/cenkalti/backoff.v1 v1.1.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect @@ -430,3 +431,5 @@ replace github.com/apache/arrow/go/v14 v14.0.2 => github.com/rilldata/arrow/go/v // security vulnerability in dgrijalva/jwt-go replace github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompatible + +replace github.com/rilldata/duckdb-replicator => /home/anshul/workspace/duckdb-replicator diff --git a/go.sum b/go.sum index 89072a6dd73..68871bd60c1 100644 --- a/go.sum +++ b/go.sum @@ -97,10 +97,10 @@ cloud.google.com/go/assuredworkloads v1.7.0/go.mod h1:z/736/oNmtGAyU47reJgGN+KVo cloud.google.com/go/assuredworkloads v1.8.0/go.mod h1:AsX2cqyNCOvEQC8RMPnoc0yEarXQk6WEKkxYfL6kGIo= cloud.google.com/go/assuredworkloads v1.9.0/go.mod h1:kFuI1P78bplYtT77Tb1hi0FMxM0vVpRC7VVoJC3ZoT0= cloud.google.com/go/assuredworkloads v1.10.0/go.mod h1:kwdUQuXcedVdsIaKgKTp9t0UJkE5+PAVNhdQm4ZVq2E= -cloud.google.com/go/auth v0.5.1 h1:0QNO7VThG54LUzKiQxv8C6x1YX7lUrzlAa1nVLF8CIw= -cloud.google.com/go/auth v0.5.1/go.mod h1:vbZT8GjzDf3AVqCcQmqeeM32U9HBFc32vVVAbwDsa6s= -cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= -cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= +cloud.google.com/go/auth v0.8.1 h1:QZW9FjC5lZzN864p13YxvAtGUlQ+KgRL+8Sg45Z6vxo= +cloud.google.com/go/auth v0.8.1/go.mod h1:qGVp/Y3kDRSDZ5gFD/XPUfYQ9xW1iI7q8RIRoCyBbJc= +cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= +cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= cloud.google.com/go/automl v1.5.0/go.mod h1:34EjfoFGMZ5sgJ9EoLsRtdPSNZLcfflJR39VbVNS2M0= cloud.google.com/go/automl v1.6.0/go.mod h1:ugf8a6Fx+zP0D59WLhqgTDsQI9w07o64uf/Is3Nh5p8= cloud.google.com/go/automl v1.7.0/go.mod h1:RL9MYCCsJEOmt0Wf3z9uzG0a7adTT1fe+aObgSpkCt8= @@ -128,8 +128,8 @@ cloud.google.com/go/bigquery v1.44.0/go.mod h1:0Y33VqXTEsbamHJvJHdFmtqHvMIY28aK1 cloud.google.com/go/bigquery v1.47.0/go.mod h1:sA9XOgy0A8vQK9+MWhEQTY6Tix87M/ZurWFIxmF9I/E= cloud.google.com/go/bigquery v1.48.0/go.mod h1:QAwSz+ipNgfL5jxiaK7weyOhzdoAy1zFm0Nf1fysJac= cloud.google.com/go/bigquery v1.49.0/go.mod h1:Sv8hMmTFFYBlt/ftw2uN6dFdQPzBlREY9yBh7Oy7/4Q= -cloud.google.com/go/bigquery v1.61.0 h1:w2Goy9n6gh91LVi6B2Sc+HpBl8WbWhIyzdvVvrAuEIw= -cloud.google.com/go/bigquery v1.61.0/go.mod h1:PjZUje0IocbuTOdq4DBOJLNYB0WF3pAKBHzAYyxCwFo= +cloud.google.com/go/bigquery v1.62.0 h1:SYEA2f7fKqbSRRBHb7g0iHTtZvtPSPYdXfmqsjpsBwo= +cloud.google.com/go/bigquery v1.62.0/go.mod h1:5ee+ZkF1x/ntgCsFQJAQTM3QkAZOecfCmvxhkJsWRSA= cloud.google.com/go/billing v1.4.0/go.mod h1:g9IdKBEFlItS8bTtlrZdVLWSSdSyFUZKXNS02zKMOZY= cloud.google.com/go/billing v1.5.0/go.mod h1:mztb1tBc3QekhjSgmpf/CV4LzWXLzCArwpLmP2Gm88s= cloud.google.com/go/billing v1.6.0/go.mod h1:WoXzguj+BeHXPbKfNWkqVtDdzORazmCjraY+vrxcyvI= @@ -180,8 +180,8 @@ cloud.google.com/go/compute/metadata v0.1.0/go.mod h1:Z1VN+bulIf6bt4P/C37K4DyZYZ cloud.google.com/go/compute/metadata v0.2.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= cloud.google.com/go/compute/metadata v0.2.1/go.mod h1:jgHgmJd2RKBGzXqF5LR2EZMGxBkeanZ9wwa75XHJgOM= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= -cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc= -cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= +cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= cloud.google.com/go/contactcenterinsights v1.3.0/go.mod h1:Eu2oemoePuEFc/xKFPjbTuPSj0fYJcPls9TFlPNnHHY= cloud.google.com/go/contactcenterinsights v1.4.0/go.mod h1:L2YzkGbPsv+vMQMCADxJoT9YiTTnSEd6fEvCeHTYVck= cloud.google.com/go/contactcenterinsights v1.6.0/go.mod h1:IIDlT6CLcDoyv79kDv8iWxMSTZhLxSCofVV5W6YFM/w= @@ -201,8 +201,8 @@ cloud.google.com/go/datacatalog v1.8.0/go.mod h1:KYuoVOv9BM8EYz/4eMFxrr4DUKhGIOX cloud.google.com/go/datacatalog v1.8.1/go.mod h1:RJ58z4rMp3gvETA465Vg+ag8BGgBdnRPEMMSTr5Uv+M= cloud.google.com/go/datacatalog v1.12.0/go.mod h1:CWae8rFkfp6LzLumKOnmVh4+Zle4A3NXLzVJ1d1mRm0= cloud.google.com/go/datacatalog v1.13.0/go.mod h1:E4Rj9a5ZtAxcQJlEBTLgMTphfP11/lNaAshpoBgemX8= -cloud.google.com/go/datacatalog v1.20.1 h1:czcba5mxwRM5V//jSadyig0y+8aOHmN7gUl9GbHu59E= -cloud.google.com/go/datacatalog v1.20.1/go.mod h1:Jzc2CoHudhuZhpv78UBAjMEg3w7I9jHA11SbRshWUjk= +cloud.google.com/go/datacatalog v1.21.0 h1:vl0pQT9TZ5rKi9e69FgtXNCR7I8MVRj4+CnbeXhz6UQ= +cloud.google.com/go/datacatalog v1.21.0/go.mod h1:DB0QWF9nelpsbB0eR/tA0xbHZZMvpoFD1XFy3Qv/McI= cloud.google.com/go/dataflow v0.6.0/go.mod h1:9QwV89cGoxjjSR9/r7eFDqqjtvbKxAK2BaYU6PVk9UM= cloud.google.com/go/dataflow v0.7.0/go.mod h1:PX526vb4ijFMesO1o202EaUmouZKBpjHsTlCtB4parQ= cloud.google.com/go/dataflow v0.8.0/go.mod h1:Rcf5YgTKPtQyYz8bLYhFoIV/vP39eL7fWNcSOyFfLJE= @@ -315,8 +315,8 @@ cloud.google.com/go/iam v0.8.0/go.mod h1:lga0/y3iH6CX7sYqypWJ33hf7kkfXJag67naqGE cloud.google.com/go/iam v0.11.0/go.mod h1:9PiLDanza5D+oWFZiH1uG+RnRCfEGKoyl6yo4cgWZGY= cloud.google.com/go/iam v0.12.0/go.mod h1:knyHGviacl11zrtZUoDuYpDgLjvr28sLQaG0YB2GYAY= cloud.google.com/go/iam v0.13.0/go.mod h1:ljOg+rcNfzZ5d6f1nAUJ8ZIxOaZUVoS14bKCtaLZ/D0= -cloud.google.com/go/iam v1.1.8 h1:r7umDwhj+BQyz0ScZMp4QrGXjSTI3ZINnpgU2nlB/K0= -cloud.google.com/go/iam v1.1.8/go.mod h1:GvE6lyMmfxXauzNq8NbgJbeVQNspG+tcdL/W8QO1+zE= +cloud.google.com/go/iam v1.1.13 h1:7zWBXG9ERbMLrzQBRhFliAV+kjcRToDTgQT3CTwYyv4= +cloud.google.com/go/iam v1.1.13/go.mod h1:K8mY0uSXwEXS30KrnVb+j54LB/ntfZu1dr+4zFMNbus= cloud.google.com/go/iap v1.4.0/go.mod h1:RGFwRJdihTINIe4wZ2iCP0zF/qu18ZwyKxrhMhygBEc= cloud.google.com/go/iap v1.5.0/go.mod h1:UH/CGgKd4KyohZL5Pt0jSKE4m3FR51qg6FKQ/z/Ix9A= cloud.google.com/go/iap v1.6.0/go.mod h1:NSuvI9C/j7UdjGjIde7t7HBz+QTwBcapPE07+sSRcLk= @@ -347,8 +347,8 @@ cloud.google.com/go/logging v1.7.0/go.mod h1:3xjP2CjkM3ZkO73aj4ASA5wRPGGCRrPIAeN cloud.google.com/go/longrunning v0.1.1/go.mod h1:UUFxuDWkv22EuY93jjmDMFT5GPQKeFVJBIF6QlTqdsE= cloud.google.com/go/longrunning v0.3.0/go.mod h1:qth9Y41RRSUE69rDcOn6DdK3HfQfsUI0YSmW3iIlLJc= cloud.google.com/go/longrunning v0.4.1/go.mod h1:4iWDqhBZ70CvZ6BfETbvam3T8FMvLK+eFj0E6AaRQTo= -cloud.google.com/go/longrunning v0.5.7 h1:WLbHekDbjK1fVFD3ibpFFVoyizlLRl73I7YKuAKilhU= -cloud.google.com/go/longrunning v0.5.7/go.mod h1:8GClkudohy1Fxm3owmBGid8W0pSgodEMwEAztp38Xng= +cloud.google.com/go/longrunning v0.5.11 h1:Havn1kGjz3whCfoD8dxMLP73Ph5w+ODyZB9RUsDxtGk= +cloud.google.com/go/longrunning v0.5.11/go.mod h1:rDn7//lmlfWV1Dx6IB4RatCPenTwwmqXuiP0/RgoEO4= cloud.google.com/go/managedidentities v1.3.0/go.mod h1:UzlW3cBOiPrzucO5qWkNkh0w33KFtBJU281hacNvsdE= cloud.google.com/go/managedidentities v1.4.0/go.mod h1:NWSBYbEMgqmbZsLIyKvxrYbtqOsxY1ZrGM+9RgDqInM= cloud.google.com/go/managedidentities v1.5.0/go.mod h1:+dWcZ0JlUmpuxpIDfyP5pP5y0bLdRwOS4Lp7gMni/LA= @@ -533,8 +533,8 @@ cloud.google.com/go/storage v1.23.0/go.mod h1:vOEEDNFnciUMhBeT6hsJIn3ieU5cFRmzeL cloud.google.com/go/storage v1.27.0/go.mod h1:x9DOL8TK/ygDUMieqwfhdpQryTeEkhGKMi80i/iqR2s= cloud.google.com/go/storage v1.28.1/go.mod h1:Qnisd4CqDdo6BGs2AD5LLnEsmSQ80wQ5ogcBBKhU86Y= cloud.google.com/go/storage v1.29.0/go.mod h1:4puEjyTKnku6gfKoTfNOU/W+a9JyuVNxjpS5GBrB8h4= -cloud.google.com/go/storage v1.42.0 h1:4QtGpplCVt1wz6g5o1ifXd656P5z+yNgzdw1tVfp0cU= -cloud.google.com/go/storage v1.42.0/go.mod h1:HjMXRFq65pGKFn6hxj6x3HCyR41uSB72Z0SO/Vn6JFQ= +cloud.google.com/go/storage v1.43.0 h1:CcxnSohZwizt4LCzQHWvBf1/kvtHUn7gk9QERXPyXFs= +cloud.google.com/go/storage v1.43.0/go.mod h1:ajvxEa7WmZS1PxvKRq4bq0tFT3vMd502JwstCcYv0Q0= cloud.google.com/go/storagetransfer v1.5.0/go.mod h1:dxNzUopWy7RQevYFHewchb29POFv3/AaBgnhqzqiK0w= cloud.google.com/go/storagetransfer v1.6.0/go.mod h1:y77xm4CQV/ZhFZH75PLEXY0ROiS7Gh6pSKrM8dJyg6I= cloud.google.com/go/storagetransfer v1.7.0/go.mod h1:8Giuj1QNb1kfLAiWM1bN6dHzfdlDAVC9rv9abHot2W4= @@ -1177,8 +1177,8 @@ github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.6.7/go.mod h1:dyJXwwfPK2VSqiB9Klm1J6romD608Ba7Hij42vrOBCo= github.com/envoyproxy/protoc-gen-validate v0.9.1/go.mod h1:OKNgG7TCp5pF4d6XftA0++PMirau2/yoOwVac3AbF2w= -github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A= -github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew= +github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= +github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5/go.mod h1:a2zkGnVExMxdzMo3M0Hi/3sEU+cWnZpSni0O6/Yb/P0= github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch v4.11.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= @@ -1361,8 +1361,8 @@ github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2V github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/glog v1.2.0 h1:uCdmnmatrKCgMBlM4rMuJZWOkPDqdbZPnrMXDY4gI68= -github.com/golang/glog v1.2.0/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= +github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY= +github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -1475,11 +1475,11 @@ github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk= github.com/google/pprof v0.0.0-20230323073829-e72429f035bd/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= -github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= +github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= +github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= -github.com/google/subcommands v1.0.1/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= +github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -1487,8 +1487,8 @@ github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/wire v0.5.0 h1:I7ELFeVBr3yfPIcc8+MWvrjk+3VjbcSzoXm3JVa+jD8= -github.com/google/wire v0.5.0/go.mod h1:ngWDr9Qvq3yZA10YrxfyGELY/AFWGVpy9c1LTRi1EoU= +github.com/google/wire v0.6.0 h1:HBkoIh4BdSxoyo9PveV8giw7ZsaBOvzWKfcg/6MrVwI= +github.com/google/wire v0.6.0/go.mod h1:F4QhpQ9EDIdJ1Mbop/NZBRB+5yrR6qg3BnctaoUk6NA= github.com/googleapis/enterprise-certificate-proxy v0.0.0-20220520183353-fd19c99a87aa/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.1.0/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg= @@ -1507,8 +1507,8 @@ github.com/googleapis/gax-go/v2 v2.5.1/go.mod h1:h6B0KMMFNtI2ddbGJn3T3ZbwkeT6yqE github.com/googleapis/gax-go/v2 v2.6.0/go.mod h1:1mjbznJAPHFpesgE5ucqfYEscaz5kMdcIDwU/6+DDoY= github.com/googleapis/gax-go/v2 v2.7.0/go.mod h1:TEop28CZZQ2y+c0VxMUmu1lV+fQx57QpBWsYpwqHJx8= github.com/googleapis/gax-go/v2 v2.7.1/go.mod h1:4orTrqY6hXxxaUL4LHIPl6lGo8vAE38/qKbhSAKP6QI= -github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg= -github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= +github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= +github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg= github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= @@ -1740,8 +1740,8 @@ github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYs github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -2397,13 +2397,13 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0/go.mod h1:oVGt1LRbBOBq1A5BQLlUg9UaU/54aiHw8cgjV3aWZ/E= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.28.0/go.mod h1:vEhqr0m4eTc+DWxfsXoXue2GBgV2uUwVznkGIHW/e5w= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0/go.mod h1:BMsdeOxN04K0L5FNUBfjFdvwWGNe/rkmSwH4Aelu/X0= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 h1:hCq2hNMwsegUvPzI7sPOvtO9cqyy5GbWt/Ybp2xrx8Q= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0/go.mod h1:LqaApwGx/oUmzsbqxkzuBvyoPpkxk3JQWnqfVrJ3wCA= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1 h1:gbhw/u49SS3gkPWiYweQNJGm/uJN5GkI/FrosxSHT7A= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1/go.mod h1:GnOaBaFQ2we3b9AGWJpsBa7v1S5RlQzlC3O7dRMxZhM= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0/go.mod h1:2AboqHi0CiIZU0qwhtUfCYD1GeUzvvIXWNkhDt7ZMG4= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= go.opentelemetry.io/contrib/instrumentation/runtime v0.52.0 h1:UaQVCH34fQsyDjlgS0L070Kjs9uCrLKoQfzn2Nl7XTY= go.opentelemetry.io/contrib/instrumentation/runtime v0.52.0/go.mod h1:Ks4aHdMgu1vAfEY0cIBHcGx2l1S0+PwFm2BE/HRzqSk= go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= @@ -2518,10 +2518,12 @@ golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4 golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -2537,8 +2539,8 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= -golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e h1:I88y4caeGeuDQxgdoFPUq097j7kNfw6uvuiNxUBfcBk= -golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -2582,8 +2584,10 @@ golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -2665,9 +2669,11 @@ golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -2697,8 +2703,8 @@ golang.org/x/oauth2 v0.4.0/go.mod h1:RznEsdpjGAINPTOF0UH/t+xJ75L18YO3Ho6Pyn+uRec golang.org/x/oauth2 v0.5.0/go.mod h1:9/XBHVqLaWO3/BRHs5jbpYCnOZVjj5V0ndyaAM7KB4I= golang.org/x/oauth2 v0.6.0/go.mod h1:ycmewcwgD4Rpr3eZJLSB4Kyyljb3qDh40vJ8STE5HKw= golang.org/x/oauth2 v0.7.0/go.mod h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -2715,6 +2721,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220929204114-8fcdb60fdcc0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -2859,9 +2867,11 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= -golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -2874,9 +2884,11 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -2893,9 +2905,10 @@ golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= -golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -2907,8 +2920,8 @@ golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20220922220347-f3bd1da661af/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.1.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= +golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -2921,7 +2934,6 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190422233926-fe54fb35175b/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -2991,8 +3003,10 @@ golang.org/x/tools v0.1.11/go.mod h1:SgwaegtQh8clINPpECJMqnxLv9I09HLqnW3RMqW0CA4 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.3.0/go.mod h1:/rWhSS2+zyEVwoJf8YAX6L2f0ntZ7Kn/mGgAWcipA5k= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= -golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -3003,8 +3017,8 @@ golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= -golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= @@ -3073,8 +3087,8 @@ google.golang.org/api v0.108.0/go.mod h1:2Ts0XTHNVWxypznxWOYUeI4g3WdP9Pk2Qk58+a/ google.golang.org/api v0.110.0/go.mod h1:7FC4Vvx1Mooxh8C5HWjzZHcavuS2f6pmJpZx60ca7iI= google.golang.org/api v0.111.0/go.mod h1:qtFHvU9mhgTJegR31csQ+rwxyUTHOKFqCKWp1J0fdw0= google.golang.org/api v0.114.0/go.mod h1:ifYI2ZsFK6/uGddGfAD5BMxlnkBqCmqHSDUVi45N5Yg= -google.golang.org/api v0.184.0 h1:dmEdk6ZkJNXy1JcDhn/ou0ZUq7n9zropG2/tR4z+RDg= -google.golang.org/api v0.184.0/go.mod h1:CeDTtUEiYENAf8PPG5VZW2yNp2VM3VWbCeTioAZBTBA= +google.golang.org/api v0.191.0 h1:cJcF09Z+4HAB2t5qTQM1ZtfL/PemsLFkcFG67qq2afk= +google.golang.org/api v0.191.0/go.mod h1:tD5dsFGxFza0hnQveGfVk9QQYKcfp+VzgRqyXFxE0+E= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -3218,12 +3232,12 @@ google.golang.org/genproto v0.0.0-20230303212802-e74f57abe488/go.mod h1:TvhZT5f7 google.golang.org/genproto v0.0.0-20230306155012-7f2fa6fef1f4/go.mod h1:NWraEVixdDnqcqQ30jipen1STv2r/n24Wb7twVTGR4s= google.golang.org/genproto v0.0.0-20230320184635-7606e756e683/go.mod h1:NWraEVixdDnqcqQ30jipen1STv2r/n24Wb7twVTGR4s= google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOlWu4KYeJZffbWgBkS1YFobzKbLVfK69pe0Ak= -google.golang.org/genproto v0.0.0-20240604185151-ef581f913117 h1:HCZ6DlkKtCDAtD8ForECsY3tKuaR+p4R3grlK80uCCc= -google.golang.org/genproto v0.0.0-20240604185151-ef581f913117/go.mod h1:lesfX/+9iA+3OdqeCpoDddJaNxVB1AB6tD7EfqMmprc= -google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3 h1:QW9+G6Fir4VcRXVH8x3LilNAb6cxBGLa6+GM4hRwexE= -google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3/go.mod h1:kdrSS/OiLkPrNUpzD4aHgCq2rVuC/YRxok32HXZ4vRE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240610135401-a8a62080eff3 h1:9Xyg6I9IWQZhRVfCWjKK+l6kI0jHcPesVlMnT//aHNo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240610135401-a8a62080eff3/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= +google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 h1:CT2Thj5AuPV9phrYMtzX11k+XkzMGfRAet42PmoTATM= +google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988/go.mod h1:7uvplUBj4RjHAxIZ//98LzOvrQ04JBkaixRmCMI29hc= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v0.0.0-20160317175043-d3ddb4469d5a/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.0.5/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= @@ -3269,8 +3283,8 @@ google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCD google.golang.org/grpc v1.51.0/go.mod h1:wgNDFcnuBGmxLKI/qn4T+m5BtEBYXJPvibbUPsAIPww= google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= -google.golang.org/grpc v1.64.1 h1:LKtvyfbX3UGVPFcGqJ9ItpVWW6oN/2XqTxfAnwRRXiA= -google.golang.org/grpc v1.64.1/go.mod h1:hiQF4LFZelK2WKaP6W0L92zGHtiQdZxk8CrSdvyjeP0= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= @@ -3289,8 +3303,8 @@ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.29.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/cenkalti/backoff.v1 v1.1.0 h1:Arh75ttbsvlpVA7WtVpH4u9h6Zl46xuptxqLxPiSo4Y= diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index c9b8220e7f3..e5469471cd1 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -31,26 +31,33 @@ type config struct { AllowHostAccess bool `mapstructure:"allow_host_access"` // ErrorOnIncompatibleVersion controls whether to return error or delete DBFile created with older duckdb version. ErrorOnIncompatibleVersion bool `mapstructure:"error_on_incompatible_version"` - // ExtTableStorage controls if every table is stored in a different db file + // ExtTableStorage controls if every table is stored in a different db file. + // Backup is only enabled when external table storage is enabled. ExtTableStorage bool `mapstructure:"external_table_storage"` // CPU cores available for the DB CPU int `mapstructure:"cpu"` // MemoryLimitGB is the amount of memory available for the DB MemoryLimitGB int `mapstructure:"memory_limit_gb"` - // MaxMemoryOverride sets a hard override for the "max_memory" DuckDB setting - MaxMemoryGBOverride int `mapstructure:"max_memory_gb_override"` - // ThreadsOverride sets a hard override for the "threads" DuckDB setting. Set to -1 for unlimited threads. - ThreadsOverride int `mapstructure:"threads_override"` + // CPUWrite is CPU available for the DB when writing data + CPUWrite int `mapstructure:"cpu_write"` + // MemoryLimitGBWrite is the amount of memory available for the DB when writing data + MemoryLimitGBWrite int `mapstructure:"memory_limit_gb_write"` // BootQueries is SQL to execute when initializing a new connection. It runs before any extensions are loaded or default settings are set. BootQueries string `mapstructure:"boot_queries"` // InitSQL is SQL to execute when initializing a new connection. It runs after extensions are loaded and and default settings are set. InitSQL string `mapstructure:"init_sql"` + // LogQueries controls whether to log the raw SQL passed to OLAP.Execute. (Internal queries will not be logged.) + LogQueries bool `mapstructure:"log_queries"` + // BackupBucket is gcs bucket to store db backups. Should be of the form `gs://bucket-name`. + BackupBucket string `mapstructure:"backup_bucket"` + // BackupBucketCredentialsJSON is the json credentials for the backup bucket. + BackupBucketCredentialsJSON string `mapstructure:"backup_bucket_credentials_json"` // DBFilePath is the path where the database is stored. It is inferred from the DSN (can't be provided by user). DBFilePath string `mapstructure:"-"` // DBStoragePath is the path where the database files are stored. It is inferred from the DSN (can't be provided by user). - DBStoragePath string `mapstructure:"-"` - // LogQueries controls whether to log the raw SQL passed to OLAP.Execute. (Internal queries will not be logged.) - LogQueries bool `mapstructure:"log_queries"` + DBStoragePath string `mapstructure:"-"` + ReadSettings map[string]string `mapstructure:"-"` + WriteSettings map[string]string `mapstructure:"-"` } func newConfig(cfgMap map[string]any) (*config, error) { @@ -83,6 +90,7 @@ func newConfig(cfgMap map[string]any) (*config, error) { // Override DSN.Path with config.Path if cfg.Path != "" { // backward compatibility, cfg.Path takes precedence over cfg.DataDir uri.Path = cfg.Path + cfg.ExtTableStorage = false } else if cfg.DataDir != "" && uri.Path == "" { // if some path is set in DSN, honour that path and ignore DataDir uri.Path = filepath.Join(cfg.DataDir, "main.db") } @@ -93,23 +101,20 @@ func newConfig(cfgMap map[string]any) (*config, error) { } // Set memory limit - maxMemory := cfg.MemoryLimitGB - if cfg.MaxMemoryGBOverride != 0 { - maxMemory = cfg.MaxMemoryGBOverride + if cfg.MemoryLimitGB > 0 { + cfg.ReadSettings["max_memory"] = fmt.Sprintf("%dGB", cfg.MemoryLimitGB) } - if maxMemory > 0 { - qry.Add("max_memory", fmt.Sprintf("%dGB", maxMemory)) + if cfg.MemoryLimitGBWrite > 0 { + cfg.WriteSettings["max_memory"] = fmt.Sprintf("%dGB", cfg.MemoryLimitGB) } // Set threads limit var threads int - if cfg.ThreadsOverride != 0 { - threads = cfg.ThreadsOverride - } else if cfg.CPU > 0 { - threads = cfg.CPU + if cfg.CPU > 0 { + cfg.ReadSettings["threads"] = strconv.Itoa(cfg.CPU) } - if threads > 0 { // NOTE: threads=0 or threads=-1 means no limit - qry.Add("threads", strconv.Itoa(threads)) + if cfg.CPUWrite > 0 { + cfg.WriteSettings["threads"] = strconv.Itoa(cfg.CPUWrite) } // Set pool size diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 142bb64a3da..b3e68299444 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -2,24 +2,19 @@ package duckdb import ( "context" - "database/sql/driver" "errors" "fmt" "io/fs" + "log/slog" "net/url" "os" - "path/filepath" - "regexp" - "strconv" "strings" "sync" "time" - "github.com/XSAM/otelsql" - "github.com/c2h5oh/datasize" "github.com/jmoiron/sqlx" - "github.com/marcboeker/go-duckdb" "github.com/mitchellh/mapstructure" + duckdbreplicator "github.com/rilldata/duckdb-replicator" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/drivers/duckdb/extensions" "github.com/rilldata/rill/runtime/drivers/file" @@ -30,6 +25,7 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" + "go.uber.org/zap/exp/zapslog" "golang.org/x/sync/semaphore" ) @@ -151,24 +147,6 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie } logger.Debug("opening duckdb handle", zap.String("dsn", cfg.DSN)) - // We've seen the DuckDB .wal and .tmp files grow to 100s of GBs in some cases. - // This prevents recovery after restarts since DuckDB hangs while trying to reprocess the files. - // This is a hacky solution that deletes the files (if they exist) before re-opening the DB. - // Generally, this should not lead to data loss since reconcile will bring the database back to the correct state. - if cfg.DBFilePath != "" { - // Always drop the .tmp directory - tmpPath := cfg.DBFilePath + ".tmp" - _ = os.RemoveAll(tmpPath) - - // Drop the .wal file if it's bigger than 100MB - walPath := cfg.DBFilePath + ".wal" - if stat, err := os.Stat(walPath); err == nil { - if stat.Size() >= 100*int64(datasize.MB) { - _ = os.Remove(walPath) - } - } - } - if cfg.DBStoragePath != "" { if err := os.MkdirAll(cfg.DBStoragePath, fs.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { return nil, err @@ -206,33 +184,26 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie }, connectionsInUse)) // Open the DB - err = c.reopenDB() + err = c.reopenDB(ctx, false) if err != nil { if c.config.ErrorOnIncompatibleVersion || !strings.Contains(err.Error(), "created with an older, incompatible version of Rill") { return nil, err } c.logger.Debug("Resetting .db file because it was created with an older, incompatible version of Rill") - - tmpPath := cfg.DBFilePath + ".tmp" - _ = os.RemoveAll(tmpPath) - walPath := cfg.DBFilePath + ".wal" - _ = os.Remove(walPath) - _ = os.Remove(cfg.DBFilePath) - // reopen connection again - if err := c.reopenDB(); err != nil { + if err := c.reopenDB(ctx, true); err != nil { return nil, err } } // Return nice error for old macOS versions - conn, err := c.db.Connx(context.Background()) + _, release, err := c.db.AcquireReadConnection(context.Background()) if err != nil && strings.Contains(err.Error(), "Symbol not found") { fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") os.Exit(1) } else if err == nil { - conn.Close() + _ = release() } else { return nil, err } @@ -244,28 +215,6 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie return c, nil } -func (d Driver) Drop(cfgMap map[string]any, logger *zap.Logger) error { - cfg, err := newConfig(cfgMap) - if err != nil { - return err - } - if cfg.DBStoragePath != "" { - return os.RemoveAll(cfg.DBStoragePath) - } - if cfg.DBFilePath != "" { - err = os.Remove(cfg.DBFilePath) - if err != nil && !os.IsNotExist(err) { - return err - } - // Hacky approach to remove the wal file - _ = os.Remove(cfg.DBFilePath + ".wal") - // also temove the temp dir - _ = os.RemoveAll(cfg.DBFilePath + ".tmp") - } - - return nil -} - func (d Driver) Spec() drivers.Spec { if d.name == "motherduck" { return motherduckSpec @@ -322,7 +271,7 @@ type connection struct { instanceID string // do not use directly it can also be nil or closed // use acquireOLAPConn/acquireMetaConn - db *sqlx.DB + db duckdbreplicator.DB // driverConfig is input config passed during Open driverConfig map[string]any driverName string @@ -510,7 +459,7 @@ func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, er } // reopenDB opens the DuckDB handle anew. If c.db is already set, it closes the existing handle first. -func (c *connection) reopenDB() error { +func (c *connection) reopenDB(ctx context.Context, clean bool) error { // If c.db is already open, close it first if c.db != nil { err := c.db.Close() @@ -556,107 +505,50 @@ func (c *connection) reopenDB() error { bootQueries = append(bootQueries, c.config.InitSQL) } - // DuckDB extensions need to be loaded separately on each connection, but the built-in connection pool in database/sql doesn't enable that. - // So we use go-duckdb's custom connector to pass a callback that it invokes for each new connection. - connector, err := duckdb.NewConnector(c.config.DSN, func(execer driver.ExecerContext) error { - for _, qry := range bootQueries { - _, err := execer.ExecContext(context.Background(), qry, nil) - if err != nil && strings.Contains(err.Error(), "Failed to download extension") { - // Retry using another mirror. Based on: https://github.com/duckdb/duckdb/issues/9378 - _, err = execer.ExecContext(context.Background(), qry+" FROM 'http://nightly-extensions.duckdb.org'", nil) - } + logger := slog.New(zapslog.NewHandler(c.logger.Core(), &zapslog.HandlerOptions{ + AddSource: true, + })) + + // Create new DB + var err error + if c.config.ExtTableStorage { + var backup *duckdbreplicator.BackupProvider + if c.config.BackupBucket != "" { + backup, err = duckdbreplicator.NewGCSBackupProvider(ctx, &duckdbreplicator.GCSBackupProviderOptions{ + UseHostCredentials: c.config.AllowHostAccess, + ApplicationCredentialsJSON: c.config.BackupBucketCredentialsJSON, + Bucket: c.config.BackupBucket, + UniqueIdentifier: c.instanceID, + }) if err != nil { return err } } - return nil - }) - if err != nil { - // Check for using incompatible database files - if strings.Contains(err.Error(), "Trying to read a database file with version number") { - return fmt.Errorf("database file %q was created with an older, incompatible version of Rill (please remove it and try again)", c.config.DSN) - } - - // Check for another process currently accessing the DB - if strings.Contains(err.Error(), "Could not set lock on file") { - return fmt.Errorf("failed to open database (is Rill already running?): %w", err) - } - - return err - } - - // Create new DB - sqlDB := otelsql.OpenDB(connector) - db := sqlx.NewDb(sqlDB, "duckdb") - db.SetMaxOpenConns(c.config.PoolSize) - c.db = db - - if !c.config.ExtTableStorage { - return nil - } - - conn, err := db.Connx(context.Background()) - if err != nil { - return err + c.db, err = duckdbreplicator.NewDB(ctx, c.instanceID, &duckdbreplicator.DBOptions{ + LocalPath: c.config.DBStoragePath, + Clean: clean, + BackupProvider: backup, + InitQueries: bootQueries, + StableSelect: !c.config.AllowHostAccess, + Logger: logger, + }) + } else { + c.db, err = duckdbreplicator.NewSingleDB(ctx, &duckdbreplicator.SingleDBOptions{ + DSN: c.config.DSN, + Clean: clean, + InitQueries: bootQueries, + Logger: logger, + }) } - defer conn.Close() - - c.logLimits(conn) - - // 2023-12-11: Hail mary for solving this issue: https://github.com/duckdblabs/rilldata/issues/6. - // Forces DuckDB to create catalog entries for the information schema up front (they are normally created lazily). - // Can be removed if the issue persists. - _, err = conn.ExecContext(context.Background(), ` - select - coalesce(t.table_catalog, current_database()) as "database", - t.table_schema as "schema", - t.table_name as "name", - t.table_type as "type", - array_agg(c.column_name order by c.ordinal_position) as "column_names", - array_agg(c.data_type order by c.ordinal_position) as "column_types", - array_agg(c.is_nullable = 'YES' order by c.ordinal_position) as "column_nullable" - from information_schema.tables t - join information_schema.columns c on t.table_schema = c.table_schema and t.table_name = c.table_name - group by 1, 2, 3, 4 - order by 1, 2, 3, 4 - `) if err != nil { return err } - // List the directories directly in the external storage directory - // Load the version.txt from each sub-directory - // If version.txt is found, attach only the .db file matching the version.txt. - // If attach fails, log the error and delete the version.txt and .db file (e.g. might be DuckDB version change) - entries, err := os.ReadDir(c.config.DBStoragePath) + _, release, err := c.db.AcquireReadConnection(context.Background()) if err != nil { return err } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - path := filepath.Join(c.config.DBStoragePath, entry.Name()) - version, exist, err := c.tableVersion(entry.Name()) - if err != nil { - c.logger.Error("error in fetching db version", zap.String("table", entry.Name()), zap.Error(err)) - _ = os.RemoveAll(path) - continue - } - if !exist { - _ = os.RemoveAll(path) - continue - } - - dbFile := filepath.Join(path, fmt.Sprintf("%s.db", version)) - db := dbName(entry.Name(), version) - _, err = conn.ExecContext(context.Background(), fmt.Sprintf("ATTACH %s AS %s", safeSQLString(dbFile), safeSQLName(db))) - if err != nil { - c.logger.Error("attach failed clearing db file", zap.String("db", dbFile), zap.Error(err)) - _, _ = conn.ExecContext(context.Background(), fmt.Sprintf("DROP VIEW IF EXISTS %s", safeSQLName(entry.Name()))) - _ = os.RemoveAll(path) - } - } + defer release() return nil } @@ -743,6 +635,7 @@ func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunn // acquireConn returns a DuckDB connection. It should only be used internally in acquireMetaConn and acquireOLAPConn. // acquireConn implements the connection tracking and DB reopening logic described in the struct definition for connection. +// TODO :: fix me ?? func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func() error, error) { c.dbCond.L.Lock() for { @@ -766,7 +659,7 @@ func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func // When tx is true, and the database is backed by a file, we reopen the database to ensure only one DuckDB connection is open. // This avoids the following issue: https://github.com/duckdb/duckdb/issues/9150 if c.config.DBFilePath != "" { - err := c.reopenDB() + err := c.reopenDB(ctx, false) if err != nil { c.txMu.Unlock() return nil, nil, err @@ -783,7 +676,7 @@ func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func } } - conn, err := c.db.Connx(ctx) + conn, releaseConn, err := c.db.AcquireReadConnection(ctx) if err != nil { releaseTx() return nil, nil, err @@ -796,7 +689,7 @@ func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func c.connTimesMu.Unlock() release := func() error { - err := conn.Close() + err := releaseConn() c.connTimesMu.Lock() delete(c.connTimes, connID) c.connTimesMu.Unlock() @@ -805,7 +698,7 @@ func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func c.dbConnCount-- if c.dbConnCount == 0 && c.dbReopen { c.dbReopen = false - err = c.reopenDB() + err = c.reopenDB(ctx, false) if err == nil { c.logger.Debug("reopened DuckDB successfully") } else { @@ -848,71 +741,8 @@ func (c *connection) periodicallyEmitStats(d time.Duration) { for { select { case <-statTicker.C: - estimatedDBSize := c.estimateSize(false) + estimatedDBSize := c.estimateSize() c.activity.RecordMetric(c.ctx, "duckdb_estimated_size_bytes", float64(estimatedDBSize)) - - // NOTE :: running CALL pragma_database_size() while duckdb is ingesting data is causing the WAL file to explode. - // Commenting the below code for now. Verify with next duckdb release - - // // Motherduck driver doesn't provide pragma stats - // if c.driverName == "motherduck" { - // continue - // } - - // var stat dbStat - // // Obtain a connection, query, release - // err := func() error { - // conn, release, err := c.acquireMetaConn(c.ctx) - // if err != nil { - // return err - // } - // defer func() { _ = release() }() - // err = conn.GetContext(c.ctx, &stat, "CALL pragma_database_size()") - // return err - // }() - // if err != nil { - // c.logger.Error("couldn't query DuckDB stats", zap.Error(err)) - // continue - // } - - // // Emit collected stats as activity events - // commonDims := []attribute.KeyValue{ - // attribute.String("duckdb.name", stat.DatabaseName), - // } - - // dbSize, err := humanReadableSizeToBytes(stat.DatabaseSize) - // if err != nil { - // c.logger.Error("couldn't convert duckdb size to bytes", zap.Error(err)) - // } else { - // c.activity.RecordMetric(c.ctx, "duckdb_size_bytes", dbSize, commonDims...) - // } - - // walSize, err := humanReadableSizeToBytes(stat.WalSize) - // if err != nil { - // c.logger.Error("couldn't convert duckdb wal size to bytes", zap.Error(err)) - // } else { - // c.activity.RecordMetric(c.ctx, "duckdb_wal_size_bytes", walSize, commonDims...) - // } - - // memoryUsage, err := humanReadableSizeToBytes(stat.MemoryUsage) - // if err != nil { - // c.logger.Error("couldn't convert duckdb memory usage to bytes", zap.Error(err)) - // } else { - // c.activity.RecordMetric(c.ctx, "duckdb_memory_usage_bytes", memoryUsage, commonDims...) - // } - - // memoryLimit, err := humanReadableSizeToBytes(stat.MemoryLimit) - // if err != nil { - // c.logger.Error("couldn't convert duckdb memory limit to bytes", zap.Error(err)) - // } else { - // c.activity.RecordMetric(c.ctx, "duckdb_memory_limit_bytes", memoryLimit, commonDims...) - // } - - // c.activity.RecordMetric(c.ctx, "duckdb_block_size_bytes", float64(stat.BlockSize), commonDims...) - // c.activity.RecordMetric(c.ctx, "duckdb_total_blocks", float64(stat.TotalBlocks), commonDims...) - // c.activity.RecordMetric(c.ctx, "duckdb_free_blocks", float64(stat.FreeBlocks), commonDims...) - // c.activity.RecordMetric(c.ctx, "duckdb_used_blocks", float64(stat.UsedBlocks), commonDims...) - case <-c.ctx.Done(): statTicker.Stop() return @@ -945,77 +775,3 @@ func (c *connection) periodicallyCheckConnDurations(d time.Duration) { } } } - -func (c *connection) logLimits(conn *sqlx.Conn) { - row := conn.QueryRowContext(context.Background(), "SELECT value FROM duckdb_settings() WHERE name='max_memory'") - var memory string - _ = row.Scan(&memory) - - row = conn.QueryRowContext(context.Background(), "SELECT value FROM duckdb_settings() WHERE name='threads'") - var threads string - _ = row.Scan(&threads) - - c.logger.Debug("duckdb limits", zap.String("memory", memory), zap.String("threads", threads)) -} - -// fatalInternalError logs a critical internal error and exits the process. -// This is used for errors that are completely unrecoverable. -// Ideally, we should refactor to cleanup/reopen/rebuild so that we don't need this. -func (c *connection) fatalInternalError(err error) { - c.logger.Fatal("duckdb: critical internal error", zap.Error(err)) -} - -// Regex to parse human-readable size returned by DuckDB -// nolint -var humanReadableSizeRegex = regexp.MustCompile(`^([\d.]+)\s*(\S+)$`) - -// Reversed logic of StringUtil::BytesToHumanReadableString -// see https://github.com/cran/duckdb/blob/master/src/duckdb/src/common/string_util.cpp#L157 -// Examples: 1 bytes, 2 bytes, 1KB, 1MB, 1TB, 1PB -// nolint -func humanReadableSizeToBytes(sizeStr string) (float64, error) { - var multiplier float64 - - match := humanReadableSizeRegex.FindStringSubmatch(sizeStr) - - if match == nil { - return 0, fmt.Errorf("invalid size format: '%s'", sizeStr) - } - - sizeFloat, err := strconv.ParseFloat(match[1], 64) - if err != nil { - return 0, err - } - - switch match[2] { - case "byte", "bytes": - multiplier = 1 - case "KB": - multiplier = 1000 - case "MB": - multiplier = 1000 * 1000 - case "GB": - multiplier = 1000 * 1000 * 1000 - case "TB": - multiplier = 1000 * 1000 * 1000 * 1000 - case "PB": - multiplier = 1000 * 1000 * 1000 * 1000 * 1000 - default: - return 0, fmt.Errorf("unknown size unit '%s' in '%s'", match[2], sizeStr) - } - - return sizeFloat * multiplier, nil -} - -// nolint -type dbStat struct { - DatabaseName string `db:"database_name"` - DatabaseSize string `db:"database_size"` - BlockSize int64 `db:"block_size"` - TotalBlocks int64 `db:"total_blocks"` - UsedBlocks int64 `db:"used_blocks"` - FreeBlocks int64 `db:"free_blocks"` - WalSize string `db:"wal_size"` - MemoryUsage string `db:"memory_usage"` - MemoryLimit string `db:"memory_limit"` -} diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index 64f251ae92d..abfa1f84795 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -14,30 +14,6 @@ import ( "go.uber.org/zap" ) -func TestOpenDrop(t *testing.T) { - path := filepath.Join(t.TempDir(), "tmp.db") - walpath := path + ".wal" - dsn := path - - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - - olap, ok := handle.AsOLAP("") - require.True(t, ok) - - err = olap.Exec(context.Background(), &drivers.Statement{Query: "CREATE TABLE foo (bar INTEGER)"}) - require.NoError(t, err) - - err = handle.Close() - require.NoError(t, err) - require.FileExists(t, path) - - err = Driver{}.Drop(map[string]any{"path": dsn}, zap.NewNop()) - require.NoError(t, err) - require.NoFileExists(t, path) - require.NoFileExists(t, walpath) -} - func TestNoFatalErr(t *testing.T) { // NOTE: Using this issue to create a fatal error: https://github.com/duckdb/duckdb/issues/7905 @@ -207,38 +183,3 @@ func TestNoFatalErrConcurrent(t *testing.T) { err = handle.Close() require.NoError(t, err) } - -func TestHumanReadableSizeToBytes(t *testing.T) { - tests := []struct { - input string - expected float64 - shouldErr bool - }{ - {"1 byte", 1, false}, - {"2 bytes", 2, false}, - {"1KB", 1000, false}, - {"1.5KB", 1500, false}, - {"1MB", 1000 * 1000, false}, - {"2.5MB", 2.5 * 1000 * 1000, false}, - {"1GB", 1000 * 1000 * 1000, false}, - {"1.5GB", 1.5 * 1000 * 1000 * 1000, false}, - {"1TB", 1000 * 1000 * 1000 * 1000, false}, - {"1.5TB", 1.5 * 1000 * 1000 * 1000 * 1000, false}, - {"1PB", 1000 * 1000 * 1000 * 1000 * 1000, false}, - {"1.5PB", 1.5 * 1000 * 1000 * 1000 * 1000 * 1000, false}, - {"invalid", 0, true}, - {"123invalid", 0, true}, - {"123 ZZ", 0, true}, - } - - for _, tt := range tests { - result, err := humanReadableSizeToBytes(tt.input) - if (err != nil) != tt.shouldErr { - t.Errorf("expected error: %v, got error: %v for input: %s", tt.shouldErr, err, tt.input) - } - - if !tt.shouldErr && result != tt.expected { - t.Errorf("expected: %v, got: %v for input: %s", tt.expected, result, tt.input) - } - } -} diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index a686c0ecfd6..02516820d00 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -2,18 +2,13 @@ package duckdb import ( "context" - dbsql "database/sql" "errors" "fmt" - "io" - "io/fs" - "os" - "path/filepath" - "strings" "time" "github.com/google/uuid" "github.com/jmoiron/sqlx" + duckdbreplicator "github.com/rilldata/duckdb-replicator" runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/observability" @@ -181,754 +176,59 @@ func (c *connection) Execute(ctx context.Context, stmt *drivers.Statement) (res return res, nil } -func (c *connection) estimateSize(includeTemp bool) int64 { - path := c.config.DBFilePath - if path == "" { - return 0 - } - - paths := []string{path} - if includeTemp { - paths = append(paths, fmt.Sprintf("%s.wal", path)) - } - if c.config.ExtTableStorage { - entries, err := os.ReadDir(c.config.DBStoragePath) - if err == nil { // ignore error - for _, entry := range entries { - if !entry.IsDir() { - continue - } - // this is to avoid counting temp tables during source ingestion - // in certain cases we only want to compute the size of the serving db files - if strings.HasPrefix(entry.Name(), "__rill_tmp_") && !includeTemp { - continue - } - path := filepath.Join(c.config.DBStoragePath, entry.Name()) - version, exist, err := c.tableVersion(entry.Name()) - if err != nil || !exist { - continue - } - paths = append(paths, filepath.Join(path, fmt.Sprintf("%s.db", version))) - if includeTemp { - paths = append(paths, filepath.Join(path, fmt.Sprintf("%s.db.wal", version))) - } - } - } - } - return fileSize(paths) +// TODO :: implement estimate size +func (c *connection) estimateSize() int64 { + return 0 } // AddTableColumn implements drivers.OLAPStore. func (c *connection) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { - c.logger.Debug("add table column", zap.String("tableName", tableName), zap.String("columnName", columnName), zap.String("typ", typ)) - if !c.config.ExtTableStorage { - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("ALTER TABLE %s ADD COLUMN %s %s", safeSQLName(tableName), safeSQLName(columnName), typ), - Priority: 1, - LongRunning: true, - }) - } - - version, exist, err := c.tableVersion(tableName) - if err != nil { - return err - } - - if !exist { - return fmt.Errorf("table %q does not exist", tableName) - } - dbName := dbName(tableName, version) - return c.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, conn *dbsql.Conn) error { - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ALTER TABLE %s.default ADD COLUMN %s %s", safeSQLName(dbName), safeSQLName(columnName), typ)}) - if err != nil { - return err - } - // recreate view to propagate schema changes - return c.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.default", safeSQLName(tableName), safeSQLName(dbName))}) - }) + return c.db.AddTableColumn(ctx, tableName, columnName, typ) } // AlterTableColumn implements drivers.OLAPStore. func (c *connection) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { - c.logger.Debug("alter table column", zap.String("tableName", tableName), zap.String("columnName", columnName), zap.String("newType", newType)) - if !c.config.ExtTableStorage { - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("ALTER TABLE %s ALTER %s TYPE %s", safeSQLName(tableName), safeSQLName(columnName), newType), - Priority: 1, - LongRunning: true, - }) - } - - version, exist, err := c.tableVersion(tableName) - if err != nil { - return err - } - - if !exist { - return fmt.Errorf("table %q does not exist", tableName) - } - dbName := dbName(tableName, version) - return c.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, conn *dbsql.Conn) error { - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ALTER TABLE %s.default ALTER %s TYPE %s", safeSQLName(dbName), safeSQLName(columnName), newType)}) - if err != nil { - return err - } - - // recreate view to propagate schema changes - return c.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.default", safeSQLName(tableName), safeSQLName(dbName))}) - }) + return c.db.AlterTableColumn(ctx, tableName, columnName, newType) } // CreateTableAsSelect implements drivers.OLAPStore. // We add a \n at the end of the any user query to ensure any comment at the end of model doesn't make the query incomplete. func (c *connection) CreateTableAsSelect(ctx context.Context, name string, view bool, sql string, tableOpts map[string]any) error { - c.logger.Debug("create table", zap.String("name", name), zap.Bool("view", view)) - if view { - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("CREATE OR REPLACE VIEW %s AS (%s\n)", safeSQLName(name), sql), - Priority: 1, - LongRunning: true, - }) - } - if !c.config.ExtTableStorage { - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n)", safeSQLName(name), sql), - Priority: 1, - LongRunning: true, - }) - } - - var cleanupFunc func() - err := c.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *dbsql.Conn) error { - // NOTE: Running mkdir while holding the connection to avoid directory getting cleaned up when concurrent calls to RenameTable cause reopenDB to be called. - - // create a new db file in // directory - sourceDir := filepath.Join(c.config.DBStoragePath, name) - if err := os.Mkdir(sourceDir, fs.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { - return fmt.Errorf("create: unable to create dir %q: %w", sourceDir, err) - } - - // check if some older version existed previously to detach it later - oldVersion, oldVersionExists, _ := c.tableVersion(name) - - newVersion := fmt.Sprint(time.Now().UnixMilli()) - dbFile := filepath.Join(sourceDir, fmt.Sprintf("%s.db", newVersion)) - db := dbName(name, newVersion) - - // attach new db - err := c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ATTACH %s AS %s", safeSQLString(dbFile), safeSQLName(db))}) - if err != nil { - removeDBFile(dbFile) - return fmt.Errorf("create: attach %q db failed: %w", dbFile, err) - } - - // Enforce storage limits - if err := c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("CREATE OR REPLACE TABLE %s.default AS (%s\n)", safeSQLName(db), sql)}); err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(db, dbFile) } - return fmt.Errorf("create: create %q.default table failed: %w", db, err) - } - - // success update version - err = c.updateVersion(name, newVersion) - if err != nil { - // extreme bad luck - cleanupFunc = func() { c.detachAndRemoveFile(db, dbFile) } - return fmt.Errorf("create: update version %q failed: %w", newVersion, err) - } - - qry, err := c.generateSelectQuery(ctx, db) - if err != nil { - return err - } - - // create view query - err = c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeSQLName(name), qry), - }) - if err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(db, dbFile) } - return fmt.Errorf("create: create view %q failed: %w", name, err) - } - - if oldVersionExists { - oldDB := dbName(name, oldVersion) - // ignore these errors since source has been correctly ingested and attached - cleanupFunc = func() { c.detachAndRemoveFile(oldDB, filepath.Join(sourceDir, fmt.Sprintf("%s.db", oldVersion))) } - } - return nil - }) - if cleanupFunc != nil { - cleanupFunc() - } - return err + return c.db.CreateTableAsSelect(ctx, name, sql, &duckdbreplicator.CreateTableOptions{View: view}) } // InsertTableAsSelect implements drivers.OLAPStore. func (c *connection) InsertTableAsSelect(ctx context.Context, name, sql string, byName, inPlace bool, strategy drivers.IncrementalStrategy, uniqueKey []string) error { - c.logger.Debug("insert table", zap.String("name", name), zap.Bool("byName", byName), zap.String("strategy", string(strategy)), zap.Strings("uniqueKey", uniqueKey)) - - if !c.config.ExtTableStorage { - return c.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *dbsql.Conn) error { - return c.execIncrementalInsert(ctx, safeSQLName(name), sql, byName, strategy, uniqueKey) - }) - } - - if inPlace { - version, exist, err := c.tableVersion(name) - if err != nil { - return err - } - if !exist { - return fmt.Errorf("insert: table %q does not exist", name) - } - - db := dbName(name, version) - safeName := fmt.Sprintf("%s.default", safeSQLName(db)) - - return c.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *dbsql.Conn) error { - return c.execIncrementalInsert(ctx, safeName, sql, byName, strategy, uniqueKey) - }) - } - - var cleanupFunc func() - err := c.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *dbsql.Conn) error { - // Get current table version - oldVersion, oldVersionExists, _ := c.tableVersion(name) - if !oldVersionExists { - return fmt.Errorf("table %q does not exist", name) - } - - // Prepare a new version - newVersion := fmt.Sprint(time.Now().UnixMilli()) - - // Prepare paths - sourceDir := filepath.Join(c.config.DBStoragePath, name) - oldDBFile := filepath.Join(sourceDir, fmt.Sprintf("%s.db", oldVersion)) - newDBFile := filepath.Join(sourceDir, fmt.Sprintf("%s.db", newVersion)) - oldDB := dbName(name, oldVersion) - newDB := dbName(name, newVersion) - - // Copy the old version to the new version - if err := copyFile(oldDBFile, newDBFile); err != nil { - return fmt.Errorf("insert: copy file failed: %w", err) - } - - // Attach the new db - err := c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ATTACH %s AS %s", safeSQLString(newDBFile), safeSQLName(newDB))}) - if err != nil { - removeDBFile(newDBFile) - return fmt.Errorf("insert: attach %q db failed: %w", newDBFile, err) - } - - // Execute the insert - safeName := fmt.Sprintf("%s.default", safeSQLName(newDB)) - err = c.execIncrementalInsert(ctx, safeName, sql, byName, strategy, uniqueKey) - if err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("insert: create %q.default table failed: %w", newDB, err) - } - - // Success: update version - err = c.updateVersion(name, newVersion) - if err != nil { - // extreme bad luck - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("insert: update version %q failed: %w", newVersion, err) - } - - // Update the view to the external table in the main DB handle - qry, err := c.generateSelectQuery(ctx, newDB) - if err != nil { - return err - } - err = c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeSQLName(name), qry), - }) - if err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("insert: create view %q failed: %w", name, err) - } - - // Delete the old version (ignoring errors since source the new data has already been correctly inserted and attached) - cleanupFunc = func() { c.detachAndRemoveFile(oldDB, oldDBFile) } - return nil + var st duckdbreplicator.IncrementalStrategy + switch strategy { + case drivers.IncrementalStrategyAppend: + st = duckdbreplicator.IncrementalStrategyAppend + case drivers.IncrementalStrategyMerge: + st = duckdbreplicator.IncrementalStrategyMerge + default: + return fmt.Errorf("incremental insert strategy %q not supported", strategy) + } + return c.db.InsertTableAsSelect(ctx, name, sql, &duckdbreplicator.InsertTableOptions{ + ByName: byName, + Strategy: st, + UniqueKey: uniqueKey, }) - if cleanupFunc != nil { - cleanupFunc() - } - return err } // DropTable implements drivers.OLAPStore. func (c *connection) DropTable(ctx context.Context, name string, view bool) error { - c.logger.Debug("drop table", zap.String("name", name), zap.Bool("view", view)) - if !c.config.ExtTableStorage { - var typ string - if view { - typ = "VIEW" - } else { - typ = "TABLE" - } - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("DROP %s IF EXISTS %s", typ, safeSQLName(name)), - Priority: 100, - LongRunning: true, - }) - } - // determine if it is a true view or view on externally stored table - version, exist, err := c.tableVersion(name) - if err != nil { - return err - } - - if !exist { - if !view { - return nil - } - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("DROP VIEW IF EXISTS %s", safeSQLName(name)), - Priority: 100, - LongRunning: true, - }) - } - - err = c.WithConnection(ctx, 100, true, true, func(ctx, ensuredCtx context.Context, _ *dbsql.Conn) error { - // drop view - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("DROP VIEW IF EXISTS %s", safeSQLName(name))}) - if err != nil { - return err - } - - oldDB := dbName(name, version) - err = c.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("DETACH %s", safeSQLName(oldDB))}) - if err != nil && !strings.Contains(err.Error(), "database not found") { // ignore database not found errors for idempotency - return err - } - return nil - }) - if err != nil { - return err - } - - // delete source directory - return os.RemoveAll(filepath.Join(c.config.DBStoragePath, name)) + return c.db.DropTable(ctx, name) } // RenameTable implements drivers.OLAPStore. -// For drop and replace (when running `RenameTable("__tmp_foo", "foo")`): -// `DROP VIEW __tmp_foo` -// `DETACH __tmp_foo__1` -// `mv __tmp_foo/1.db foo/2.db` -// `echo 2 > version.txt` -// `rm __tmp_foo` -// `ATTACH 'foo/2.db' AS foo__2` -// `CREATE OR REPLACE VIEW foo AS SELECT * FROM foo_2` -// `DETACH foo__1` -// `rm foo/1.db` func (c *connection) RenameTable(ctx context.Context, oldName, newName string, view bool) error { - c.logger.Debug("rename table", zap.String("from", oldName), zap.String("to", newName), zap.Bool("view", view), zap.Bool("ext", c.config.ExtTableStorage)) - if strings.EqualFold(oldName, newName) { - return fmt.Errorf("rename: old and new name are same case insensitive strings") - } - if !c.config.ExtTableStorage { - return c.dropAndReplace(ctx, oldName, newName, view) - } - // determine if it is a true view or a view on externally stored table - oldVersion, exist, err := c.tableVersion(oldName) - if err != nil { - return err - } - if !exist { - return c.dropAndReplace(ctx, oldName, newName, view) - } - - oldVersionInNewDir, replaceInNewTable, err := c.tableVersion(newName) - if err != nil { - return err - } - - newSrcDir := filepath.Join(c.config.DBStoragePath, newName) - oldSrcDir := filepath.Join(c.config.DBStoragePath, oldName) - - // reopen duckdb connections which should delete any temporary files built up during ingestion - // need to do detach using tx=true to isolate it from other queries - err = c.WithConnection(ctx, 100, true, true, func(currentCtx, ctx context.Context, conn *dbsql.Conn) error { - err = os.Mkdir(newSrcDir, fs.ModePerm) - if err != nil && !errors.Is(err, fs.ErrExist) { - return err - } - - // drop old view - err = c.Exec(currentCtx, &drivers.Statement{Query: fmt.Sprintf("DROP VIEW IF EXISTS %s", safeSQLName(oldName))}) - if err != nil { - return fmt.Errorf("rename: drop %q view failed: %w", oldName, err) - } - - // detach old db - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("DETACH %s", safeSQLName(dbName(oldName, oldVersion)))}) - if err != nil { - return fmt.Errorf("rename: detach %q db failed: %w", dbName(oldName, oldVersion), err) - } - - // move old file as a new file in source directory - newVersion := fmt.Sprint(time.Now().UnixMilli()) - newFile := filepath.Join(newSrcDir, fmt.Sprintf("%s.db", newVersion)) - err = os.Rename(filepath.Join(oldSrcDir, fmt.Sprintf("%s.db", oldVersion)), newFile) - if err != nil { - return fmt.Errorf("rename: rename file failed: %w", err) - } - // also move .db.wal file in case checkpointing was not completed - _ = os.Rename(filepath.Join(oldSrcDir, fmt.Sprintf("%s.db.wal", oldVersion)), - filepath.Join(newSrcDir, fmt.Sprintf("%s.db.wal", newVersion))) - - err = c.updateVersion(newName, newVersion) - if err != nil { - return fmt.Errorf("rename: update version failed: %w", err) - } - err = os.RemoveAll(filepath.Join(c.config.DBStoragePath, oldName)) - if err != nil { - c.logger.Error("rename: unable to delete old path", zap.Error(err)) - } - - newDB := dbName(newName, newVersion) - // attach new db - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ATTACH %s AS %s", safeSQLString(newFile), safeSQLName(newDB))}) - if err != nil { - return fmt.Errorf("rename: attach %q db failed: %w", newDB, err) - } - - qry, err := c.generateSelectQuery(ctx, newDB) - if err != nil { - return err - } - - // change view query - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeSQLName(newName), qry)}) - if err != nil { - return fmt.Errorf("rename: create %q view failed: %w", newName, err) - } - - if !replaceInNewTable { - return nil - } - // new table had some other file previously - if err := c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("DETACH %s", safeSQLName(dbName(newName, oldVersionInNewDir)))}); err != nil { - return err - } - removeDBFile(filepath.Join(newSrcDir, fmt.Sprintf("%s.db", oldVersionInNewDir))) - return nil - }) - return err + return c.db.RenameTable(ctx, oldName, newName) } func (c *connection) MayBeScaledToZero(ctx context.Context) bool { return false } -func (c *connection) execIncrementalInsert(ctx context.Context, safeName, sql string, byName bool, strategy drivers.IncrementalStrategy, uniqueKey []string) error { - var byNameClause string - if byName { - byNameClause = "BY NAME" - } - - if strategy == drivers.IncrementalStrategyAppend { - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("INSERT INTO %s %s (%s\n)", safeName, byNameClause, sql), - Priority: 1, - LongRunning: true, - }) - } - - if strategy == drivers.IncrementalStrategyMerge { - // Create a temporary table with the new data - tmp := uuid.New().String() - err := c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("CREATE TEMPORARY TABLE %s AS (%s\n)", safeSQLName(tmp), sql), - Priority: 1, - LongRunning: true, - }) - if err != nil { - return err - } - - // check the count of the new data - // skip if the count is 0 - // if there was no data in the empty file then the detected schema can be different from the current schema which leads to errors or performance issues - res, err := c.Execute(ctx, &drivers.Statement{ - Query: fmt.Sprintf("SELECT COUNT(*) == 0 FROM %s", safeSQLName(tmp)), - Priority: 1, - }) - if err != nil { - return err - } - var empty bool - for res.Next() { - if err := res.Scan(&empty); err != nil { - _ = res.Close() - return err - } - } - _ = res.Close() - if empty { - return nil - } - - // Drop the rows from the target table where the unique key is present in the temporary table - where := "" - for i, key := range uniqueKey { - key = safeSQLName(key) - if i != 0 { - where += " AND " - } - where += fmt.Sprintf("base.%s IS NOT DISTINCT FROM tmp.%s", key, key) - } - err = c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("DELETE FROM %s base WHERE EXISTS (SELECT 1 FROM %s tmp WHERE %s)", safeName, safeSQLName(tmp), where), - Priority: 1, - LongRunning: true, - }) - if err != nil { - return err - } - - // Insert the new data into the target table - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("INSERT INTO %s %s SELECT * FROM %s", safeName, byNameClause, safeSQLName(tmp)), - Priority: 1, - LongRunning: true, - }) - } - - return fmt.Errorf("incremental insert strategy %q not supported", strategy) -} - -func (c *connection) dropAndReplace(ctx context.Context, oldName, newName string, view bool) error { - var typ string - if view { - typ = "VIEW" - } else { - typ = "TABLE" - } - - existing, err := c.InformationSchema().Lookup(ctx, "", "", newName) - if err != nil { - if !errors.Is(err, drivers.ErrNotFound) { - return err - } - return c.Exec(ctx, &drivers.Statement{ - Query: fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(oldName), safeSQLName(newName)), - Priority: 100, - LongRunning: true, - }) - } - - return c.WithConnection(ctx, 100, true, true, func(ctx, ensuredCtx context.Context, conn *dbsql.Conn) error { - // The newName may currently be occupied by a name of another type than oldName. - var existingTyp string - if existing.View { - existingTyp = "VIEW" - } else { - existingTyp = "TABLE" - } - - err := c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("DROP %s IF EXISTS %s", existingTyp, safeSQLName(newName))}) - if err != nil { - return err - } - - return c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(oldName), safeSQLName(newName))}) - }) -} - -func (c *connection) detachAndRemoveFile(db, dbFile string) { - err := c.WithConnection(context.Background(), 100, false, true, func(ctx, ensuredCtx context.Context, conn *dbsql.Conn) error { - err := c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("DETACH %s", safeSQLName(db)), Priority: 100}) - removeDBFile(dbFile) - return err - }) - if err != nil { - c.logger.Debug("detach failed", zap.String("db", db), zap.Error(err)) - } -} - -func (c *connection) tableVersion(name string) (string, bool, error) { - pathToFile := filepath.Join(c.config.DBStoragePath, name, "version.txt") - contents, err := os.ReadFile(pathToFile) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - return "", false, nil - } - return "", false, err - } - return strings.TrimSpace(string(contents)), true, nil -} - -func (c *connection) updateVersion(name, version string) error { - pathToFile := filepath.Join(c.config.DBStoragePath, name, "version.txt") - file, err := os.Create(pathToFile) - if err != nil { - return err - } - defer file.Close() - - _, err = file.WriteString(version) - return err -} - -// convertToEnum converts a varchar col in table to an enum type. -// Generally to be used for low cardinality varchar columns although not enforced here. -func (c *connection) convertToEnum(ctx context.Context, table string, cols []string) error { - if len(cols) == 0 { - return fmt.Errorf("empty list") - } - if !c.config.ExtTableStorage { - return fmt.Errorf("`cast_to_enum` is only supported when `external_table_storage` is enabled") - } - c.logger.Debug("convert column to enum", zap.String("table", table), zap.Strings("col", cols)) - - oldVersion, exist, err := c.tableVersion(table) - if err != nil { - return err - } - - if !exist { - return fmt.Errorf("table %q does not exist", table) - } - - // scan main db and main schema - res, err := c.Execute(ctx, &drivers.Statement{ - Query: "SELECT current_database(), current_schema()", - Priority: 100, - }) - if err != nil { - return err - } - - var mainDB, mainSchema string - if res.Next() { - if err := res.Scan(&mainDB, &mainSchema); err != nil { - _ = res.Close() - return err - } - } - _ = res.Close() - - sourceDir := filepath.Join(c.config.DBStoragePath, table) - newVersion := fmt.Sprint(time.Now().UnixMilli()) - newDBFile := filepath.Join(sourceDir, fmt.Sprintf("%s.db", newVersion)) - newDB := dbName(table, newVersion) - var cleanupFunc func() - err = c.WithConnection(ctx, 100, true, false, func(ctx, ensuredCtx context.Context, _ *dbsql.Conn) error { - // attach new db - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ATTACH %s AS %s", safeSQLString(newDBFile), safeSQLName(newDB))}) - if err != nil { - removeDBFile(newDBFile) - return fmt.Errorf("create: attach %q db failed: %w", newDBFile, err) - } - - // switch to new db - // this is only required since duckdb has bugs around db scoped custom types - // TODO: remove this when https://github.com/duckdb/duckdb/pull/9622 is released - err = c.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("USE %s", safeSQLName(newDB))}) - if err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("failed switch db %q: %w", newDB, err) - } - defer func() { - // switch to original db, notice `db.schema` just doing USE db switches context to `main` schema in the current db if doing `USE main` - // we want to switch to original db and schema - err = c.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("USE %s.%s", safeSQLName(mainDB), safeSQLName(mainSchema))}) - if err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - // This should NEVER happen - c.fatalInternalError(fmt.Errorf("failed to switch back from db %q: %w", mainDB, err)) - } - }() - - oldDB := dbName(table, oldVersion) - for _, col := range cols { - enum := fmt.Sprintf("%s_enum", col) - if err = c.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("CREATE TYPE %s AS ENUM (SELECT DISTINCT %s FROM %s.default WHERE %s IS NOT NULL)", safeSQLName(enum), safeSQLName(col), safeSQLName(oldDB), safeSQLName(col))}); err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("failed to create enum %q: %w", enum, err) - } - } - - var selectQry string - for _, col := range cols { - enum := fmt.Sprintf("%s_enum", col) - selectQry += fmt.Sprintf("CAST(%s AS %s) AS %s,", safeSQLName(col), safeSQLName(enum), safeSQLName(col)) - } - selectQry += fmt.Sprintf("* EXCLUDE(%s)", strings.Join(cols, ",")) - - if err := c.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("CREATE OR REPLACE TABLE \"default\" AS SELECT %s FROM %s.default", selectQry, safeSQLName(oldDB))}); err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("failed to create table with enum values: %w", err) - } - - // recreate view to propagate schema changes - selectQry, err := c.generateSelectQuery(ctx, newDB) - if err != nil { - return err - } - - // NOTE :: db name need to be appened in the view query else query fails when switching to main db - if err := c.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("CREATE OR REPLACE VIEW %s.%s.%s AS %s", safeSQLName(mainDB), safeSQLName(mainSchema), safeSQLName(table), selectQry)}); err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("failed to create view %q: %w", table, err) - } - - // update version and detach old db - if err := c.updateVersion(table, newVersion); err != nil { - cleanupFunc = func() { c.detachAndRemoveFile(newDB, newDBFile) } - return fmt.Errorf("failed to update version: %w", err) - } - - cleanupFunc = func() { - c.detachAndRemoveFile(oldDB, filepath.Join(sourceDir, fmt.Sprintf("%s.db", oldVersion))) - } - return nil - }) - if cleanupFunc != nil { - cleanupFunc() - } - return err -} - -// duckDB raises Contents of view were altered: types don't match! error even when number of columns are same but sequence of column changes in underlying table. -// This causes temporary query failures till the model view is not updated to reflect the new column sequence. -// We ensure that view for external table storage is always generated using a stable order of columns of underlying table. -// Additionally we want to keep the same order as the underlying table locally so that we can show columns in the same order as they appear in source data. -// Using `AllowHostAccess` as proxy to check if we are running in local/cloud mode. -func (c *connection) generateSelectQuery(ctx context.Context, db string) (string, error) { - if c.config.AllowHostAccess { - return fmt.Sprintf("SELECT * FROM %s.default", safeSQLName(db)), nil - } - - rows, err := c.Execute(ctx, &drivers.Statement{ - Query: fmt.Sprintf(` - SELECT column_name AS name - FROM information_schema.columns - WHERE table_catalog = %s AND table_name = 'default' - ORDER BY name ASC`, safeSQLString(db)), - }) - if err != nil { - return "", err - } - defer rows.Close() - - cols := make([]string, 0) - var col string - for rows.Next() { - if err := rows.Scan(&col); err != nil { - return "", err - } - cols = append(cols, safeName(col)) - } - - return fmt.Sprintf("SELECT %s FROM %s.default", strings.Join(cols, ", "), safeSQLName(db)), nil -} - func RowsToSchema(r *sqlx.Rows) (*runtimev1.StructType, error) { if r == nil { return nil, nil @@ -960,17 +260,6 @@ func RowsToSchema(r *sqlx.Rows) (*runtimev1.StructType, error) { return &runtimev1.StructType{Fields: fields}, nil } -func dbName(name, version string) string { - return fmt.Sprintf("%s_%s", name, version) -} - -func removeDBFile(dbFile string) { - _ = os.Remove(dbFile) - // Hacky approach to remove the wal and tmp file - _ = os.Remove(dbFile + ".wal") - _ = os.RemoveAll(dbFile + ".tmp") -} - // safeSQLName returns a quoted SQL identifier. func safeSQLName(name string) string { return safeName(name) @@ -979,20 +268,3 @@ func safeSQLName(name string) string { func safeSQLString(name string) string { return drivers.DialectDuckDB.EscapeStringValue(name) } - -func copyFile(src, dst string) error { - srcFile, err := os.Open(src) - if err != nil { - return err - } - defer srcFile.Close() - - dstFile, err := os.Create(dst) - if err != nil { - return err - } - defer dstFile.Close() - - _, err = io.Copy(dstFile, srcFile) - return err -} diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index 352d46e3673..71596d75a5c 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -311,52 +311,6 @@ func Test_connection_RenameToExistingTableOld(t *testing.T) { require.NoError(t, res.Close()) } -func Test_connection_CastEnum(t *testing.T) { - temp := t.TempDir() - os.Mkdir(temp, fs.ModePerm) - - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - c := handle.(*connection) - require.NoError(t, c.Migrate(context.Background())) - c.AsOLAP("default") - - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 1 AS id, 'bglr' AS city, 'IND' AS country", nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 2, 'mUm', 'IND'", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 3, 'Perth', 'Aus'", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 3, null, 'Aus'", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 3, 'bglr', null", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.convertToEnum(context.Background(), "test", []string{"city", "country"}) - require.NoError(t, err) - - res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE column_name='city' AND table_name='test' AND table_catalog = 'view'"}) - require.NoError(t, err) - - var typ string - require.True(t, res.Next()) - require.NoError(t, res.Scan(&typ)) - require.Equal(t, "ENUM('bglr', 'Perth', 'mUm')", typ) - require.NoError(t, res.Close()) - - res, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE column_name='country' AND table_name='test' AND table_catalog = 'view'"}) - require.NoError(t, err) - require.True(t, res.Next()) - require.NoError(t, res.Scan(&typ)) - require.Equal(t, "ENUM('Aus', 'IND')", typ) - require.NoError(t, res.Close()) -} - func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) diff --git a/runtime/drivers/duckdb/transporter_objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter_objectStore_to_duckDB.go index 24f86e9ffba..6b3c9ab140a 100644 --- a/runtime/drivers/duckdb/transporter_objectStore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_objectStore_to_duckDB.go @@ -112,8 +112,7 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps } // convert to enum if len(srcCfg.CastToENUM) > 0 { - conn, _ := t.to.(*connection) - return conn.convertToEnum(ctx, sinkCfg.Table, srcCfg.CastToENUM) + return fmt.Errorf("`cast_to_enum` is not implemented") } return nil } @@ -175,8 +174,7 @@ func (t *objectStoreToDuckDB) ingestDuckDBSQL(ctx context.Context, originalSQL s } // convert to enum if len(srcCfg.CastToENUM) > 0 { - conn, _ := t.to.(*connection) - return conn.convertToEnum(ctx, dbSink.Table, srcCfg.CastToENUM) + return fmt.Errorf("`cast_to_enum` is not implemented") } return nil } diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 848d303999f..2129362de0f 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -53,10 +53,6 @@ func (d driver) Open(instanceID string, config map[string]any, client *activity. return conn, nil } -func (d driver) Drop(config map[string]any, logger *zap.Logger) error { - return nil -} - func (d driver) HasAnonymousSourceAccess(ctx context.Context, props map[string]any, logger *zap.Logger) (bool, error) { return false, fmt.Errorf("not implemented") } diff --git a/runtime/registry_test.go b/runtime/registry_test.go index 01189f06229..ad2d8ce8578 100644 --- a/runtime/registry_test.go +++ b/runtime/registry_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/c2h5oh/datasize" + "github.com/marcboeker/go-duckdb" runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" @@ -478,7 +479,7 @@ func TestRuntime_DeleteInstance_DropCorrupted(t *testing.T) { err := rt.CreateInstance(context.Background(), inst) require.NoError(t, err) - dbpath := filepath.Join(rt.opts.DataDir, inst.ID, "duckdb", "main.db") + dbpath := filepath.Join(rt.opts.DataDir, inst.ID, "duckdb", "write", "main.db") // Put some data into it to create a .db file on disk olap, release, err := rt.OLAP(ctx, inst.ID, "") @@ -495,9 +496,9 @@ func TestRuntime_DeleteInstance_DropCorrupted(t *testing.T) { require.NoError(t, err) // Check we can't open it anymore - _, _, err = rt.OLAP(ctx, inst.ID, "") + conn, err := duckdb.NewConnector(dbpath, nil) require.Error(t, err) - require.FileExists(t, dbpath) + require.Nil(t, conn) // Delete instance and check it still drops the .db file for DuckDB err = rt.DeleteInstance(ctx, inst.ID) From 854a666452b266e3fa15c507ab3f09664eafe85d Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 30 Oct 2024 15:13:19 +0530 Subject: [PATCH 02/64] use latest replicator version --- go.mod | 4 +--- go.sum | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e8cefe0ac81..f45d093d8f3 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe github.com/prometheus/client_golang v1.19.1 github.com/redis/go-redis/v9 v9.0.2 - github.com/rilldata/duckdb-replicator v0.0.0 + github.com/rilldata/duckdb-replicator v0.0.0-20241030092710-25561e98b106 github.com/riverqueue/river v0.11.4 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.11.4 github.com/riverqueue/river/rivertype v0.11.4 @@ -431,5 +431,3 @@ replace github.com/apache/arrow/go/v14 v14.0.2 => github.com/rilldata/arrow/go/v // security vulnerability in dgrijalva/jwt-go replace github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompatible - -replace github.com/rilldata/duckdb-replicator => /home/anshul/workspace/duckdb-replicator diff --git a/go.sum b/go.sum index 68871bd60c1..98a5fa092cb 100644 --- a/go.sum +++ b/go.sum @@ -2109,6 +2109,8 @@ github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff h1:Tt67B9BQVkymWsosWgz7vyz8MXnlYzc8xbqtxYuPU1s= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= +github.com/rilldata/duckdb-replicator v0.0.0-20241030092710-25561e98b106 h1:E59vOFheE3u6L9y3zzi2wxMOl+NDaijfPM+HaNhgUDI= +github.com/rilldata/duckdb-replicator v0.0.0-20241030092710-25561e98b106/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= github.com/riverqueue/river v0.11.4 h1:NMRsODhRgFztf080RMCjI377jldLXsx41E2r7+c0lPE= github.com/riverqueue/river v0.11.4/go.mod h1:HvgBkqon7lYKm9Su4lVOnn1qx8Q4FnSMJjf5auVial4= github.com/riverqueue/river/riverdriver v0.11.4 h1:kBg68vfTnRuSwsgcZ7UbKC4ocZ+KSCGnuZw/GwMMMP4= From c86ed09d9dc9a54bbd976b4ad076014a5a83924f Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 1 Nov 2024 17:57:51 +0530 Subject: [PATCH 03/64] transporter changes --- runtime/drivers/clickhouse/olap.go | 2 +- runtime/drivers/druid/olap.go | 2 +- runtime/drivers/duckdb/Hacks removed.md | 8 +++ runtime/drivers/duckdb/config.go | 10 ++-- runtime/drivers/duckdb/duckdb.go | 44 +++------------ runtime/drivers/duckdb/duckdb_test.go | 6 +-- runtime/drivers/duckdb/olap.go | 6 +-- .../transporter_duckDB_to_duckDB_test.go | 2 +- .../transporter_motherduck_to_duckDB.go | 54 +++++++++---------- .../transporter_mysql_to_duckDB_test.go | 2 +- .../transporter_postgres_to_duckDB_test.go | 2 +- .../transporter_sqlite_to_duckDB_test.go | 2 +- .../duckdb/transporter_sqlstore_to_duckDB.go | 52 +++++++++--------- runtime/drivers/olap.go | 2 +- runtime/drivers/pinot/olap.go | 2 +- runtime/metricsview/executor_pivot.go | 2 +- runtime/queries/column_timeseries.go | 2 +- runtime/queries/table_columns.go | 2 +- runtime/resolvers/glob.go | 2 +- 19 files changed, 88 insertions(+), 116 deletions(-) create mode 100644 runtime/drivers/duckdb/Hacks removed.md diff --git a/runtime/drivers/clickhouse/olap.go b/runtime/drivers/clickhouse/olap.go index 55fb2fbac66..c561a3cbf18 100644 --- a/runtime/drivers/clickhouse/olap.go +++ b/runtime/drivers/clickhouse/olap.go @@ -34,7 +34,7 @@ func (c *connection) Dialect() drivers.Dialect { return drivers.DialectClickHouse } -func (c *connection) WithConnection(ctx context.Context, priority int, longRunning, tx bool, fn drivers.WithConnectionFunc) error { +func (c *connection) WithConnection(ctx context.Context, priority int, longRunning bool, fn drivers.WithConnectionFunc) error { // Check not nested if connFromContext(ctx) != nil { panic("nested WithConnection") diff --git a/runtime/drivers/druid/olap.go b/runtime/drivers/druid/olap.go index e20a13008f6..1c6aa1605a7 100644 --- a/runtime/drivers/druid/olap.go +++ b/runtime/drivers/druid/olap.go @@ -54,7 +54,7 @@ func (c *connection) Dialect() drivers.Dialect { return drivers.DialectDruid } -func (c *connection) WithConnection(ctx context.Context, priority int, longRunning, tx bool, fn drivers.WithConnectionFunc) error { +func (c *connection) WithConnection(ctx context.Context, priority int, longRunning bool, fn drivers.WithConnectionFunc) error { return fmt.Errorf("druid: WithConnection not supported") } diff --git a/runtime/drivers/duckdb/Hacks removed.md b/runtime/drivers/duckdb/Hacks removed.md new file mode 100644 index 00000000000..64e81da0916 --- /dev/null +++ b/runtime/drivers/duckdb/Hacks removed.md @@ -0,0 +1,8 @@ +Hacks removed. + +1. Removing `.tmp` and `.wal` directory. No longer works since the main.db no longer ingest anything. + +Features removed. + +1. String to `enum` conversion. +2. No `tx=true` queries since writes now happen on a different handle. \ No newline at end of file diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index e5469471cd1..0472ec571ad 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -34,13 +34,13 @@ type config struct { // ExtTableStorage controls if every table is stored in a different db file. // Backup is only enabled when external table storage is enabled. ExtTableStorage bool `mapstructure:"external_table_storage"` - // CPU cores available for the DB + // CPU cores available for the read DB. If no CPUWrite is set and external_table_storage is enabled then this is split evenly between read and write. CPU int `mapstructure:"cpu"` - // MemoryLimitGB is the amount of memory available for the DB + // MemoryLimitGB is the amount of memory available for the read DB. If no MemoryLimitGBWrite is set and external_table_storage is enabled then this is split evenly between read and write. MemoryLimitGB int `mapstructure:"memory_limit_gb"` - // CPUWrite is CPU available for the DB when writing data + // CPUWrite is CPU available for the DB when writing data. CPUWrite int `mapstructure:"cpu_write"` - // MemoryLimitGBWrite is the amount of memory available for the DB when writing data + // MemoryLimitGBWrite is the amount of memory available for the DB when writing data. MemoryLimitGBWrite int `mapstructure:"memory_limit_gb_write"` // BootQueries is SQL to execute when initializing a new connection. It runs before any extensions are loaded or default settings are set. BootQueries string `mapstructure:"boot_queries"` @@ -101,6 +101,8 @@ func newConfig(cfgMap map[string]any) (*config, error) { } // Set memory limit + cfg.ReadSettings = make(map[string]string) + cfg.WriteSettings = make(map[string]string) if cfg.MemoryLimitGB > 0 { cfg.ReadSettings["max_memory"] = fmt.Sprintf("%dGB", cfg.MemoryLimitGB) } diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index b3e68299444..ec7e6fa27e4 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -290,9 +290,6 @@ type connection struct { // The OLAP interface additionally provides an option to limit the number of long-running queries, as designated by the caller. // longRunningSem enforces this limitation. longRunningSem *semaphore.Weighted - // The OLAP interface also provides an option to acquire a connection "transactionally". - // We've run into issues with DuckDB freezing up on transactions, so we just use a lock for now to serialize them (inconsistency in case of crashes is acceptable). - txMu sync.RWMutex // If DuckDB encounters a fatal error, all queries will fail until the DB has been reopened. // When dbReopen is set to true, dbCond will be used to stop acquisition of new connections, // and then when dbConnCount becomes 0, the DB will be reopened and dbReopen set to false again. @@ -423,13 +420,13 @@ func (c *connection) AsTransporter(from, to drivers.Handle) (drivers.Transporter olap, _ := to.(*connection) if c == to { if from == to { - return NewDuckDBToDuckDB(olap, c.logger), true + return newDuckDBToDuckDB(c, c.logger), true } if from.Driver() == "motherduck" { - return NewMotherduckToDuckDB(from, olap, c.logger), true + return newMotherduckToDuckDB(from, olap, c.logger), true } if store, ok := from.AsSQLStore(); ok { - return NewSQLStoreToDuckDB(store, olap, c.logger), true + return newSQLStoreToDuckDB(store, olap, c.logger), true } if store, ok := from.AsWarehouse(); ok { return NewWarehouseToDuckDB(store, olap, c.logger), true @@ -568,7 +565,7 @@ func (c *connection) acquireMetaConn(ctx context.Context) (*sqlx.Conn, func() er } // Get new conn - conn, releaseConn, err := c.acquireConn(ctx, false) + conn, releaseConn, err := c.acquireConn(ctx) if err != nil { c.metaSem.Release(1) return nil, nil, err @@ -586,7 +583,7 @@ func (c *connection) acquireMetaConn(ctx context.Context) (*sqlx.Conn, func() er // acquireOLAPConn gets a connection from the pool for OLAP queries (i.e. slow queries). // It returns a function that puts the connection back in the pool (if applicable). -func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunning, tx bool) (*sqlx.Conn, func() error, error) { +func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunning bool) (*sqlx.Conn, func() error, error) { // Try to get conn from context (means the call is wrapped in WithConnection) conn := connFromContext(ctx) if conn != nil { @@ -611,7 +608,7 @@ func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunn } // Get new conn - conn, releaseConn, err := c.acquireConn(ctx, tx) + conn, releaseConn, err := c.acquireConn(ctx) if err != nil { c.olapSem.Release() if longRunning { @@ -635,8 +632,7 @@ func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunn // acquireConn returns a DuckDB connection. It should only be used internally in acquireMetaConn and acquireOLAPConn. // acquireConn implements the connection tracking and DB reopening logic described in the struct definition for connection. -// TODO :: fix me ?? -func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func() error, error) { +func (c *connection) acquireConn(ctx context.Context) (*sqlx.Conn, func() error, error) { c.dbCond.L.Lock() for { if c.dbErr != nil { @@ -652,33 +648,8 @@ func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func c.dbConnCount++ c.dbCond.L.Unlock() - // Poor man's transaction support – see struct docstring for details. - if tx { - c.txMu.Lock() - - // When tx is true, and the database is backed by a file, we reopen the database to ensure only one DuckDB connection is open. - // This avoids the following issue: https://github.com/duckdb/duckdb/issues/9150 - if c.config.DBFilePath != "" { - err := c.reopenDB(ctx, false) - if err != nil { - c.txMu.Unlock() - return nil, nil, err - } - } - } else { - c.txMu.RLock() - } - releaseTx := func() { - if tx { - c.txMu.Unlock() - } else { - c.txMu.RUnlock() - } - } - conn, releaseConn, err := c.db.AcquireReadConnection(ctx) if err != nil { - releaseTx() return nil, nil, err } @@ -693,7 +664,6 @@ func (c *connection) acquireConn(ctx context.Context, tx bool) (*sqlx.Conn, func c.connTimesMu.Lock() delete(c.connTimes, connID) c.connTimesMu.Unlock() - releaseTx() c.dbCond.L.Lock() c.dbConnCount-- if c.dbConnCount == 0 && c.dbReopen { diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index abfa1f84795..abc57f03c7c 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -138,7 +138,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { LEFT JOIN d ON b.b12 = d.d1 WHERE d.d2 IN (''); ` - err1 = olap.WithConnection(context.Background(), 0, false, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + err1 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { time.Sleep(500 * time.Millisecond) return olap.Exec(ctx, &drivers.Statement{Query: qry}) }) @@ -151,7 +151,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { var err2 error go func() { qry := `SELECT * FROM a;` - err2 = olap.WithConnection(context.Background(), 0, false, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + err2 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { time.Sleep(1000 * time.Millisecond) return olap.Exec(ctx, &drivers.Statement{Query: qry}) }) @@ -165,7 +165,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { go func() { time.Sleep(250 * time.Millisecond) qry := `SELECT * FROM a;` - err3 = olap.WithConnection(context.Background(), 0, false, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + err3 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { return olap.Exec(ctx, &drivers.Statement{Query: qry}) }) wg.Done() diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index 02516820d00..c65b9aa65fb 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -32,14 +32,14 @@ func (c *connection) Dialect() drivers.Dialect { return drivers.DialectDuckDB } -func (c *connection) WithConnection(ctx context.Context, priority int, longRunning, tx bool, fn drivers.WithConnectionFunc) error { +func (c *connection) WithConnection(ctx context.Context, priority int, longRunning bool, fn drivers.WithConnectionFunc) error { // Check not nested if connFromContext(ctx) != nil { panic("nested WithConnection") } // Acquire connection - conn, release, err := c.acquireOLAPConn(ctx, priority, longRunning, tx) + conn, release, err := c.acquireOLAPConn(ctx, priority, longRunning) if err != nil { return err } @@ -124,7 +124,7 @@ func (c *connection) Execute(ctx context.Context, stmt *drivers.Statement) (res }() // Acquire connection - conn, release, err := c.acquireOLAPConn(ctx, stmt.Priority, stmt.LongRunning, false) + conn, release, err := c.acquireOLAPConn(ctx, stmt.Priority, stmt.LongRunning) acquiredTime = time.Now() if err != nil { return nil, err diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 3962c46cfef..600d1a52af4 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -36,7 +36,7 @@ func TestDuckDBToDuckDBTransfer(t *testing.T) { olap, _ = to.AsOLAP("") - tr := NewDuckDBToDuckDB(olap, zap.NewNop()) + tr := newDuckDBToDuckDB(to.(*connection), zap.NewNop()) // transfer once err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index 6a7cb2b54f0..b38d57a2d78 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -13,7 +13,7 @@ import ( ) type motherduckToDuckDB struct { - to drivers.OLAPStore + to *connection from drivers.Handle logger *zap.Logger } @@ -31,7 +31,7 @@ type mdConfigProps struct { var _ drivers.Transporter = &motherduckToDuckDB{} -func NewMotherduckToDuckDB(from drivers.Handle, to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter { +func newMotherduckToDuckDB(from drivers.Handle, to *connection, logger *zap.Logger) drivers.Transporter { return &motherduckToDuckDB{ to: to, from: from, @@ -75,46 +75,42 @@ func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps m t.logger = t.logger.With(zap.String("source", sinkCfg.Table)) - // we first ingest data in a temporary table in the main db - // and then copy it to the final table to ensure that the final table is always created using CRUD APIs which takes care - // whether table goes in main db or in separate table specific db - tmpTable := fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table) - defer func() { - // ensure temporary table is cleaned - err := t.to.Exec(context.Background(), &drivers.Statement{ - Query: fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable), - Priority: 100, - LongRunning: true, - }) - if err != nil { - t.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) - } - }() - - err = t.to.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + return t.to.db.WithWriteConnection(ctx, func(wrappedCtx, ensuredCtx context.Context, conn *sql.Conn) error { // load motherduck extension; connect to motherduck service - err = t.to.Exec(ctx, &drivers.Statement{Query: "INSTALL 'motherduck'; LOAD 'motherduck';"}) + _, err = conn.ExecContext(ctx, "INSTALL 'motherduck'; LOAD 'motherduck';") if err != nil { return fmt.Errorf("failed to load motherduck extension %w", err) } - if err = t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("SET motherduck_token='%s'", token)}); err != nil { + if _, err = conn.ExecContext(ctx, fmt.Sprintf("SET motherduck_token='%s'", token)); err != nil { if !strings.Contains(err.Error(), "can only be set during initialization") { return fmt.Errorf("failed to set motherduck token %w", err) } } // ignore attach error since it might be already attached - _ = t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)}) + _, _ = conn.ExecContext(ctx, fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)) userQuery := strings.TrimSpace(srcConfig.SQL) userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + + // we first ingest data in a temporary table in the main db + // and then copy it to the final table to ensure that the final table is always created using CRUD APIs + tmpTable := fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table) + defer func() { + // ensure temporary table is cleaned + _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) + if err != nil { + t.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) + } + }() + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeName(tmpTable), userQuery) - return t.to.Exec(ctx, &drivers.Statement{Query: query}) - }) - if err != nil { - return err - } + _, err = conn.ExecContext(ctx, query) + if err != nil { + return err + } - // copy data from temp table to target table - return t.to.CreateTableAsSelect(ctx, sinkCfg.Table, false, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) + // copy data from temp table to target table + return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) + }) } diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index 7971d379508..5b5ed4db09d 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -111,7 +111,7 @@ func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { require.NoError(t, err) olap, _ := to.AsOLAP("") - tr := NewSQLStoreToDuckDB(sqlStore, olap, zap.NewNop()) + tr := newSQLStoreToDuckDB(sqlStore, to.(*connection), zap.NewNop()) err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_data_types_table;"}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) require.NoError(t, err) res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "select count(*) from sink"}) diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index a2d148ae4d1..7f501ef652b 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -76,7 +76,7 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { require.NoError(t, err) olap, _ := to.AsOLAP("") - tr := NewSQLStoreToDuckDB(sqlStore, olap, zap.NewNop()) + tr := newSQLStoreToDuckDB(sqlStore, to.(*connection), zap.NewNop()) err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_datatypes;"}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) require.NoError(t, err) res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "select count(*) from sink"}) diff --git a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go index 4aac3f3810f..c2a4de73653 100644 --- a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go @@ -34,7 +34,7 @@ func Test_sqliteToDuckDB_Transfer(t *testing.T) { olap, _ := to.AsOLAP("") tr := &duckDBToDuckDB{ - to: olap, + to: to.(*connection), logger: zap.NewNop(), } query := fmt.Sprintf("SELECT * FROM sqlite_scan('%s', 't');", dbPath) diff --git a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go index 862a4d131cf..2b7b54cfd17 100644 --- a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go @@ -14,16 +14,16 @@ import ( ) type sqlStoreToDuckDB struct { - to drivers.OLAPStore + to *connection from drivers.SQLStore logger *zap.Logger } var _ drivers.Transporter = &sqlStoreToDuckDB{} -func NewSQLStoreToDuckDB(from drivers.SQLStore, to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter { +func newSQLStoreToDuckDB(from drivers.SQLStore, c *connection, logger *zap.Logger) drivers.Transporter { return &sqlStoreToDuckDB{ - to: to, + to: c, from: from, logger: logger, } @@ -63,8 +63,7 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri s.logger.Debug("records to be ingested", zap.Uint64("rows", total)) } // we first ingest data in a temporary table in the main db - // and then copy it to the final table to ensure that the final table is always created using CRUD APIs which takes care - // whether table goes in main db or in separate table specific db + // and then copy it to the final table to ensure that the final table is always created using CRUD APIs tmpTable := fmt.Sprintf("__%s_tmp_sqlstore", table) // generate create table query qry, err := createTableQuery(schema, tmpTable) @@ -72,27 +71,23 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri return err } - // create table - err = s.to.Exec(ctx, &drivers.Statement{Query: qry, Priority: 1, LongRunning: true}) - if err != nil { - return err - } - - defer func() { - // ensure temporary table is cleaned - err := s.to.Exec(context.Background(), &drivers.Statement{ - Query: fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable), - Priority: 100, - LongRunning: true, - }) + err = s.to.db.WithWriteConnection(ctx, func(ctx, ensuredCtx context.Context, conn *sql.Conn) error { + // create table + _, err := conn.ExecContext(ctx, qry, nil) if err != nil { - s.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) + return err } - }() - err = s.to.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, conn *sql.Conn) error { + defer func() { + // ensure temporary table is cleaned + _, err = conn.ExecContext(ensuredCtx, fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) + if err != nil { + s.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) + } + }() + // append data using appender API - return rawConn(conn, func(conn driver.Conn) error { + err = rawConn(conn, func(conn driver.Conn) error { a, err := duckdb.NewAppenderFromConn(conn, "", tmpTable) if err != nil { return err @@ -133,13 +128,14 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri } } }) - }) - if err != nil { - return err - } + if err != nil { + return err + } - // copy data from temp table to target table - return s.to.CreateTableAsSelect(ctx, table, false, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) + // copy data from temp table to target table + return s.to.CreateTableAsSelect(ctx, table, false, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) + }) + return err } func createTableQuery(schema *runtimev1.StructType, name string) (string, error) { diff --git a/runtime/drivers/olap.go b/runtime/drivers/olap.go index 6edefd13f13..0f27bf9e091 100644 --- a/runtime/drivers/olap.go +++ b/runtime/drivers/olap.go @@ -29,7 +29,7 @@ type WithConnectionFunc func(wrappedCtx context.Context, ensuredCtx context.Cont // NOTE crud APIs are not safe to be called with `WithConnection` type OLAPStore interface { Dialect() Dialect - WithConnection(ctx context.Context, priority int, longRunning, tx bool, fn WithConnectionFunc) error + WithConnection(ctx context.Context, priority int, longRunning bool, fn WithConnectionFunc) error Exec(ctx context.Context, stmt *Statement) error Execute(ctx context.Context, stmt *Statement) (*Result, error) InformationSchema() InformationSchema diff --git a/runtime/drivers/pinot/olap.go b/runtime/drivers/pinot/olap.go index f2542aeacf6..fd83df179c2 100644 --- a/runtime/drivers/pinot/olap.go +++ b/runtime/drivers/pinot/olap.go @@ -48,7 +48,7 @@ func (c *connection) Dialect() drivers.Dialect { return drivers.DialectPinot } -func (c *connection) WithConnection(ctx context.Context, priority int, longRunning, tx bool, fn drivers.WithConnectionFunc) error { +func (c *connection) WithConnection(ctx context.Context, priority int, longRunning bool, fn drivers.WithConnectionFunc) error { return fmt.Errorf("pinot: WithConnection not supported") } diff --git a/runtime/metricsview/executor_pivot.go b/runtime/metricsview/executor_pivot.go index 2ae4b6d0731..cdebbf0b943 100644 --- a/runtime/metricsview/executor_pivot.go +++ b/runtime/metricsview/executor_pivot.go @@ -146,7 +146,7 @@ func (e *Executor) executePivotExport(ctx context.Context, ast *AST, pivot *pivo } defer release() var path string - err = olap.WithConnection(ctx, e.priority, false, false, func(wrappedCtx context.Context, ensuredCtx context.Context, conn *sql.Conn) error { + err = olap.WithConnection(ctx, e.priority, false, func(wrappedCtx context.Context, ensuredCtx context.Context, conn *sql.Conn) error { // Stage the underlying data in a temporary table alias, err := randomString("t", 8) if err != nil { diff --git a/runtime/queries/column_timeseries.go b/runtime/queries/column_timeseries.go index 7af2e3172ca..888cbf4709f 100644 --- a/runtime/queries/column_timeseries.go +++ b/runtime/queries/column_timeseries.go @@ -113,7 +113,7 @@ func (q *ColumnTimeseries) Resolve(ctx context.Context, rt *runtime.Runtime, ins timezone = q.TimeZone } - return olap.WithConnection(ctx, priority, false, false, func(ctx context.Context, ensuredCtx context.Context, _ *sql.Conn) error { + return olap.WithConnection(ctx, priority, false, func(ctx context.Context, ensuredCtx context.Context, _ *sql.Conn) error { tsAlias := tempName("_ts_") temporaryTableName := tempName("_timeseries_") diff --git a/runtime/queries/table_columns.go b/runtime/queries/table_columns.go index bce8ea59bd5..e6009b89577 100644 --- a/runtime/queries/table_columns.go +++ b/runtime/queries/table_columns.go @@ -70,7 +70,7 @@ func (q *TableColumns) Resolve(ctx context.Context, rt *runtime.Runtime, instanc switch olap.Dialect() { case drivers.DialectDuckDB: - return olap.WithConnection(ctx, priority, false, false, func(ctx context.Context, ensuredCtx context.Context, _ *sql.Conn) error { + return olap.WithConnection(ctx, priority, false, func(ctx context.Context, ensuredCtx context.Context, _ *sql.Conn) error { // views return duplicate column names, so we need to create a temporary table temporaryTableName := tempName("profile_columns_") err = olap.Exec(ctx, &drivers.Statement{ diff --git a/runtime/resolvers/glob.go b/runtime/resolvers/glob.go index a42efc7d4f2..5f2d79e0772 100644 --- a/runtime/resolvers/glob.go +++ b/runtime/resolvers/glob.go @@ -321,7 +321,7 @@ func (r *globResolver) transformResult(ctx context.Context, rows []map[string]an } defer os.Remove(jsonFile) - err = olap.WithConnection(ctx, 0, false, false, func(wrappedCtx context.Context, ensuredCtx context.Context, _ *databasesql.Conn) error { + err = olap.WithConnection(ctx, 0, false, func(wrappedCtx context.Context, ensuredCtx context.Context, _ *databasesql.Conn) error { // Load the JSON file into a temporary table err = olap.Exec(wrappedCtx, &drivers.Statement{ Query: fmt.Sprintf("CREATE TEMPORARY TABLE %s AS (SELECT * FROM read_ndjson_auto(%s))", olap.Dialect().EscapeIdentifier(r.tmpTableName), olap.Dialect().EscapeStringValue(jsonFile)), From c7fd73105e908760887e3f24742717a83d417526 Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 1 Nov 2024 19:57:27 +0530 Subject: [PATCH 04/64] fixed transporters --- go.mod | 4 +- go.sum | 4 +- runtime/drivers/duckdb/duckdb.go | 19 +++- .../duckdb/transporter_duckDB_to_duckDB.go | 88 ++++++--------- .../transporter_motherduck_to_duckDB.go | 72 ++++++------ .../duckdb/transporter_sqlstore_to_duckDB.go | 103 +++++++++--------- 6 files changed, 143 insertions(+), 147 deletions(-) diff --git a/go.mod b/go.mod index f45d093d8f3..0b18dcc989a 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe github.com/prometheus/client_golang v1.19.1 github.com/redis/go-redis/v9 v9.0.2 - github.com/rilldata/duckdb-replicator v0.0.0-20241030092710-25561e98b106 + github.com/rilldata/duckdb-replicator v0.0.0-20241101141720-e0a536d1aa53 github.com/riverqueue/river v0.11.4 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.11.4 github.com/riverqueue/river/rivertype v0.11.4 @@ -431,3 +431,5 @@ replace github.com/apache/arrow/go/v14 v14.0.2 => github.com/rilldata/arrow/go/v // security vulnerability in dgrijalva/jwt-go replace github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompatible + +// replace github.com/rilldata/duckdb-replicator => /home/anshul/workspace/duckdb-replicator diff --git a/go.sum b/go.sum index 98a5fa092cb..bdbfa5fabc3 100644 --- a/go.sum +++ b/go.sum @@ -2109,8 +2109,8 @@ github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff h1:Tt67B9BQVkymWsosWgz7vyz8MXnlYzc8xbqtxYuPU1s= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= -github.com/rilldata/duckdb-replicator v0.0.0-20241030092710-25561e98b106 h1:E59vOFheE3u6L9y3zzi2wxMOl+NDaijfPM+HaNhgUDI= -github.com/rilldata/duckdb-replicator v0.0.0-20241030092710-25561e98b106/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= +github.com/rilldata/duckdb-replicator v0.0.0-20241101141720-e0a536d1aa53 h1:qVo2Sdw0f3TD6vmU8lCXpsobMqpkUKKhiUp+zJYmSnc= +github.com/rilldata/duckdb-replicator v0.0.0-20241101141720-e0a536d1aa53/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= github.com/riverqueue/river v0.11.4 h1:NMRsODhRgFztf080RMCjI377jldLXsx41E2r7+c0lPE= github.com/riverqueue/river v0.11.4/go.mod h1:HvgBkqon7lYKm9Su4lVOnn1qx8Q4FnSMJjf5auVial4= github.com/riverqueue/river/riverdriver v0.11.4 h1:kBg68vfTnRuSwsgcZ7UbKC4ocZ+KSCGnuZw/GwMMMP4= diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index ec7e6fa27e4..f46d86a5552 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -565,7 +565,7 @@ func (c *connection) acquireMetaConn(ctx context.Context) (*sqlx.Conn, func() er } // Get new conn - conn, releaseConn, err := c.acquireConn(ctx) + rwConn, releaseConn, err := c.acquireConn(ctx, true) if err != nil { c.metaSem.Release(1) return nil, nil, err @@ -578,7 +578,7 @@ func (c *connection) acquireMetaConn(ctx context.Context) (*sqlx.Conn, func() er return err } - return conn, release, nil + return rwConn.Connx(), release, nil } // acquireOLAPConn gets a connection from the pool for OLAP queries (i.e. slow queries). @@ -608,7 +608,7 @@ func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunn } // Get new conn - conn, releaseConn, err := c.acquireConn(ctx) + rwConn, releaseConn, err := c.acquireConn(ctx, true) if err != nil { c.olapSem.Release() if longRunning { @@ -627,12 +627,12 @@ func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunn return err } - return conn, release, nil + return rwConn.Connx(), release, nil } // acquireConn returns a DuckDB connection. It should only be used internally in acquireMetaConn and acquireOLAPConn. // acquireConn implements the connection tracking and DB reopening logic described in the struct definition for connection. -func (c *connection) acquireConn(ctx context.Context) (*sqlx.Conn, func() error, error) { +func (c *connection) acquireConn(ctx context.Context, read bool) (duckdbreplicator.Conn, func() error, error) { c.dbCond.L.Lock() for { if c.dbErr != nil { @@ -648,7 +648,14 @@ func (c *connection) acquireConn(ctx context.Context) (*sqlx.Conn, func() error, c.dbConnCount++ c.dbCond.L.Unlock() - conn, releaseConn, err := c.db.AcquireReadConnection(ctx) + var conn duckdbreplicator.Conn + var releaseConn func() error + var err error + if read { + conn, releaseConn, err = c.db.AcquireReadConnection(ctx) + } else { + conn, releaseConn, err = c.db.AcquireWriteConnection(ctx) + } if err != nil { return nil, nil, err } diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index e70a7baaa79..7876203a309 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -2,7 +2,6 @@ package duckdb import ( "context" - "database/sql" "errors" "fmt" "net/url" @@ -16,13 +15,13 @@ import ( ) type duckDBToDuckDB struct { - to drivers.OLAPStore + to *connection logger *zap.Logger } -func NewDuckDBToDuckDB(to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter { +func newDuckDBToDuckDB(c *connection, logger *zap.Logger) drivers.Transporter { return &duckDBToDuckDB{ - to: to, + to: c, logger: logger, } } @@ -116,63 +115,42 @@ func (t *duckDBToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[s } func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *dbSourceProperties, sinkProps *sinkProperties) error { - var cleanupFunc func() - err := t.to.WithConnection(ctx, 1, true, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { - res, err := t.to.Execute(ctx, &drivers.Statement{Query: "SELECT current_database(),current_schema();"}) - if err != nil { - return err - } - - var localDB, localSchema string - for res.Next() { - if err := res.Scan(&localDB, &localSchema); err != nil { - _ = res.Close() - return err - } - } - _ = res.Close() - - // duckdb considers everything before first . as db name - // alternative solution can be to query `show databases()` before and after to identify db name - dbName, _, _ := strings.Cut(filepath.Base(srcProps.Database), ".") - if dbName == "main" { - return fmt.Errorf("`main` is a reserved db name") - } + rwConn, release, err := t.to.acquireConn(ctx, false) + if err != nil { + return err + } + defer release() + conn := rwConn.Connx() - if err = t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("ATTACH %s AS %s", safeSQLString(srcProps.Database), safeSQLName(dbName))}); err != nil { - return fmt.Errorf("failed to attach db %q: %w", srcProps.Database, err) - } + var localDB, localSchema string + err = conn.QueryRowContext(ctx, "SELECT current_database(),current_schema()").Scan(&localDB, &localSchema) + if err != nil { + return err + } - cleanupFunc = func() { - // we don't want to run any detach db without `tx` lock - // tx=true will reopen duckdb handle(except in case of in-memory duckdb handle) which will detach the attached external db as well - err := t.to.WithConnection(context.Background(), 100, false, true, func(wrappedCtx, ensuredCtx context.Context, conn *sql.Conn) error { - return nil - }) - if err != nil { - t.logger.Debug("failed to detach db", zap.Error(err)) - } - } + // duckdb considers everything before first . as db name + // alternative solution can be to query `show databases()` before and after to identify db name + dbName, _, _ := strings.Cut(filepath.Base(srcProps.Database), ".") + if dbName == "main" { + return fmt.Errorf("`main` is a reserved db name") + } - if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("USE %s;", safeName(dbName))}); err != nil { - return err - } + if _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(srcProps.Database), safeSQLName(dbName))); err != nil { + return fmt.Errorf("failed to attach db %q: %w", srcProps.Database, err) + } - defer func() { // revert back to localdb - if err = t.to.Exec(ensuredCtx, &drivers.Statement{Query: fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))}); err != nil { - t.logger.Error("failed to switch to local database", zap.Error(err)) - } - }() + defer func() { + _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DETACH %s", safeSQLName(dbName))) + }() - userQuery := strings.TrimSpace(srcProps.SQL) - userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeName(sinkProps.Table), userQuery) - return t.to.Exec(ctx, &drivers.Statement{Query: query}) - }) - if cleanupFunc != nil { - cleanupFunc() + if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("USE %s;", safeName(dbName))}); err != nil { + return err } - return err + + userQuery := strings.TrimSpace(srcProps.SQL) + userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeName(sinkProps.Table), userQuery) + return t.to.Exec(ctx, &drivers.Statement{Query: query}) } // rewriteLocalPaths rewrites a DuckDB SQL statement such that relative paths become absolute paths relative to the basePath, diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index b38d57a2d78..0a31bf49725 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -2,7 +2,6 @@ package duckdb import ( "context" - "database/sql" "fmt" "os" "strings" @@ -75,42 +74,49 @@ func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps m t.logger = t.logger.With(zap.String("source", sinkCfg.Table)) - return t.to.db.WithWriteConnection(ctx, func(wrappedCtx, ensuredCtx context.Context, conn *sql.Conn) error { - // load motherduck extension; connect to motherduck service - _, err = conn.ExecContext(ctx, "INSTALL 'motherduck'; LOAD 'motherduck';") - if err != nil { - return fmt.Errorf("failed to load motherduck extension %w", err) - } + rwConn, release, err := t.to.acquireConn(ctx, false) + if err != nil { + return err + } + defer release() + + conn := rwConn.Connx() - if _, err = conn.ExecContext(ctx, fmt.Sprintf("SET motherduck_token='%s'", token)); err != nil { - if !strings.Contains(err.Error(), "can only be set during initialization") { - return fmt.Errorf("failed to set motherduck token %w", err) - } + // load motherduck extension; connect to motherduck service + _, err = conn.ExecContext(ctx, "INSTALL 'motherduck'; LOAD 'motherduck';") + if err != nil { + return fmt.Errorf("failed to load motherduck extension %w", err) + } + + if _, err = conn.ExecContext(ctx, fmt.Sprintf("SET motherduck_token='%s'", token)); err != nil { + if !strings.Contains(err.Error(), "can only be set during initialization") { + return fmt.Errorf("failed to set motherduck token %w", err) } + } - // ignore attach error since it might be already attached - _, _ = conn.ExecContext(ctx, fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)) - userQuery := strings.TrimSpace(srcConfig.SQL) - userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - - // we first ingest data in a temporary table in the main db - // and then copy it to the final table to ensure that the final table is always created using CRUD APIs - tmpTable := fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table) - defer func() { - // ensure temporary table is cleaned - _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) - if err != nil { - t.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) - } - }() - - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeName(tmpTable), userQuery) - _, err = conn.ExecContext(ctx, query) + // ignore attach error since it might be already attached + _, _ = conn.ExecContext(ctx, fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)) + userQuery := strings.TrimSpace(srcConfig.SQL) + userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + + // we first ingest data in a temporary table in the main db + // and then copy it to the final table to ensure that the final table is always created using CRUD APIs + tmpTable := fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table) + defer func() { + // ensure temporary table is cleaned + _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) if err != nil { - return err + t.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) } + }() + + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeName(tmpTable), userQuery) + _, err = conn.ExecContext(ctx, query) + if err != nil { + return err + } + + // copy data from temp table to target table + return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) - // copy data from temp table to target table - return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) - }) } diff --git a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go index 2b7b54cfd17..c3d0d54e5d5 100644 --- a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go @@ -2,7 +2,6 @@ package duckdb import ( "context" - "database/sql" "database/sql/driver" "errors" "fmt" @@ -71,71 +70,75 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri return err } - err = s.to.db.WithWriteConnection(ctx, func(ctx, ensuredCtx context.Context, conn *sql.Conn) error { - // create table - _, err := conn.ExecContext(ctx, qry, nil) + rwConn, release, err := s.to.acquireConn(ctx, false) + if err != nil { + return err + } + defer release() + conn := rwConn.Connx() + + // create table + _, err = conn.ExecContext(ctx, qry, nil) + if err != nil { + return err + } + + defer func() { + // ensure temporary table is cleaned + _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) if err != nil { - return err + s.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) } + }() + // append data using appender API + err = rawConn(conn.Conn, func(conn driver.Conn) error { + a, err := duckdb.NewAppenderFromConn(conn, "", tmpTable) + if err != nil { + return err + } defer func() { - // ensure temporary table is cleaned - _, err = conn.ExecContext(ensuredCtx, fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) + err = a.Close() if err != nil { - s.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) + s.logger.Error("appender closed failed", zap.Error(err)) } }() - // append data using appender API - err = rawConn(conn, func(conn driver.Conn) error { - a, err := duckdb.NewAppenderFromConn(conn, "", tmpTable) - if err != nil { - return err - } - defer func() { - err = a.Close() - if err != nil { - s.logger.Error("appender closed failed", zap.Error(err)) - } - }() - - for num := 0; ; num++ { - select { - case <-ctx.Done(): - return ctx.Err() - default: - if num == 10000 { - num = 0 - if err := a.Flush(); err != nil { - return err - } - } - - row, err := iter.Next(ctx) - if err != nil { - if errors.Is(err, drivers.ErrIteratorDone) { - return nil - } - return err - } - if err := convert(row, schema); err != nil { // duckdb specific datatype conversion + for num := 0; ; num++ { + select { + case <-ctx.Done(): + return ctx.Err() + default: + if num == 10000 { + num = 0 + if err := a.Flush(); err != nil { return err } + } - if err := a.AppendRow(row...); err != nil { - return err + row, err := iter.Next(ctx) + if err != nil { + if errors.Is(err, drivers.ErrIteratorDone) { + return nil } + return err + } + if err := convert(row, schema); err != nil { // duckdb specific datatype conversion + return err + } + + if err := a.AppendRow(row...); err != nil { + return err } } - }) - if err != nil { - return err } - - // copy data from temp table to target table - return s.to.CreateTableAsSelect(ctx, table, false, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) }) - return err + if err != nil { + return err + } + + // copy data from temp table to target table + return s.to.CreateTableAsSelect(ctx, table, false, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) } func createTableQuery(schema *runtimev1.StructType, name string) (string, error) { From c411c9f56cb1bd1ad8815dbacb8b7ff66c6c9965 Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:11:41 +0530 Subject: [PATCH 05/64] set backup directory --- cli/cmd/runtime/start.go | 21 +++++++++------ go.mod | 4 +-- go.sum | 4 +-- runtime/connections.go | 3 +++ runtime/drivers/duckdb/config.go | 23 +++++++--------- runtime/drivers/duckdb/config_test.go | 16 +++-------- runtime/drivers/duckdb/duckdb.go | 24 ++++++++--------- runtime/drivers/duckdb/olap.go | 1 - .../duckdb/transporter_duckDB_to_duckDB.go | 27 ++++++++++++++++--- .../transporter_motherduck_to_duckDB.go | 2 +- runtime/runtime.go | 20 +++++++------- 11 files changed, 81 insertions(+), 64 deletions(-) diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index ea79ea64020..899278b4135 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -85,6 +85,9 @@ type Config struct { // DataDir stores data for all instances like duckdb file, temporary downloaded file etc. // The data for each instance is stored in a child directory named instance_id DataDir string `split_words:"true"` + // DuckDBBackupBucket is the name of the GCS bucket where DuckDB backups are stored + DuckDBBackupBucket string `split_words:"true"` + DuckDBBackupBucketCredentialsJSON string `split_words:"true"` // Sink type of activity client: noop (or empty string), kafka ActivitySinkType string `default:"" split_words:"true"` // Kafka brokers of an activity client's sink @@ -200,14 +203,16 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { // Init runtime opts := &runtime.Options{ - ConnectionCacheSize: conf.ConnectionCacheSize, - MetastoreConnector: "metastore", - QueryCacheSizeBytes: conf.QueryCacheSizeBytes, - SecurityEngineCacheSize: conf.SecurityEngineCacheSize, - ControllerLogBufferCapacity: conf.LogBufferCapacity, - ControllerLogBufferSizeBytes: conf.LogBufferSizeBytes, - AllowHostAccess: conf.AllowHostAccess, - DataDir: conf.DataDir, + ConnectionCacheSize: conf.ConnectionCacheSize, + MetastoreConnector: "metastore", + QueryCacheSizeBytes: conf.QueryCacheSizeBytes, + SecurityEngineCacheSize: conf.SecurityEngineCacheSize, + ControllerLogBufferCapacity: conf.LogBufferCapacity, + ControllerLogBufferSizeBytes: conf.LogBufferSizeBytes, + AllowHostAccess: conf.AllowHostAccess, + DataDir: conf.DataDir, + DuckDBBackupBucket: conf.DuckDBBackupBucket, + DuckDBBackupBucketCredentialsJSON: conf.DuckDBBackupBucketCredentialsJSON, SystemConnectors: []*runtimev1.Connector{ { Type: conf.MetastoreDriver, diff --git a/go.mod b/go.mod index 0b18dcc989a..56424b72fc2 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,6 @@ require ( github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe github.com/prometheus/client_golang v1.19.1 github.com/redis/go-redis/v9 v9.0.2 - github.com/rilldata/duckdb-replicator v0.0.0-20241101141720-e0a536d1aa53 github.com/riverqueue/river v0.11.4 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.11.4 github.com/riverqueue/river/rivertype v0.11.4 @@ -344,6 +343,7 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/richardlehane/mscfb v1.0.4 // indirect github.com/richardlehane/msoleps v1.0.3 // indirect + github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef // indirect github.com/riverqueue/river/riverdriver v0.11.4 // indirect github.com/riverqueue/river/rivershared v0.11.4 // indirect github.com/rivo/uniseg v0.4.7 // indirect @@ -431,5 +431,3 @@ replace github.com/apache/arrow/go/v14 v14.0.2 => github.com/rilldata/arrow/go/v // security vulnerability in dgrijalva/jwt-go replace github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompatible - -// replace github.com/rilldata/duckdb-replicator => /home/anshul/workspace/duckdb-replicator diff --git a/go.sum b/go.sum index bdbfa5fabc3..b2a1dca14d4 100644 --- a/go.sum +++ b/go.sum @@ -2109,8 +2109,8 @@ github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff h1:Tt67B9BQVkymWsosWgz7vyz8MXnlYzc8xbqtxYuPU1s= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= -github.com/rilldata/duckdb-replicator v0.0.0-20241101141720-e0a536d1aa53 h1:qVo2Sdw0f3TD6vmU8lCXpsobMqpkUKKhiUp+zJYmSnc= -github.com/rilldata/duckdb-replicator v0.0.0-20241101141720-e0a536d1aa53/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= +github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef h1:+FNaqFgSY+tNiUzyASBPk2OF7R/ePke8FKF/Wn1qQY4= +github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= github.com/riverqueue/river v0.11.4 h1:NMRsODhRgFztf080RMCjI377jldLXsx41E2r7+c0lPE= github.com/riverqueue/river v0.11.4/go.mod h1:HvgBkqon7lYKm9Su4lVOnn1qx8Q4FnSMJjf5auVial4= github.com/riverqueue/river/riverdriver v0.11.4 h1:kBg68vfTnRuSwsgcZ7UbKC4ocZ+KSCGnuZw/GwMMMP4= diff --git a/runtime/connections.go b/runtime/connections.go index bfad2e9848a..169fd0281ce 100644 --- a/runtime/connections.go +++ b/runtime/connections.go @@ -259,6 +259,9 @@ func (r *Runtime) ConnectorConfig(ctx context.Context, instanceID, name string) case "motherduck": res.setPreset("token", vars["token"], false) res.setPreset("dsn", "", true) + case "duckdb": + res.setPreset("backup_bucket", r.opts.DuckDBBackupBucket, false) + res.setPreset("backup_bucket_credentials_json", r.opts.DuckDBBackupBucketCredentialsJSON, false) case "local_file": // The "local_file" connector needs to know the repo root. // TODO: This is an ugly hack. But how can we get rid of it? diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index 0472ec571ad..63122414e08 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -3,7 +3,6 @@ package duckdb import ( "fmt" "net/url" - "path/filepath" "strconv" "strings" @@ -48,14 +47,11 @@ type config struct { InitSQL string `mapstructure:"init_sql"` // LogQueries controls whether to log the raw SQL passed to OLAP.Execute. (Internal queries will not be logged.) LogQueries bool `mapstructure:"log_queries"` - // BackupBucket is gcs bucket to store db backups. Should be of the form `gs://bucket-name`. + // BackupBucket is gcs bucket to store db backups. Should be of the form `bucket-name`. BackupBucket string `mapstructure:"backup_bucket"` // BackupBucketCredentialsJSON is the json credentials for the backup bucket. BackupBucketCredentialsJSON string `mapstructure:"backup_bucket_credentials_json"` - // DBFilePath is the path where the database is stored. It is inferred from the DSN (can't be provided by user). - DBFilePath string `mapstructure:"-"` - // DBStoragePath is the path where the database files are stored. It is inferred from the DSN (can't be provided by user). - DBStoragePath string `mapstructure:"-"` + ReadSettings map[string]string `mapstructure:"-"` WriteSettings map[string]string `mapstructure:"-"` } @@ -91,13 +87,9 @@ func newConfig(cfgMap map[string]any) (*config, error) { if cfg.Path != "" { // backward compatibility, cfg.Path takes precedence over cfg.DataDir uri.Path = cfg.Path cfg.ExtTableStorage = false - } else if cfg.DataDir != "" && uri.Path == "" { // if some path is set in DSN, honour that path and ignore DataDir - uri.Path = filepath.Join(cfg.DataDir, "main.db") + } else if uri.Path != "" { // if some path is set in DSN, honour that path and ignore DataDir + cfg.ExtTableStorage = false } - - // Infer DBFilePath - cfg.DBFilePath = uri.Path - cfg.DBStoragePath = filepath.Dir(cfg.DBFilePath) } // Set memory limit @@ -147,11 +139,16 @@ func newConfig(cfgMap map[string]any) (*config, error) { // useful for motherduck but safe to pass at initial connect if !qry.Has("custom_user_agent") { qry.Add("custom_user_agent", "rill") + cfg.WriteSettings["custom_user_agent"] = "rill" + } else { + cfg.WriteSettings["custom_user_agent"] = qry.Get("custom_user_agent") + } + for k, v := range cfg.ReadSettings { + qry.Add(k, v) } // Rebuild DuckDB DSN (which should be "path?key=val&...") // this is required since spaces and other special characters are valid in db file path but invalid and hence encoded in URL cfg.DSN = generateDSN(uri.Path, qry.Encode()) - return cfg, nil } diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 7cb2822e2c2..57236a03711 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -32,14 +32,12 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"data_dir": "path/to"}) require.NoError(t, err) - require.Equal(t, "path/to/main.db?custom_user_agent=rill", cfg.DSN) - require.Equal(t, "path/to/main.db", cfg.DBFilePath) + require.Subset(t, cfg.WriteSettings, map[string]string{"custom_user_agent": "rill"}) require.Equal(t, 2, cfg.PoolSize) cfg, err = newConfig(map[string]any{"data_dir": "path/to", "pool_size": 10}) require.NoError(t, err) - require.Equal(t, "path/to/main.db?custom_user_agent=rill", cfg.DSN) - require.Equal(t, "path/to/main.db", cfg.DBFilePath) + require.Subset(t, cfg.WriteSettings, map[string]string{"custom_user_agent": "rill"}) require.Equal(t, 10, cfg.PoolSize) cfg, err = newConfig(map[string]any{"data_dir": "path/to", "pool_size": "10"}) @@ -53,35 +51,29 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?rill_pool_size=10"}) require.NoError(t, err) require.Equal(t, "path/to/duck.db?custom_user_agent=rill", cfg.DSN) - require.Equal(t, "path/to/duck.db", cfg.DBFilePath) require.Equal(t, 10, cfg.PoolSize) cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB&rill_pool_size=10"}) require.NoError(t, err) require.Equal(t, "path/to/duck.db?custom_user_agent=rill&max_memory=4GB", cfg.DSN) require.Equal(t, 10, cfg.PoolSize) - require.Equal(t, "path/to/duck.db", cfg.DBFilePath) _, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB", "pool_size": "abc"}) require.Error(t, err) - cfg, err = newConfig(map[string]any{"dsn": "duck.db"}) + _, err = newConfig(map[string]any{"dsn": "duck.db"}) require.NoError(t, err) - require.Equal(t, "duck.db", cfg.DBFilePath) - cfg, err = newConfig(map[string]any{"dsn": "duck.db?rill_pool_size=10"}) + _, err = newConfig(map[string]any{"dsn": "duck.db?rill_pool_size=10"}) require.NoError(t, err) - require.Equal(t, "duck.db", cfg.DBFilePath) cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}) require.NoError(t, err) - require.Equal(t, "duck.db", cfg.DBFilePath) require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=8GB&threads=2", cfg.DSN) require.Equal(t, 2, cfg.PoolSize) cfg, err = newConfig(map[string]any{"dsn": "duck.db?max_memory=2GB&rill_pool_size=4"}) require.NoError(t, err) - require.Equal(t, "duck.db", cfg.DBFilePath) require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=2GB", cfg.DSN) require.Equal(t, 4, cfg.PoolSize) } diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index f46d86a5552..c75c013dc29 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -4,7 +4,7 @@ import ( "context" "errors" "fmt" - "io/fs" + "io" "log/slog" "net/url" "os" @@ -147,12 +147,6 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie } logger.Debug("opening duckdb handle", zap.String("dsn", cfg.DSN)) - if cfg.DBStoragePath != "" { - if err := os.MkdirAll(cfg.DBStoragePath, fs.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { - return nil, err - } - } - // See note in connection struct olapSemSize := cfg.PoolSize - 1 if olapSemSize < 1 { @@ -177,7 +171,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie } // register a callback to add a gauge on number of connections in use per db - attrs := []attribute.KeyValue{attribute.String("db", c.config.DBFilePath)} + attrs := []attribute.KeyValue{attribute.String("instance_id", instanceID)} c.registration = observability.Must(meter.RegisterCallback(func(ctx context.Context, observer metric.Observer) error { observer.ObserveInt64(connectionsInUse, int64(c.dbConnCount), metric.WithAttributes(attrs...)) return nil @@ -502,9 +496,15 @@ func (c *connection) reopenDB(ctx context.Context, clean bool) error { bootQueries = append(bootQueries, c.config.InitSQL) } - logger := slog.New(zapslog.NewHandler(c.logger.Core(), &zapslog.HandlerOptions{ - AddSource: true, - })) + var logger *slog.Logger + + if c.config.LogQueries { + logger = slog.New(zapslog.NewHandler(c.logger.Core(), &zapslog.HandlerOptions{ + AddSource: true, + })) + } else { + logger = slog.New(slog.NewTextHandler(io.Discard, nil)) + } // Create new DB var err error @@ -522,8 +522,8 @@ func (c *connection) reopenDB(ctx context.Context, clean bool) error { } } c.db, err = duckdbreplicator.NewDB(ctx, c.instanceID, &duckdbreplicator.DBOptions{ - LocalPath: c.config.DBStoragePath, Clean: clean, + LocalPath: c.config.DataDir, BackupProvider: backup, InitQueries: bootQueries, StableSelect: !c.config.AllowHostAccess, diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index c65b9aa65fb..36e53e7ffeb 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -98,7 +98,6 @@ func (c *connection) Execute(ctx context.Context, stmt *drivers.Statement) (res queueLatency := acquiredTime.Sub(start).Milliseconds() attrs := []attribute.KeyValue{ - attribute.String("db", c.config.DBFilePath), attribute.Bool("cancelled", errors.Is(outErr, context.Canceled)), attribute.Bool("failed", outErr != nil), attribute.String("instance_id", c.instanceID), diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 7876203a309..4a609d6a3cb 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -143,14 +143,35 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DETACH %s", safeSQLName(dbName))) }() - if err := t.to.Exec(ctx, &drivers.Statement{Query: fmt.Sprintf("USE %s;", safeName(dbName))}); err != nil { + if _, err := conn.ExecContext(ctx, fmt.Sprintf("USE %s;", safeName(dbName))); err != nil { return err } + defer func() { + _, err = conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))) + if err != nil { + t.logger.Error("failed to switch back to original database", zap.Error(err)) + } + }() + userQuery := strings.TrimSpace(srcProps.SQL) userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeName(sinkProps.Table), userQuery) - return t.to.Exec(ctx, &drivers.Statement{Query: query}) + safeTempTable := fmt.Sprintf("%s_tmp_", sinkProps.Table) + query := fmt.Sprintf("CREATE OR REPLACE TEMPORARY TABLE %s AS (%s\n);", safeTempTable, userQuery) + _, err = conn.ExecContext(ctx, query) + if err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + + defer func() { + _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE %s;", safeTempTable)) + if err != nil { + t.logger.Error("failed to drop temporary table", zap.Error(err)) + } + }() + + // create permanent table from temp table using crud API + return rwConn.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), nil) } // rewriteLocalPaths rewrites a DuckDB SQL statement such that relative paths become absolute paths relative to the basePath, diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index 0a31bf49725..b5eff0a9ef2 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -117,6 +117,6 @@ func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps m } // copy data from temp table to target table - return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) + return rwConn.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) } diff --git a/runtime/runtime.go b/runtime/runtime.go index 80b1356af26..bf2601b0df2 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -20,15 +20,17 @@ import ( var tracer = otel.Tracer("github.com/rilldata/rill/runtime") type Options struct { - MetastoreConnector string - SystemConnectors []*runtimev1.Connector - ConnectionCacheSize int - QueryCacheSizeBytes int64 - SecurityEngineCacheSize int - ControllerLogBufferCapacity int - ControllerLogBufferSizeBytes int64 - AllowHostAccess bool - DataDir string + MetastoreConnector string + SystemConnectors []*runtimev1.Connector + ConnectionCacheSize int + QueryCacheSizeBytes int64 + SecurityEngineCacheSize int + ControllerLogBufferCapacity int + ControllerLogBufferSizeBytes int64 + AllowHostAccess bool + DataDir string + DuckDBBackupBucket string + DuckDBBackupBucketCredentialsJSON string } type Runtime struct { From 9330a7a8f0fe42ba0bdb387310eabcc5cb706b5c Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:57:55 +0530 Subject: [PATCH 06/64] test fixes --- go.mod | 2 +- go.sum | 2 + runtime/drivers/duckdb/config_test.go | 20 +--- runtime/drivers/duckdb/duckdb.go | 1 - runtime/drivers/duckdb/olap_crud_test.go | 92 +++---------------- .../duckdb/transporter_sqlstore_to_duckDB.go | 2 +- 6 files changed, 19 insertions(+), 100 deletions(-) diff --git a/go.mod b/go.mod index 56424b72fc2..58f6895dbbb 100644 --- a/go.mod +++ b/go.mod @@ -75,6 +75,7 @@ require ( github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe github.com/prometheus/client_golang v1.19.1 github.com/redis/go-redis/v9 v9.0.2 + github.com/rilldata/duckdb-replicator v0.0.0-20241104142154-c29e5da9b7e2 github.com/riverqueue/river v0.11.4 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.11.4 github.com/riverqueue/river/rivertype v0.11.4 @@ -343,7 +344,6 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/richardlehane/mscfb v1.0.4 // indirect github.com/richardlehane/msoleps v1.0.3 // indirect - github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef // indirect github.com/riverqueue/river/riverdriver v0.11.4 // indirect github.com/riverqueue/river/rivershared v0.11.4 // indirect github.com/rivo/uniseg v0.4.7 // indirect diff --git a/go.sum b/go.sum index b2a1dca14d4..347e600aba0 100644 --- a/go.sum +++ b/go.sum @@ -2111,6 +2111,8 @@ github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff h1:Tt67B9BQ github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef h1:+FNaqFgSY+tNiUzyASBPk2OF7R/ePke8FKF/Wn1qQY4= github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= +github.com/rilldata/duckdb-replicator v0.0.0-20241104142154-c29e5da9b7e2 h1:PPf/zD3/0+La8VUn+HiP8KpiUAWSBlt9QkLy9fjl5Fc= +github.com/rilldata/duckdb-replicator v0.0.0-20241104142154-c29e5da9b7e2/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= github.com/riverqueue/river v0.11.4 h1:NMRsODhRgFztf080RMCjI377jldLXsx41E2r7+c0lPE= github.com/riverqueue/river v0.11.4/go.mod h1:HvgBkqon7lYKm9Su4lVOnn1qx8Q4FnSMJjf5auVial4= github.com/riverqueue/river/riverdriver v0.11.4 h1:kBg68vfTnRuSwsgcZ7UbKC4ocZ+KSCGnuZw/GwMMMP4= diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 57236a03711..14df1399307 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -88,7 +88,7 @@ func Test_specialCharInPath(t *testing.T) { conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) config := conn.(*connection).config - require.Equal(t, filepath.Join(path, "st@g3's.db?custom_user_agent=rill&max_memory=4GB&threads=1"), config.DSN) + require.Equal(t, dbFile+"?custom_user_agent=rill&max_memory=4GB&threads=1", config.DSN) require.Equal(t, 2, config.PoolSize) olap, ok := conn.AsOLAP("") @@ -99,21 +99,3 @@ func Test_specialCharInPath(t *testing.T) { require.NoError(t, res.Close()) require.NoError(t, conn.Close()) } - -func TestOverrides(t *testing.T) { - cfgMap := map[string]any{"path": "duck.db", "memory_limit_gb": "4", "cpu": "2", "max_memory_gb_override": "2", "threads_override": "10", "external_table_storage": false} - handle, err := Driver{}.Open("default", cfgMap, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - - olap, ok := handle.AsOLAP("") - require.True(t, ok) - - res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT value FROM duckdb_settings() WHERE name='max_memory'"}) - require.NoError(t, err) - require.True(t, res.Next()) - var mem string - require.NoError(t, res.Scan(&mem)) - require.NoError(t, res.Close()) - - require.Equal(t, "1.8 GiB", mem) -} diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index c75c013dc29..b0275cff354 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -526,7 +526,6 @@ func (c *connection) reopenDB(ctx context.Context, clean bool) error { LocalPath: c.config.DataDir, BackupProvider: backup, InitQueries: bootQueries, - StableSelect: !c.config.AllowHostAccess, Logger: logger, }) } else { diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index 71596d75a5c..fd98c15cb60 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -26,8 +26,8 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { normalConn.AsOLAP("default") require.NoError(t, normalConn.Migrate(context.Background())) - dbPath = filepath.Join(temp, "default", "view.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + dbPath = filepath.Join(temp, "default") + handle, err = Driver{}.Open("default", map[string]any{"data_dir": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) viewConnection := handle.(*connection) require.NoError(t, viewConnection.Migrate(context.Background())) @@ -86,7 +86,7 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { require.NoError(t, res.Scan(&count)) require.Equal(t, 1, count) require.NoError(t, res.Close()) - contents, err := os.ReadFile(filepath.Join(temp, "default", tt.name, "version.txt")) + contents, err := os.ReadFile(filepath.Join(temp, "default", "read", tt.name, "version.txt")) require.NoError(t, err) version, err := strconv.ParseInt(string(contents), 10, 64) require.NoError(t, err) @@ -99,8 +99,7 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { temp := t.TempDir() - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -112,7 +111,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test-select-multiple", false, "select 'hello'", nil) require.NoError(t, err) - dirs, err := os.ReadDir(filepath.Join(temp, "test-select-multiple")) + dirs, err := os.ReadDir(filepath.Join(temp, "read", "test-select-multiple")) require.NoError(t, err) names := make([]string, 0) for _, dir := range dirs { @@ -122,7 +121,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test-select-multiple", false, "select fail query", nil) require.Error(t, err) - dirs, err = os.ReadDir(filepath.Join(temp, "test-select-multiple")) + dirs, err = os.ReadDir(filepath.Join(temp, "read", "test-select-multiple")) require.NoError(t, err) newNames := make([]string, 0) for _, dir := range dirs { @@ -144,8 +143,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { func Test_connection_DropTable(t *testing.T) { temp := t.TempDir() - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -158,7 +156,7 @@ func Test_connection_DropTable(t *testing.T) { err = c.DropTable(context.Background(), "test-drop", true) require.NoError(t, err) - _, err = os.ReadDir(filepath.Join(temp, "test-drop")) + _, err = os.ReadDir(filepath.Join(temp, "read", "test-drop")) require.True(t, os.IsNotExist(err)) res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) FROM information_schema.tables WHERE table_name='test-drop' AND table_type='VIEW'"}) @@ -173,8 +171,7 @@ func Test_connection_DropTable(t *testing.T) { func Test_connection_InsertTableAsSelect(t *testing.T) { temp := t.TempDir() - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -200,10 +197,8 @@ func Test_connection_InsertTableAsSelect(t *testing.T) { func Test_connection_RenameTable(t *testing.T) { temp := t.TempDir() - os.Mkdir(temp, fs.ModePerm) - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -226,10 +221,8 @@ func Test_connection_RenameTable(t *testing.T) { func Test_connection_RenameToExistingTable(t *testing.T) { temp := t.TempDir() - os.Mkdir(temp, fs.ModePerm) - dbPath := filepath.Join(temp, "default", "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -255,10 +248,7 @@ func Test_connection_RenameToExistingTable(t *testing.T) { func Test_connection_AddTableColumn(t *testing.T) { temp := t.TempDir() - os.Mkdir(temp, fs.ModePerm) - - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -267,7 +257,7 @@ func Test_connection_AddTableColumn(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test alter column", false, "select 1 as data", nil) require.NoError(t, err) - res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE table_name='test alter column' AND table_catalog = 'view'"}) + res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE table_name='test alter column'"}) require.NoError(t, err) require.True(t, res.Next()) var typ string @@ -278,7 +268,7 @@ func Test_connection_AddTableColumn(t *testing.T) { err = c.AlterTableColumn(context.Background(), "test alter column", "data", "VARCHAR") require.NoError(t, err) - res, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE table_name='test alter column' AND table_catalog = 'view'"}) + res, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE table_name='test alter column'"}) require.NoError(t, err) require.True(t, res.Next()) require.NoError(t, res.Scan(&typ)) @@ -346,60 +336,6 @@ func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { require.NoError(t, err) } -func Test_connection_ChangingOrder(t *testing.T) { - temp := t.TempDir() - os.Mkdir(temp, fs.ModePerm) - - // on cloud - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - c := handle.(*connection) - require.NoError(t, c.Migrate(context.Background())) - c.AsOLAP("default") - - // create table - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 1 AS id, 'India' AS 'coun\"try'", nil) - require.NoError(t, err) - - // create view - err = c.CreateTableAsSelect(context.Background(), "test_view", true, "SELECT * FROM test", nil) - require.NoError(t, err) - verifyCount(t, c, "test_view", 1) - - // change sequence - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 'India' AS 'coun\"try', 1 AS id", nil) - require.NoError(t, err) - // view should still work - verifyCount(t, c, "test_view", 1) - - // on local - dbPath = filepath.Join(temp, "local.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - c = handle.(*connection) - require.NoError(t, c.Migrate(context.Background())) - c.AsOLAP("default") - - // create table - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 1 AS id, 'India' AS 'coun\"try'", nil) - require.NoError(t, err) - - // create view - err = c.CreateTableAsSelect(context.Background(), "test_view", true, "SELECT * FROM test", nil) - require.NoError(t, err) - verifyCount(t, c, "test_view", 1) - - // change sequence - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 'India' AS 'coun\"try', 1 AS id", nil) - require.NoError(t, err) - - // view no longer works - _, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) from test_view"}) - require.Error(t, err) - require.Contains(t, err.Error(), "Binder Error: Contents of view were altered: types don't match!") -} - func verifyCount(t *testing.T, c *connection, table string, expected int) { res, err := c.Execute(context.Background(), &drivers.Statement{Query: fmt.Sprintf("SELECT count(*) from %s", table)}) require.NoError(t, err) diff --git a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go index c3d0d54e5d5..0920703c4ac 100644 --- a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go @@ -138,7 +138,7 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri } // copy data from temp table to target table - return s.to.CreateTableAsSelect(ctx, table, false, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) + return rwConn.CreateTableAsSelect(ctx, table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) } func createTableQuery(schema *runtimev1.StructType, name string) (string, error) { From a1d4a1006fab5a9f2b6b0764b3b5045275540c21 Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:26:54 +0530 Subject: [PATCH 07/64] lint fixes --- go.mod | 2 +- go.sum | 6 ++-- runtime/drivers/duckdb/config.go | 2 +- runtime/drivers/duckdb/duckdb.go | 3 +- .../duckdb/transporter_duckDB_to_duckDB.go | 28 ++++++++++++------- .../transporter_duckDB_to_duckDB_test.go | 7 +++-- .../transporter_motherduck_to_duckDB.go | 15 +++++----- .../duckdb/transporter_sqlstore_to_duckDB.go | 16 ++++++----- 8 files changed, 44 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index 58f6895dbbb..b47c17b5fe9 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe github.com/prometheus/client_golang v1.19.1 github.com/redis/go-redis/v9 v9.0.2 - github.com/rilldata/duckdb-replicator v0.0.0-20241104142154-c29e5da9b7e2 + github.com/rilldata/duckdb-replicator v0.0.0-20241105044430-27fe98427e31 github.com/riverqueue/river v0.11.4 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.11.4 github.com/riverqueue/river/rivertype v0.11.4 diff --git a/go.sum b/go.sum index 347e600aba0..ca17c8f630e 100644 --- a/go.sum +++ b/go.sum @@ -2109,10 +2109,8 @@ github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff h1:Tt67B9BQVkymWsosWgz7vyz8MXnlYzc8xbqtxYuPU1s= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= -github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef h1:+FNaqFgSY+tNiUzyASBPk2OF7R/ePke8FKF/Wn1qQY4= -github.com/rilldata/duckdb-replicator v0.0.0-20241104113724-8735428481ef/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= -github.com/rilldata/duckdb-replicator v0.0.0-20241104142154-c29e5da9b7e2 h1:PPf/zD3/0+La8VUn+HiP8KpiUAWSBlt9QkLy9fjl5Fc= -github.com/rilldata/duckdb-replicator v0.0.0-20241104142154-c29e5da9b7e2/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= +github.com/rilldata/duckdb-replicator v0.0.0-20241105044430-27fe98427e31 h1:xU7v5LJ++FGySr7s8P+J5ZtjLIBYcnIZg1fHcTb+/W0= +github.com/rilldata/duckdb-replicator v0.0.0-20241105044430-27fe98427e31/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= github.com/riverqueue/river v0.11.4 h1:NMRsODhRgFztf080RMCjI377jldLXsx41E2r7+c0lPE= github.com/riverqueue/river v0.11.4/go.mod h1:HvgBkqon7lYKm9Su4lVOnn1qx8Q4FnSMJjf5auVial4= github.com/riverqueue/river/riverdriver v0.11.4 h1:kBg68vfTnRuSwsgcZ7UbKC4ocZ+KSCGnuZw/GwMMMP4= diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index 63122414e08..57285c36541 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -31,7 +31,7 @@ type config struct { // ErrorOnIncompatibleVersion controls whether to return error or delete DBFile created with older duckdb version. ErrorOnIncompatibleVersion bool `mapstructure:"error_on_incompatible_version"` // ExtTableStorage controls if every table is stored in a different db file. - // Backup is only enabled when external table storage is enabled. + // Backup is only and automatically enabled when external table storage is enabled. ExtTableStorage bool `mapstructure:"external_table_storage"` // CPU cores available for the read DB. If no CPUWrite is set and external_table_storage is enabled then this is split evenly between read and write. CPU int `mapstructure:"cpu"` diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index b0275cff354..38f2f3bb964 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -544,8 +544,7 @@ func (c *connection) reopenDB(ctx context.Context, clean bool) error { if err != nil { return err } - defer release() - return nil + return release() } // acquireMetaConn gets a connection from the pool for "meta" queries like catalog and information schema (i.e. fast queries). diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 4a609d6a3cb..1232b24f72c 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -119,7 +119,9 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d if err != nil { return err } - defer release() + defer func() { + _ = release() + }() conn := rwConn.Connx() var localDB, localSchema string @@ -156,20 +158,26 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d userQuery := strings.TrimSpace(srcProps.SQL) userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - safeTempTable := fmt.Sprintf("%s_tmp_", sinkProps.Table) - query := fmt.Sprintf("CREATE OR REPLACE TEMPORARY TABLE %s AS (%s\n);", safeTempTable, userQuery) - _, err = conn.ExecContext(ctx, query) - if err != nil { - return fmt.Errorf("failed to create table: %w", err) - } - + safeTempTable := safeName(fmt.Sprintf("%s_tmp_", sinkProps.Table)) defer func() { - _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE %s;", safeTempTable)) + // ensure temporary table is cleaned + _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTempTable)) if err != nil { - t.logger.Error("failed to drop temporary table", zap.Error(err)) + t.logger.Error("failed to drop temp table", zap.String("table", safeTempTable), zap.Error(err)) } }() + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeTempTable, userQuery) + _, err = conn.ExecContext(ctx, query) + // first revert to original database + if _, err = conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))); err != nil { + t.logger.Error("failed to switch back to original database", zap.Error(err)) + } + // check for the original error + if err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + // create permanent table from temp table using crud API return rwConn.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), nil) } diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 600d1a52af4..0a6d09128ff 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -34,15 +34,16 @@ func TestDuckDBToDuckDBTransfer(t *testing.T) { to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) - olap, _ = to.AsOLAP("") - tr := newDuckDBToDuckDB(to.(*connection), zap.NewNop()) // transfer once err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) require.NoError(t, err) - rows, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) + olap, ok = to.AsOLAP("") + require.True(t, ok) + + rows, err := to.(*connection).Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) require.NoError(t, err) var count int diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index b5eff0a9ef2..54c0b85e55e 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -78,7 +78,9 @@ func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps m if err != nil { return err } - defer release() + defer func() { + _ = release() + }() conn := rwConn.Connx() @@ -101,22 +103,21 @@ func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps m // we first ingest data in a temporary table in the main db // and then copy it to the final table to ensure that the final table is always created using CRUD APIs - tmpTable := fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table) + safeTmpTable := safeName(fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table)) defer func() { // ensure temporary table is cleaned - _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) + _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTmpTable)) if err != nil { - t.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) + t.logger.Error("failed to drop temp table", zap.String("table", safeTmpTable), zap.Error(err)) } }() - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeName(tmpTable), userQuery) + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeTmpTable, userQuery) _, err = conn.ExecContext(ctx, query) if err != nil { return err } // copy data from temp table to target table - return rwConn.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) - + return rwConn.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", safeTmpTable), nil) } diff --git a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go index 0920703c4ac..4332eeaa464 100644 --- a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go @@ -63,9 +63,9 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri } // we first ingest data in a temporary table in the main db // and then copy it to the final table to ensure that the final table is always created using CRUD APIs - tmpTable := fmt.Sprintf("__%s_tmp_sqlstore", table) + safeTmpTable := safeName(fmt.Sprintf("__%s_tmp_sqlstore", table)) // generate create table query - qry, err := createTableQuery(schema, tmpTable) + qry, err := createTableQuery(schema, safeTmpTable) if err != nil { return err } @@ -74,7 +74,9 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri if err != nil { return err } - defer release() + defer func() { + _ = release() + }() conn := rwConn.Connx() // create table @@ -85,15 +87,15 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri defer func() { // ensure temporary table is cleaned - _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", tmpTable)) + _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTmpTable)) if err != nil { - s.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) + s.logger.Error("failed to drop temp table", zap.String("table", safeTmpTable), zap.Error(err)) } }() // append data using appender API err = rawConn(conn.Conn, func(conn driver.Conn) error { - a, err := duckdb.NewAppenderFromConn(conn, "", tmpTable) + a, err := duckdb.NewAppenderFromConn(conn, "", safeTmpTable) if err != nil { return err } @@ -138,7 +140,7 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri } // copy data from temp table to target table - return rwConn.CreateTableAsSelect(ctx, table, fmt.Sprintf("SELECT * FROM %s", tmpTable), nil) + return rwConn.CreateTableAsSelect(ctx, table, fmt.Sprintf("SELECT * FROM %s", safeTmpTable), nil) } func createTableQuery(schema *runtimev1.StructType, name string) (string, error) { From b27121cf05b21bde8972e5a0be36d7b78643795e Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:48:38 +0530 Subject: [PATCH 08/64] postgres tests fix --- .../drivers/duckdb/transporter_sqlstore_to_duckDB.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go index 4332eeaa464..a9eedfd75f7 100644 --- a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go @@ -63,9 +63,9 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri } // we first ingest data in a temporary table in the main db // and then copy it to the final table to ensure that the final table is always created using CRUD APIs - safeTmpTable := safeName(fmt.Sprintf("__%s_tmp_sqlstore", table)) + tmpTable := fmt.Sprintf("__%s_tmp_sqlstore", table) // generate create table query - qry, err := createTableQuery(schema, safeTmpTable) + qry, err := createTableQuery(schema, tmpTable) if err != nil { return err } @@ -87,15 +87,15 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri defer func() { // ensure temporary table is cleaned - _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTmpTable)) + _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeName(tmpTable))) if err != nil { - s.logger.Error("failed to drop temp table", zap.String("table", safeTmpTable), zap.Error(err)) + s.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) } }() // append data using appender API err = rawConn(conn.Conn, func(conn driver.Conn) error { - a, err := duckdb.NewAppenderFromConn(conn, "", safeTmpTable) + a, err := duckdb.NewAppenderFromConn(conn, "", tmpTable) if err != nil { return err } @@ -140,7 +140,7 @@ func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter dri } // copy data from temp table to target table - return rwConn.CreateTableAsSelect(ctx, table, fmt.Sprintf("SELECT * FROM %s", safeTmpTable), nil) + return rwConn.CreateTableAsSelect(ctx, table, fmt.Sprintf("SELECT * FROM %s", safeName(tmpTable)), nil) } func createTableQuery(schema *runtimev1.StructType, name string) (string, error) { From 089d4176148391964aa2b36dc3a853297594234b Mon Sep 17 00:00:00 2001 From: anshul khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 5 Nov 2024 18:27:59 +0530 Subject: [PATCH 09/64] self review --- go.mod | 4 +- go.sum | 4 +- runtime/drivers/duckdb/Hacks removed.md | 8 ---- runtime/drivers/duckdb/duckdb.go | 39 +++++++++---------- runtime/drivers/duckdb/olap.go | 3 +- .../duckdb/transporter_duckDB_to_duckDB.go | 4 +- 6 files changed, 25 insertions(+), 37 deletions(-) delete mode 100644 runtime/drivers/duckdb/Hacks removed.md diff --git a/go.mod b/go.mod index b47c17b5fe9..b2693d3e745 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/rilldata/rill -go 1.23.2 +go 1.22.0 require ( cloud.google.com/go/bigquery v1.62.0 @@ -75,7 +75,7 @@ require ( github.com/pingcap/tidb/pkg/parser v0.0.0-20231124053542-069631e2ecfe github.com/prometheus/client_golang v1.19.1 github.com/redis/go-redis/v9 v9.0.2 - github.com/rilldata/duckdb-replicator v0.0.0-20241105044430-27fe98427e31 + github.com/rilldata/duckdb-replicator v0.0.0-20241105125325-4015bc69dfa9 github.com/riverqueue/river v0.11.4 github.com/riverqueue/river/riverdriver/riverpgxv5 v0.11.4 github.com/riverqueue/river/rivertype v0.11.4 diff --git a/go.sum b/go.sum index ca17c8f630e..611df10cf42 100644 --- a/go.sum +++ b/go.sum @@ -2109,8 +2109,8 @@ github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff h1:Tt67B9BQVkymWsosWgz7vyz8MXnlYzc8xbqtxYuPU1s= github.com/rilldata/arrow/go/v14 v14.0.0-20240624035703-e234e04219ff/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= -github.com/rilldata/duckdb-replicator v0.0.0-20241105044430-27fe98427e31 h1:xU7v5LJ++FGySr7s8P+J5ZtjLIBYcnIZg1fHcTb+/W0= -github.com/rilldata/duckdb-replicator v0.0.0-20241105044430-27fe98427e31/go.mod h1:mRKMcdCdKJOrtHKMYXVpeEklqrP8AakXIet4RgqbpGI= +github.com/rilldata/duckdb-replicator v0.0.0-20241105125325-4015bc69dfa9 h1:wFQnKx1rjGUEKJnXmVb+DrU3NQ4WJDBLblQlzhxcYA8= +github.com/rilldata/duckdb-replicator v0.0.0-20241105125325-4015bc69dfa9/go.mod h1:bYAIEgpvXrIsKxIwSUMv1Ei5/mN6mOa2dD01gr5kBsw= github.com/riverqueue/river v0.11.4 h1:NMRsODhRgFztf080RMCjI377jldLXsx41E2r7+c0lPE= github.com/riverqueue/river v0.11.4/go.mod h1:HvgBkqon7lYKm9Su4lVOnn1qx8Q4FnSMJjf5auVial4= github.com/riverqueue/river/riverdriver v0.11.4 h1:kBg68vfTnRuSwsgcZ7UbKC4ocZ+KSCGnuZw/GwMMMP4= diff --git a/runtime/drivers/duckdb/Hacks removed.md b/runtime/drivers/duckdb/Hacks removed.md deleted file mode 100644 index 64e81da0916..00000000000 --- a/runtime/drivers/duckdb/Hacks removed.md +++ /dev/null @@ -1,8 +0,0 @@ -Hacks removed. - -1. Removing `.tmp` and `.wal` directory. No longer works since the main.db no longer ingest anything. - -Features removed. - -1. String to `enum` conversion. -2. No `tx=true` queries since writes now happen on a different handle. \ No newline at end of file diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 38f2f3bb964..f154e434a1c 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "io" "log/slog" "net/url" "os" @@ -180,7 +179,13 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie // Open the DB err = c.reopenDB(ctx, false) if err != nil { - if c.config.ErrorOnIncompatibleVersion || !strings.Contains(err.Error(), "created with an older, incompatible version of Rill") { + // Check for another process currently accessing the DB + if strings.Contains(err.Error(), "Could not set lock on file") { + return nil, fmt.Errorf("failed to open database (is Rill already running?): %w", err) + } + + // Check for using incompatible database files + if c.config.ErrorOnIncompatibleVersion || !strings.Contains(err.Error(), "Trying to read a database file with version number") { return nil, err } @@ -496,17 +501,10 @@ func (c *connection) reopenDB(ctx context.Context, clean bool) error { bootQueries = append(bootQueries, c.config.InitSQL) } - var logger *slog.Logger - - if c.config.LogQueries { - logger = slog.New(zapslog.NewHandler(c.logger.Core(), &zapslog.HandlerOptions{ - AddSource: true, - })) - } else { - logger = slog.New(slog.NewTextHandler(io.Discard, nil)) - } - // Create new DB + logger := slog.New(zapslog.NewHandler(c.logger.Core(), &zapslog.HandlerOptions{ + AddSource: true, + })) var err error if c.config.ExtTableStorage { var backup *duckdbreplicator.BackupProvider @@ -536,15 +534,7 @@ func (c *connection) reopenDB(ctx context.Context, clean bool) error { Logger: logger, }) } - if err != nil { - return err - } - - _, release, err := c.db.AcquireReadConnection(context.Background()) - if err != nil { - return err - } - return release() + return err } // acquireMetaConn gets a connection from the pool for "meta" queries like catalog and information schema (i.e. fast queries). @@ -750,3 +740,10 @@ func (c *connection) periodicallyCheckConnDurations(d time.Duration) { } } } + +// fatalInternalError logs a critical internal error and exits the process. +// This is used for errors that are completely unrecoverable. +// Ideally, we should refactor to cleanup/reopen/rebuild so that we don't need this. +func (c *connection) fatalInternalError(err error) { + c.logger.Fatal("duckdb: critical internal error", zap.Error(err)) +} diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index 36e53e7ffeb..88b213bc691 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -175,9 +175,8 @@ func (c *connection) Execute(ctx context.Context, stmt *drivers.Statement) (res return res, nil } -// TODO :: implement estimate size func (c *connection) estimateSize() int64 { - return 0 + return c.db.Size() } // AddTableColumn implements drivers.OLAPStore. diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 1232b24f72c..703e43e9a84 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -170,8 +170,8 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeTempTable, userQuery) _, err = conn.ExecContext(ctx, query) // first revert to original database - if _, err = conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))); err != nil { - t.logger.Error("failed to switch back to original database", zap.Error(err)) + if _, switchErr := conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))); switchErr != nil { + t.to.fatalInternalError(fmt.Errorf("failed to switch back to original database: %w", err)) } // check for the original error if err != nil { From 279d207064cfad8b56b8db300d76b07c1f1ed48f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Fri, 8 Nov 2024 16:18:45 +0000 Subject: [PATCH 10/64] Import --- runtime/pkg/duckdbreplicator/README.md | 18 + runtime/pkg/duckdbreplicator/backup.go | 335 +++++ runtime/pkg/duckdbreplicator/conn.go | 127 ++ runtime/pkg/duckdbreplicator/db.go | 1202 +++++++++++++++++ runtime/pkg/duckdbreplicator/db_test.go | 77 ++ runtime/pkg/duckdbreplicator/examples/main.go | 74 + runtime/pkg/duckdbreplicator/gcs.go | 47 + runtime/pkg/duckdbreplicator/go.mod | 63 + runtime/pkg/duckdbreplicator/go.sum | 317 +++++ runtime/pkg/duckdbreplicator/io.go | 75 + runtime/pkg/duckdbreplicator/io_test.go | 78 ++ runtime/pkg/duckdbreplicator/singledb.go | 318 +++++ runtime/pkg/duckdbreplicator/singledb_test.go | 101 ++ runtime/pkg/duckdbreplicator/sqlutil.go | 17 + 14 files changed, 2849 insertions(+) create mode 100644 runtime/pkg/duckdbreplicator/README.md create mode 100644 runtime/pkg/duckdbreplicator/backup.go create mode 100644 runtime/pkg/duckdbreplicator/conn.go create mode 100644 runtime/pkg/duckdbreplicator/db.go create mode 100644 runtime/pkg/duckdbreplicator/db_test.go create mode 100644 runtime/pkg/duckdbreplicator/examples/main.go create mode 100644 runtime/pkg/duckdbreplicator/gcs.go create mode 100644 runtime/pkg/duckdbreplicator/go.mod create mode 100644 runtime/pkg/duckdbreplicator/go.sum create mode 100644 runtime/pkg/duckdbreplicator/io.go create mode 100644 runtime/pkg/duckdbreplicator/io_test.go create mode 100644 runtime/pkg/duckdbreplicator/singledb.go create mode 100644 runtime/pkg/duckdbreplicator/singledb_test.go create mode 100644 runtime/pkg/duckdbreplicator/sqlutil.go diff --git a/runtime/pkg/duckdbreplicator/README.md b/runtime/pkg/duckdbreplicator/README.md new file mode 100644 index 00000000000..8e0e96dd433 --- /dev/null +++ b/runtime/pkg/duckdbreplicator/README.md @@ -0,0 +1,18 @@ +# DuckDB Replicator + +## Motivation +1. As an embedded database, DuckDB does not inherently provide the same isolation for ETL and serving workloads that other OLAP databases offer. +2. We have observed significant degradation in query performance during data ingestion. +3. In a Kubernetes environment, it is recommended to use local disks instead of network disks, necessitating separate local disk backups. + +## Features +1. Utilizes separate DuckDB handles for reading and writing, each with distinct CPU and memory resources. +2. Automatically backs up writes to GCS in real-time. +3. Automatically restores from backups when starting with an empty local disk. + +## Examples +1. Refer to `examples/main.go` for a usage example. + +## Future Work +1. Enable writes and reads to be executed on separate machines. +2. Limit read operations to specific tables to support ephemeral tables (intermediate tables required only for writes). diff --git a/runtime/pkg/duckdbreplicator/backup.go b/runtime/pkg/duckdbreplicator/backup.go new file mode 100644 index 00000000000..3a043262bcb --- /dev/null +++ b/runtime/pkg/duckdbreplicator/backup.go @@ -0,0 +1,335 @@ +package duckdbreplicator + +import ( + "context" + "errors" + "fmt" + "io" + "io/fs" + "log/slog" + "os" + "path/filepath" + "strings" + "time" + + "gocloud.dev/blob" + "gocloud.dev/blob/gcsblob" + "gocloud.dev/gcerrors" + "golang.org/x/sync/errgroup" +) + +type BackupFormat string + +const ( + BackupFormatUnknown BackupFormat = "unknown" + BackupFormatDB BackupFormat = "db" + BackupFormatParquet BackupFormat = "parquet" +) + +type BackupProvider struct { + bucket *blob.Bucket +} + +func (b *BackupProvider) Close() error { + return b.bucket.Close() +} + +type GCSBackupProviderOptions struct { + // UseHostCredentials specifies whether to use the host's default credentials. + UseHostCredentials bool + ApplicationCredentialsJSON string + // Bucket is the GCS bucket to use for backups. Should be of the form `bucket-name`. + Bucket string + // BackupFormat specifies the format of the backup. + // TODO :: implement backup format. Fixed to DuckDB for now. + BackupFormat BackupFormat + // UnqiueIdentifier is used to store backups in a unique location. + // This must be set when multiple databases are writing to the same bucket. + UniqueIdentifier string +} + +// NewGCSBackupProvider creates a new BackupProvider based on GCS. +func NewGCSBackupProvider(ctx context.Context, opts *GCSBackupProviderOptions) (*BackupProvider, error) { + client, err := newClient(ctx, opts.ApplicationCredentialsJSON, opts.UseHostCredentials) + if err != nil { + return nil, err + } + + bucket, err := gcsblob.OpenBucket(ctx, client, opts.Bucket, nil) + if err != nil { + return nil, fmt.Errorf("failed to open bucket %q, %w", opts.Bucket, err) + } + + if opts.UniqueIdentifier != "" { + if !strings.HasSuffix(opts.UniqueIdentifier, "/") { + opts.UniqueIdentifier += "/" + } + bucket = blob.PrefixedBucket(bucket, opts.UniqueIdentifier) + } + return &BackupProvider{ + bucket: bucket, + }, nil +} + +// syncWrite syncs the write path with the backup location. +func (d *db) syncWrite(ctx context.Context) error { + if !d.writeDirty || d.backup == nil { + // optimisation to skip sync if write was already synced + return nil + } + d.logger.Debug("syncing from backup") + // Create an errgroup for background downloads with limited concurrency. + g, ctx := errgroup.WithContext(ctx) + g.SetLimit(8) + + objects := d.backup.List(&blob.ListOptions{ + Delimiter: "/", // only list directories with a trailing slash and IsDir set to true + }) + + tblVersions := make(map[string]string) + for { + // Stop the loop if the ctx was cancelled + var stop bool + select { + case <-ctx.Done(): + stop = true + default: + // don't break + } + if stop { + break // can't use break inside the select + } + + obj, err := objects.Next(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + if !obj.IsDir { + continue + } + + table := strings.TrimSuffix(obj.Key, "/") + d.logger.Debug("SyncWithObjectStorage: discovered table", slog.String("table", table)) + + // get version of the table + var backedUpVersion string + err = retry(func() error { + res, err := d.backup.ReadAll(ctx, filepath.Join(table, "version.txt")) + if err != nil { + return err + } + backedUpVersion = string(res) + return nil + }) + if err != nil { + if gcerrors.Code(err) == gcerrors.NotFound { + // invalid table directory + d.logger.Debug("SyncWithObjectStorage: invalid table directory", slog.String("table", table)) + _ = d.deleteBackup(ctx, table, "") + } + return err + } + tblVersions[table] = backedUpVersion + + // check with current version + version, exists, _ := tableVersion(d.writePath, table) + if exists && version == backedUpVersion { + d.logger.Debug("SyncWithObjectStorage: table is already up to date", slog.String("table", table)) + continue + } + + tableDir := filepath.Join(d.writePath, table) + // truncate existing table directory + if err := os.RemoveAll(tableDir); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(tableDir, backedUpVersion), os.ModePerm); err != nil { + return err + } + + tblIter := d.backup.List(&blob.ListOptions{Prefix: filepath.Join(table, backedUpVersion)}) + // download all objects in the table and current version + for { + obj, err := tblIter.Next(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + g.Go(func() error { + return retry(func() error { + file, err := os.Create(filepath.Join(d.writePath, obj.Key)) + if err != nil { + return err + } + defer file.Close() + + rdr, err := d.backup.NewReader(ctx, obj.Key, nil) + if err != nil { + return err + } + defer rdr.Close() + + _, err = io.Copy(file, rdr) + return err + }) + }) + } + } + + // Wait for all outstanding downloads to complete + err := g.Wait() + if err != nil { + return err + } + + // Update table versions + for table, version := range tblVersions { + err = os.WriteFile(filepath.Join(d.writePath, table, "version.txt"), []byte(version), fs.ModePerm) + if err != nil { + return err + } + } + + // remove any tables that are not in backup + entries, err := os.ReadDir(d.writePath) + if err != nil { + return err + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + if _, ok := tblVersions[entry.Name()]; ok { + continue + } + err = os.RemoveAll(filepath.Join(d.writePath, entry.Name())) + if err != nil { + return err + } + } + return nil +} + +func (d *db) syncBackup(ctx context.Context, table string) error { + if d.backup == nil { + return nil + } + d.logger.Debug("syncing table", slog.String("table", table)) + version, exist, err := tableVersion(d.writePath, table) + if err != nil { + return err + } + + if !exist { + return fmt.Errorf("table %q not found", table) + } + + path := filepath.Join(d.writePath, table, version) + entries, err := os.ReadDir(path) + if err != nil { + return err + } + + for _, entry := range entries { + d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", path)) + // no directory should exist as of now + if entry.IsDir() { + d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", path)) + continue + } + + wr, err := os.Open(filepath.Join(path, entry.Name())) + if err != nil { + return err + } + + // upload to cloud storage + err = retry(func() error { + return d.backup.Upload(ctx, filepath.Join(table, version, entry.Name()), wr, &blob.WriterOptions{ + ContentType: "application/octet-stream", + }) + }) + wr.Close() + if err != nil { + return err + } + } + + // update version.txt + // Ideally if this fails it is a non recoverable error but for now we will rely on retries + err = retry(func() error { + return d.backup.WriteAll(ctx, filepath.Join(table, "version.txt"), []byte(version), nil) + }) + if err != nil { + d.logger.Error("failed to update version.txt in backup", slog.Any("error", err)) + } + return err +} + +// deleteBackup deletes backup. +// If table is specified, only that table is deleted. +// If table and version is specified, only that version of the table is deleted. +func (d *db) deleteBackup(ctx context.Context, table, version string) error { + if d.backup == nil { + return nil + } + if table == "" && version != "" { + return fmt.Errorf("table must be specified if version is specified") + } + var prefix string + if table != "" { + if version != "" { + prefix = filepath.Join(table, version) + "/" + } else { + // deleting the entire table + prefix = table + "/" + // delete version.txt first + err := retry(func() error { return d.backup.Delete(ctx, "version.txt") }) + if err != nil && gcerrors.Code(err) != gcerrors.NotFound { + d.logger.Error("failed to delete version.txt in backup", slog.Any("error", err)) + return err + } + } + } + + iter := d.backup.List(&blob.ListOptions{Prefix: prefix}) + for { + obj, err := iter.Next(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + err = retry(func() error { return d.backup.Delete(ctx, obj.Key) }) + if err != nil { + return err + } + } + return nil +} + +func retry(fn func() error) error { + var err error + for i := 0; i < _maxRetries; i++ { + err = fn() + if err == nil { + return nil // success + } else if strings.Contains(err.Error(), "stream error: stream ID") { + time.Sleep(_retryDelay) // retry + } else { + break // return error + } + } + return err +} + +const ( + _maxRetries = 5 + _retryDelay = 10 * time.Second +) diff --git a/runtime/pkg/duckdbreplicator/conn.go b/runtime/pkg/duckdbreplicator/conn.go new file mode 100644 index 00000000000..4bfc3d6a28e --- /dev/null +++ b/runtime/pkg/duckdbreplicator/conn.go @@ -0,0 +1,127 @@ +package duckdbreplicator + +import ( + "context" + + "github.com/jmoiron/sqlx" +) + +// Conn represents a single database connection. +// This is useful when running a chain of queries using a single write connection. +type Conn interface { + // Connx returns the underlying sqlx.Conn. + Connx() *sqlx.Conn + + // CreateTableAsSelect creates a new table by name from the results of the given SQL query. + CreateTableAsSelect(ctx context.Context, name string, sql string, opts *CreateTableOptions) error + + // InsertTableAsSelect inserts the results of the given SQL query into the table. + InsertTableAsSelect(ctx context.Context, name string, sql string, opts *InsertTableOptions) error + + // DropTable removes a table from the database. + DropTable(ctx context.Context, name string) error + + // RenameTable renames a table in the database. + RenameTable(ctx context.Context, oldName, newName string) error + + // AddTableColumn adds a column to the table. + AddTableColumn(ctx context.Context, tableName, columnName, typ string) error + + // AlterTableColumn alters the type of a column in the table. + AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error +} + +type conn struct { + *sqlx.Conn + + db *db +} + +var _ Conn = (*conn)(nil) + +func (c *conn) Connx() *sqlx.Conn { + return c.Conn +} + +func (c *conn) CreateTableAsSelect(ctx context.Context, name, sql string, opts *CreateTableOptions) error { + if opts == nil { + opts = &CreateTableOptions{} + } + return c.db.createTableAsSelect(ctx, c.Conn, func() error { return nil }, name, sql, opts) +} + +// InsertTableAsSelect inserts the results of the given SQL query into the table. +func (c *conn) InsertTableAsSelect(ctx context.Context, name, sql string, opts *InsertTableOptions) error { + if opts == nil { + opts = &InsertTableOptions{ + Strategy: IncrementalStrategyAppend, + } + } + return c.db.insertTableAsSelect(ctx, c.Conn, func() error { return nil }, name, sql, opts) +} + +// DropTable removes a table from the database. +func (c *conn) DropTable(ctx context.Context, name string) error { + return c.db.dropTable(ctx, name) +} + +// RenameTable renames a table in the database. +func (c *conn) RenameTable(ctx context.Context, oldName, newName string) error { + return c.db.renameTable(ctx, oldName, newName) +} + +// AddTableColumn adds a column to the table. +func (c *conn) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { + return c.db.addTableColumn(ctx, c.Conn, func() error { return nil }, tableName, columnName, typ) +} + +// AlterTableColumn alters the type of a column in the table. +func (c *conn) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { + return c.db.alterTableColumn(ctx, c.Conn, func() error { return nil }, tableName, columnName, newType) +} + +type singledbConn struct { + *sqlx.Conn + + db *singledb +} + +var _ Conn = (*singledbConn)(nil) + +func (c *singledbConn) Connx() *sqlx.Conn { + return c.Conn +} + +func (c *singledbConn) CreateTableAsSelect(ctx context.Context, name, sql string, opts *CreateTableOptions) error { + return c.db.createTableAsSelect(ctx, c.Conn, name, sql, opts) +} + +// InsertTableAsSelect inserts the results of the given SQL query into the table. +func (c *singledbConn) InsertTableAsSelect(ctx context.Context, name, sql string, opts *InsertTableOptions) error { + if opts == nil { + opts = &InsertTableOptions{ + Strategy: IncrementalStrategyAppend, + } + } + return execIncrementalInsert(ctx, c.Conn, name, sql, opts) +} + +// DropTable removes a table from the database. +func (c *singledbConn) DropTable(ctx context.Context, name string) error { + return c.db.dropTable(ctx, c.Conn, name) +} + +// RenameTable renames a table in the database. +func (c *singledbConn) RenameTable(ctx context.Context, oldName, newName string) error { + return c.db.renameTable(ctx, c.Conn, oldName, newName) +} + +// AddTableColumn adds a column to the table. +func (c *singledbConn) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { + return c.db.addTableColumn(ctx, c.Conn, tableName, columnName, typ) +} + +// AlterTableColumn alters the type of a column in the table. +func (c *singledbConn) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { + return c.db.alterTableColumn(ctx, c.Conn, tableName, columnName, newType) +} diff --git a/runtime/pkg/duckdbreplicator/db.go b/runtime/pkg/duckdbreplicator/db.go new file mode 100644 index 00000000000..a18e1638f30 --- /dev/null +++ b/runtime/pkg/duckdbreplicator/db.go @@ -0,0 +1,1202 @@ +package duckdbreplicator + +import ( + "context" + "database/sql" + "database/sql/driver" + "encoding/json" + "errors" + "fmt" + "io/fs" + "log/slog" + "net/url" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "github.com/XSAM/otelsql" + "github.com/google/uuid" + "github.com/jmoiron/sqlx" + "github.com/marcboeker/go-duckdb" + "github.com/mitchellh/mapstructure" + "go.opentelemetry.io/otel/attribute" + "gocloud.dev/blob" +) + +type DB interface { + // Close closes the database. + Close() error + + // AcquireReadConnection returns a connection to the database for reading. + // Once done the connection should be released by calling the release function. + // This connection must only be used for select queries or for creating and working with temporary tables. + AcquireReadConnection(ctx context.Context) (conn Conn, release func() error, err error) + + // AcquireWriteConnection returns a connection to the database for writing. + // Once done the connection should be released by calling the release function. + // Any persistent changes to the database should be done by calling CRUD APIs on this connection. + AcquireWriteConnection(ctx context.Context) (conn Conn, release func() error, err error) + + // Size returns the size of the database in bytes. + // It is currently implemented as sum of the size of all serving `.db` files. + Size() int64 + + // CRUD APIs + + // CreateTableAsSelect creates a new table by name from the results of the given SQL query. + CreateTableAsSelect(ctx context.Context, name string, sql string, opts *CreateTableOptions) error + + // InsertTableAsSelect inserts the results of the given SQL query into the table. + InsertTableAsSelect(ctx context.Context, name string, sql string, opts *InsertTableOptions) error + + // DropTable removes a table from the database. + DropTable(ctx context.Context, name string) error + + // RenameTable renames a table in the database. + RenameTable(ctx context.Context, oldName, newName string) error + + // AddTableColumn adds a column to the table. + AddTableColumn(ctx context.Context, tableName, columnName, typ string) error + + // AlterTableColumn alters the type of a column in the table. + AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error +} + +type DBOptions struct { + // Clean specifies whether to start with a clean database or download data from cloud storage and start with backed up data. + Clean bool + // LocalPath is the path where local db files will be stored. Should be unique for each database. + LocalPath string + + BackupProvider *BackupProvider + + // ReadSettings are settings applied the read duckDB handle. + ReadSettings map[string]string + // WriteSettings are settings applied the write duckDB handle. + WriteSettings map[string]string + // InitQueries are the queries to run when the database is first created. + InitQueries []string + + Logger *slog.Logger +} + +// TODO :: revisit this logic +func (d *DBOptions) ValidateSettings() error { + read := &settings{} + err := mapstructure.Decode(d.ReadSettings, read) + if err != nil { + return fmt.Errorf("read settings: %w", err) + } + + write := &settings{} + err = mapstructure.Decode(d.WriteSettings, write) + if err != nil { + return fmt.Errorf("write settings: %w", err) + } + + // no memory limits defined + // divide memory equally between read and write + if read.MaxMemory == "" && write.MaxMemory == "" { + connector, err := duckdb.NewConnector("", nil) + if err != nil { + return fmt.Errorf("unable to create duckdb connector: %w", err) + } + defer connector.Close() + db := sql.OpenDB(connector) + defer db.Close() + + row := db.QueryRow("SELECT value FROM duckdb_settings() WHERE name = 'max_memory'") + var maxMemory string + err = row.Scan(&maxMemory) + if err != nil { + return fmt.Errorf("unable to get max_memory: %w", err) + } + + bytes, err := humanReadableSizeToBytes(maxMemory) + if err != nil { + return fmt.Errorf("unable to parse max_memory: %w", err) + } + + read.MaxMemory = fmt.Sprintf("%d bytes", int64(bytes)/2) + write.MaxMemory = fmt.Sprintf("%d bytes", int64(bytes)/2) + } + + if read.MaxMemory == "" != (write.MaxMemory == "") { + // only one is defined + var mem string + if read.MaxMemory != "" { + mem = read.MaxMemory + } else { + mem = write.MaxMemory + } + + bytes, err := humanReadableSizeToBytes(mem) + if err != nil { + return fmt.Errorf("unable to parse max_memory: %w", err) + } + + read.MaxMemory = fmt.Sprintf("%d bytes", int64(bytes)/2) + write.MaxMemory = fmt.Sprintf("%d bytes", int64(bytes)/2) + } + + var readThread, writeThread int + if read.Threads != "" { + readThread, err = strconv.Atoi(read.Threads) + if err != nil { + return fmt.Errorf("unable to parse read threads: %w", err) + } + } + if write.Threads != "" { + writeThread, err = strconv.Atoi(write.Threads) + if err != nil { + return fmt.Errorf("unable to parse write threads: %w", err) + } + } + + if readThread == 0 && writeThread == 0 { + connector, err := duckdb.NewConnector("", nil) + if err != nil { + return fmt.Errorf("unable to create duckdb connector: %w", err) + } + defer connector.Close() + db := sql.OpenDB(connector) + defer db.Close() + + row := db.QueryRow("SELECT value FROM duckdb_settings() WHERE name = 'threads'") + var threads int + err = row.Scan(&threads) + if err != nil { + return fmt.Errorf("unable to get threads: %w", err) + } + + read.Threads = strconv.Itoa((threads + 1) / 2) + write.Threads = strconv.Itoa(threads / 2) + } + + if readThread == 0 != (writeThread == 0) { + // only one is defined + var threads int + if readThread != 0 { + threads = readThread + } else { + threads = writeThread + } + + read.Threads = strconv.Itoa((threads + 1) / 2) + write.Threads = strconv.Itoa(threads / 2) + } + + err = mapstructure.WeakDecode(read, &d.ReadSettings) + if err != nil { + return fmt.Errorf("failed to update read settings: %w", err) + } + + err = mapstructure.WeakDecode(write, &d.WriteSettings) + if err != nil { + return fmt.Errorf("failed to update write settings: %w", err) + } + return nil +} + +type CreateTableOptions struct { + // View specifies whether the created table is a view. + View bool +} + +type IncrementalStrategy string + +const ( + IncrementalStrategyUnspecified IncrementalStrategy = "" + IncrementalStrategyAppend IncrementalStrategy = "append" + IncrementalStrategyMerge IncrementalStrategy = "merge" +) + +type InsertTableOptions struct { + ByName bool + Strategy IncrementalStrategy + UniqueKey []string +} + +// NewDB creates a new DB instance. +// This can be a slow operation if the backup is large. +// dbIdentifier is a unique identifier for the database reported in metrics. +func NewDB(ctx context.Context, dbIdentifier string, opts *DBOptions) (DB, error) { + if dbIdentifier == "" { + return nil, fmt.Errorf("db identifier cannot be empty") + } + err := opts.ValidateSettings() + if err != nil { + return nil, err + } + db := &db{ + dbIdentifier: dbIdentifier, + opts: opts, + readPath: filepath.Join(opts.LocalPath, "read"), + writePath: filepath.Join(opts.LocalPath, "write"), + writeDirty: true, + logger: opts.Logger, + } + if opts.BackupProvider != nil { + db.backup = opts.BackupProvider.bucket + } + // if clean is true, remove the backup + if opts.Clean { + err = db.deleteBackup(ctx, "", "") + if err != nil { + return nil, fmt.Errorf("unable to clean backup: %w", err) + } + } + + // create read and write paths + err = os.MkdirAll(db.readPath, fs.ModePerm) + if err != nil { + return nil, fmt.Errorf("unable to create read path: %w", err) + } + err = os.MkdirAll(db.writePath, fs.ModePerm) + if err != nil { + return nil, fmt.Errorf("unable to create write path: %w", err) + } + + // sync write path + err = db.syncWrite(ctx) + if err != nil { + return nil, err + } + + // sync read path + err = db.syncRead(ctx) + if err != nil { + return nil, err + } + + // create read handle + db.readHandle, err = db.openDBAndAttach(ctx, true) + if err != nil { + if strings.Contains(err.Error(), "Symbol not found") { + fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") + os.Exit(1) + } + return nil, err + } + + return db, nil +} + +type db struct { + dbIdentifier string + opts *DBOptions + + readHandle *sqlx.DB + readPath string + writePath string + readMu sync.RWMutex + writeMu sync.Mutex + writeDirty bool + + backup *blob.Bucket + + logger *slog.Logger +} + +var _ DB = &db{} + +func (d *db) Close() error { + d.writeMu.Lock() + defer d.writeMu.Unlock() + + d.readMu.Lock() + defer d.readMu.Unlock() + + return d.readHandle.Close() +} + +func (d *db) AcquireReadConnection(ctx context.Context) (Conn, func() error, error) { + d.readMu.RLock() + + c, err := d.readHandle.Connx(ctx) + if err != nil { + d.readMu.RUnlock() + return nil, nil, err + } + + return &conn{ + Conn: c, + db: d, + }, func() error { + err = c.Close() + d.readMu.RUnlock() + return err + }, nil +} + +func (d *db) AcquireWriteConnection(ctx context.Context) (Conn, func() error, error) { + d.writeMu.Lock() + defer d.writeMu.Unlock() + c, release, err := d.acquireWriteConn(ctx) + if err != nil { + return nil, nil, err + } + + return &conn{ + Conn: c, + db: d, + }, release, nil +} + +func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts *CreateTableOptions) error { + if opts == nil { + opts = &CreateTableOptions{} + } + d.logger.Debug("create table", slog.String("name", name), slog.Bool("view", opts.View)) + d.writeMu.Lock() + defer d.writeMu.Unlock() + conn, release, err := d.acquireWriteConn(ctx) + if err != nil { + return err + } + defer func() { + _ = release() + }() + return d.createTableAsSelect(ctx, conn, release, name, query, opts) +} + +func (d *db) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, name, query string, opts *CreateTableOptions) error { + // check if some older version exists + oldVersion, oldVersionExists, _ := tableVersion(d.writePath, name) + d.logger.Debug("old version", slog.String("version", oldVersion), slog.Bool("exists", oldVersionExists)) + + // create new version directory + newVersion := newVersion() + newVersionDir := filepath.Join(d.writePath, name, newVersion) + err := os.MkdirAll(newVersionDir, fs.ModePerm) + if err != nil { + return fmt.Errorf("create: unable to create dir %q: %w", name, err) + } + + var m meta + if opts.View { + // create view - validates that SQL is correct + _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS (%s\n)", safeSQLName(name), query)) + if err != nil { + return err + } + + m = meta{ViewSQL: query} + } else { + // create db file + dbFile := filepath.Join(newVersionDir, "data.db") + safeDBName := safeSQLName(dbName(name)) + + // detach existing db + _, err = conn.ExecContext(ctx, fmt.Sprintf("DETACH DATABASE IF EXISTS %s", safeDBName), nil) + if err != nil { + _ = os.RemoveAll(newVersionDir) + return fmt.Errorf("create: detach %q db failed: %w", safeDBName, err) + } + + // attach new db + _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(dbFile), safeDBName), nil) + if err != nil { + _ = os.RemoveAll(newVersionDir) + return fmt.Errorf("create: attach %q db failed: %w", dbFile, err) + } + + // ingest data + _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE TABLE %s.default AS (%s\n)", safeDBName, query), nil) + if err != nil { + _ = os.RemoveAll(newVersionDir) + return fmt.Errorf("create: create %q.default table failed: %w", safeDBName, err) + } + + m = meta{Format: BackupFormatDB} + } + + d.writeDirty = true + // write meta + err = writeMeta(newVersionDir, m) + if err != nil { + _ = os.RemoveAll(newVersionDir) + return err + } + + // update version.txt + err = os.WriteFile(filepath.Join(d.writePath, name, "version.txt"), []byte(newVersion), fs.ModePerm) + if err != nil { + _ = os.RemoveAll(newVersionDir) + return fmt.Errorf("create: write version file failed: %w", err) + } + + // close write handle before syncing read so that temp files or wal files if any are removed + err = releaseConn() + if err != nil { + return err + } + + if err := d.syncBackup(ctx, name); err != nil { + return fmt.Errorf("create: replicate failed: %w", err) + } + d.logger.Debug("table created", slog.String("name", name)) + // both backups and write are now in sync + d.writeDirty = false + if oldVersionExists { + _ = os.RemoveAll(filepath.Join(d.writePath, name, oldVersion)) + _ = d.deleteBackup(ctx, name, oldVersion) + } + return d.syncRead(ctx) +} + +func (d *db) InsertTableAsSelect(ctx context.Context, name, query string, opts *InsertTableOptions) error { + if opts == nil { + opts = &InsertTableOptions{ + Strategy: IncrementalStrategyAppend, + } + } + + d.logger.Debug("insert table", slog.String("name", name), slog.Group("option", "by_name", opts.ByName, "strategy", string(opts.Strategy), "unique_key", opts.UniqueKey)) + d.writeMu.Lock() + defer d.writeMu.Unlock() + conn, release, err := d.acquireWriteConn(ctx) + if err != nil { + return err + } + + defer func() { + _ = release() + }() + return d.insertTableAsSelect(ctx, conn, release, name, query, opts) +} + +func (d *db) insertTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, name, query string, opts *InsertTableOptions) error { + // Get current table version + oldVersion, oldVersionExists, err := tableVersion(d.writePath, name) + if err != nil || !oldVersionExists { + return fmt.Errorf("table %q does not exist", name) + } + + d.writeDirty = true + // Execute the insert + err = execIncrementalInsert(ctx, conn, fmt.Sprintf("%s.default", safeSQLName(dbName(name))), query, opts) + if err != nil { + return fmt.Errorf("insert: insert into table %q failed: %w", name, err) + } + + // rename db directory + newVersion := newVersion() + oldVersionDir := filepath.Join(d.writePath, name, oldVersion) + err = os.Rename(oldVersionDir, filepath.Join(d.writePath, name, newVersion)) + if err != nil { + return fmt.Errorf("insert: update version %q failed: %w", newVersion, err) + } + + // update version.txt + err = os.WriteFile(filepath.Join(d.writePath, name, "version.txt"), []byte(newVersion), fs.ModePerm) + if err != nil { + return fmt.Errorf("insert: write version file failed: %w", err) + } + + err = releaseConn() + if err != nil { + return err + } + // replicate + err = d.syncBackup(ctx, name) + if err != nil { + return fmt.Errorf("insert: replicate failed: %w", err) + } + // both backups and write are now in sync + d.writeDirty = false + + // Delete the old version (ignoring errors since source the new data has already been correctly inserted) + _ = os.RemoveAll(oldVersionDir) + _ = d.deleteBackup(ctx, name, oldVersion) + return d.syncRead(ctx) +} + +// DropTable implements DB. +func (d *db) DropTable(ctx context.Context, name string) error { + d.logger.Debug("drop table", slog.String("name", name)) + d.writeMu.Lock() + defer d.writeMu.Unlock() + _, release, err := d.acquireWriteConn(ctx) // we don't need the handle but need to sync the write + if err != nil { + return err + } + defer func() { + _ = release() + }() + + return d.dropTable(ctx, name) +} + +func (d *db) dropTable(ctx context.Context, name string) error { + _, exist, _ := tableVersion(d.writePath, name) + if !exist { + return fmt.Errorf("drop: table %q not found", name) + } + + d.writeDirty = true + // drop the table from backup location + err := d.deleteBackup(ctx, name, "") + if err != nil { + return fmt.Errorf("drop: unable to drop table %q from backup: %w", name, err) + } + + // delete the table directory + err = os.RemoveAll(filepath.Join(d.writePath, name)) + if err != nil { + return fmt.Errorf("drop: unable to drop table %q: %w", name, err) + } + // both backups and write are now in sync + d.writeDirty = false + return d.syncRead(ctx) +} + +func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { + d.logger.Debug("rename table", slog.String("from", oldName), slog.String("to", newName)) + if strings.EqualFold(oldName, newName) { + return fmt.Errorf("rename: Table with name %q already exists", newName) + } + d.writeMu.Lock() + defer d.writeMu.Unlock() + _, release, err := d.acquireWriteConn(ctx) // we don't need the handle but need to sync the write + if err != nil { + return err + } + defer func() { + _ = release() + }() + return d.renameTable(ctx, oldName, newName) +} + +func (d *db) renameTable(ctx context.Context, oldName, newName string) error { + oldVersion, exist, err := d.writeTableVersion(oldName) + if err != nil { + return err + } + if !exist { + return fmt.Errorf("rename: Table %q not found", oldName) + } + + newTableVersion, replaceInNewTable, _ := d.writeTableVersion(newName) + + d.writeDirty = true + err = os.RemoveAll(filepath.Join(d.writePath, newName)) + if err != nil { + return fmt.Errorf("rename: unable to delete existing new table: %w", err) + } + + err = os.Rename(filepath.Join(d.writePath, oldName), filepath.Join(d.writePath, newName)) + if err != nil { + return fmt.Errorf("rename: rename file failed: %w", err) + } + + // rename to a new version + version := newVersion() + err = os.Rename(filepath.Join(d.writePath, newName, oldVersion), filepath.Join(d.writePath, newName, version)) + if err != nil { + return fmt.Errorf("rename: rename version failed: %w", err) + } + + // update version.txt + writeErr := os.WriteFile(filepath.Join(d.writePath, newName, "version.txt"), []byte(newVersion()), fs.ModePerm) + if writeErr != nil { + return fmt.Errorf("rename: write version file failed: %w", writeErr) + } + + if d.syncBackup(ctx, newName) != nil { + return fmt.Errorf("rename: unable to replicate new table") + } + err = d.deleteBackup(ctx, oldName, "") + if err != nil { + return fmt.Errorf("rename: unable to delete old table %q from backup: %w", oldName, err) + } + d.writeDirty = false + if replaceInNewTable { + _ = d.deleteBackup(ctx, newName, newTableVersion) + } + return d.syncRead(ctx) +} + +func (d *db) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { + d.logger.Debug("AddTableColumn", slog.String("table", tableName), slog.String("column", columnName), slog.String("typ", typ)) + d.writeMu.Lock() + defer d.writeMu.Unlock() + conn, release, err := d.acquireWriteConn(ctx) + if err != nil { + return err + } + defer func() { + _ = release() + }() + + return d.addTableColumn(ctx, conn, release, tableName, columnName, typ) +} + +func (d *db) addTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, tableName, columnName, typ string) error { + version, exist, err := tableVersion(d.writePath, tableName) + if err != nil { + return err + } + + if !exist { + return fmt.Errorf("table %q does not exist", tableName) + } + + d.writeDirty = true + _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ADD COLUMN %s %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), typ)) + if err != nil { + return err + } + + // rename to new version + newVersion := newVersion() + err = os.Rename(filepath.Join(d.writePath, tableName, version), filepath.Join(d.writePath, tableName, newVersion)) + if err != nil { + return err + } + + // update version.txt + err = os.WriteFile(filepath.Join(d.writePath, tableName, "version.txt"), []byte(newVersion), fs.ModePerm) + if err != nil { + return err + } + + err = releaseConn() + if err != nil { + return err + } + + // replicate + err = d.syncBackup(ctx, tableName) + if err != nil { + return err + } + d.writeDirty = false + // remove old version + _ = d.deleteBackup(ctx, tableName, version) + return d.syncRead(ctx) +} + +// AlterTableColumn implements drivers.OLAPStore. +func (d *db) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { + d.logger.Debug("AlterTableColumn", slog.String("table", tableName), slog.String("column", columnName), slog.String("typ", newType)) + d.writeMu.Lock() + defer d.writeMu.Unlock() + conn, release, err := d.acquireWriteConn(ctx) + if err != nil { + return err + } + defer func() { + _ = release() + }() + + return d.alterTableColumn(ctx, conn, release, tableName, columnName, newType) +} + +func (d *db) alterTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, tableName, columnName, newType string) error { + version, exist, err := tableVersion(d.writePath, tableName) + if err != nil { + return err + } + + if !exist { + return fmt.Errorf("table %q does not exist", tableName) + } + + d.writeDirty = true + _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ALTER %s TYPE %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), newType)) + if err != nil { + return err + } + + // rename to new version + newVersion := fmt.Sprint(time.Now().UnixMilli()) + err = os.Rename(filepath.Join(d.writePath, tableName, version), filepath.Join(d.writePath, tableName, newVersion)) + if err != nil { + return err + } + + // update version.txt + err = os.WriteFile(filepath.Join(d.writePath, tableName, "version.txt"), []byte(newVersion), fs.ModePerm) + if err != nil { + return err + } + + err = releaseConn() + if err != nil { + return err + } + + // replicate + err = d.syncBackup(ctx, tableName) + if err != nil { + return err + } + d.writeDirty = false + // remove old version + _ = d.deleteBackup(ctx, tableName, version) + return d.syncRead(ctx) +} + +func (d *db) syncRead(ctx context.Context) error { + entries, err := os.ReadDir(d.writePath) + if err != nil { + return err + } + + tableVersion := make(map[string]string) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + // Check if there is already a table with the same version + writeVersion, exist, _ := d.writeTableVersion(entry.Name()) + if !exist { + continue + } + tableVersion[entry.Name()] = writeVersion + readVersion, _, _ := d.readTableVersion(entry.Name()) + if writeVersion == readVersion { + continue + } + + d.logger.Debug("Sync: copying table", slog.String("table", entry.Name())) + err = copyDir(filepath.Join(d.readPath, entry.Name()), filepath.Join(d.writePath, entry.Name())) + if err != nil { + return err + } + } + + handle, err := d.openDBAndAttach(ctx, true) + if err != nil { + return err + } + + var oldDBHandle *sqlx.DB + d.readMu.Lock() + // swap read handle + oldDBHandle = d.readHandle + d.readHandle = handle + d.readMu.Unlock() + + // close old read handle + if oldDBHandle != nil { + err = oldDBHandle.Close() + if err != nil { + d.logger.Warn("error in closing old read handle", slog.String("error", err.Error())) + } + } + + // delete data for tables/versions that have been removed from write + entries, err = os.ReadDir(d.readPath) + if err != nil { + return err + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + readVersion, ok, _ := d.readTableVersion(entry.Name()) + if !ok { + // invalid table + _ = os.RemoveAll(filepath.Join(d.readPath, entry.Name())) + continue + } + + writeVersion, ok := tableVersion[entry.Name()] + if !ok { + // table not in write + d.logger.Debug("Sync: removing table", slog.String("table", entry.Name())) + err = os.RemoveAll(filepath.Join(d.readPath, entry.Name())) + if err != nil { + return err + } + continue + } + + if readVersion == writeVersion { + continue + } + + d.logger.Debug("Sync: removing old version", slog.String("table", entry.Name()), slog.String("version", readVersion)) + err = os.RemoveAll(filepath.Join(d.readPath, entry.Name(), readVersion)) + if err != nil { + return err + } + } + return nil +} + +func (d *db) Size() int64 { + var paths []string + entries, err := os.ReadDir(d.readPath) + if err != nil { // ignore error + return 0 + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // this is to avoid counting temp tables during source ingestion + // in certain cases we only want to compute the size of the serving db files + // TODO :: remove this when removing staged table concepts + if strings.HasPrefix(entry.Name(), "__rill_tmp_") { + continue + } + path := filepath.Join(d.readPath, entry.Name()) + version, exist, _ := d.readTableVersion(entry.Name()) + if !exist { + continue + } + paths = append(paths, filepath.Join(path, fmt.Sprintf("%s.db", version))) + } + return fileSize(paths) +} + +// acquireWriteConn syncs the write database, initializes the write handle and returns a write connection. +// The release function should be called to release the connection. +// It should be called with the writeMu locked. +func (d *db) acquireWriteConn(ctx context.Context) (*sqlx.Conn, func() error, error) { + err := d.syncWrite(ctx) + if err != nil { + return nil, nil, err + } + + db, err := d.openDBAndAttach(ctx, false) + if err != nil { + return nil, nil, err + } + conn, err := db.Connx(ctx) + if err != nil { + _ = db.Close() + return nil, nil, err + } + return conn, func() error { + _ = conn.Close() + err = db.Close() + return err + }, nil +} + +func (d *db) openDBAndAttach(ctx context.Context, read bool) (*sqlx.DB, error) { + // open the db + var ( + dsn *url.URL + err error + settings map[string]string + path string + ) + if read { + dsn, err = url.Parse("") // in-memory + settings = d.opts.ReadSettings + path = d.readPath + } else { + path = d.writePath + dsn, err = url.Parse(filepath.Join(path, "stage.db")) + settings = d.opts.WriteSettings + } + if err != nil { + return nil, err + } + + query := dsn.Query() + for k, v := range settings { + query.Set(k, v) + } + dsn.RawQuery = query.Encode() + connector, err := duckdb.NewConnector(dsn.String(), func(execer driver.ExecerContext) error { + for _, qry := range d.opts.InitQueries { + _, err := execer.ExecContext(context.Background(), qry, nil) + if err != nil && strings.Contains(err.Error(), "Failed to download extension") { + // Retry using another mirror. Based on: https://github.com/duckdb/duckdb/issues/9378 + _, err = execer.ExecContext(context.Background(), qry+" FROM 'http://nightly-extensions.duckdb.org'", nil) + } + if err != nil { + return err + } + } + return nil + }) + if err != nil { + return nil, err + } + + db := sqlx.NewDb(otelsql.OpenDB(connector), "duckdb") + + err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(attribute.String("db.system", "duckdb"), attribute.String("db_identifier", d.dbIdentifier))) + if err != nil { + return nil, fmt.Errorf("registering db stats metrics: %w", err) + } + + err = db.PingContext(ctx) + if err != nil { + db.Close() + return nil, err + } + + err = d.attachDBs(ctx, db, path, read) + if err != nil { + db.Close() + return nil, err + } + + // 2023-12-11: Hail mary for solving this issue: https://github.com/duckdblabs/rilldata/issues/6. + // Forces DuckDB to create catalog entries for the information schema up front (they are normally created lazily). + // Can be removed if the issue persists. + _, err = db.ExecContext(context.Background(), ` + select + coalesce(t.table_catalog, current_database()) as "database", + t.table_schema as "schema", + t.table_name as "name", + t.table_type as "type", + array_agg(c.column_name order by c.ordinal_position) as "column_names", + array_agg(c.data_type order by c.ordinal_position) as "column_types", + array_agg(c.is_nullable = 'YES' order by c.ordinal_position) as "column_nullable" + from information_schema.tables t + join information_schema.columns c on t.table_schema = c.table_schema and t.table_name = c.table_name + group by 1, 2, 3, 4 + order by 1, 2, 3, 4 + `) + if err != nil { + return nil, err + } + + return db, nil +} + +func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, path string, read bool) error { + entries, err := os.ReadDir(path) + if err != nil { + return err + } + + var views []string + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + // NOTE :: we always look at the write version + // Tables in read path are removed after getting a new handle + // So we need to always look at the write version to ensure we do not reattach dropped tables + version, exist, _ := d.writeTableVersion(entry.Name()) + if !exist { + continue + } + versionPath := filepath.Join(path, entry.Name(), version) + + // read meta file + f, err := os.ReadFile(filepath.Join(versionPath, "meta.json")) + if err != nil { + _ = os.RemoveAll(versionPath) + d.logger.Warn("error in reading meta file", slog.String("table", entry.Name()), slog.Any("error", err)) + return err + } + var meta meta + err = json.Unmarshal(f, &meta) + if err != nil { + _ = os.RemoveAll(versionPath) + d.logger.Warn("error in unmarshalling meta file", slog.String("table", entry.Name()), slog.Any("error", err)) + return err + } + + if meta.ViewSQL != "" { + // table is a view + views = append(views, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS (%s\n)", safeSQLName(entry.Name()), meta.ViewSQL)) + continue + } + switch meta.Format { + case BackupFormatDB: + dbName := dbName(entry.Name()) + var readMode string + if read { + readMode = " (READ_ONLY)" + } + _, err := db.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s %s", safeSQLString(filepath.Join(versionPath, "data.db")), safeSQLName(dbName), readMode)) + if err != nil { + d.logger.Error("error in attaching db", slog.String("table", entry.Name()), slog.Any("error", err)) + _ = os.RemoveAll(filepath.Join(path, entry.Name())) + return err + } + + _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.default", safeSQLName(entry.Name()), safeSQLName(dbName))) + if err != nil { + return err + } + case BackupFormatParquet: + panic("unimplemented") + default: + return fmt.Errorf("unknown backup format %q", meta.Format) + } + } + // create views after attaching all the DBs since views can depend on other tables + for _, view := range views { + _, err := db.ExecContext(ctx, view) + if err != nil { + return err + } + } + return nil +} + +func (d *db) readTableVersion(name string) (string, bool, error) { + return tableVersion(d.readPath, name) +} + +func (d *db) writeTableVersion(name string) (string, bool, error) { + return tableVersion(d.writePath, name) +} + +func execIncrementalInsert(ctx context.Context, conn *sqlx.Conn, safeTableName, query string, opts *InsertTableOptions) error { + var byNameClause string + if opts.ByName { + byNameClause = "BY NAME" + } + + if opts.Strategy == IncrementalStrategyAppend { + _, err := conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s (%s\n)", safeTableName, byNameClause, query)) + return err + } + + if opts.Strategy == IncrementalStrategyMerge { + // Create a temporary table with the new data + tmp := uuid.New().String() + _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE TEMPORARY TABLE %s AS (%s\n)", safeSQLName(tmp), query)) + if err != nil { + return err + } + + // check the count of the new data + // skip if the count is 0 + // if there was no data in the empty file then the detected schema can be different from the current schema which leads to errors or performance issues + res := conn.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) == 0 FROM %s", safeSQLName(tmp))) + var empty bool + if err := res.Scan(&empty); err != nil { + return err + } + + if empty { + return nil + } + + // Drop the rows from the target table where the unique key is present in the temporary table + where := "" + for i, key := range opts.UniqueKey { + key = safeSQLName(key) + if i != 0 { + where += " AND " + } + where += fmt.Sprintf("base.%s IS NOT DISTINCT FROM tmp.%s", key, key) + } + _, err = conn.ExecContext(ctx, fmt.Sprintf("DELETE FROM %s base WHERE EXISTS (SELECT 1 FROM %s tmp WHERE %s)", safeTableName, safeSQLName(tmp), where)) + if err != nil { + return err + } + + // Insert the new data into the target table + _, err = conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s SELECT * FROM %s", safeTableName, byNameClause, safeSQLName(tmp))) + return err + } + + return fmt.Errorf("incremental insert strategy %q not supported", opts.Strategy) +} + +func tableVersion(path, name string) (string, bool, error) { + pathToFile := filepath.Join(path, name, "version.txt") + contents, err := os.ReadFile(pathToFile) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return "", false, nil + } + return "", false, err + } + return strings.TrimSpace(string(contents)), true, nil +} + +func newVersion() string { + return strconv.FormatInt(time.Now().UnixMilli(), 10) +} + +type meta struct { + ViewSQL string + Format BackupFormat +} + +func writeMeta(path string, meta meta) error { + metaBytes, err := json.Marshal(meta) + if err != nil { + return fmt.Errorf("create: marshal meta failed: %w", err) + } + err = os.WriteFile(filepath.Join(path, "meta.json"), metaBytes, fs.ModePerm) + if err != nil { + return fmt.Errorf("create: write meta failed: %w", err) + } + return nil +} + +func dbName(name string) string { + return fmt.Sprintf("%s__data__db", name) +} + +type settings struct { + MaxMemory string `mapstructure:"max_memory"` + Threads string `mapstructure:"threads"` + // Can be more settings +} + +// Regex to parse human-readable size returned by DuckDB +// nolint +var humanReadableSizeRegex = regexp.MustCompile(`^([\d.]+)\s*(\S+)$`) + +// Reversed logic of StringUtil::BytesToHumanReadableString +// see https://github.com/cran/duckdb/blob/master/src/duckdb/src/common/string_util.cpp#L157 +// Examples: 1 bytes, 2 bytes, 1KB, 1MB, 1TB, 1PB +// nolint +func humanReadableSizeToBytes(sizeStr string) (float64, error) { + var multiplier float64 + + match := humanReadableSizeRegex.FindStringSubmatch(sizeStr) + + if match == nil { + return 0, fmt.Errorf("invalid size format: '%s'", sizeStr) + } + + sizeFloat, err := strconv.ParseFloat(match[1], 64) + if err != nil { + return 0, err + } + + switch match[2] { + case "byte", "bytes": + multiplier = 1 + case "KB": + multiplier = 1000 + case "MB": + multiplier = 1000 * 1000 + case "GB": + multiplier = 1000 * 1000 * 1000 + case "TB": + multiplier = 1000 * 1000 * 1000 * 1000 + case "PB": + multiplier = 1000 * 1000 * 1000 * 1000 * 1000 + case "KiB": + multiplier = 1024 + case "MiB": + multiplier = 1024 * 1024 + case "GiB": + multiplier = 1024 * 1024 * 1024 + case "TiB": + multiplier = 1024 * 1024 * 1024 * 1024 + case "PiB": + multiplier = 1024 * 1024 * 1024 * 1024 * 1024 + default: + return 0, fmt.Errorf("unknown size unit '%s' in '%s'", match[2], sizeStr) + } + + return sizeFloat * multiplier, nil +} diff --git a/runtime/pkg/duckdbreplicator/db_test.go b/runtime/pkg/duckdbreplicator/db_test.go new file mode 100644 index 00000000000..64968bb29eb --- /dev/null +++ b/runtime/pkg/duckdbreplicator/db_test.go @@ -0,0 +1,77 @@ +package duckdbreplicator + +import ( + "context" + "io" + "log/slog" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDB(t *testing.T) { + dir := t.TempDir() + ctx := context.Background() + db, err := NewDB(ctx, "test", &DBOptions{ + LocalPath: dir, + BackupProvider: nil, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + }) + require.NoError(t, err) + + // create table + err = db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", nil) + require.NoError(t, err) + + // query table + var ( + id int + country string + ) + conn, release, err := db.AcquireReadConnection(ctx) + require.NoError(t, err) + err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country FROM test").Scan(&id, &country) + require.NoError(t, err) + require.Equal(t, 1, id) + require.Equal(t, "India", country) + require.NoError(t, release()) + + // rename table + err = db.RenameTable(ctx, "test", "test2") + require.NoError(t, err) + + // drop old table + err = db.DropTable(ctx, "test") + require.Error(t, err) + + // insert into table + err = db.InsertTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'US' AS country", nil) + require.NoError(t, err) + + // merge into table + err = db.InsertTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'USA' AS country", &InsertTableOptions{ + Strategy: IncrementalStrategyMerge, + UniqueKey: []string{"id"}, + }) + require.NoError(t, err) + + // query table + conn, release, err = db.AcquireReadConnection(ctx) + require.NoError(t, err) + err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country FROM test2 where id = 2").Scan(&id, &country) + require.NoError(t, err) + require.Equal(t, 2, id) + require.Equal(t, "USA", country) + require.NoError(t, release()) + + // Add column + err = db.AddTableColumn(ctx, "test2", "city", "TEXT") + require.NoError(t, err) + + // drop table + err = db.DropTable(ctx, "test2") + require.NoError(t, err) +} diff --git a/runtime/pkg/duckdbreplicator/examples/main.go b/runtime/pkg/duckdbreplicator/examples/main.go new file mode 100644 index 00000000000..f34b8b40594 --- /dev/null +++ b/runtime/pkg/duckdbreplicator/examples/main.go @@ -0,0 +1,74 @@ +package main + +import ( + "context" + "fmt" + "log/slog" + "time" + + duckdbreplicator "github.com/rilldata/duckdb-replicator" + _ "gocloud.dev/blob/gcsblob" +) + +func main() { + backup, err := duckdbreplicator.NewGCSBackupProvider(context.Background(), &duckdbreplicator.GCSBackupProviderOptions{ + UseHostCredentials: true, + Bucket: "", + UniqueIdentifier: "756c6367-e807-43ff-8b07-df1bae29c57e/", + }) + if err != nil { + panic(err) + } + + dbOptions := &duckdbreplicator.DBOptions{ + LocalPath: "", + BackupProvider: backup, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "8GB", "threads": "2"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: slog.Default(), + } + + db, err := duckdbreplicator.NewDB(context.Background(), "756c6367-e807-43ff-8b07-df1bae29c57e", dbOptions) + if err != nil { + panic(err) + } + defer db.Close() + + t := time.Now() + // create table + err = db.CreateTableAsSelect(context.Background(), "test-2", `SELECT * FROM read_parquet('data*.parquet')`, nil) + if err != nil { + panic(err) + } + fmt.Printf("time taken %v\n", time.Since(t)) + + // rename table + err = db.RenameTable(context.Background(), "test-2", "test") + if err != nil { + panic(err) + } + + // insert into renamed table + err = db.InsertTableAsSelect(context.Background(), "test", `SELECT * FROM read_parquet('data*.parquet')`, &duckdbreplicator.InsertTableOptions{ + Strategy: duckdbreplicator.IncrementalStrategyAppend, + }) + if err != nil { + panic(err) + } + + // get count + conn, release, err := db.AcquireReadConnection(context.Background()) + if err != nil { + panic(err) + } + defer release() + + var count int + err = conn.Connx().QueryRowxContext(context.Background(), `SELECT count(*) FROM "test"`).Scan(&count) + if err != nil { + fmt.Printf("error %v\n", err) + } + fmt.Println(count) + +} diff --git a/runtime/pkg/duckdbreplicator/gcs.go b/runtime/pkg/duckdbreplicator/gcs.go new file mode 100644 index 00000000000..e3258cd28d1 --- /dev/null +++ b/runtime/pkg/duckdbreplicator/gcs.go @@ -0,0 +1,47 @@ +package duckdbreplicator + +import ( + "context" + "errors" + "strings" + + "gocloud.dev/gcp" + "golang.org/x/oauth2/google" +) + +var ErrNoCredentials = errors.New("empty credentials: set `google_application_credentials` env variable") + +func newClient(ctx context.Context, jsonData string, allowHostAccess bool) (*gcp.HTTPClient, error) { + creds, err := credentials(ctx, jsonData, allowHostAccess) + if err != nil { + if !errors.Is(err, ErrNoCredentials) { + return nil, err + } + + // no credentials set, we try with a anonymous client in case user is trying to access public buckets + return gcp.NewAnonymousHTTPClient(gcp.DefaultTransport()), nil + } + // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. + return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) +} + +func credentials(ctx context.Context, jsonData string, allowHostAccess bool) (*google.Credentials, error) { + if jsonData != "" { + // google_application_credentials is set, use credentials from json string provided by user + return google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") + } + // google_application_credentials is not set + if allowHostAccess { + // use host credentials + creds, err := gcp.DefaultCredentials(ctx) + if err != nil { + if strings.Contains(err.Error(), "google: could not find default credentials") { + return nil, ErrNoCredentials + } + + return nil, err + } + return creds, nil + } + return nil, ErrNoCredentials +} diff --git a/runtime/pkg/duckdbreplicator/go.mod b/runtime/pkg/duckdbreplicator/go.mod new file mode 100644 index 00000000000..3aaa94b52eb --- /dev/null +++ b/runtime/pkg/duckdbreplicator/go.mod @@ -0,0 +1,63 @@ +module github.com/rilldata/duckdb-replicator + +go 1.22.0 + +require ( + github.com/XSAM/otelsql v0.27.0 + github.com/google/uuid v1.6.0 + github.com/jmoiron/sqlx v1.3.5 + github.com/marcboeker/go-duckdb v1.8.2 + github.com/mitchellh/mapstructure v1.5.0 + github.com/stretchr/testify v1.9.0 + go.opentelemetry.io/otel v1.30.0 + gocloud.dev v0.36.0 + golang.org/x/oauth2 v0.22.0 + golang.org/x/sync v0.8.0 +) + +require ( + cloud.google.com/go v0.115.0 // indirect + cloud.google.com/go/auth v0.8.1 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect + cloud.google.com/go/compute/metadata v0.5.0 // indirect + cloud.google.com/go/iam v1.1.13 // indirect + cloud.google.com/go/storage v1.43.0 // indirect + github.com/apache/arrow/go/v17 v17.0.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/goccy/go-json v0.10.3 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect + github.com/google/s2a-go v0.1.8 // indirect + github.com/google/wire v0.6.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect + github.com/googleapis/gax-go/v2 v2.13.0 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect + go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 // indirect + go.opentelemetry.io/otel/metric v1.30.0 // indirect + go.opentelemetry.io/otel/trace v1.30.0 // indirect + golang.org/x/crypto v0.28.0 // indirect + golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/text v0.19.0 // indirect + golang.org/x/time v0.6.0 // indirect + golang.org/x/tools v0.26.0 // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect + google.golang.org/api v0.191.0 // indirect + google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect + google.golang.org/grpc v1.67.1 // indirect + google.golang.org/protobuf v1.35.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/runtime/pkg/duckdbreplicator/go.sum b/runtime/pkg/duckdbreplicator/go.sum new file mode 100644 index 00000000000..3f23d84c197 --- /dev/null +++ b/runtime/pkg/duckdbreplicator/go.sum @@ -0,0 +1,317 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.115.0 h1:CnFSK6Xo3lDYRoBKEcAtia6VSC837/ZkJuRduSFnr14= +cloud.google.com/go v0.115.0/go.mod h1:8jIM5vVgoAEoiVxQ/O4BFTfHqulPZgs/ufEzMcFMdWU= +cloud.google.com/go/auth v0.8.1 h1:QZW9FjC5lZzN864p13YxvAtGUlQ+KgRL+8Sg45Z6vxo= +cloud.google.com/go/auth v0.8.1/go.mod h1:qGVp/Y3kDRSDZ5gFD/XPUfYQ9xW1iI7q8RIRoCyBbJc= +cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= +cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= +cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= +cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= +cloud.google.com/go/iam v1.1.13 h1:7zWBXG9ERbMLrzQBRhFliAV+kjcRToDTgQT3CTwYyv4= +cloud.google.com/go/iam v1.1.13/go.mod h1:K8mY0uSXwEXS30KrnVb+j54LB/ntfZu1dr+4zFMNbus= +cloud.google.com/go/longrunning v0.5.11 h1:Havn1kGjz3whCfoD8dxMLP73Ph5w+ODyZB9RUsDxtGk= +cloud.google.com/go/longrunning v0.5.11/go.mod h1:rDn7//lmlfWV1Dx6IB4RatCPenTwwmqXuiP0/RgoEO4= +cloud.google.com/go/storage v1.43.0 h1:CcxnSohZwizt4LCzQHWvBf1/kvtHUn7gk9QERXPyXFs= +cloud.google.com/go/storage v1.43.0/go.mod h1:ajvxEa7WmZS1PxvKRq4bq0tFT3vMd502JwstCcYv0Q0= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/XSAM/otelsql v0.27.0 h1:i9xtxtdcqXV768a5C6SoT/RkG+ue3JTOgkYInzlTOqs= +github.com/XSAM/otelsql v0.27.0/go.mod h1:0mFB3TvLa7NCuhm/2nU7/b2wEtsczkj8Rey8ygO7V+A= +github.com/apache/arrow/go/v17 v17.0.0 h1:RRR2bdqKcdbss9Gxy2NS/hK8i4LDMh23L6BbkN5+F54= +github.com/apache/arrow/go/v17 v17.0.0/go.mod h1:jR7QHkODl15PfYyjM2nU+yTLScZ/qfj7OSUZmJ8putc= +github.com/aws/aws-sdk-go v1.49.0 h1:g9BkW1fo9GqKfwg2+zCD+TW/D36Ux+vtfJ8guF4AYmY= +github.com/aws/aws-sdk-go v1.49.0/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= +github.com/aws/aws-sdk-go-v2 v1.24.0 h1:890+mqQ+hTpNuw0gGP6/4akolQkSToDJgHfQE7AwGuk= +github.com/aws/aws-sdk-go-v2 v1.24.0/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 h1:OCs21ST2LrepDfD3lwlQiOqIGp6JiEUqG84GzTDoyJs= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4/go.mod h1:usURWEKSNNAcAZuzRn/9ZYPT8aZQkR7xcCtunK/LkJo= +github.com/aws/aws-sdk-go-v2/config v1.26.1 h1:z6DqMxclFGL3Zfo+4Q0rLnAZ6yVkzCRxhRMsiRQnD1o= +github.com/aws/aws-sdk-go-v2/config v1.26.1/go.mod h1:ZB+CuKHRbb5v5F0oJtGdhFTelmrxd4iWO1lf0rQwSAg= +github.com/aws/aws-sdk-go-v2/credentials v1.16.12 h1:v/WgB8NxprNvr5inKIiVVrXPuuTegM+K8nncFkr1usU= +github.com/aws/aws-sdk-go-v2/credentials v1.16.12/go.mod h1:X21k0FjEJe+/pauud82HYiQbEr9jRKY3kXEIQ4hXeTQ= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.10 h1:w98BT5w+ao1/r5sUuiH6JkVzjowOKeOJRHERyy1vh58= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.10/go.mod h1:K2WGI7vUvkIv1HoNbfBA1bvIZ+9kL3YVmWxeKuLQsiw= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.7 h1:FnLf60PtjXp8ZOzQfhJVsqF0OtYKQZWQfqOLshh8YXg= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.7/go.mod h1:tDVvl8hyU6E9B8TrnNrZQEVkQlB8hjJwcgpPhgtlnNg= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9 h1:v+HbZaCGmOwnTTVS86Fleq0vPzOd7tnJGbFhP0stNLs= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9/go.mod h1:Xjqy+Nyj7VDLBtCMkQYOw1QYfAEZCVLrfI0ezve8wd4= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9 h1:N94sVhRACtXyVcjXxrwK1SKFIJrA9pOJ5yu2eSHnmls= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9/go.mod h1:hqamLz7g1/4EJP+GH5NBhcUMLjW+gKLQabgyz6/7WAU= +github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2 h1:GrSw8s0Gs/5zZ0SX+gX4zQjRnRsMJDJ2sLur1gRBhEM= +github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2/go.mod h1:6fQQgfuGmw8Al/3M2IgIllycxV7ZW7WCdVSqfBeUiCY= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.9 h1:ugD6qzjYtB7zM5PN/ZIeaAIyefPaD82G8+SJopgvUpw= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.9/go.mod h1:YD0aYBWCrPENpHolhKw2XDlTIWae2GKXT1T4o6N6hiM= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 h1:/b31bi3YVNlkzkBrm9LfpaKoaYZUxIAj4sHfOTmLfqw= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4/go.mod h1:2aGXHFmbInwgP9ZfpmdIfOELL79zhdNYNmReK8qDfdQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.9 h1:/90OR2XbSYfXucBMJ4U14wrjlfleq/0SB6dZDPncgmo= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.9/go.mod h1:dN/Of9/fNZet7UrQQ6kTDo/VSwKPIq94vjlU16bRARc= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.9 h1:Nf2sHxjMJR8CSImIVCONRi4g0Su3J+TSTbS7G0pUeMU= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.9/go.mod h1:idky4TER38YIjr2cADF1/ugFMKvZV7p//pVeV5LZbF0= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.9 h1:iEAeF6YC3l4FzlJPP9H3Ko1TXpdjdqWffxXjp8SY6uk= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.9/go.mod h1:kjsXoK23q9Z/tLBrckZLLyvjhZoS+AGrzqzUfEClvMM= +github.com/aws/aws-sdk-go-v2/service/s3 v1.47.5 h1:Keso8lIOS+IzI2MkPZyK6G0LYcK3My2LQ+T5bxghEAY= +github.com/aws/aws-sdk-go-v2/service/s3 v1.47.5/go.mod h1:vADO6Jn+Rq4nDtfwNjhgR84qkZwiC6FqCaXdw/kYwjA= +github.com/aws/aws-sdk-go-v2/service/sso v1.18.5 h1:ldSFWz9tEHAwHNmjx2Cvy1MjP5/L9kNoR0skc6wyOOM= +github.com/aws/aws-sdk-go-v2/service/sso v1.18.5/go.mod h1:CaFfXLYL376jgbP7VKC96uFcU8Rlavak0UlAwk1Dlhc= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.5 h1:2k9KmFawS63euAkY4/ixVNsYYwrwnd5fIvgEKkfZFNM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.5/go.mod h1:W+nd4wWDVkSUIox9bacmkBP5NMFQeTJ/xqNabpzSR38= +github.com/aws/aws-sdk-go-v2/service/sts v1.26.5 h1:5UYvv8JUvllZsRnfrcMQ+hJ9jNICmcgKPAO1CER25Wg= +github.com/aws/aws-sdk-go-v2/service/sts v1.26.5/go.mod h1:XX5gh4CB7wAs4KhcF46G6C8a2i7eupU19dcAAE+EydU= +github.com/aws/smithy-go v1.19.0 h1:KWFKQV80DpP3vJrrA9sVAHQ5gc2z8i4EzrLhLlWXcBM= +github.com/aws/smithy-go v1.19.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= +github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-replayers/grpcreplay v1.1.0 h1:S5+I3zYyZ+GQz68OfbURDdt/+cSMqCK1wrvNx7WBzTE= +github.com/google/go-replayers/grpcreplay v1.1.0/go.mod h1:qzAvJ8/wi57zq7gWqaE6AwLM6miiXUQwP1S+I9icmhk= +github.com/google/go-replayers/httpreplay v1.2.0 h1:VM1wEyyjaoU53BwrOnaf9VhAyQQEEioJvFYxYcLRKzk= +github.com/google/go-replayers/httpreplay v1.2.0/go.mod h1:WahEFFZZ7a1P4VM1qEeHy+tME4bwyqPcwWbNlUI1Mcg= +github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= +github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= +github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= +github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= +github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/wire v0.6.0 h1:HBkoIh4BdSxoyo9PveV8giw7ZsaBOvzWKfcg/6MrVwI= +github.com/google/wire v0.6.0/go.mod h1:F4QhpQ9EDIdJ1Mbop/NZBRB+5yrR6qg3BnctaoUk6NA= +github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= +github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= +github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= +github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g= +github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/marcboeker/go-duckdb v1.8.2 h1:gHcFjt+HcPSpDVjPSzwof+He12RS+KZPwxcfoVP8Yx4= +github.com/marcboeker/go-duckdb v1.8.2/go.mod h1:2oV8BZv88S16TKGKM+Lwd0g7DX84x0jMxjTInThC8Is= +github.com/mattn/go-sqlite3 v1.14.6 h1:dNPt6NO46WmLVt2DLNpwczCmdV5boIZ6g/tlDrlRUbg= +github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 h1:hCq2hNMwsegUvPzI7sPOvtO9cqyy5GbWt/Ybp2xrx8Q= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0/go.mod h1:LqaApwGx/oUmzsbqxkzuBvyoPpkxk3JQWnqfVrJ3wCA= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= +go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= +go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= +go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= +go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= +go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw= +go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= +go.opentelemetry.io/otel/sdk/metric v1.21.0 h1:smhI5oD714d6jHE6Tie36fPx4WDFIg+Y6RfAY4ICcR0= +go.opentelemetry.io/otel/sdk/metric v1.21.0/go.mod h1:FJ8RAsoPGv/wYMgBdUJXOm+6pzFY3YdljnXtv1SBE8Q= +go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= +go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= +gocloud.dev v0.36.0 h1:q5zoXux4xkOZP473e1EZbG8Gq9f0vlg1VNH5Du/ybus= +gocloud.dev v0.36.0/go.mod h1:bLxah6JQVKBaIxzsr5BQLYB4IYdWHkMZdzCXlo6F0gg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= +golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= +gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo= +google.golang.org/api v0.191.0 h1:cJcF09Z+4HAB2t5qTQM1ZtfL/PemsLFkcFG67qq2afk= +google.golang.org/api v0.191.0/go.mod h1:tD5dsFGxFza0hnQveGfVk9QQYKcfp+VzgRqyXFxE0+E= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 h1:CT2Thj5AuPV9phrYMtzX11k+XkzMGfRAet42PmoTATM= +google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988/go.mod h1:7uvplUBj4RjHAxIZ//98LzOvrQ04JBkaixRmCMI29hc= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/runtime/pkg/duckdbreplicator/io.go b/runtime/pkg/duckdbreplicator/io.go new file mode 100644 index 00000000000..62db7bb5d3f --- /dev/null +++ b/runtime/pkg/duckdbreplicator/io.go @@ -0,0 +1,75 @@ +package duckdbreplicator + +import ( + "io" + "os" + "path/filepath" +) + +// copyDir copies a directory from source to destination +// It recursively copies all the contents of the source directory to the destination directory. +// Files with the same name in the destination directory will be overwritten. +func copyDir(dst, src string) error { + // Create the destination directory + err := os.MkdirAll(dst, os.ModePerm) + if err != nil { + return err + } + // Read the contents of the source directory + entries, err := os.ReadDir(src) + if err != nil { + return err + } + + // Copy the contents of the source directory + for _, entry := range entries { + srcPath := filepath.Join(src, entry.Name()) + dstPath := filepath.Join(dst, entry.Name()) + + if entry.IsDir() { + err = copyDir(dstPath, srcPath) + if err != nil { + return err + } + } else { + err = copyFile(dstPath, srcPath) + if err != nil { + return err + } + } + } + return nil +} + +func copyFile(dst, src string) error { + // Open the source file + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + // Create the destination file + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + // Copy the content from source to destination + _, err = io.Copy(dstFile, srcFile) + if err != nil { + return err + } + return nil +} + +func fileSize(paths []string) int64 { + var size int64 + for _, path := range paths { + if info, err := os.Stat(path); err == nil { // ignoring error since only error possible is *PathError + size += info.Size() + } + } + return size +} diff --git a/runtime/pkg/duckdbreplicator/io_test.go b/runtime/pkg/duckdbreplicator/io_test.go new file mode 100644 index 00000000000..d07d83dbe3d --- /dev/null +++ b/runtime/pkg/duckdbreplicator/io_test.go @@ -0,0 +1,78 @@ +package duckdbreplicator + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCopyDirEmptyDir(t *testing.T) { + src := t.TempDir() + dest := t.TempDir() + err := os.RemoveAll(dest) + require.NoError(t, err) + require.NoDirExists(t, dest) + + err = copyDir(dest, src) + require.NoError(t, err) + + require.DirExists(t, dest) + require.DirExists(t, src) +} + +func TestCopyDirEmptyNestedDir(t *testing.T) { + src := t.TempDir() + dest := t.TempDir() + err := os.RemoveAll(dest) + require.NoError(t, err) + require.NoDirExists(t, dest) + + err = os.MkdirAll(filepath.Join(src, "nested1", "nested"), os.ModePerm) + require.NoError(t, err) + + err = os.MkdirAll(filepath.Join(src, "nested2"), os.ModePerm) + require.NoError(t, err) + + err = copyDir(dest, src) + require.NoError(t, err) + + require.DirExists(t, dest) + require.DirExists(t, filepath.Join(dest, "nested1")) + require.DirExists(t, filepath.Join(dest, "nested2")) + require.DirExists(t, filepath.Join(dest, "nested1", "nested")) +} + +func TestCopyDirWithFile(t *testing.T) { + src := t.TempDir() + dest := t.TempDir() + require.NoError(t, os.Mkdir(filepath.Join(dest, "existing"), os.ModePerm)) + + err := os.MkdirAll(filepath.Join(src, "nested1", "nested"), os.ModePerm) + require.NoError(t, err) + + require.NoError(t, os.WriteFile(filepath.Join(src, "nested1", "file.txt"), []byte("nested1"), os.ModePerm)) + require.NoError(t, os.WriteFile(filepath.Join(src, "nested1", "nested", "file.txt"), []byte("nested1-nested"), os.ModePerm)) + + err = os.MkdirAll(filepath.Join(src, "nested2"), os.ModePerm) + require.NoError(t, os.WriteFile(filepath.Join(src, "nested2", "file.txt"), []byte("nested2"), os.ModePerm)) + require.NoError(t, err) + + err = copyDir(dest, src) + require.NoError(t, err) + + contents, err := os.ReadFile(filepath.Join(dest, "nested1", "file.txt")) + require.NoError(t, err) + require.Equal(t, "nested1", string(contents)) + + contents, err = os.ReadFile(filepath.Join(dest, "nested1", "nested", "file.txt")) + require.NoError(t, err) + require.Equal(t, "nested1-nested", string(contents)) + + contents, err = os.ReadFile(filepath.Join(dest, "nested2", "file.txt")) + require.NoError(t, err) + require.Equal(t, "nested2", string(contents)) + + require.DirExists(t, filepath.Join(dest, "existing")) +} diff --git a/runtime/pkg/duckdbreplicator/singledb.go b/runtime/pkg/duckdbreplicator/singledb.go new file mode 100644 index 00000000000..4c0e821a04d --- /dev/null +++ b/runtime/pkg/duckdbreplicator/singledb.go @@ -0,0 +1,318 @@ +package duckdbreplicator + +import ( + "context" + "database/sql" + "database/sql/driver" + "errors" + "fmt" + "io" + "log/slog" + "net/url" + "os" + "strings" + "sync" + + "github.com/XSAM/otelsql" + "github.com/jmoiron/sqlx" + "github.com/marcboeker/go-duckdb" + "go.opentelemetry.io/otel/attribute" +) + +type singledb struct { + db *sqlx.DB + writeMU *sync.Mutex // limits write queries to one at a time. Does not block read queries. + logger *slog.Logger +} + +type SingleDBOptions struct { + DSN string + Clean bool + InitQueries []string + Logger *slog.Logger +} + +var _ DB = &singledb{} + +// NewSingleDB creates a new DB that writes to and reads from a single DuckDB database. +// This is useful for testing only. +func NewSingleDB(ctx context.Context, opts *SingleDBOptions) (DB, error) { + if opts.Clean { + u, err := url.Parse(opts.DSN) + if err != nil { + return nil, err + } + if u.Path != "" { + err = os.Remove(u.Path) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + } + } + connector, err := duckdb.NewConnector(opts.DSN, func(execer driver.ExecerContext) error { + for _, qry := range opts.InitQueries { + _, err := execer.ExecContext(context.Background(), qry, nil) + if err != nil && strings.Contains(err.Error(), "Failed to download extension") { + // Retry using another mirror. Based on: https://github.com/duckdb/duckdb/issues/9378 + _, err = execer.ExecContext(context.Background(), qry+" FROM 'http://nightly-extensions.duckdb.org'", nil) + } + if err != nil { + return err + } + } + return nil + }) + if err != nil { + if strings.Contains(err.Error(), "Symbol not found") { + fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") + os.Exit(1) + } + return nil, err + } + + db := sqlx.NewDb(otelsql.OpenDB(connector), "duckdb") + err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(attribute.String("db.system", "duckdb"))) + if err != nil { + db.Close() + return nil, fmt.Errorf("registering db stats metrics: %w", err) + } + + err = db.PingContext(context.Background()) + if err != nil { + db.Close() + return nil, err + } + if opts.Logger == nil { + opts.Logger = slog.New(slog.NewJSONHandler(io.Discard, nil)) + } + return &singledb{ + db: db, + writeMU: &sync.Mutex{}, + logger: opts.Logger, + }, nil +} + +// Close implements DB. +func (s *singledb) Close() error { + return s.db.Close() +} + +// AcquireReadConnection implements DB. +func (s *singledb) AcquireReadConnection(ctx context.Context) (Conn, func() error, error) { + conn, err := s.db.Connx(ctx) + if err != nil { + return nil, nil, err + } + + return &singledbConn{ + Conn: conn, + db: s, + }, conn.Close, nil +} + +func (s *singledb) AcquireWriteConnection(ctx context.Context) (Conn, func() error, error) { + s.writeMU.Lock() + c, err := s.db.Connx(ctx) + if err != nil { + s.writeMU.Unlock() + return nil, nil, err + } + + return &singledbConn{ + Conn: c, + db: s, + }, func() error { + err := c.Close() + s.writeMU.Unlock() + return err + }, nil +} + +// CreateTableAsSelect implements DB. +func (s *singledb) CreateTableAsSelect(ctx context.Context, name, uery string, opts *CreateTableOptions) error { + s.writeMU.Lock() + defer s.writeMU.Unlock() + + conn, err := s.db.Connx(ctx) + if err != nil { + return err + } + + return s.createTableAsSelect(ctx, conn, name, uery, opts) +} + +func (s *singledb) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, name, query string, opts *CreateTableOptions) error { + var typ string + if opts != nil && opts.View { + typ = "VIEW" + } else { + typ = "TABLE" + } + + _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE %s %s AS (%s\n)", typ, safeSQLName(name), query)) + return err +} + +// DropTable implements DB. +func (s *singledb) DropTable(ctx context.Context, name string) error { + s.writeMU.Lock() + defer s.writeMU.Unlock() + + conn, err := s.db.Connx(ctx) + if err != nil { + return err + } + + return s.dropTable(ctx, conn, name) +} + +func (s *singledb) dropTable(ctx context.Context, conn *sqlx.Conn, name string) error { + view, err := isView(ctx, conn, name) + if err != nil { + return err + } + var typ string + if view { + typ = "VIEW" + } else { + typ = "TABLE" + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("DROP %s %s", typ, safeSQLName(name))) + return err +} + +// InsertTableAsSelect implements DB. +func (s *singledb) InsertTableAsSelect(ctx context.Context, name, query string, opts *InsertTableOptions) error { + if opts == nil { + opts = &InsertTableOptions{ + Strategy: IncrementalStrategyAppend, + } + } + s.writeMU.Lock() + defer s.writeMU.Unlock() + + conn, err := s.db.Connx(ctx) + if err != nil { + return err + } + + if opts == nil { + opts = &InsertTableOptions{ + Strategy: IncrementalStrategyAppend, + } + } + return execIncrementalInsert(ctx, conn, safeSQLName(name), query, opts) +} + +// RenameTable implements DB. +func (s *singledb) RenameTable(ctx context.Context, oldName, newName string) error { + s.writeMU.Lock() + defer s.writeMU.Unlock() + + conn, err := s.db.Connx(ctx) + if err != nil { + return err + } + + return s.renameTable(ctx, conn, oldName, newName) +} + +func (s *singledb) renameTable(ctx context.Context, conn *sqlx.Conn, oldName, newName string) error { + view, err := isView(ctx, conn, oldName) + if err != nil { + return err + } + + var typ string + if view { + typ = "VIEW" + } else { + typ = "TABLE" + } + + newNameIsView, err := isView(ctx, conn, newName) + if err != nil { + if !errors.Is(err, sql.ErrNoRows) { + return err + } + // The newName does not exist. + _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(oldName), safeSQLName(newName))) + return err + } + + // The newName is already occupied. + var existingTyp string + if newNameIsView { + existingTyp = "VIEW" + } else { + existingTyp = "TABLE" + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("DROP %s IF EXISTS %s", existingTyp, safeSQLName(newName))) + if err != nil { + return err + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(oldName), safeSQLName(newName))) + return err +} + +// AddTableColumn implements DB. +func (s *singledb) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { + s.writeMU.Lock() + defer s.writeMU.Unlock() + + conn, err := s.db.Connx(ctx) + if err != nil { + return err + } + + return s.addTableColumn(ctx, conn, tableName, columnName, typ) +} + +func (s *singledb) addTableColumn(ctx context.Context, conn *sqlx.Conn, tableName, columnName, typ string) error { + _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ADD COLUMN %s %s", safeSQLString(tableName), safeSQLName(columnName), typ)) + return err +} + +// AlterTableColumn implements DB. +func (s *singledb) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { + s.writeMU.Lock() + defer s.writeMU.Unlock() + + conn, err := s.db.Connx(ctx) + if err != nil { + return err + } + + return s.alterTableColumn(ctx, conn, tableName, columnName, newType) +} + +func (s *singledb) alterTableColumn(ctx context.Context, conn *sqlx.Conn, tableName, columnName, newType string) error { + _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ALTER COLUMN %s TYPE %s", safeSQLName(tableName), safeSQLName(columnName), newType)) + return err +} + +// TODO :: fix by calling pragma_database_size +func (s *singledb) Size() int64 { + return 0 +} + +func isView(ctx context.Context, conn *sqlx.Conn, name string) (bool, error) { + var view bool + err := conn.QueryRowxContext(ctx, ` + SELECT + UPPER(table_type) = 'VIEW' + FROM + information_schema.tables + WHERE + table_catalog = current_database() + AND table_schema = 'main' + AND LOWER(table_name) = LOWER(?) + `, name).Scan(&view) + if err != nil { + return false, err + } + return view, nil +} diff --git a/runtime/pkg/duckdbreplicator/singledb_test.go b/runtime/pkg/duckdbreplicator/singledb_test.go new file mode 100644 index 00000000000..e21cf793618 --- /dev/null +++ b/runtime/pkg/duckdbreplicator/singledb_test.go @@ -0,0 +1,101 @@ +package duckdbreplicator + +import ( + "context" + "database/sql" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSingleDB_test(t *testing.T) { + ctx := context.Background() + db, err := NewSingleDB(ctx, &SingleDBOptions{ + DSN: "", + }) + require.NoError(t, err) + + // create table + rw, release, err := db.AcquireWriteConnection(ctx) + require.NoError(t, err) + + err = rw.CreateTableAsSelect(ctx, "test-2", "SELECT 1 AS id, 'India' AS country", nil) + require.NoError(t, err) + + // rename table + err = rw.RenameTable(ctx, "test-2", "test") + require.NoError(t, err) + + // insert into table + err = rw.InsertTableAsSelect(ctx, "test", "SELECT 2 AS id, 'USA' AS country", nil) + require.NoError(t, err) + + // add column + err = rw.AddTableColumn(ctx, "test", "currency_score", "INT") + require.NoError(t, err) + + // alter column + err = rw.AlterTableColumn(ctx, "test", "currency_score", "FLOAT") + require.NoError(t, err) + require.NoError(t, release()) + + // select from table + conn, release, err := db.AcquireReadConnection(ctx) + require.NoError(t, err) + + var ( + id int + country string + currencyScore sql.NullFloat64 + ) + + err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country, currency_score FROM test WHERE id = 2").Scan(&id, &country, ¤cyScore) + require.NoError(t, err) + require.Equal(t, 2, id) + require.Equal(t, "USA", country) + require.Equal(t, false, currencyScore.Valid) + + err = release() + require.NoError(t, err) + + // drop table + err = db.DropTable(ctx, "test") + require.NoError(t, err) +} + +func TestSingleDB_testRenameExisting(t *testing.T) { + ctx := context.Background() + db, err := NewSingleDB(ctx, &SingleDBOptions{ + DSN: "", + }) + require.NoError(t, err) + + // create table + err = db.CreateTableAsSelect(ctx, "test-2", "SELECT 1 AS id, 'India' AS country", nil) + require.NoError(t, err) + + // create another table + err = db.CreateTableAsSelect(ctx, "test-3", "SELECT 2 AS id, 'USA' AS country", nil) + require.NoError(t, err) + + // rename table + err = db.RenameTable(ctx, "test-2", "test-3") + require.NoError(t, err) + + // select from table + conn, release, err := db.AcquireReadConnection(ctx) + require.NoError(t, err) + + var ( + id int + country string + ) + + err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country FROM \"test-3\" WHERE id = 1").Scan(&id, &country) + require.NoError(t, err) + require.Equal(t, 1, id) + require.Equal(t, "India", country) + + err = release() + require.NoError(t, err) +} diff --git a/runtime/pkg/duckdbreplicator/sqlutil.go b/runtime/pkg/duckdbreplicator/sqlutil.go new file mode 100644 index 00000000000..8183f916472 --- /dev/null +++ b/runtime/pkg/duckdbreplicator/sqlutil.go @@ -0,0 +1,17 @@ +package duckdbreplicator + +import ( + "fmt" + "strings" +) + +func safeSQLString(s string) string { + return fmt.Sprintf("'%s'", strings.ReplaceAll(s, "'", "''")) +} + +func safeSQLName(ident string) string { + if ident == "" { + return ident + } + return fmt.Sprintf("\"%s\"", strings.ReplaceAll(ident, "\"", "\"\"")) // nolint:gocritic // Because SQL escaping is different +} From 05a060338a27b32e2163bd09574485872dcc362a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Fri, 8 Nov 2024 18:35:35 +0000 Subject: [PATCH 11/64] Remove go.mod --- runtime/pkg/duckdbreplicator/go.mod | 63 ------ runtime/pkg/duckdbreplicator/go.sum | 317 ---------------------------- 2 files changed, 380 deletions(-) delete mode 100644 runtime/pkg/duckdbreplicator/go.mod delete mode 100644 runtime/pkg/duckdbreplicator/go.sum diff --git a/runtime/pkg/duckdbreplicator/go.mod b/runtime/pkg/duckdbreplicator/go.mod deleted file mode 100644 index 3aaa94b52eb..00000000000 --- a/runtime/pkg/duckdbreplicator/go.mod +++ /dev/null @@ -1,63 +0,0 @@ -module github.com/rilldata/duckdb-replicator - -go 1.22.0 - -require ( - github.com/XSAM/otelsql v0.27.0 - github.com/google/uuid v1.6.0 - github.com/jmoiron/sqlx v1.3.5 - github.com/marcboeker/go-duckdb v1.8.2 - github.com/mitchellh/mapstructure v1.5.0 - github.com/stretchr/testify v1.9.0 - go.opentelemetry.io/otel v1.30.0 - gocloud.dev v0.36.0 - golang.org/x/oauth2 v0.22.0 - golang.org/x/sync v0.8.0 -) - -require ( - cloud.google.com/go v0.115.0 // indirect - cloud.google.com/go/auth v0.8.1 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect - cloud.google.com/go/compute/metadata v0.5.0 // indirect - cloud.google.com/go/iam v1.1.13 // indirect - cloud.google.com/go/storage v1.43.0 // indirect - github.com/apache/arrow/go/v17 v17.0.0 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/go-logr/logr v1.4.2 // indirect - github.com/go-logr/stdr v1.2.2 // indirect - github.com/goccy/go-json v0.10.3 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/google/flatbuffers v24.3.25+incompatible // indirect - github.com/google/s2a-go v0.1.8 // indirect - github.com/google/wire v0.6.0 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect - github.com/googleapis/gax-go/v2 v2.13.0 // indirect - github.com/klauspost/compress v1.17.11 // indirect - github.com/klauspost/cpuid/v2 v2.2.8 // indirect - github.com/pierrec/lz4/v4 v4.1.21 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/zeebo/xxh3 v1.0.2 // indirect - go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 // indirect - go.opentelemetry.io/otel/metric v1.30.0 // indirect - go.opentelemetry.io/otel/trace v1.30.0 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect - golang.org/x/mod v0.21.0 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/text v0.19.0 // indirect - golang.org/x/time v0.6.0 // indirect - golang.org/x/tools v0.26.0 // indirect - golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/api v0.191.0 // indirect - google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect - google.golang.org/grpc v1.67.1 // indirect - google.golang.org/protobuf v1.35.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) diff --git a/runtime/pkg/duckdbreplicator/go.sum b/runtime/pkg/duckdbreplicator/go.sum deleted file mode 100644 index 3f23d84c197..00000000000 --- a/runtime/pkg/duckdbreplicator/go.sum +++ /dev/null @@ -1,317 +0,0 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.115.0 h1:CnFSK6Xo3lDYRoBKEcAtia6VSC837/ZkJuRduSFnr14= -cloud.google.com/go v0.115.0/go.mod h1:8jIM5vVgoAEoiVxQ/O4BFTfHqulPZgs/ufEzMcFMdWU= -cloud.google.com/go/auth v0.8.1 h1:QZW9FjC5lZzN864p13YxvAtGUlQ+KgRL+8Sg45Z6vxo= -cloud.google.com/go/auth v0.8.1/go.mod h1:qGVp/Y3kDRSDZ5gFD/XPUfYQ9xW1iI7q8RIRoCyBbJc= -cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= -cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= -cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= -cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= -cloud.google.com/go/iam v1.1.13 h1:7zWBXG9ERbMLrzQBRhFliAV+kjcRToDTgQT3CTwYyv4= -cloud.google.com/go/iam v1.1.13/go.mod h1:K8mY0uSXwEXS30KrnVb+j54LB/ntfZu1dr+4zFMNbus= -cloud.google.com/go/longrunning v0.5.11 h1:Havn1kGjz3whCfoD8dxMLP73Ph5w+ODyZB9RUsDxtGk= -cloud.google.com/go/longrunning v0.5.11/go.mod h1:rDn7//lmlfWV1Dx6IB4RatCPenTwwmqXuiP0/RgoEO4= -cloud.google.com/go/storage v1.43.0 h1:CcxnSohZwizt4LCzQHWvBf1/kvtHUn7gk9QERXPyXFs= -cloud.google.com/go/storage v1.43.0/go.mod h1:ajvxEa7WmZS1PxvKRq4bq0tFT3vMd502JwstCcYv0Q0= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/XSAM/otelsql v0.27.0 h1:i9xtxtdcqXV768a5C6SoT/RkG+ue3JTOgkYInzlTOqs= -github.com/XSAM/otelsql v0.27.0/go.mod h1:0mFB3TvLa7NCuhm/2nU7/b2wEtsczkj8Rey8ygO7V+A= -github.com/apache/arrow/go/v17 v17.0.0 h1:RRR2bdqKcdbss9Gxy2NS/hK8i4LDMh23L6BbkN5+F54= -github.com/apache/arrow/go/v17 v17.0.0/go.mod h1:jR7QHkODl15PfYyjM2nU+yTLScZ/qfj7OSUZmJ8putc= -github.com/aws/aws-sdk-go v1.49.0 h1:g9BkW1fo9GqKfwg2+zCD+TW/D36Ux+vtfJ8guF4AYmY= -github.com/aws/aws-sdk-go v1.49.0/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= -github.com/aws/aws-sdk-go-v2 v1.24.0 h1:890+mqQ+hTpNuw0gGP6/4akolQkSToDJgHfQE7AwGuk= -github.com/aws/aws-sdk-go-v2 v1.24.0/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 h1:OCs21ST2LrepDfD3lwlQiOqIGp6JiEUqG84GzTDoyJs= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4/go.mod h1:usURWEKSNNAcAZuzRn/9ZYPT8aZQkR7xcCtunK/LkJo= -github.com/aws/aws-sdk-go-v2/config v1.26.1 h1:z6DqMxclFGL3Zfo+4Q0rLnAZ6yVkzCRxhRMsiRQnD1o= -github.com/aws/aws-sdk-go-v2/config v1.26.1/go.mod h1:ZB+CuKHRbb5v5F0oJtGdhFTelmrxd4iWO1lf0rQwSAg= -github.com/aws/aws-sdk-go-v2/credentials v1.16.12 h1:v/WgB8NxprNvr5inKIiVVrXPuuTegM+K8nncFkr1usU= -github.com/aws/aws-sdk-go-v2/credentials v1.16.12/go.mod h1:X21k0FjEJe+/pauud82HYiQbEr9jRKY3kXEIQ4hXeTQ= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.10 h1:w98BT5w+ao1/r5sUuiH6JkVzjowOKeOJRHERyy1vh58= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.10/go.mod h1:K2WGI7vUvkIv1HoNbfBA1bvIZ+9kL3YVmWxeKuLQsiw= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.7 h1:FnLf60PtjXp8ZOzQfhJVsqF0OtYKQZWQfqOLshh8YXg= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.7/go.mod h1:tDVvl8hyU6E9B8TrnNrZQEVkQlB8hjJwcgpPhgtlnNg= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9 h1:v+HbZaCGmOwnTTVS86Fleq0vPzOd7tnJGbFhP0stNLs= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.9/go.mod h1:Xjqy+Nyj7VDLBtCMkQYOw1QYfAEZCVLrfI0ezve8wd4= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9 h1:N94sVhRACtXyVcjXxrwK1SKFIJrA9pOJ5yu2eSHnmls= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.9/go.mod h1:hqamLz7g1/4EJP+GH5NBhcUMLjW+gKLQabgyz6/7WAU= -github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2 h1:GrSw8s0Gs/5zZ0SX+gX4zQjRnRsMJDJ2sLur1gRBhEM= -github.com/aws/aws-sdk-go-v2/internal/ini v1.7.2/go.mod h1:6fQQgfuGmw8Al/3M2IgIllycxV7ZW7WCdVSqfBeUiCY= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.9 h1:ugD6qzjYtB7zM5PN/ZIeaAIyefPaD82G8+SJopgvUpw= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.9/go.mod h1:YD0aYBWCrPENpHolhKw2XDlTIWae2GKXT1T4o6N6hiM= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4 h1:/b31bi3YVNlkzkBrm9LfpaKoaYZUxIAj4sHfOTmLfqw= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.4/go.mod h1:2aGXHFmbInwgP9ZfpmdIfOELL79zhdNYNmReK8qDfdQ= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.9 h1:/90OR2XbSYfXucBMJ4U14wrjlfleq/0SB6dZDPncgmo= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.9/go.mod h1:dN/Of9/fNZet7UrQQ6kTDo/VSwKPIq94vjlU16bRARc= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.9 h1:Nf2sHxjMJR8CSImIVCONRi4g0Su3J+TSTbS7G0pUeMU= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.9/go.mod h1:idky4TER38YIjr2cADF1/ugFMKvZV7p//pVeV5LZbF0= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.9 h1:iEAeF6YC3l4FzlJPP9H3Ko1TXpdjdqWffxXjp8SY6uk= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.9/go.mod h1:kjsXoK23q9Z/tLBrckZLLyvjhZoS+AGrzqzUfEClvMM= -github.com/aws/aws-sdk-go-v2/service/s3 v1.47.5 h1:Keso8lIOS+IzI2MkPZyK6G0LYcK3My2LQ+T5bxghEAY= -github.com/aws/aws-sdk-go-v2/service/s3 v1.47.5/go.mod h1:vADO6Jn+Rq4nDtfwNjhgR84qkZwiC6FqCaXdw/kYwjA= -github.com/aws/aws-sdk-go-v2/service/sso v1.18.5 h1:ldSFWz9tEHAwHNmjx2Cvy1MjP5/L9kNoR0skc6wyOOM= -github.com/aws/aws-sdk-go-v2/service/sso v1.18.5/go.mod h1:CaFfXLYL376jgbP7VKC96uFcU8Rlavak0UlAwk1Dlhc= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.5 h1:2k9KmFawS63euAkY4/ixVNsYYwrwnd5fIvgEKkfZFNM= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.5/go.mod h1:W+nd4wWDVkSUIox9bacmkBP5NMFQeTJ/xqNabpzSR38= -github.com/aws/aws-sdk-go-v2/service/sts v1.26.5 h1:5UYvv8JUvllZsRnfrcMQ+hJ9jNICmcgKPAO1CER25Wg= -github.com/aws/aws-sdk-go-v2/service/sts v1.26.5/go.mod h1:XX5gh4CB7wAs4KhcF46G6C8a2i7eupU19dcAAE+EydU= -github.com/aws/smithy-go v1.19.0 h1:KWFKQV80DpP3vJrrA9sVAHQ5gc2z8i4EzrLhLlWXcBM= -github.com/aws/smithy-go v1.19.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= -github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= -github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= -github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= -github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= -github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-replayers/grpcreplay v1.1.0 h1:S5+I3zYyZ+GQz68OfbURDdt/+cSMqCK1wrvNx7WBzTE= -github.com/google/go-replayers/grpcreplay v1.1.0/go.mod h1:qzAvJ8/wi57zq7gWqaE6AwLM6miiXUQwP1S+I9icmhk= -github.com/google/go-replayers/httpreplay v1.2.0 h1:VM1wEyyjaoU53BwrOnaf9VhAyQQEEioJvFYxYcLRKzk= -github.com/google/go-replayers/httpreplay v1.2.0/go.mod h1:WahEFFZZ7a1P4VM1qEeHy+tME4bwyqPcwWbNlUI1Mcg= -github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= -github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= -github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= -github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= -github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/wire v0.6.0 h1:HBkoIh4BdSxoyo9PveV8giw7ZsaBOvzWKfcg/6MrVwI= -github.com/google/wire v0.6.0/go.mod h1:F4QhpQ9EDIdJ1Mbop/NZBRB+5yrR6qg3BnctaoUk6NA= -github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= -github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= -github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= -github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= -github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g= -github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ= -github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= -github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= -github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= -github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= -github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= -github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/marcboeker/go-duckdb v1.8.2 h1:gHcFjt+HcPSpDVjPSzwof+He12RS+KZPwxcfoVP8Yx4= -github.com/marcboeker/go-duckdb v1.8.2/go.mod h1:2oV8BZv88S16TKGKM+Lwd0g7DX84x0jMxjTInThC8Is= -github.com/mattn/go-sqlite3 v1.14.6 h1:dNPt6NO46WmLVt2DLNpwczCmdV5boIZ6g/tlDrlRUbg= -github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= -github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= -github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= -github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= -github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= -github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= -github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= -go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 h1:hCq2hNMwsegUvPzI7sPOvtO9cqyy5GbWt/Ybp2xrx8Q= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0/go.mod h1:LqaApwGx/oUmzsbqxkzuBvyoPpkxk3JQWnqfVrJ3wCA= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= -go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= -go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= -go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= -go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= -go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw= -go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= -go.opentelemetry.io/otel/sdk/metric v1.21.0 h1:smhI5oD714d6jHE6Tie36fPx4WDFIg+Y6RfAY4ICcR0= -go.opentelemetry.io/otel/sdk/metric v1.21.0/go.mod h1:FJ8RAsoPGv/wYMgBdUJXOm+6pzFY3YdljnXtv1SBE8Q= -go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= -go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= -gocloud.dev v0.36.0 h1:q5zoXux4xkOZP473e1EZbG8Gq9f0vlg1VNH5Du/ybus= -gocloud.dev v0.36.0/go.mod h1:bLxah6JQVKBaIxzsr5BQLYB4IYdWHkMZdzCXlo6F0gg= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= -golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= -golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= -golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= -golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= -golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= -golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= -golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= -golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= -golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= -gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= -gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo= -google.golang.org/api v0.191.0 h1:cJcF09Z+4HAB2t5qTQM1ZtfL/PemsLFkcFG67qq2afk= -google.golang.org/api v0.191.0/go.mod h1:tD5dsFGxFza0hnQveGfVk9QQYKcfp+VzgRqyXFxE0+E= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 h1:CT2Thj5AuPV9phrYMtzX11k+XkzMGfRAet42PmoTATM= -google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988/go.mod h1:7uvplUBj4RjHAxIZ//98LzOvrQ04JBkaixRmCMI29hc= -google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= -google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= -google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= From 13653fd3609ec755c181416d8de12fbec13438f0 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 11 Nov 2024 19:10:01 +0530 Subject: [PATCH 12/64] use single local directory --- runtime/pkg/duckdbreplicator/examples/main.go | 74 --- runtime/pkg/duckdbreplicator/gcs.go | 47 -- .../{duckdbreplicator => rduckdb}/README.md | 2 +- .../{duckdbreplicator => rduckdb}/backup.go | 147 ++--- .../pkg/{duckdbreplicator => rduckdb}/conn.go | 15 +- .../pkg/{duckdbreplicator => rduckdb}/db.go | 505 ++++++++---------- .../{duckdbreplicator => rduckdb}/db_test.go | 22 +- runtime/pkg/rduckdb/examples/main.go | 74 +++ .../pkg/{duckdbreplicator => rduckdb}/io.go | 2 +- .../{duckdbreplicator => rduckdb}/io_test.go | 2 +- .../{duckdbreplicator => rduckdb}/singledb.go | 12 +- .../singledb_test.go | 8 +- .../{duckdbreplicator => rduckdb}/sqlutil.go | 2 +- 13 files changed, 375 insertions(+), 537 deletions(-) delete mode 100644 runtime/pkg/duckdbreplicator/examples/main.go delete mode 100644 runtime/pkg/duckdbreplicator/gcs.go rename runtime/pkg/{duckdbreplicator => rduckdb}/README.md (97%) rename runtime/pkg/{duckdbreplicator => rduckdb}/backup.go (58%) rename runtime/pkg/{duckdbreplicator => rduckdb}/conn.go (93%) rename runtime/pkg/{duckdbreplicator => rduckdb}/db.go (72%) rename runtime/pkg/{duckdbreplicator => rduckdb}/db_test.go (75%) create mode 100644 runtime/pkg/rduckdb/examples/main.go rename runtime/pkg/{duckdbreplicator => rduckdb}/io.go (98%) rename runtime/pkg/{duckdbreplicator => rduckdb}/io_test.go (98%) rename runtime/pkg/{duckdbreplicator => rduckdb}/singledb.go (97%) rename runtime/pkg/{duckdbreplicator => rduckdb}/singledb_test.go (94%) rename runtime/pkg/{duckdbreplicator => rduckdb}/sqlutil.go (93%) diff --git a/runtime/pkg/duckdbreplicator/examples/main.go b/runtime/pkg/duckdbreplicator/examples/main.go deleted file mode 100644 index f34b8b40594..00000000000 --- a/runtime/pkg/duckdbreplicator/examples/main.go +++ /dev/null @@ -1,74 +0,0 @@ -package main - -import ( - "context" - "fmt" - "log/slog" - "time" - - duckdbreplicator "github.com/rilldata/duckdb-replicator" - _ "gocloud.dev/blob/gcsblob" -) - -func main() { - backup, err := duckdbreplicator.NewGCSBackupProvider(context.Background(), &duckdbreplicator.GCSBackupProviderOptions{ - UseHostCredentials: true, - Bucket: "", - UniqueIdentifier: "756c6367-e807-43ff-8b07-df1bae29c57e/", - }) - if err != nil { - panic(err) - } - - dbOptions := &duckdbreplicator.DBOptions{ - LocalPath: "", - BackupProvider: backup, - ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, - WriteSettings: map[string]string{"memory_limit": "8GB", "threads": "2"}, - InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, - Logger: slog.Default(), - } - - db, err := duckdbreplicator.NewDB(context.Background(), "756c6367-e807-43ff-8b07-df1bae29c57e", dbOptions) - if err != nil { - panic(err) - } - defer db.Close() - - t := time.Now() - // create table - err = db.CreateTableAsSelect(context.Background(), "test-2", `SELECT * FROM read_parquet('data*.parquet')`, nil) - if err != nil { - panic(err) - } - fmt.Printf("time taken %v\n", time.Since(t)) - - // rename table - err = db.RenameTable(context.Background(), "test-2", "test") - if err != nil { - panic(err) - } - - // insert into renamed table - err = db.InsertTableAsSelect(context.Background(), "test", `SELECT * FROM read_parquet('data*.parquet')`, &duckdbreplicator.InsertTableOptions{ - Strategy: duckdbreplicator.IncrementalStrategyAppend, - }) - if err != nil { - panic(err) - } - - // get count - conn, release, err := db.AcquireReadConnection(context.Background()) - if err != nil { - panic(err) - } - defer release() - - var count int - err = conn.Connx().QueryRowxContext(context.Background(), `SELECT count(*) FROM "test"`).Scan(&count) - if err != nil { - fmt.Printf("error %v\n", err) - } - fmt.Println(count) - -} diff --git a/runtime/pkg/duckdbreplicator/gcs.go b/runtime/pkg/duckdbreplicator/gcs.go deleted file mode 100644 index e3258cd28d1..00000000000 --- a/runtime/pkg/duckdbreplicator/gcs.go +++ /dev/null @@ -1,47 +0,0 @@ -package duckdbreplicator - -import ( - "context" - "errors" - "strings" - - "gocloud.dev/gcp" - "golang.org/x/oauth2/google" -) - -var ErrNoCredentials = errors.New("empty credentials: set `google_application_credentials` env variable") - -func newClient(ctx context.Context, jsonData string, allowHostAccess bool) (*gcp.HTTPClient, error) { - creds, err := credentials(ctx, jsonData, allowHostAccess) - if err != nil { - if !errors.Is(err, ErrNoCredentials) { - return nil, err - } - - // no credentials set, we try with a anonymous client in case user is trying to access public buckets - return gcp.NewAnonymousHTTPClient(gcp.DefaultTransport()), nil - } - // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. - return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) -} - -func credentials(ctx context.Context, jsonData string, allowHostAccess bool) (*google.Credentials, error) { - if jsonData != "" { - // google_application_credentials is set, use credentials from json string provided by user - return google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") - } - // google_application_credentials is not set - if allowHostAccess { - // use host credentials - creds, err := gcp.DefaultCredentials(ctx) - if err != nil { - if strings.Contains(err.Error(), "google: could not find default credentials") { - return nil, ErrNoCredentials - } - - return nil, err - } - return creds, nil - } - return nil, ErrNoCredentials -} diff --git a/runtime/pkg/duckdbreplicator/README.md b/runtime/pkg/rduckdb/README.md similarity index 97% rename from runtime/pkg/duckdbreplicator/README.md rename to runtime/pkg/rduckdb/README.md index 8e0e96dd433..00694e81b9a 100644 --- a/runtime/pkg/duckdbreplicator/README.md +++ b/runtime/pkg/rduckdb/README.md @@ -1,4 +1,4 @@ -# DuckDB Replicator +# rduckdb ## Motivation 1. As an embedded database, DuckDB does not inherently provide the same isolation for ETL and serving workloads that other OLAP databases offer. diff --git a/runtime/pkg/duckdbreplicator/backup.go b/runtime/pkg/rduckdb/backup.go similarity index 58% rename from runtime/pkg/duckdbreplicator/backup.go rename to runtime/pkg/rduckdb/backup.go index 3a043262bcb..f6ea5e5169d 100644 --- a/runtime/pkg/duckdbreplicator/backup.go +++ b/runtime/pkg/rduckdb/backup.go @@ -1,78 +1,25 @@ -package duckdbreplicator +package rduckdb import ( "context" "errors" "fmt" "io" - "io/fs" "log/slog" "os" + "path" "path/filepath" "strings" "time" "gocloud.dev/blob" - "gocloud.dev/blob/gcsblob" "gocloud.dev/gcerrors" "golang.org/x/sync/errgroup" ) -type BackupFormat string - -const ( - BackupFormatUnknown BackupFormat = "unknown" - BackupFormatDB BackupFormat = "db" - BackupFormatParquet BackupFormat = "parquet" -) - -type BackupProvider struct { - bucket *blob.Bucket -} - -func (b *BackupProvider) Close() error { - return b.bucket.Close() -} - -type GCSBackupProviderOptions struct { - // UseHostCredentials specifies whether to use the host's default credentials. - UseHostCredentials bool - ApplicationCredentialsJSON string - // Bucket is the GCS bucket to use for backups. Should be of the form `bucket-name`. - Bucket string - // BackupFormat specifies the format of the backup. - // TODO :: implement backup format. Fixed to DuckDB for now. - BackupFormat BackupFormat - // UnqiueIdentifier is used to store backups in a unique location. - // This must be set when multiple databases are writing to the same bucket. - UniqueIdentifier string -} - -// NewGCSBackupProvider creates a new BackupProvider based on GCS. -func NewGCSBackupProvider(ctx context.Context, opts *GCSBackupProviderOptions) (*BackupProvider, error) { - client, err := newClient(ctx, opts.ApplicationCredentialsJSON, opts.UseHostCredentials) - if err != nil { - return nil, err - } - - bucket, err := gcsblob.OpenBucket(ctx, client, opts.Bucket, nil) - if err != nil { - return nil, fmt.Errorf("failed to open bucket %q, %w", opts.Bucket, err) - } - - if opts.UniqueIdentifier != "" { - if !strings.HasSuffix(opts.UniqueIdentifier, "/") { - opts.UniqueIdentifier += "/" - } - bucket = blob.PrefixedBucket(bucket, opts.UniqueIdentifier) - } - return &BackupProvider{ - bucket: bucket, - }, nil -} - -// syncWrite syncs the write path with the backup location. -func (d *db) syncWrite(ctx context.Context) error { +// syncLocalWithBackup syncs the write path with the backup location. +// This is not safe for concurrent calls. +func (d *db) syncLocalWithBackup(ctx context.Context) error { if !d.writeDirty || d.backup == nil { // optimisation to skip sync if write was already synced return nil @@ -116,8 +63,8 @@ func (d *db) syncWrite(ctx context.Context) error { // get version of the table var backedUpVersion string - err = retry(func() error { - res, err := d.backup.ReadAll(ctx, filepath.Join(table, "version.txt")) + err = retry(ctx, func() error { + res, err := d.backup.ReadAll(ctx, path.Join(table, "version.txt")) if err != nil { return err } @@ -135,13 +82,13 @@ func (d *db) syncWrite(ctx context.Context) error { tblVersions[table] = backedUpVersion // check with current version - version, exists, _ := tableVersion(d.writePath, table) + version, exists, _ := tableVersion(d.localPath, table) if exists && version == backedUpVersion { d.logger.Debug("SyncWithObjectStorage: table is already up to date", slog.String("table", table)) continue } - tableDir := filepath.Join(d.writePath, table) + tableDir := filepath.Join(d.localPath, table) // truncate existing table directory if err := os.RemoveAll(tableDir); err != nil { return err @@ -150,7 +97,7 @@ func (d *db) syncWrite(ctx context.Context) error { return err } - tblIter := d.backup.List(&blob.ListOptions{Prefix: filepath.Join(table, backedUpVersion)}) + tblIter := d.backup.List(&blob.ListOptions{Prefix: path.Join(table, backedUpVersion)}) // download all objects in the table and current version for { obj, err := tblIter.Next(ctx) @@ -161,8 +108,8 @@ func (d *db) syncWrite(ctx context.Context) error { return err } g.Go(func() error { - return retry(func() error { - file, err := os.Create(filepath.Join(d.writePath, obj.Key)) + return retry(ctx, func() error { + file, err := os.Create(filepath.Join(d.localPath, obj.Key)) if err != nil { return err } @@ -189,14 +136,14 @@ func (d *db) syncWrite(ctx context.Context) error { // Update table versions for table, version := range tblVersions { - err = os.WriteFile(filepath.Join(d.writePath, table, "version.txt"), []byte(version), fs.ModePerm) + err = d.setTableVersion(table, version) if err != nil { return err } } // remove any tables that are not in backup - entries, err := os.ReadDir(d.writePath) + entries, err := os.ReadDir(d.localPath) if err != nil { return err } @@ -207,7 +154,7 @@ func (d *db) syncWrite(ctx context.Context) error { if _, ok := tblVersions[entry.Name()]; ok { continue } - err = os.RemoveAll(filepath.Join(d.writePath, entry.Name())) + err = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) if err != nil { return err } @@ -215,12 +162,14 @@ func (d *db) syncWrite(ctx context.Context) error { return nil } -func (d *db) syncBackup(ctx context.Context, table string) error { +// syncBackupWithLocal syncs the backup location with the local path for given table. +// If oldVersion is specified, it is deleted after successful sync. +func (d *db) syncBackupWithLocal(ctx context.Context, table, oldVersion string) error { if d.backup == nil { return nil } d.logger.Debug("syncing table", slog.String("table", table)) - version, exist, err := tableVersion(d.writePath, table) + version, exist, err := tableVersion(d.localPath, table) if err != nil { return err } @@ -229,45 +178,53 @@ func (d *db) syncBackup(ctx context.Context, table string) error { return fmt.Errorf("table %q not found", table) } - path := filepath.Join(d.writePath, table, version) - entries, err := os.ReadDir(path) + localPath := filepath.Join(d.localPath, table, version) + entries, err := os.ReadDir(localPath) if err != nil { return err } for _, entry := range entries { - d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", path)) + d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", localPath)) // no directory should exist as of now if entry.IsDir() { - d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", path)) + d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", localPath)) continue } - wr, err := os.Open(filepath.Join(path, entry.Name())) + wr, err := os.Open(filepath.Join(localPath, entry.Name())) if err != nil { return err } // upload to cloud storage - err = retry(func() error { - return d.backup.Upload(ctx, filepath.Join(table, version, entry.Name()), wr, &blob.WriterOptions{ + err = retry(ctx, func() error { + return d.backup.Upload(ctx, path.Join(table, version, entry.Name()), wr, &blob.WriterOptions{ ContentType: "application/octet-stream", }) }) - wr.Close() + _ = wr.Close() if err != nil { return err } } // update version.txt - // Ideally if this fails it is a non recoverable error but for now we will rely on retries - err = retry(func() error { - return d.backup.WriteAll(ctx, filepath.Join(table, "version.txt"), []byte(version), nil) + // Ideally if this fails it leaves backup in inconsistent state but for now we will rely on retries + // ignore context cancellation errors for version.txt updates + ctxWithTimeout, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + err = retry(context.Background(), func() error { + return d.backup.WriteAll(ctxWithTimeout, path.Join(table, "version.txt"), []byte(version), nil) }) if err != nil { d.logger.Error("failed to update version.txt in backup", slog.Any("error", err)) } + + // success -- remove old version + if oldVersion != "" { + _ = d.deleteBackup(ctx, table, oldVersion) + } return err } @@ -284,18 +241,22 @@ func (d *db) deleteBackup(ctx context.Context, table, version string) error { var prefix string if table != "" { if version != "" { - prefix = filepath.Join(table, version) + "/" + prefix = path.Join(table, version) + "/" } else { // deleting the entire table prefix = table + "/" // delete version.txt first - err := retry(func() error { return d.backup.Delete(ctx, "version.txt") }) + // also ignore context cancellation errors since it can leave the backup in inconsistent state + ctxWithTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + err := retry(context.Background(), func() error { return d.backup.Delete(ctxWithTimeout, "version.txt") }) if err != nil && gcerrors.Code(err) != gcerrors.NotFound { d.logger.Error("failed to delete version.txt in backup", slog.Any("error", err)) return err } } } + // ignore errors since version.txt is already removed iter := d.backup.List(&blob.ListOptions{Prefix: prefix}) for { @@ -304,26 +265,30 @@ func (d *db) deleteBackup(ctx context.Context, table, version string) error { if errors.Is(err, io.EOF) { break } - return err + d.logger.Debug("failed to list object", slog.Any("error", err)) } - err = retry(func() error { return d.backup.Delete(ctx, obj.Key) }) + err = retry(ctx, func() error { return d.backup.Delete(ctx, obj.Key) }) if err != nil { - return err + d.logger.Debug("failed to delete object", slog.String("object", obj.Key), slog.Any("error", err)) } } return nil } -func retry(fn func() error) error { +func retry(ctx context.Context, fn func() error) error { var err error for i := 0; i < _maxRetries; i++ { + select { + case <-ctx.Done(): + return ctx.Err() // return on context cancellation + case <-time.After(_retryDelay): + } err = fn() if err == nil { return nil // success - } else if strings.Contains(err.Error(), "stream error: stream ID") { - time.Sleep(_retryDelay) // retry - } else { - break // return error + } + if !strings.Contains(err.Error(), "stream error: stream ID") { + break // break and return error } } return err diff --git a/runtime/pkg/duckdbreplicator/conn.go b/runtime/pkg/rduckdb/conn.go similarity index 93% rename from runtime/pkg/duckdbreplicator/conn.go rename to runtime/pkg/rduckdb/conn.go index 4bfc3d6a28e..a375e2a5815 100644 --- a/runtime/pkg/duckdbreplicator/conn.go +++ b/runtime/pkg/rduckdb/conn.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "context" @@ -44,19 +44,11 @@ func (c *conn) Connx() *sqlx.Conn { } func (c *conn) CreateTableAsSelect(ctx context.Context, name, sql string, opts *CreateTableOptions) error { - if opts == nil { - opts = &CreateTableOptions{} - } return c.db.createTableAsSelect(ctx, c.Conn, func() error { return nil }, name, sql, opts) } // InsertTableAsSelect inserts the results of the given SQL query into the table. func (c *conn) InsertTableAsSelect(ctx context.Context, name, sql string, opts *InsertTableOptions) error { - if opts == nil { - opts = &InsertTableOptions{ - Strategy: IncrementalStrategyAppend, - } - } return c.db.insertTableAsSelect(ctx, c.Conn, func() error { return nil }, name, sql, opts) } @@ -98,11 +90,6 @@ func (c *singledbConn) CreateTableAsSelect(ctx context.Context, name, sql string // InsertTableAsSelect inserts the results of the given SQL query into the table. func (c *singledbConn) InsertTableAsSelect(ctx context.Context, name, sql string, opts *InsertTableOptions) error { - if opts == nil { - opts = &InsertTableOptions{ - Strategy: IncrementalStrategyAppend, - } - } return execIncrementalInsert(ctx, c.Conn, name, sql, opts) } diff --git a/runtime/pkg/duckdbreplicator/db.go b/runtime/pkg/rduckdb/db.go similarity index 72% rename from runtime/pkg/duckdbreplicator/db.go rename to runtime/pkg/rduckdb/db.go index a18e1638f30..e923547b361 100644 --- a/runtime/pkg/duckdbreplicator/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "context" @@ -72,7 +72,7 @@ type DBOptions struct { // LocalPath is the path where local db files will be stored. Should be unique for each database. LocalPath string - BackupProvider *BackupProvider + Backup *blob.Bucket // ReadSettings are settings applied the read duckDB handle. ReadSettings map[string]string @@ -81,7 +81,8 @@ type DBOptions struct { // InitQueries are the queries to run when the database is first created. InitQueries []string - Logger *slog.Logger + Logger *slog.Logger + OtelAttributes []attribute.KeyValue } // TODO :: revisit this logic @@ -157,26 +158,6 @@ func (d *DBOptions) ValidateSettings() error { } } - if readThread == 0 && writeThread == 0 { - connector, err := duckdb.NewConnector("", nil) - if err != nil { - return fmt.Errorf("unable to create duckdb connector: %w", err) - } - defer connector.Close() - db := sql.OpenDB(connector) - defer db.Close() - - row := db.QueryRow("SELECT value FROM duckdb_settings() WHERE name = 'threads'") - var threads int - err = row.Scan(&threads) - if err != nil { - return fmt.Errorf("unable to get threads: %w", err) - } - - read.Threads = strconv.Itoa((threads + 1) / 2) - write.Threads = strconv.Itoa(threads / 2) - } - if readThread == 0 != (writeThread == 0) { // only one is defined var threads int @@ -186,8 +167,8 @@ func (d *DBOptions) ValidateSettings() error { threads = writeThread } - read.Threads = strconv.Itoa((threads + 1) / 2) - write.Threads = strconv.Itoa(threads / 2) + read.Threads = strconv.Itoa(threads) + write.Threads = strconv.Itoa(threads) } err = mapstructure.WeakDecode(read, &d.ReadSettings) @@ -224,24 +205,19 @@ type InsertTableOptions struct { // NewDB creates a new DB instance. // This can be a slow operation if the backup is large. // dbIdentifier is a unique identifier for the database reported in metrics. -func NewDB(ctx context.Context, dbIdentifier string, opts *DBOptions) (DB, error) { - if dbIdentifier == "" { - return nil, fmt.Errorf("db identifier cannot be empty") - } +func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { err := opts.ValidateSettings() if err != nil { return nil, err } db := &db{ - dbIdentifier: dbIdentifier, - opts: opts, - readPath: filepath.Join(opts.LocalPath, "read"), - writePath: filepath.Join(opts.LocalPath, "write"), - writeDirty: true, - logger: opts.Logger, + opts: opts, + localPath: opts.LocalPath, + writeDirty: true, + logger: opts.Logger, } - if opts.BackupProvider != nil { - db.backup = opts.BackupProvider.bucket + if opts.Backup != nil { + db.backup = opts.Backup } // if clean is true, remove the backup if opts.Clean { @@ -251,30 +227,20 @@ func NewDB(ctx context.Context, dbIdentifier string, opts *DBOptions) (DB, error } } - // create read and write paths - err = os.MkdirAll(db.readPath, fs.ModePerm) + // create local path + err = os.MkdirAll(db.localPath, fs.ModePerm) if err != nil { return nil, fmt.Errorf("unable to create read path: %w", err) } - err = os.MkdirAll(db.writePath, fs.ModePerm) - if err != nil { - return nil, fmt.Errorf("unable to create write path: %w", err) - } - // sync write path - err = db.syncWrite(ctx) - if err != nil { - return nil, err - } - - // sync read path - err = db.syncRead(ctx) + // sync local data + err = db.syncLocalWithBackup(ctx) if err != nil { return nil, err } // create read handle - db.readHandle, err = db.openDBAndAttach(ctx, true) + db.readHandle, err = db.openDBAndAttach(ctx, true, "") if err != nil { if strings.Contains(err.Error(), "Symbol not found") { fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") @@ -287,15 +253,14 @@ func NewDB(ctx context.Context, dbIdentifier string, opts *DBOptions) (DB, error } type db struct { - dbIdentifier string - opts *DBOptions + opts *DBOptions - readHandle *sqlx.DB - readPath string - writePath string - readMu sync.RWMutex - writeMu sync.Mutex - writeDirty bool + localPath string + readHandle *sqlx.DB + readMu sync.RWMutex + writeMu sync.Mutex + writeDirty bool + inconsistent bool backup *blob.Bucket @@ -323,14 +288,16 @@ func (d *db) AcquireReadConnection(ctx context.Context) (Conn, func() error, err return nil, nil, err } - return &conn{ - Conn: c, - db: d, - }, func() error { - err = c.Close() - d.readMu.RUnlock() - return err - }, nil + release := func() error { + err := c.Close() + d.readMu.RUnlock() + return err + } + conn := &conn{ + Conn: c, + db: d, + } + return conn, release, nil } func (d *db) AcquireWriteConnection(ctx context.Context) (Conn, func() error, error) { @@ -348,9 +315,6 @@ func (d *db) AcquireWriteConnection(ctx context.Context) (Conn, func() error, er } func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts *CreateTableOptions) error { - if opts == nil { - opts = &CreateTableOptions{} - } d.logger.Debug("create table", slog.String("name", name), slog.Bool("view", opts.View)) d.writeMu.Lock() defer d.writeMu.Unlock() @@ -366,18 +330,18 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * func (d *db) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, name, query string, opts *CreateTableOptions) error { // check if some older version exists - oldVersion, oldVersionExists, _ := tableVersion(d.writePath, name) + oldVersion, oldVersionExists, _ := tableVersion(d.localPath, name) d.logger.Debug("old version", slog.String("version", oldVersion), slog.Bool("exists", oldVersionExists)) // create new version directory newVersion := newVersion() - newVersionDir := filepath.Join(d.writePath, name, newVersion) + newVersionDir := filepath.Join(d.localPath, name, newVersion) err := os.MkdirAll(newVersionDir, fs.ModePerm) if err != nil { return fmt.Errorf("create: unable to create dir %q: %w", name, err) } - var m meta + var m *meta if opts.View { // create view - validates that SQL is correct _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS (%s\n)", safeSQLName(name), query)) @@ -385,7 +349,7 @@ func (d *db) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseCo return err } - m = meta{ViewSQL: query} + m = &meta{ViewSQL: query} } else { // create db file dbFile := filepath.Join(newVersionDir, "data.db") @@ -411,11 +375,8 @@ func (d *db) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseCo _ = os.RemoveAll(newVersionDir) return fmt.Errorf("create: create %q.default table failed: %w", safeDBName, err) } - - m = meta{Format: BackupFormatDB} } - d.writeDirty = true // write meta err = writeMeta(newVersionDir, m) if err != nil { @@ -424,7 +385,8 @@ func (d *db) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseCo } // update version.txt - err = os.WriteFile(filepath.Join(d.writePath, name, "version.txt"), []byte(newVersion), fs.ModePerm) + d.writeDirty = true + err = d.setTableVersion(name, newVersion) if err != nil { _ = os.RemoveAll(newVersionDir) return fmt.Errorf("create: write version file failed: %w", err) @@ -436,26 +398,20 @@ func (d *db) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseCo return err } - if err := d.syncBackup(ctx, name); err != nil { + if err := d.syncBackupWithLocal(ctx, name, oldVersion); err != nil { return fmt.Errorf("create: replicate failed: %w", err) } d.logger.Debug("table created", slog.String("name", name)) - // both backups and write are now in sync + // backup and local are now in sync d.writeDirty = false if oldVersionExists { - _ = os.RemoveAll(filepath.Join(d.writePath, name, oldVersion)) - _ = d.deleteBackup(ctx, name, oldVersion) + _ = d.deleteLocalTable(name, oldVersion) } - return d.syncRead(ctx) + + return d.reopen("") } func (d *db) InsertTableAsSelect(ctx context.Context, name, query string, opts *InsertTableOptions) error { - if opts == nil { - opts = &InsertTableOptions{ - Strategy: IncrementalStrategyAppend, - } - } - d.logger.Debug("insert table", slog.String("name", name), slog.Group("option", "by_name", opts.ByName, "strategy", string(opts.Strategy), "unique_key", opts.UniqueKey)) d.writeMu.Lock() defer d.writeMu.Unlock() @@ -472,7 +428,7 @@ func (d *db) InsertTableAsSelect(ctx context.Context, name, query string, opts * func (d *db) insertTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, name, query string, opts *InsertTableOptions) error { // Get current table version - oldVersion, oldVersionExists, err := tableVersion(d.writePath, name) + oldVersion, oldVersionExists, err := tableVersion(d.localPath, name) if err != nil || !oldVersionExists { return fmt.Errorf("table %q does not exist", name) } @@ -486,14 +442,14 @@ func (d *db) insertTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseCo // rename db directory newVersion := newVersion() - oldVersionDir := filepath.Join(d.writePath, name, oldVersion) - err = os.Rename(oldVersionDir, filepath.Join(d.writePath, name, newVersion)) + oldVersionDir := filepath.Join(d.localPath, name, oldVersion) + err = os.Rename(oldVersionDir, filepath.Join(d.localPath, name, newVersion)) if err != nil { return fmt.Errorf("insert: update version %q failed: %w", newVersion, err) } // update version.txt - err = os.WriteFile(filepath.Join(d.writePath, name, "version.txt"), []byte(newVersion), fs.ModePerm) + err = os.WriteFile(filepath.Join(d.localPath, name, "version.txt"), []byte(newVersion), fs.ModePerm) if err != nil { return fmt.Errorf("insert: write version file failed: %w", err) } @@ -503,7 +459,7 @@ func (d *db) insertTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseCo return err } // replicate - err = d.syncBackup(ctx, name) + err = d.syncBackupWithLocal(ctx, name, oldVersion) if err != nil { return fmt.Errorf("insert: replicate failed: %w", err) } @@ -512,8 +468,7 @@ func (d *db) insertTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseCo // Delete the old version (ignoring errors since source the new data has already been correctly inserted) _ = os.RemoveAll(oldVersionDir) - _ = d.deleteBackup(ctx, name, oldVersion) - return d.syncRead(ctx) + return d.reopen("") } // DropTable implements DB. @@ -533,26 +488,31 @@ func (d *db) DropTable(ctx context.Context, name string) error { } func (d *db) dropTable(ctx context.Context, name string) error { - _, exist, _ := tableVersion(d.writePath, name) + _, exist, _ := tableVersion(d.localPath, name) if !exist { return fmt.Errorf("drop: table %q not found", name) } d.writeDirty = true + // drop the table from backup location err := d.deleteBackup(ctx, name, "") if err != nil { return fmt.Errorf("drop: unable to drop table %q from backup: %w", name, err) } + d.writeDirty = false - // delete the table directory - err = os.RemoveAll(filepath.Join(d.writePath, name)) + // reopen db handle + err = d.reopen(name) if err != nil { - return fmt.Errorf("drop: unable to drop table %q: %w", name, err) + return fmt.Errorf("drop: unable to reopen: %w", err) } - // both backups and write are now in sync - d.writeDirty = false - return d.syncRead(ctx) + + err = d.deleteLocalTable(name, "") + if err != nil { + d.logger.Debug("drop: unable to delete local table data", slog.String("table", name), slog.String("error", err.Error())) + } + return nil } func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { @@ -573,7 +533,7 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { } func (d *db) renameTable(ctx context.Context, oldName, newName string) error { - oldVersion, exist, err := d.writeTableVersion(oldName) + oldVersion, exist, err := d.tableVersion(oldName, false) if err != nil { return err } @@ -581,44 +541,55 @@ func (d *db) renameTable(ctx context.Context, oldName, newName string) error { return fmt.Errorf("rename: Table %q not found", oldName) } - newTableVersion, replaceInNewTable, _ := d.writeTableVersion(newName) + oldVersionInNewTable, replaceInNewTable, _ := d.tableVersion(newName, false) d.writeDirty = true - err = os.RemoveAll(filepath.Join(d.writePath, newName)) + // copy the old table version to new table version + version := newVersion() + err = copyDir(filepath.Join(d.localPath, newName, version), filepath.Join(d.localPath, oldName, oldVersion)) if err != nil { - return fmt.Errorf("rename: unable to delete existing new table: %w", err) + return fmt.Errorf("rename: copy table failed: %w", err) } - err = os.Rename(filepath.Join(d.writePath, oldName), filepath.Join(d.writePath, newName)) + // update version.txt + err = d.setTableVersion(newName, version) if err != nil { - return fmt.Errorf("rename: rename file failed: %w", err) + return fmt.Errorf("rename: write version file failed: %w", err) } - // rename to a new version - version := newVersion() - err = os.Rename(filepath.Join(d.writePath, newName, oldVersion), filepath.Join(d.writePath, newName, version)) + // sync the new table and new version + if err := d.syncBackupWithLocal(ctx, newName, oldVersionInNewTable); err != nil { + return fmt.Errorf("rename: unable to replicate new table: %w", err) + } + + // drop the old table in backup + err = d.deleteBackup(ctx, oldName, "") if err != nil { - return fmt.Errorf("rename: rename version failed: %w", err) + // at this point both is inconsistent + // has both old table and new table + return fmt.Errorf("rename: unable to delete old table %q from backup: %w", oldName, err) } - // update version.txt - writeErr := os.WriteFile(filepath.Join(d.writePath, newName, "version.txt"), []byte(newVersion()), fs.ModePerm) - if writeErr != nil { - return fmt.Errorf("rename: write version file failed: %w", writeErr) + // reopen db handle ignoring old name + err = d.reopen(oldName) + if err != nil { + return fmt.Errorf("rename: unable to reopen: %w", err) } - if d.syncBackup(ctx, newName) != nil { - return fmt.Errorf("rename: unable to replicate new table") + d.inconsistent = false + + if replaceInNewTable { + _ = d.deleteLocalTable(newName, oldVersionInNewTable) } - err = d.deleteBackup(ctx, oldName, "") + + // delete old table from local + err = d.deleteLocalTable(oldName, "") if err != nil { - return fmt.Errorf("rename: unable to delete old table %q from backup: %w", oldName, err) + d.logger.Debug("rename: unable to delete old table", slog.String("table", oldName), slog.String("error", err.Error())) } + d.writeDirty = false - if replaceInNewTable { - _ = d.deleteBackup(ctx, newName, newTableVersion) - } - return d.syncRead(ctx) + return nil } func (d *db) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { @@ -637,7 +608,7 @@ func (d *db) AddTableColumn(ctx context.Context, tableName, columnName, typ stri } func (d *db) addTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, tableName, columnName, typ string) error { - version, exist, err := tableVersion(d.writePath, tableName) + oldVersion, exist, err := tableVersion(d.localPath, tableName) if err != nil { return err } @@ -646,21 +617,32 @@ func (d *db) addTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn fu return fmt.Errorf("table %q does not exist", tableName) } - d.writeDirty = true - _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ADD COLUMN %s %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), typ)) + newVersion := newVersion() + err = copyDir(filepath.Join(d.localPath, tableName, newVersion), filepath.Join(d.localPath, tableName, oldVersion)) if err != nil { return err } - // rename to new version - newVersion := newVersion() - err = os.Rename(filepath.Join(d.writePath, tableName, version), filepath.Join(d.writePath, tableName, newVersion)) + // detach old db + _, err = conn.ExecContext(ctx, fmt.Sprintf("DETACH DATABASE %s", safeSQLName(dbName(tableName)))) + if err != nil { + return err + } + + // reattach new db + _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(filepath.Join(d.localPath, tableName, newVersion, "data.db")), safeSQLName(dbName(tableName)))) + if err != nil { + return err + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ADD COLUMN %s %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), typ)) if err != nil { return err } // update version.txt - err = os.WriteFile(filepath.Join(d.writePath, tableName, "version.txt"), []byte(newVersion), fs.ModePerm) + d.writeDirty = true + err = d.setTableVersion(tableName, newVersion) if err != nil { return err } @@ -671,14 +653,16 @@ func (d *db) addTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn fu } // replicate - err = d.syncBackup(ctx, tableName) + err = d.syncBackupWithLocal(ctx, tableName, oldVersion) if err != nil { return err } d.writeDirty = false - // remove old version - _ = d.deleteBackup(ctx, tableName, version) - return d.syncRead(ctx) + + // remove old local version + _ = d.deleteLocalTable(tableName, oldVersion) + + return d.reopen("") } // AlterTableColumn implements drivers.OLAPStore. @@ -698,7 +682,7 @@ func (d *db) AlterTableColumn(ctx context.Context, tableName, columnName, newTyp } func (d *db) alterTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, tableName, columnName, newType string) error { - version, exist, err := tableVersion(d.writePath, tableName) + oldVersion, exist, err := tableVersion(d.localPath, tableName) if err != nil { return err } @@ -707,72 +691,61 @@ func (d *db) alterTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn return fmt.Errorf("table %q does not exist", tableName) } - d.writeDirty = true - _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ALTER %s TYPE %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), newType)) + newVersion := newVersion() + err = copyDir(filepath.Join(d.localPath, tableName, newVersion), filepath.Join(d.localPath, tableName, oldVersion)) if err != nil { return err } - // rename to new version - newVersion := fmt.Sprint(time.Now().UnixMilli()) - err = os.Rename(filepath.Join(d.writePath, tableName, version), filepath.Join(d.writePath, tableName, newVersion)) + // detach old db + _, err = conn.ExecContext(ctx, fmt.Sprintf("DETACH DATABASE %s", safeSQLName(dbName(tableName)))) if err != nil { return err } - // update version.txt - err = os.WriteFile(filepath.Join(d.writePath, tableName, "version.txt"), []byte(newVersion), fs.ModePerm) + // reattach new db + _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(filepath.Join(d.localPath, tableName, newVersion, "data.db")), safeSQLName(dbName(tableName)))) if err != nil { return err } - err = releaseConn() + _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ALTER %s TYPE %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), newType)) if err != nil { return err } - // replicate - err = d.syncBackup(ctx, tableName) + // update version.txt + d.writeDirty = true + err = d.setTableVersion(tableName, newVersion) if err != nil { return err } - d.writeDirty = false - // remove old version - _ = d.deleteBackup(ctx, tableName, version) - return d.syncRead(ctx) -} -func (d *db) syncRead(ctx context.Context) error { - entries, err := os.ReadDir(d.writePath) + err = releaseConn() if err != nil { return err } - tableVersion := make(map[string]string) - for _, entry := range entries { - if !entry.IsDir() { - continue - } + // replicate + err = d.syncBackupWithLocal(ctx, tableName, oldVersion) + if err != nil { + return err + } + d.writeDirty = false - // Check if there is already a table with the same version - writeVersion, exist, _ := d.writeTableVersion(entry.Name()) - if !exist { - continue - } - tableVersion[entry.Name()] = writeVersion - readVersion, _, _ := d.readTableVersion(entry.Name()) - if writeVersion == readVersion { - continue - } + // remove old local version + _ = d.deleteLocalTable(tableName, oldVersion) - d.logger.Debug("Sync: copying table", slog.String("table", entry.Name())) - err = copyDir(filepath.Join(d.readPath, entry.Name()), filepath.Join(d.writePath, entry.Name())) - if err != nil { - return err - } - } + return d.reopen("") +} - handle, err := d.openDBAndAttach(ctx, true) +func (d *db) reopen(deletedTable string) error { + // reopen should ignore context cancellations since cancellation errors can leave read inconsistent from write + // Also it is expected to be a fast operation so should be okay to ignore context cancellations + // extensions are already downloaded in NewDB + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + handle, err := d.openDBAndAttach(ctx, true, deletedTable) if err != nil { return err } @@ -791,50 +764,12 @@ func (d *db) syncRead(ctx context.Context) error { d.logger.Warn("error in closing old read handle", slog.String("error", err.Error())) } } - - // delete data for tables/versions that have been removed from write - entries, err = os.ReadDir(d.readPath) - if err != nil { - return err - } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - readVersion, ok, _ := d.readTableVersion(entry.Name()) - if !ok { - // invalid table - _ = os.RemoveAll(filepath.Join(d.readPath, entry.Name())) - continue - } - - writeVersion, ok := tableVersion[entry.Name()] - if !ok { - // table not in write - d.logger.Debug("Sync: removing table", slog.String("table", entry.Name())) - err = os.RemoveAll(filepath.Join(d.readPath, entry.Name())) - if err != nil { - return err - } - continue - } - - if readVersion == writeVersion { - continue - } - - d.logger.Debug("Sync: removing old version", slog.String("table", entry.Name()), slog.String("version", readVersion)) - err = os.RemoveAll(filepath.Join(d.readPath, entry.Name(), readVersion)) - if err != nil { - return err - } - } return nil } func (d *db) Size() int64 { var paths []string - entries, err := os.ReadDir(d.readPath) + entries, err := os.ReadDir(d.localPath) if err != nil { // ignore error return 0 } @@ -848,8 +783,8 @@ func (d *db) Size() int64 { if strings.HasPrefix(entry.Name(), "__rill_tmp_") { continue } - path := filepath.Join(d.readPath, entry.Name()) - version, exist, _ := d.readTableVersion(entry.Name()) + path := filepath.Join(d.localPath, entry.Name()) + version, exist, _ := d.tableVersion(entry.Name(), true) if !exist { continue } @@ -862,12 +797,12 @@ func (d *db) Size() int64 { // The release function should be called to release the connection. // It should be called with the writeMu locked. func (d *db) acquireWriteConn(ctx context.Context) (*sqlx.Conn, func() error, error) { - err := d.syncWrite(ctx) + err := d.syncLocalWithBackup(ctx) if err != nil { return nil, nil, err } - db, err := d.openDBAndAttach(ctx, false) + db, err := d.openDBAndAttach(ctx, false, "") if err != nil { return nil, nil, err } @@ -883,27 +818,18 @@ func (d *db) acquireWriteConn(ctx context.Context) (*sqlx.Conn, func() error, er }, nil } -func (d *db) openDBAndAttach(ctx context.Context, read bool) (*sqlx.DB, error) { +func (d *db) openDBAndAttach(ctx context.Context, read bool, ignoreTable string) (*sqlx.DB, error) { // open the db - var ( - dsn *url.URL - err error - settings map[string]string - path string - ) + var settings map[string]string + dsn, err := url.Parse("") // in-memory + if err != nil { + return nil, err + } if read { - dsn, err = url.Parse("") // in-memory settings = d.opts.ReadSettings - path = d.readPath } else { - path = d.writePath - dsn, err = url.Parse(filepath.Join(path, "stage.db")) settings = d.opts.WriteSettings } - if err != nil { - return nil, err - } - query := dsn.Query() for k, v := range settings { query.Set(k, v) @@ -928,7 +854,7 @@ func (d *db) openDBAndAttach(ctx context.Context, read bool) (*sqlx.DB, error) { db := sqlx.NewDb(otelsql.OpenDB(connector), "duckdb") - err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(attribute.String("db.system", "duckdb"), attribute.String("db_identifier", d.dbIdentifier))) + err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(d.opts.OtelAttributes...)) if err != nil { return nil, fmt.Errorf("registering db stats metrics: %w", err) } @@ -939,7 +865,7 @@ func (d *db) openDBAndAttach(ctx context.Context, read bool) (*sqlx.DB, error) { return nil, err } - err = d.attachDBs(ctx, db, path, read) + err = d.attachDBs(ctx, db, read, ignoreTable) if err != nil { db.Close() return nil, err @@ -969,8 +895,8 @@ func (d *db) openDBAndAttach(ctx context.Context, read bool) (*sqlx.DB, error) { return db, nil } -func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, path string, read bool) error { - entries, err := os.ReadDir(path) +func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, read bool, ignoreTable string) error { + entries, err := os.ReadDir(d.localPath) if err != nil { return err } @@ -980,60 +906,58 @@ func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, path string, read bool) if !entry.IsDir() { continue } + if entry.Name() == ignoreTable { + continue + } - // NOTE :: we always look at the write version - // Tables in read path are removed after getting a new handle - // So we need to always look at the write version to ensure we do not reattach dropped tables - version, exist, _ := d.writeTableVersion(entry.Name()) + version, exist, _ := d.tableVersion(entry.Name(), false) if !exist { continue } - versionPath := filepath.Join(path, entry.Name(), version) + versionPath := filepath.Join(d.localPath, entry.Name(), version) // read meta file + isView := true f, err := os.ReadFile(filepath.Join(versionPath, "meta.json")) if err != nil { - _ = os.RemoveAll(versionPath) - d.logger.Warn("error in reading meta file", slog.String("table", entry.Name()), slog.Any("error", err)) - return err - } - var meta meta - err = json.Unmarshal(f, &meta) - if err != nil { - _ = os.RemoveAll(versionPath) - d.logger.Warn("error in unmarshalling meta file", slog.String("table", entry.Name()), slog.Any("error", err)) - return err + pathErr := &fs.PathError{} + if !errors.As(err, &pathErr) { + _ = os.RemoveAll(versionPath) + d.logger.Warn("error in reading meta file", slog.String("table", entry.Name()), slog.Any("error", err)) + return err + } + isView = false } - - if meta.ViewSQL != "" { + if isView { + var meta meta + err = json.Unmarshal(f, &meta) + if err != nil { + _ = os.RemoveAll(versionPath) + d.logger.Warn("error in unmarshalling meta file", slog.String("table", entry.Name()), slog.Any("error", err)) + return err + } // table is a view views = append(views, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS (%s\n)", safeSQLName(entry.Name()), meta.ViewSQL)) continue } - switch meta.Format { - case BackupFormatDB: - dbName := dbName(entry.Name()) - var readMode string - if read { - readMode = " (READ_ONLY)" - } - _, err := db.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s %s", safeSQLString(filepath.Join(versionPath, "data.db")), safeSQLName(dbName), readMode)) - if err != nil { - d.logger.Error("error in attaching db", slog.String("table", entry.Name()), slog.Any("error", err)) - _ = os.RemoveAll(filepath.Join(path, entry.Name())) - return err - } + dbName := dbName(entry.Name()) + var readMode string + if read { + readMode = " (READ_ONLY)" + } + _, err = db.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s %s", safeSQLString(filepath.Join(versionPath, "data.db")), safeSQLName(dbName), readMode)) + if err != nil { + d.logger.Error("error in attaching db", slog.String("table", entry.Name()), slog.Any("error", err)) + _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) + return err + } - _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.default", safeSQLName(entry.Name()), safeSQLName(dbName))) - if err != nil { - return err - } - case BackupFormatParquet: - panic("unimplemented") - default: - return fmt.Errorf("unknown backup format %q", meta.Format) + _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.default", safeSQLName(entry.Name()), safeSQLName(dbName))) + if err != nil { + return err } } + // create views after attaching all the DBs since views can depend on other tables for _, view := range views { _, err := db.ExecContext(ctx, view) @@ -1044,12 +968,25 @@ func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, path string, read bool) return nil } -func (d *db) readTableVersion(name string) (string, bool, error) { - return tableVersion(d.readPath, name) +func (d *db) tableVersion(name string, read bool) (string, bool, error) { + if read { + return tableVersion(d.localPath, name) + } + return tableVersion(d.localPath, name) } -func (d *db) writeTableVersion(name string) (string, bool, error) { - return tableVersion(d.writePath, name) +func (d *db) setTableVersion(name, version string) error { + return os.WriteFile(filepath.Join(d.localPath, name, "version.txt"), []byte(version), fs.ModePerm) +} + +func (d *db) deleteLocalTable(table, version string) error { + var path string + if version == "" { + path = filepath.Join(d.localPath, table) + } else { + path = filepath.Join(d.localPath, table, version) + } + return os.RemoveAll(path) } func execIncrementalInsert(ctx context.Context, conn *sqlx.Conn, safeTableName, query string, opts *InsertTableOptions) error { @@ -1124,10 +1061,12 @@ func newVersion() string { type meta struct { ViewSQL string - Format BackupFormat } -func writeMeta(path string, meta meta) error { +func writeMeta(path string, meta *meta) error { + if meta == nil { + return nil + } metaBytes, err := json.Marshal(meta) if err != nil { return fmt.Errorf("create: marshal meta failed: %w", err) diff --git a/runtime/pkg/duckdbreplicator/db_test.go b/runtime/pkg/rduckdb/db_test.go similarity index 75% rename from runtime/pkg/duckdbreplicator/db_test.go rename to runtime/pkg/rduckdb/db_test.go index 64968bb29eb..780178f353c 100644 --- a/runtime/pkg/duckdbreplicator/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "context" @@ -12,18 +12,18 @@ import ( func TestDB(t *testing.T) { dir := t.TempDir() ctx := context.Background() - db, err := NewDB(ctx, "test", &DBOptions{ - LocalPath: dir, - BackupProvider: nil, - ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, - WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, - InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, - Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + db, err := NewDB(ctx, &DBOptions{ + LocalPath: dir, + Backup: nil, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), }) require.NoError(t, err) // create table - err = db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", nil) + err = db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) require.NoError(t, err) // query table @@ -48,7 +48,9 @@ func TestDB(t *testing.T) { require.Error(t, err) // insert into table - err = db.InsertTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'US' AS country", nil) + err = db.InsertTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'US' AS country", &InsertTableOptions{ + Strategy: IncrementalStrategyAppend, + }) require.NoError(t, err) // merge into table diff --git a/runtime/pkg/rduckdb/examples/main.go b/runtime/pkg/rduckdb/examples/main.go new file mode 100644 index 00000000000..60c6e21a4ff --- /dev/null +++ b/runtime/pkg/rduckdb/examples/main.go @@ -0,0 +1,74 @@ +package main + +// import ( +// "context" +// "fmt" +// "log/slog" +// "time" + +// "github.com/rilldata/rill/runtime/pkg/rduckdb" +// _ "gocloud.dev/blob/gcsblob" +// ) + +// func main() { +// // backup, err := rduckdb.NewGCSBackupProvider(context.Background(), &rduckdb.GCSBackupProviderOptions{ +// // UseHostCredentials: true, +// // Bucket: "", +// // UniqueIdentifier: "756c6367-e807-43ff-8b07-df1bae29c57e/", +// // }) +// // if err != nil { +// // panic(err) +// // } + +// dbOptions := &rduckdb.DBOptions{ +// LocalPath: "", +// // BackupProvider: backup, +// ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, +// WriteSettings: map[string]string{"memory_limit": "8GB", "threads": "2"}, +// InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, +// Logger: slog.Default(), +// } + +// db, err := rduckdb.NewDB(context.Background(), "756c6367-e807-43ff-8b07-df1bae29c57e", dbOptions) +// if err != nil { +// panic(err) +// } +// defer db.Close() + +// t := time.Now() +// // create table +// err = db.CreateTableAsSelect(context.Background(), "test-2", `SELECT * FROM read_parquet('data*.parquet')`, &rduckdb.CreateTableOptions{}) +// if err != nil { +// panic(err) +// } +// fmt.Printf("time taken %v\n", time.Since(t)) + +// // rename table +// err = db.RenameTable(context.Background(), "test-2", "test") +// if err != nil { +// panic(err) +// } + +// // insert into renamed table +// err = db.InsertTableAsSelect(context.Background(), "test", `SELECT * FROM read_parquet('data*.parquet')`, &rduckdb.InsertTableOptions{ +// Strategy: rduckdb.IncrementalStrategyAppend, +// }) +// if err != nil { +// panic(err) +// } + +// // get count +// conn, release, err := db.AcquireReadConnection(context.Background()) +// if err != nil { +// panic(err) +// } +// defer release() + +// var count int +// err = conn.Connx().QueryRowxContext(context.Background(), `SELECT count(*) FROM "test"`).Scan(&count) +// if err != nil { +// fmt.Printf("error %v\n", err) +// } +// fmt.Println(count) + +// } diff --git a/runtime/pkg/duckdbreplicator/io.go b/runtime/pkg/rduckdb/io.go similarity index 98% rename from runtime/pkg/duckdbreplicator/io.go rename to runtime/pkg/rduckdb/io.go index 62db7bb5d3f..eb2437f6161 100644 --- a/runtime/pkg/duckdbreplicator/io.go +++ b/runtime/pkg/rduckdb/io.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "io" diff --git a/runtime/pkg/duckdbreplicator/io_test.go b/runtime/pkg/rduckdb/io_test.go similarity index 98% rename from runtime/pkg/duckdbreplicator/io_test.go rename to runtime/pkg/rduckdb/io_test.go index d07d83dbe3d..b54447ceb70 100644 --- a/runtime/pkg/duckdbreplicator/io_test.go +++ b/runtime/pkg/rduckdb/io_test.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "os" diff --git a/runtime/pkg/duckdbreplicator/singledb.go b/runtime/pkg/rduckdb/singledb.go similarity index 97% rename from runtime/pkg/duckdbreplicator/singledb.go rename to runtime/pkg/rduckdb/singledb.go index 4c0e821a04d..be0e060dd43 100644 --- a/runtime/pkg/duckdbreplicator/singledb.go +++ b/runtime/pkg/rduckdb/singledb.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "context" @@ -184,11 +184,6 @@ func (s *singledb) dropTable(ctx context.Context, conn *sqlx.Conn, name string) // InsertTableAsSelect implements DB. func (s *singledb) InsertTableAsSelect(ctx context.Context, name, query string, opts *InsertTableOptions) error { - if opts == nil { - opts = &InsertTableOptions{ - Strategy: IncrementalStrategyAppend, - } - } s.writeMU.Lock() defer s.writeMU.Unlock() @@ -197,11 +192,6 @@ func (s *singledb) InsertTableAsSelect(ctx context.Context, name, query string, return err } - if opts == nil { - opts = &InsertTableOptions{ - Strategy: IncrementalStrategyAppend, - } - } return execIncrementalInsert(ctx, conn, safeSQLName(name), query, opts) } diff --git a/runtime/pkg/duckdbreplicator/singledb_test.go b/runtime/pkg/rduckdb/singledb_test.go similarity index 94% rename from runtime/pkg/duckdbreplicator/singledb_test.go rename to runtime/pkg/rduckdb/singledb_test.go index e21cf793618..86edc038538 100644 --- a/runtime/pkg/duckdbreplicator/singledb_test.go +++ b/runtime/pkg/rduckdb/singledb_test.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "context" @@ -19,7 +19,7 @@ func TestSingleDB_test(t *testing.T) { rw, release, err := db.AcquireWriteConnection(ctx) require.NoError(t, err) - err = rw.CreateTableAsSelect(ctx, "test-2", "SELECT 1 AS id, 'India' AS country", nil) + err = rw.CreateTableAsSelect(ctx, "test-2", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) require.NoError(t, err) // rename table @@ -27,7 +27,9 @@ func TestSingleDB_test(t *testing.T) { require.NoError(t, err) // insert into table - err = rw.InsertTableAsSelect(ctx, "test", "SELECT 2 AS id, 'USA' AS country", nil) + err = rw.InsertTableAsSelect(ctx, "test", "SELECT 2 AS id, 'USA' AS country", &InsertTableOptions{ + Strategy: IncrementalStrategyAppend, + }) require.NoError(t, err) // add column diff --git a/runtime/pkg/duckdbreplicator/sqlutil.go b/runtime/pkg/rduckdb/sqlutil.go similarity index 93% rename from runtime/pkg/duckdbreplicator/sqlutil.go rename to runtime/pkg/rduckdb/sqlutil.go index 8183f916472..28710d676be 100644 --- a/runtime/pkg/duckdbreplicator/sqlutil.go +++ b/runtime/pkg/rduckdb/sqlutil.go @@ -1,4 +1,4 @@ -package duckdbreplicator +package rduckdb import ( "fmt" From e9a8c6c1966084962771546ab8ace5af7db8c500 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 15 Nov 2024 18:32:20 +0530 Subject: [PATCH 13/64] use metadata.json for each table --- runtime/pkg/rduckdb/conn.go | 114 --- runtime/pkg/rduckdb/db.go | 834 ++++++++----------- runtime/pkg/rduckdb/db_test.go | 234 +++++- runtime/pkg/rduckdb/examples/main.go | 74 -- runtime/pkg/rduckdb/{backup.go => remote.go} | 154 ++-- runtime/pkg/rduckdb/singledb.go | 308 ------- runtime/pkg/rduckdb/singledb_test.go | 103 --- 7 files changed, 613 insertions(+), 1208 deletions(-) delete mode 100644 runtime/pkg/rduckdb/conn.go delete mode 100644 runtime/pkg/rduckdb/examples/main.go rename runtime/pkg/rduckdb/{backup.go => remote.go} (61%) delete mode 100644 runtime/pkg/rduckdb/singledb.go delete mode 100644 runtime/pkg/rduckdb/singledb_test.go diff --git a/runtime/pkg/rduckdb/conn.go b/runtime/pkg/rduckdb/conn.go deleted file mode 100644 index a375e2a5815..00000000000 --- a/runtime/pkg/rduckdb/conn.go +++ /dev/null @@ -1,114 +0,0 @@ -package rduckdb - -import ( - "context" - - "github.com/jmoiron/sqlx" -) - -// Conn represents a single database connection. -// This is useful when running a chain of queries using a single write connection. -type Conn interface { - // Connx returns the underlying sqlx.Conn. - Connx() *sqlx.Conn - - // CreateTableAsSelect creates a new table by name from the results of the given SQL query. - CreateTableAsSelect(ctx context.Context, name string, sql string, opts *CreateTableOptions) error - - // InsertTableAsSelect inserts the results of the given SQL query into the table. - InsertTableAsSelect(ctx context.Context, name string, sql string, opts *InsertTableOptions) error - - // DropTable removes a table from the database. - DropTable(ctx context.Context, name string) error - - // RenameTable renames a table in the database. - RenameTable(ctx context.Context, oldName, newName string) error - - // AddTableColumn adds a column to the table. - AddTableColumn(ctx context.Context, tableName, columnName, typ string) error - - // AlterTableColumn alters the type of a column in the table. - AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error -} - -type conn struct { - *sqlx.Conn - - db *db -} - -var _ Conn = (*conn)(nil) - -func (c *conn) Connx() *sqlx.Conn { - return c.Conn -} - -func (c *conn) CreateTableAsSelect(ctx context.Context, name, sql string, opts *CreateTableOptions) error { - return c.db.createTableAsSelect(ctx, c.Conn, func() error { return nil }, name, sql, opts) -} - -// InsertTableAsSelect inserts the results of the given SQL query into the table. -func (c *conn) InsertTableAsSelect(ctx context.Context, name, sql string, opts *InsertTableOptions) error { - return c.db.insertTableAsSelect(ctx, c.Conn, func() error { return nil }, name, sql, opts) -} - -// DropTable removes a table from the database. -func (c *conn) DropTable(ctx context.Context, name string) error { - return c.db.dropTable(ctx, name) -} - -// RenameTable renames a table in the database. -func (c *conn) RenameTable(ctx context.Context, oldName, newName string) error { - return c.db.renameTable(ctx, oldName, newName) -} - -// AddTableColumn adds a column to the table. -func (c *conn) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { - return c.db.addTableColumn(ctx, c.Conn, func() error { return nil }, tableName, columnName, typ) -} - -// AlterTableColumn alters the type of a column in the table. -func (c *conn) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { - return c.db.alterTableColumn(ctx, c.Conn, func() error { return nil }, tableName, columnName, newType) -} - -type singledbConn struct { - *sqlx.Conn - - db *singledb -} - -var _ Conn = (*singledbConn)(nil) - -func (c *singledbConn) Connx() *sqlx.Conn { - return c.Conn -} - -func (c *singledbConn) CreateTableAsSelect(ctx context.Context, name, sql string, opts *CreateTableOptions) error { - return c.db.createTableAsSelect(ctx, c.Conn, name, sql, opts) -} - -// InsertTableAsSelect inserts the results of the given SQL query into the table. -func (c *singledbConn) InsertTableAsSelect(ctx context.Context, name, sql string, opts *InsertTableOptions) error { - return execIncrementalInsert(ctx, c.Conn, name, sql, opts) -} - -// DropTable removes a table from the database. -func (c *singledbConn) DropTable(ctx context.Context, name string) error { - return c.db.dropTable(ctx, c.Conn, name) -} - -// RenameTable renames a table in the database. -func (c *singledbConn) RenameTable(ctx context.Context, oldName, newName string) error { - return c.db.renameTable(ctx, c.Conn, oldName, newName) -} - -// AddTableColumn adds a column to the table. -func (c *singledbConn) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { - return c.db.addTableColumn(ctx, c.Conn, tableName, columnName, typ) -} - -// AlterTableColumn alters the type of a column in the table. -func (c *singledbConn) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { - return c.db.alterTableColumn(ctx, c.Conn, tableName, columnName, newType) -} diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index e923547b361..f5c4b176e4a 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -13,13 +13,13 @@ import ( "os" "path/filepath" "regexp" + "slices" "strconv" "strings" "sync" "time" "github.com/XSAM/otelsql" - "github.com/google/uuid" "github.com/jmoiron/sqlx" "github.com/marcboeker/go-duckdb" "github.com/mitchellh/mapstructure" @@ -27,6 +27,8 @@ import ( "gocloud.dev/blob" ) +var errNotFound = errors.New("not found") + type DB interface { // Close closes the database. Close() error @@ -34,12 +36,7 @@ type DB interface { // AcquireReadConnection returns a connection to the database for reading. // Once done the connection should be released by calling the release function. // This connection must only be used for select queries or for creating and working with temporary tables. - AcquireReadConnection(ctx context.Context) (conn Conn, release func() error, err error) - - // AcquireWriteConnection returns a connection to the database for writing. - // Once done the connection should be released by calling the release function. - // Any persistent changes to the database should be done by calling CRUD APIs on this connection. - AcquireWriteConnection(ctx context.Context) (conn Conn, release func() error, err error) + AcquireReadConnection(ctx context.Context) (conn *sqlx.Conn, release func() error, err error) // Size returns the size of the database in bytes. // It is currently implemented as sum of the size of all serving `.db` files. @@ -50,20 +47,14 @@ type DB interface { // CreateTableAsSelect creates a new table by name from the results of the given SQL query. CreateTableAsSelect(ctx context.Context, name string, sql string, opts *CreateTableOptions) error - // InsertTableAsSelect inserts the results of the given SQL query into the table. - InsertTableAsSelect(ctx context.Context, name string, sql string, opts *InsertTableOptions) error + // MutateTable allows mutating a table in the database by calling the mutateFn. + MutateTable(ctx context.Context, name string, mutateFn func(ctx context.Context, conn *sqlx.Conn) error) error // DropTable removes a table from the database. DropTable(ctx context.Context, name string) error // RenameTable renames a table in the database. RenameTable(ctx context.Context, oldName, newName string) error - - // AddTableColumn adds a column to the table. - AddTableColumn(ctx context.Context, tableName, columnName, typ string) error - - // AlterTableColumn alters the type of a column in the table. - AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error } type DBOptions struct { @@ -188,20 +179,6 @@ type CreateTableOptions struct { View bool } -type IncrementalStrategy string - -const ( - IncrementalStrategyUnspecified IncrementalStrategy = "" - IncrementalStrategyAppend IncrementalStrategy = "append" - IncrementalStrategyMerge IncrementalStrategy = "merge" -) - -type InsertTableOptions struct { - ByName bool - Strategy IncrementalStrategy - UniqueKey []string -} - // NewDB creates a new DB instance. // This can be a slow operation if the backup is large. // dbIdentifier is a unique identifier for the database reported in metrics. @@ -234,13 +211,13 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } // sync local data - err = db.syncLocalWithBackup(ctx) + err = db.pullFromRemote(ctx) if err != nil { return nil, err } // create read handle - db.readHandle, err = db.openDBAndAttach(ctx, true, "") + db.readHandle, err = db.openDBAndAttach(ctx, "", "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") @@ -255,12 +232,11 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { type db struct { opts *DBOptions - localPath string - readHandle *sqlx.DB - readMu sync.RWMutex - writeMu sync.Mutex - writeDirty bool - inconsistent bool + localPath string + readHandle *sqlx.DB + readMu sync.RWMutex + writeMu sync.Mutex + writeDirty bool backup *blob.Bucket @@ -279,473 +255,318 @@ func (d *db) Close() error { return d.readHandle.Close() } -func (d *db) AcquireReadConnection(ctx context.Context) (Conn, func() error, error) { +func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() error, error) { d.readMu.RLock() - c, err := d.readHandle.Connx(ctx) + conn, err := d.readHandle.Connx(ctx) if err != nil { d.readMu.RUnlock() return nil, nil, err } release := func() error { - err := c.Close() + err := conn.Close() d.readMu.RUnlock() return err } - conn := &conn{ - Conn: c, - db: d, - } return conn, release, nil } -func (d *db) AcquireWriteConnection(ctx context.Context) (Conn, func() error, error) { - d.writeMu.Lock() - defer d.writeMu.Unlock() - c, release, err := d.acquireWriteConn(ctx) - if err != nil { - return nil, nil, err - } - - return &conn{ - Conn: c, - db: d, - }, release, nil -} - func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts *CreateTableOptions) error { d.logger.Debug("create table", slog.String("name", name), slog.Bool("view", opts.View)) d.writeMu.Lock() defer d.writeMu.Unlock() - conn, release, err := d.acquireWriteConn(ctx) + + // pull latest changes from remote + err := d.pullFromRemote(ctx) if err != nil { return err } - defer func() { - _ = release() - }() - return d.createTableAsSelect(ctx, conn, release, name, query, opts) -} -func (d *db) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, name, query string, opts *CreateTableOptions) error { // check if some older version exists - oldVersion, oldVersionExists, _ := tableVersion(d.localPath, name) - d.logger.Debug("old version", slog.String("version", oldVersion), slog.Bool("exists", oldVersionExists)) + oldMeta, _ := d.tableMeta(name) + if oldMeta != nil { + d.logger.Debug("old version", slog.String("version", oldMeta.Version)) + } // create new version directory newVersion := newVersion() - newVersionDir := filepath.Join(d.localPath, name, newVersion) - err := os.MkdirAll(newVersionDir, fs.ModePerm) - if err != nil { - return fmt.Errorf("create: unable to create dir %q: %w", name, err) + newMeta := &tableMeta{ + Name: name, + Version: newVersion, + CreatedVersion: newVersion, } - - var m *meta + var dsn string if opts.View { - // create view - validates that SQL is correct - _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS (%s\n)", safeSQLName(name), query)) - if err != nil { - return err + newMeta.SQL = query + dsn = "" + // SPECIAL CASE + if oldMeta != nil && oldMeta.Type == "VIEW" { + newMeta.CreatedVersion = oldMeta.CreatedVersion } - - m = &meta{ViewSQL: query} - } else { - // create db file - dbFile := filepath.Join(newVersionDir, "data.db") - safeDBName := safeSQLName(dbName(name)) - - // detach existing db - _, err = conn.ExecContext(ctx, fmt.Sprintf("DETACH DATABASE IF EXISTS %s", safeDBName), nil) + err = os.MkdirAll(filepath.Join(d.localPath, name), fs.ModePerm) if err != nil { - _ = os.RemoveAll(newVersionDir) - return fmt.Errorf("create: detach %q db failed: %w", safeDBName, err) + return fmt.Errorf("create: unable to create dir %q: %w", name, err) } - - // attach new db - _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(dbFile), safeDBName), nil) - if err != nil { - _ = os.RemoveAll(newVersionDir) - return fmt.Errorf("create: attach %q db failed: %w", dbFile, err) - } - - // ingest data - _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE TABLE %s.default AS (%s\n)", safeDBName, query), nil) + } else { + newVersionDir := filepath.Join(d.localPath, name, newVersion) + err = os.MkdirAll(newVersionDir, fs.ModePerm) if err != nil { - _ = os.RemoveAll(newVersionDir) - return fmt.Errorf("create: create %q.default table failed: %w", safeDBName, err) + return fmt.Errorf("create: unable to create dir %q: %w", name, err) } + dsn = filepath.Join(newVersionDir, "data.db") + newMeta.CreatedVersion = newVersion } - // write meta - err = writeMeta(newVersionDir, m) + // need to attach existing table so that any views dependent on this table are correctly attached + conn, release, err := d.acquireWriteConn(ctx, dsn, name, true) if err != nil { - _ = os.RemoveAll(newVersionDir) return err } - - // update version.txt - d.writeDirty = true - err = d.setTableVersion(name, newVersion) - if err != nil { - _ = os.RemoveAll(newVersionDir) - return fmt.Errorf("create: write version file failed: %w", err) - } - - // close write handle before syncing read so that temp files or wal files if any are removed - err = releaseConn() - if err != nil { - return err - } - - if err := d.syncBackupWithLocal(ctx, name, oldVersion); err != nil { - return fmt.Errorf("create: replicate failed: %w", err) - } - d.logger.Debug("table created", slog.String("name", name)) - // backup and local are now in sync - d.writeDirty = false - if oldVersionExists { - _ = d.deleteLocalTable(name, oldVersion) - } - - return d.reopen("") -} - -func (d *db) InsertTableAsSelect(ctx context.Context, name, query string, opts *InsertTableOptions) error { - d.logger.Debug("insert table", slog.String("name", name), slog.Group("option", "by_name", opts.ByName, "strategy", string(opts.Strategy), "unique_key", opts.UniqueKey)) - d.writeMu.Lock() - defer d.writeMu.Unlock() - conn, release, err := d.acquireWriteConn(ctx) - if err != nil { - return err - } - defer func() { _ = release() }() - return d.insertTableAsSelect(ctx, conn, release, name, query, opts) -} -func (d *db) insertTableAsSelect(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, name, query string, opts *InsertTableOptions) error { - // Get current table version - oldVersion, oldVersionExists, err := tableVersion(d.localPath, name) - if err != nil || !oldVersionExists { - return fmt.Errorf("table %q does not exist", name) + safeName := safeSQLName(name) + var typ string + if opts.View { + typ = "VIEW" + newMeta.Type = "VIEW" + } else { + typ = "TABLE" + newMeta.Type = "TABLE" + newMeta.SQL = "" } - - d.writeDirty = true - // Execute the insert - err = execIncrementalInsert(ctx, conn, fmt.Sprintf("%s.default", safeSQLName(dbName(name))), query, opts) + // ingest data + _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE %s %s AS (%s\n)", typ, safeName, query), nil) if err != nil { - return fmt.Errorf("insert: insert into table %q failed: %w", name, err) + return fmt.Errorf("create: create %s %q failed: %w", typ, name, err) } - // rename db directory - newVersion := newVersion() - oldVersionDir := filepath.Join(d.localPath, name, oldVersion) - err = os.Rename(oldVersionDir, filepath.Join(d.localPath, name, newVersion)) + // close write handle before syncing read so that temp files or wal files are removed + err = release() if err != nil { - return fmt.Errorf("insert: update version %q failed: %w", newVersion, err) + return err } - // update version.txt - err = os.WriteFile(filepath.Join(d.localPath, name, "version.txt"), []byte(newVersion), fs.ModePerm) - if err != nil { - return fmt.Errorf("insert: write version file failed: %w", err) + d.writeDirty = true + // update remote data and metadata + if err := d.pushToRemote(ctx, name, oldMeta, newMeta); err != nil { + return fmt.Errorf("create: replicate failed: %w", err) } + d.logger.Debug("remote table updated", slog.String("name", name)) - err = releaseConn() + // update local metadata + err = d.writeTableMeta(name, newMeta) if err != nil { - return err + return fmt.Errorf("create: write version file failed: %w", err) } - // replicate - err = d.syncBackupWithLocal(ctx, name, oldVersion) + + d.writeDirty = false + err = d.reopen(ctx) if err != nil { - return fmt.Errorf("insert: replicate failed: %w", err) + // TODO :: this means reads will not be in sync with remote till another write happens + // Should we mark db as reopen and wait for outstanding queries to become zero and then reopen? + return fmt.Errorf("create: db reopen failed: %w", err) } - // both backups and write are now in sync - d.writeDirty = false - - // Delete the old version (ignoring errors since source the new data has already been correctly inserted) - _ = os.RemoveAll(oldVersionDir) - return d.reopen("") + return nil } -// DropTable implements DB. -func (d *db) DropTable(ctx context.Context, name string) error { - d.logger.Debug("drop table", slog.String("name", name)) +func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx context.Context, conn *sqlx.Conn) error) error { + d.logger.Debug("mutate table", slog.String("name", name)) d.writeMu.Lock() defer d.writeMu.Unlock() - _, release, err := d.acquireWriteConn(ctx) // we don't need the handle but need to sync the write - if err != nil { - return err - } - defer func() { - _ = release() - }() - - return d.dropTable(ctx, name) -} -func (d *db) dropTable(ctx context.Context, name string) error { - _, exist, _ := tableVersion(d.localPath, name) - if !exist { - return fmt.Errorf("drop: table %q not found", name) - } - - d.writeDirty = true - - // drop the table from backup location - err := d.deleteBackup(ctx, name, "") + oldMeta, err := d.tableMeta(name) if err != nil { - return fmt.Errorf("drop: unable to drop table %q from backup: %w", name, err) + if errors.Is(err, errNotFound) { + return fmt.Errorf("mutate: Table %q not found", name) + } + return fmt.Errorf("rename: unable to get table meta: %w", err) } - d.writeDirty = false - // reopen db handle - err = d.reopen(name) + // create new version directory + newVersion := newVersion() + newVersionDir := filepath.Join(d.localPath, name, newVersion) + err = os.MkdirAll(newVersionDir, fs.ModePerm) if err != nil { - return fmt.Errorf("drop: unable to reopen: %w", err) + return fmt.Errorf("mutate: unable to create dir %q: %w", name, err) } - err = d.deleteLocalTable(name, "") + err = copyDir(newVersionDir, filepath.Join(d.localPath, name, oldMeta.Version)) if err != nil { - d.logger.Debug("drop: unable to delete local table data", slog.String("table", name), slog.String("error", err.Error())) + return fmt.Errorf("mutate: copy table failed: %w", err) } - return nil -} -func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { - d.logger.Debug("rename table", slog.String("from", oldName), slog.String("to", newName)) - if strings.EqualFold(oldName, newName) { - return fmt.Errorf("rename: Table with name %q already exists", newName) - } - d.writeMu.Lock() - defer d.writeMu.Unlock() - _, release, err := d.acquireWriteConn(ctx) // we don't need the handle but need to sync the write + // acquire write connection + // need to ignore attaching table since it is already present in the db file + conn, release, err := d.acquireWriteConn(ctx, filepath.Join(newVersionDir, "data.db"), name, false) if err != nil { return err } - defer func() { - _ = release() - }() - return d.renameTable(ctx, oldName, newName) -} -func (d *db) renameTable(ctx context.Context, oldName, newName string) error { - oldVersion, exist, err := d.tableVersion(oldName, false) + err = mutateFn(ctx, conn) if err != nil { - return err - } - if !exist { - return fmt.Errorf("rename: Table %q not found", oldName) + _ = release() + return fmt.Errorf("mutate: mutate failed: %w", err) } - oldVersionInNewTable, replaceInNewTable, _ := d.tableVersion(newName, false) - + // push to remote + _ = release() d.writeDirty = true - // copy the old table version to new table version - version := newVersion() - err = copyDir(filepath.Join(d.localPath, newName, version), filepath.Join(d.localPath, oldName, oldVersion)) - if err != nil { - return fmt.Errorf("rename: copy table failed: %w", err) + meta := &tableMeta{ + Name: name, + Version: newVersion, + CreatedVersion: oldMeta.CreatedVersion, + Type: oldMeta.Type, + SQL: oldMeta.SQL, } - - // update version.txt - err = d.setTableVersion(newName, version) + err = d.pushToRemote(ctx, name, oldMeta, meta) if err != nil { - return fmt.Errorf("rename: write version file failed: %w", err) + return fmt.Errorf("mutate: replicate failed: %w", err) } - // sync the new table and new version - if err := d.syncBackupWithLocal(ctx, newName, oldVersionInNewTable); err != nil { - return fmt.Errorf("rename: unable to replicate new table: %w", err) - } - - // drop the old table in backup - err = d.deleteBackup(ctx, oldName, "") + // update local meta + err = d.writeTableMeta(name, meta) if err != nil { - // at this point both is inconsistent - // has both old table and new table - return fmt.Errorf("rename: unable to delete old table %q from backup: %w", oldName, err) + return fmt.Errorf("rename: write version file failed: %w", err) } + d.writeDirty = false // reopen db handle ignoring old name - err = d.reopen(oldName) + err = d.reopen(ctx) if err != nil { return fmt.Errorf("rename: unable to reopen: %w", err) } - - d.inconsistent = false - - if replaceInNewTable { - _ = d.deleteLocalTable(newName, oldVersionInNewTable) - } - - // delete old table from local - err = d.deleteLocalTable(oldName, "") - if err != nil { - d.logger.Debug("rename: unable to delete old table", slog.String("table", oldName), slog.String("error", err.Error())) - } - - d.writeDirty = false return nil } -func (d *db) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { - d.logger.Debug("AddTableColumn", slog.String("table", tableName), slog.String("column", columnName), slog.String("typ", typ)) +// DropTable implements DB. +func (d *db) DropTable(ctx context.Context, name string) error { + d.logger.Debug("drop table", slog.String("name", name)) d.writeMu.Lock() defer d.writeMu.Unlock() - conn, release, err := d.acquireWriteConn(ctx) - if err != nil { - return err - } - defer func() { - _ = release() - }() - - return d.addTableColumn(ctx, conn, release, tableName, columnName, typ) -} - -func (d *db) addTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, tableName, columnName, typ string) error { - oldVersion, exist, err := tableVersion(d.localPath, tableName) - if err != nil { - return err - } - - if !exist { - return fmt.Errorf("table %q does not exist", tableName) - } - - newVersion := newVersion() - err = copyDir(filepath.Join(d.localPath, tableName, newVersion), filepath.Join(d.localPath, tableName, oldVersion)) - if err != nil { - return err - } - - // detach old db - _, err = conn.ExecContext(ctx, fmt.Sprintf("DETACH DATABASE %s", safeSQLName(dbName(tableName)))) - if err != nil { - return err - } - // reattach new db - _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(filepath.Join(d.localPath, tableName, newVersion, "data.db")), safeSQLName(dbName(tableName)))) + // pull latest changes from remote + err := d.pullFromRemote(ctx) if err != nil { - return err + return fmt.Errorf("drop: unable to pull from remote: %w", err) } - _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ADD COLUMN %s %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), typ)) + // check if table exists + meta, err := d.tableMeta(name) if err != nil { - return err + if errors.Is(err, errNotFound) { + return fmt.Errorf("drop: Table %q not found", name) + } + return fmt.Errorf("drop: unable to get table meta: %w", err) } - // update version.txt + // drop the table from backup location d.writeDirty = true - err = d.setTableVersion(tableName, newVersion) + err = d.deleteBackup(ctx, name, "") if err != nil { - return err + return fmt.Errorf("drop: unable to drop table %q from backup: %w", name, err) } - err = releaseConn() + // mark table as deleted in local + meta.Deleted = true + err = d.writeTableMeta(name, meta) if err != nil { - return err + return fmt.Errorf("drop: write meta failed: %w", err) } - // replicate - err = d.syncBackupWithLocal(ctx, tableName, oldVersion) + // reopen db handle + err = d.reopen(ctx) if err != nil { - return err + return fmt.Errorf("drop: unable to reopen: %w", err) } - d.writeDirty = false - - // remove old local version - _ = d.deleteLocalTable(tableName, oldVersion) - return d.reopen("") + d.writeDirty = false + return nil } -// AlterTableColumn implements drivers.OLAPStore. -func (d *db) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { - d.logger.Debug("AlterTableColumn", slog.String("table", tableName), slog.String("column", columnName), slog.String("typ", newType)) +func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { + d.logger.Debug("rename table", slog.String("from", oldName), slog.String("to", newName)) + if strings.EqualFold(oldName, newName) { + return fmt.Errorf("rename: Table with name %q already exists", newName) + } d.writeMu.Lock() defer d.writeMu.Unlock() - conn, release, err := d.acquireWriteConn(ctx) - if err != nil { - return err - } - defer func() { - _ = release() - }() - return d.alterTableColumn(ctx, conn, release, tableName, columnName, newType) -} - -func (d *db) alterTableColumn(ctx context.Context, conn *sqlx.Conn, releaseConn func() error, tableName, columnName, newType string) error { - oldVersion, exist, err := tableVersion(d.localPath, tableName) + // pull latest changes from remote + err := d.pullFromRemote(ctx) if err != nil { - return err + return fmt.Errorf("rename: unable to pull from remote: %w", err) } - if !exist { - return fmt.Errorf("table %q does not exist", tableName) + oldMeta, err := d.tableMeta(oldName) + if err != nil { + if errors.Is(err, errNotFound) { + return fmt.Errorf("rename: Table %q not found", oldName) + } + return fmt.Errorf("rename: unable to get table meta: %w", err) } + // copy the old table to new table newVersion := newVersion() - err = copyDir(filepath.Join(d.localPath, tableName, newVersion), filepath.Join(d.localPath, tableName, oldVersion)) + err = copyDir(filepath.Join(d.localPath, newName, newVersion), filepath.Join(d.localPath, oldName, oldMeta.Version)) if err != nil { - return err + return fmt.Errorf("rename: copy table failed: %w", err) } - // detach old db - _, err = conn.ExecContext(ctx, fmt.Sprintf("DETACH DATABASE %s", safeSQLName(dbName(tableName)))) + // rename the underlying table + err = renameTable(ctx, filepath.Join(d.localPath, newName, newVersion, "data.db"), oldName, newName) if err != nil { - return err + return fmt.Errorf("rename: rename table failed: %w", err) } - // reattach new db - _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(filepath.Join(d.localPath, tableName, newVersion, "data.db")), safeSQLName(dbName(tableName)))) - if err != nil { - return err + d.writeDirty = true + // sync the new table and new version + meta := &tableMeta{ + Name: newName, + Version: newVersion, + CreatedVersion: newVersion, + Type: oldMeta.Type, + SQL: oldMeta.SQL, + } + if err := d.pushToRemote(ctx, newName, oldMeta, meta); err != nil { + return fmt.Errorf("rename: unable to replicate new table: %w", err) } - _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.default ALTER %s TYPE %s", safeSQLName(dbName(tableName)), safeSQLName(columnName), newType)) + // drop the old table in backup + err = d.deleteBackup(ctx, oldName, "") if err != nil { - return err + // at this point db is inconsistent + // has both old table and new table + return fmt.Errorf("rename: unable to delete old table %q from backup: %w", oldName, err) } - // update version.txt - d.writeDirty = true - err = d.setTableVersion(tableName, newVersion) + // update local meta + err = d.writeTableMeta(newName, meta) if err != nil { - return err + return fmt.Errorf("rename: write version file failed: %w", err) } - err = releaseConn() + // mark table as deleted in local + oldMeta.Deleted = true + err = d.writeTableMeta(oldName, oldMeta) if err != nil { - return err + return fmt.Errorf("drop: write meta failed: %w", err) } - // replicate - err = d.syncBackupWithLocal(ctx, tableName, oldVersion) + // reopen db handle ignoring old name + err = d.reopen(ctx) if err != nil { - return err + return fmt.Errorf("rename: unable to reopen: %w", err) } - d.writeDirty = false - - // remove old local version - _ = d.deleteLocalTable(tableName, oldVersion) - return d.reopen("") + d.writeDirty = false + return nil } -func (d *db) reopen(deletedTable string) error { - // reopen should ignore context cancellations since cancellation errors can leave read inconsistent from write - // Also it is expected to be a fast operation so should be okay to ignore context cancellations - // extensions are already downloaded in NewDB - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - handle, err := d.openDBAndAttach(ctx, true, deletedTable) +func (d *db) reopen(ctx context.Context) error { + handle, err := d.openDBAndAttach(ctx, "", "", true) if err != nil { return err } @@ -764,6 +585,50 @@ func (d *db) reopen(deletedTable string) error { d.logger.Warn("error in closing old read handle", slog.String("error", err.Error())) } } + + // do another scan on local data and remove old versions, deleted tables etc + entries, err := os.ReadDir(d.localPath) + if err != nil { + return err + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + bytes, err := os.ReadFile(filepath.Join(d.localPath, entry.Name(), "meta.json")) + if err != nil { + d.logger.Debug("error in reading meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) + // no meta.json, delete the directory + _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) + } + oldMeta := &tableMeta{} + err = json.Unmarshal(bytes, oldMeta) + if err != nil { + d.logger.Debug("error in unmarshalling meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) + _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) + } + + if oldMeta.Deleted { + d.logger.Debug("deleting deleted table", slog.String("table", entry.Name())) + _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) + continue + } + + // remove old versions + versions, err := os.ReadDir(filepath.Join(d.localPath, entry.Name())) + if err != nil { + return err + } + for _, version := range versions { + if !version.IsDir() { + continue + } + if version.Name() != oldMeta.Version { + d.logger.Debug("deleting old version", slog.String("table", entry.Name()), slog.String("version", version.Name())) + _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name(), version.Name())) + } + } + } return nil } @@ -783,12 +648,10 @@ func (d *db) Size() int64 { if strings.HasPrefix(entry.Name(), "__rill_tmp_") { continue } - path := filepath.Join(d.localPath, entry.Name()) - version, exist, _ := d.tableVersion(entry.Name(), true) - if !exist { - continue + meta, _ := d.tableMeta(entry.Name()) + if meta != nil { + paths = append(paths, filepath.Join(d.localPath, entry.Name(), meta.Version, "data.db")) } - paths = append(paths, filepath.Join(path, fmt.Sprintf("%s.db", version))) } return fileSize(paths) } @@ -796,13 +659,12 @@ func (d *db) Size() int64 { // acquireWriteConn syncs the write database, initializes the write handle and returns a write connection. // The release function should be called to release the connection. // It should be called with the writeMu locked. -func (d *db) acquireWriteConn(ctx context.Context) (*sqlx.Conn, func() error, error) { - err := d.syncLocalWithBackup(ctx) - if err != nil { - return nil, nil, err +func (d *db) acquireWriteConn(ctx context.Context, dsn, table string, attachExisting bool) (*sqlx.Conn, func() error, error) { + var ignoreTable string + if !attachExisting { + ignoreTable = table } - - db, err := d.openDBAndAttach(ctx, false, "") + db, err := d.openDBAndAttach(ctx, dsn, ignoreTable, false) if err != nil { return nil, nil, err } @@ -811,6 +673,16 @@ func (d *db) acquireWriteConn(ctx context.Context) (*sqlx.Conn, func() error, er _ = db.Close() return nil, nil, err } + + if attachExisting { + _, err = conn.ExecContext(ctx, "DROP VIEW IF EXISTS "+safeSQLName(table)) + if err != nil { + _ = conn.Close() + _ = db.Close() + return nil, nil, err + } + } + return conn, func() error { _ = conn.Close() err = db.Close() @@ -818,10 +690,11 @@ func (d *db) acquireWriteConn(ctx context.Context) (*sqlx.Conn, func() error, er }, nil } -func (d *db) openDBAndAttach(ctx context.Context, read bool, ignoreTable string) (*sqlx.DB, error) { +func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read bool) (*sqlx.DB, error) { + d.logger.Debug("open db", slog.Bool("read", read), slog.String("uri", uri)) // open the db var settings map[string]string - dsn, err := url.Parse("") // in-memory + dsn, err := url.Parse(uri) // in-memory if err != nil { return nil, err } @@ -853,7 +726,6 @@ func (d *db) openDBAndAttach(ctx context.Context, read bool, ignoreTable string) } db := sqlx.NewDb(otelsql.OpenDB(connector), "duckdb") - err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(d.opts.OtelAttributes...)) if err != nil { return nil, fmt.Errorf("registering db stats metrics: %w", err) @@ -865,7 +737,7 @@ func (d *db) openDBAndAttach(ctx context.Context, read bool, ignoreTable string) return nil, err } - err = d.attachDBs(ctx, db, read, ignoreTable) + err = d.attachDBs(ctx, db, ignoreTable) if err != nil { db.Close() return nil, err @@ -889,19 +761,20 @@ func (d *db) openDBAndAttach(ctx context.Context, read bool, ignoreTable string) order by 1, 2, 3, 4 `) if err != nil { + db.Close() return nil, err } return db, nil } -func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, read bool, ignoreTable string) error { +func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, ignoreTable string) error { entries, err := os.ReadDir(d.localPath) if err != nil { return err } - var views []string + tables := make([]*tableMeta, 0) for _, entry := range entries { if !entry.IsDir() { continue @@ -910,57 +783,49 @@ func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, read bool, ignoreTable continue } - version, exist, _ := d.tableVersion(entry.Name(), false) - if !exist { + meta, _ := d.tableMeta(entry.Name()) + if meta == nil || meta.Deleted { continue } - versionPath := filepath.Join(d.localPath, entry.Name(), version) + d.logger.Debug("discovered table", slog.String("table", entry.Name()), slog.String("version", meta.Version)) + tables = append(tables, meta) + } - // read meta file - isView := true - f, err := os.ReadFile(filepath.Join(versionPath, "meta.json")) - if err != nil { - pathErr := &fs.PathError{} - if !errors.As(err, &pathErr) { - _ = os.RemoveAll(versionPath) - d.logger.Warn("error in reading meta file", slog.String("table", entry.Name()), slog.Any("error", err)) - return err - } - isView = false + // sort tables by created_version + // this is to ensure that views/tables on which other views depend are attached first + slices.SortFunc(tables, func(a, b *tableMeta) int { + // all tables should be attached first + if a.Type == "TABLE" && b.Type == "TABLE" { + return 0 + } + if a.Type == "TABLE" { + return -1 + } + if b.Type == "TABLE" { + return 1 } - if isView { - var meta meta - err = json.Unmarshal(f, &meta) + return strings.Compare(a.CreatedVersion, b.CreatedVersion) + }) + + for _, table := range tables { + safeTable := safeSQLName(table.Name) + if table.Type == "VIEW" { + _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) if err != nil { - _ = os.RemoveAll(versionPath) - d.logger.Warn("error in unmarshalling meta file", slog.String("table", entry.Name()), slog.Any("error", err)) return err } - // table is a view - views = append(views, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS (%s\n)", safeSQLName(entry.Name()), meta.ViewSQL)) continue } - dbName := dbName(entry.Name()) - var readMode string - if read { - readMode = " (READ_ONLY)" - } - _, err = db.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s %s", safeSQLString(filepath.Join(versionPath, "data.db")), safeSQLName(dbName), readMode)) - if err != nil { - d.logger.Error("error in attaching db", slog.String("table", entry.Name()), slog.Any("error", err)) - _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) - return err - } - - _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.default", safeSQLName(entry.Name()), safeSQLName(dbName))) + versionPath := filepath.Join(d.localPath, table.Name, table.Version) + safeDBName := safeSQLName(dbName(table.Name)) + _, err = db.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s (READ_ONLY)", safeSQLString(filepath.Join(versionPath, "data.db")), safeDBName)) if err != nil { + d.logger.Error("error in attaching db", slog.String("table", table.Name), slog.Any("error", err)) + _ = os.RemoveAll(filepath.Join(d.localPath, table.Name)) return err } - } - // create views after attaching all the DBs since views can depend on other tables - for _, view := range views { - _, err := db.ExecContext(ctx, view) + _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) if err != nil { return err } @@ -968,116 +833,73 @@ func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, read bool, ignoreTable return nil } -func (d *db) tableVersion(name string, read bool) (string, bool, error) { - if read { - return tableVersion(d.localPath, name) +func (d *db) tableMeta(name string) (*tableMeta, error) { + contents, err := os.ReadFile(filepath.Join(d.localPath, name, "meta.json")) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil, errNotFound + } + return nil, err } - return tableVersion(d.localPath, name) -} - -func (d *db) setTableVersion(name, version string) error { - return os.WriteFile(filepath.Join(d.localPath, name, "version.txt"), []byte(version), fs.ModePerm) + m := &tableMeta{} + err = json.Unmarshal(contents, m) + if err != nil { + return nil, err + } + return m, nil } -func (d *db) deleteLocalTable(table, version string) error { - var path string - if version == "" { - path = filepath.Join(d.localPath, table) - } else { - path = filepath.Join(d.localPath, table, version) +func (d *db) writeTableMeta(name string, meta *tableMeta) error { + metaBytes, err := json.Marshal(meta) + if err != nil { + return fmt.Errorf("create: marshal meta failed: %w", err) + } + err = os.WriteFile(filepath.Join(d.localPath, name, "meta.json"), metaBytes, fs.ModePerm) + if err != nil { + return fmt.Errorf("create: write meta failed: %w", err) } - return os.RemoveAll(path) + return nil } -func execIncrementalInsert(ctx context.Context, conn *sqlx.Conn, safeTableName, query string, opts *InsertTableOptions) error { - var byNameClause string - if opts.ByName { - byNameClause = "BY NAME" - } +type tableMeta struct { + Name string `json:"name"` + Version string `json:"version"` + CreatedVersion string `json:"created_version"` + Type string `json:"type"` // either table or view + SQL string `json:"sql"` // populated for views + // Deleted is set to true if the table is deleted. + // This is only used for local tables since local copy can only be removed when db handle has been reattached. + Deleted bool `json:"deleted"` +} - if opts.Strategy == IncrementalStrategyAppend { - _, err := conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s (%s\n)", safeTableName, byNameClause, query)) +func renameTable(ctx context.Context, dbFile, old, newName string) error { + db, err := sql.Open("duckdb", dbFile) + if err != nil { return err } + defer db.Close() - if opts.Strategy == IncrementalStrategyMerge { - // Create a temporary table with the new data - tmp := uuid.New().String() - _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE TEMPORARY TABLE %s AS (%s\n)", safeSQLName(tmp), query)) - if err != nil { - return err - } - - // check the count of the new data - // skip if the count is 0 - // if there was no data in the empty file then the detected schema can be different from the current schema which leads to errors or performance issues - res := conn.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) == 0 FROM %s", safeSQLName(tmp))) - var empty bool - if err := res.Scan(&empty); err != nil { - return err - } - - if empty { - return nil - } - - // Drop the rows from the target table where the unique key is present in the temporary table - where := "" - for i, key := range opts.UniqueKey { - key = safeSQLName(key) - if i != 0 { - where += " AND " - } - where += fmt.Sprintf("base.%s IS NOT DISTINCT FROM tmp.%s", key, key) - } - _, err = conn.ExecContext(ctx, fmt.Sprintf("DELETE FROM %s base WHERE EXISTS (SELECT 1 FROM %s tmp WHERE %s)", safeTableName, safeSQLName(tmp), where)) - if err != nil { - return err - } - - // Insert the new data into the target table - _, err = conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s SELECT * FROM %s", safeTableName, byNameClause, safeSQLName(tmp))) + var isView bool + err = db.QueryRowContext(ctx, "SELECT lower(table_type) = 'view' FROM INFORMATION_SCHEMA.TABLES WHERE table_name = ?", old).Scan(&isView) + if err != nil { return err } - return fmt.Errorf("incremental insert strategy %q not supported", opts.Strategy) -} - -func tableVersion(path, name string) (string, bool, error) { - pathToFile := filepath.Join(path, name, "version.txt") - contents, err := os.ReadFile(pathToFile) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - return "", false, nil - } - return "", false, err + var typ string + if isView { + typ = "VIEW" + } else { + typ = "TABLE" } - return strings.TrimSpace(string(contents)), true, nil + + _, err = db.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, old, newName)) + return err } func newVersion() string { return strconv.FormatInt(time.Now().UnixMilli(), 10) } -type meta struct { - ViewSQL string -} - -func writeMeta(path string, meta *meta) error { - if meta == nil { - return nil - } - metaBytes, err := json.Marshal(meta) - if err != nil { - return fmt.Errorf("create: marshal meta failed: %w", err) - } - err = os.WriteFile(filepath.Join(path, "meta.json"), metaBytes, fs.ModePerm) - if err != nil { - return fmt.Errorf("create: write meta failed: %w", err) - } - return nil -} - func dbName(name string) string { return fmt.Sprintf("%s__data__db", name) } diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 780178f353c..0191821bf44 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -4,26 +4,19 @@ import ( "context" "io" "log/slog" + "os" "testing" + "github.com/jmoiron/sqlx" "github.com/stretchr/testify/require" + "gocloud.dev/blob/fileblob" ) func TestDB(t *testing.T) { - dir := t.TempDir() + db, _, _ := prepareDB(t) ctx := context.Background() - db, err := NewDB(ctx, &DBOptions{ - LocalPath: dir, - Backup: nil, - ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, - WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, - InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, - Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), - }) - require.NoError(t, err) - // create table - err = db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) require.NoError(t, err) // query table @@ -33,8 +26,8 @@ func TestDB(t *testing.T) { ) conn, release, err := db.AcquireReadConnection(ctx) require.NoError(t, err) - err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country FROM test").Scan(&id, &country) - require.NoError(t, err) + + conn.QueryRowContext(ctx, "SELECT id, country FROM test").Scan(&id, &country) require.Equal(t, 1, id) require.Equal(t, "India", country) require.NoError(t, release()) @@ -48,32 +41,221 @@ func TestDB(t *testing.T) { require.Error(t, err) // insert into table - err = db.InsertTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'US' AS country", &InsertTableOptions{ - Strategy: IncrementalStrategyAppend, - }) - require.NoError(t, err) - - // merge into table - err = db.InsertTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'USA' AS country", &InsertTableOptions{ - Strategy: IncrementalStrategyMerge, - UniqueKey: []string{"id"}, + err = db.MutateTable(ctx, "test2", func(ctx context.Context, conn *sqlx.Conn) error { + _, err := conn.ExecContext(ctx, "INSERT INTO test2 (id, country) VALUES (2, 'USA')") + return err }) require.NoError(t, err) // query table conn, release, err = db.AcquireReadConnection(ctx) require.NoError(t, err) - err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country FROM test2 where id = 2").Scan(&id, &country) + err = conn.QueryRowxContext(ctx, "SELECT id, country FROM test2 where id = 2").Scan(&id, &country) require.NoError(t, err) require.Equal(t, 2, id) require.Equal(t, "USA", country) require.NoError(t, release()) // Add column - err = db.AddTableColumn(ctx, "test2", "city", "TEXT") - require.NoError(t, err) + db.MutateTable(ctx, "test2", func(ctx context.Context, conn *sqlx.Conn) error { + _, err := conn.ExecContext(ctx, "ALTER TABLE test2 ADD COLUMN city TEXT") + return err + }) // drop table err = db.DropTable(ctx, "test2") require.NoError(t, err) + require.NoError(t, db.Close()) +} + +func TestCreateTable(t *testing.T) { + db, _, _ := prepareDB(t) + ctx := context.Background() + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + + // replace table + err = db.CreateTableAsSelect(ctx, "test", "SELECT 2 AS id, 'USA' AS country", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 2, Country: "USA"}}) + + // create another table that ingests from first table + err = db.CreateTableAsSelect(ctx, "test2", "SELECT * FROM test", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test2", []testData{{ID: 2, Country: "USA"}}) + + // create view + err = db.CreateTableAsSelect(ctx, "test_view", "SELECT * FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test_view", []testData{{ID: 2, Country: "USA"}}) + + // view on top of view + err = db.CreateTableAsSelect(ctx, "pest_view", "SELECT * FROM test_view", &CreateTableOptions{View: true}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM pest_view", []testData{{ID: 2, Country: "USA"}}) + + // replace underlying table + err = db.CreateTableAsSelect(ctx, "test", "SELECT 3 AS id, 'UK' AS country", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 3, Country: "UK"}}) + + // view should reflect the change + verifyTable(t, db, "SELECT id, country FROM test_view", []testData{{ID: 3, Country: "UK"}}) + + // create table that was previously view + err = db.CreateTableAsSelect(ctx, "test_view", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test_view", []testData{{ID: 1, Country: "India"}}) + + // create view that was previously table + err = db.CreateTableAsSelect(ctx, "test", "SELECT * FROM test_view", &CreateTableOptions{View: true}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + require.NoError(t, db.Close()) +} + +func TestDropTable(t *testing.T) { + db, _, _ := prepareDB(t) + ctx := context.Background() + + // create table + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + + // create view + err = db.CreateTableAsSelect(ctx, "test_view", "SELECT * FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test_view", []testData{{ID: 1, Country: "India"}}) + + // drop view + err = db.DropTable(ctx, "test_view") + require.NoError(t, err) + + // verify table data is still there + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + + // drop table + err = db.DropTable(ctx, "test") + require.NoError(t, err) + require.NoError(t, db.Close()) +} + +func TestMutateTable(t *testing.T) { + db, _, _ := prepareDB(t) + ctx := context.Background() + + // create table + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'Delhi' AS city", &CreateTableOptions{}) + require.NoError(t, err) + + // insert into table + err = db.MutateTable(ctx, "test", func(ctx context.Context, conn *sqlx.Conn) error { + _, err := conn.ExecContext(ctx, "INSERT INTO test (id, city) VALUES (2, 'NY')") + return err + }) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, city FROM test", []testData{{ID: 1, City: "Delhi"}, {ID: 2, City: "NY"}}) + + // add column and update existing entries in parallel query existing table + alterDone := make(chan struct{}) + queryDone := make(chan struct{}) + testDone := make(chan struct{}) + + go func() { + db.MutateTable(ctx, "test", func(ctx context.Context, conn *sqlx.Conn) error { + _, err := conn.ExecContext(ctx, "ALTER TABLE test ADD COLUMN country TEXT") + require.NoError(t, err) + _, err = conn.ExecContext(ctx, "UPDATE test SET country = 'USA' WHERE id = 2") + require.NoError(t, err) + _, err = conn.ExecContext(ctx, "UPDATE test SET country = 'India' WHERE id = 1") + require.NoError(t, err) + + close(alterDone) + <-queryDone + return nil + }) + close(testDone) + }() + + go func() { + <-alterDone + verifyTable(t, db, "SELECT * FROM test", []testData{{ID: 1, City: "Delhi"}, {ID: 2, City: "NY"}}) + close(queryDone) + }() + + <-testDone + verifyTable(t, db, "SELECT * FROM test", []testData{{ID: 1, City: "Delhi", Country: "India"}, {ID: 2, City: "NY", Country: "USA"}}) + require.NoError(t, db.Close()) +} + +func TestResetLocal(t *testing.T) { + db, localDir, remoteDir := prepareDB(t) + ctx := context.Background() + + // create table + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + + // reset local + require.NoError(t, db.Close()) + require.NoError(t, os.RemoveAll(localDir)) + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + bucket, err := fileblob.OpenBucket(remoteDir, nil) + require.NoError(t, err) + db, err = NewDB(ctx, &DBOptions{ + LocalPath: localDir, + Backup: bucket, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: logger, + }) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) +} + +func prepareDB(t *testing.T) (db DB, localDir, remoteDir string) { + localDir = t.TempDir() + ctx := context.Background() + // logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + // Level: slog.LevelDebug, + // })) + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + remoteDir = t.TempDir() + bucket, err := fileblob.OpenBucket(remoteDir, nil) + require.NoError(t, err) + db, err = NewDB(ctx, &DBOptions{ + LocalPath: localDir, + Backup: bucket, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: logger, + }) + require.NoError(t, err) + return +} + +func verifyTable(t *testing.T, db DB, query string, data []testData) { + ctx := context.Background() + conn, release, err := db.AcquireReadConnection(ctx) + require.NoError(t, err) + defer release() + + var scannedData []testData + err = conn.SelectContext(ctx, &scannedData, query) + require.NoError(t, err) + require.Equal(t, data, scannedData) +} + +type testData struct { + ID int `db:"id"` + Country string `db:"country"` + City string `db:"city"` } diff --git a/runtime/pkg/rduckdb/examples/main.go b/runtime/pkg/rduckdb/examples/main.go deleted file mode 100644 index 60c6e21a4ff..00000000000 --- a/runtime/pkg/rduckdb/examples/main.go +++ /dev/null @@ -1,74 +0,0 @@ -package main - -// import ( -// "context" -// "fmt" -// "log/slog" -// "time" - -// "github.com/rilldata/rill/runtime/pkg/rduckdb" -// _ "gocloud.dev/blob/gcsblob" -// ) - -// func main() { -// // backup, err := rduckdb.NewGCSBackupProvider(context.Background(), &rduckdb.GCSBackupProviderOptions{ -// // UseHostCredentials: true, -// // Bucket: "", -// // UniqueIdentifier: "756c6367-e807-43ff-8b07-df1bae29c57e/", -// // }) -// // if err != nil { -// // panic(err) -// // } - -// dbOptions := &rduckdb.DBOptions{ -// LocalPath: "", -// // BackupProvider: backup, -// ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, -// WriteSettings: map[string]string{"memory_limit": "8GB", "threads": "2"}, -// InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, -// Logger: slog.Default(), -// } - -// db, err := rduckdb.NewDB(context.Background(), "756c6367-e807-43ff-8b07-df1bae29c57e", dbOptions) -// if err != nil { -// panic(err) -// } -// defer db.Close() - -// t := time.Now() -// // create table -// err = db.CreateTableAsSelect(context.Background(), "test-2", `SELECT * FROM read_parquet('data*.parquet')`, &rduckdb.CreateTableOptions{}) -// if err != nil { -// panic(err) -// } -// fmt.Printf("time taken %v\n", time.Since(t)) - -// // rename table -// err = db.RenameTable(context.Background(), "test-2", "test") -// if err != nil { -// panic(err) -// } - -// // insert into renamed table -// err = db.InsertTableAsSelect(context.Background(), "test", `SELECT * FROM read_parquet('data*.parquet')`, &rduckdb.InsertTableOptions{ -// Strategy: rduckdb.IncrementalStrategyAppend, -// }) -// if err != nil { -// panic(err) -// } - -// // get count -// conn, release, err := db.AcquireReadConnection(context.Background()) -// if err != nil { -// panic(err) -// } -// defer release() - -// var count int -// err = conn.Connx().QueryRowxContext(context.Background(), `SELECT count(*) FROM "test"`).Scan(&count) -// if err != nil { -// fmt.Printf("error %v\n", err) -// } -// fmt.Println(count) - -// } diff --git a/runtime/pkg/rduckdb/backup.go b/runtime/pkg/rduckdb/remote.go similarity index 61% rename from runtime/pkg/rduckdb/backup.go rename to runtime/pkg/rduckdb/remote.go index f6ea5e5169d..263aff4ca47 100644 --- a/runtime/pkg/rduckdb/backup.go +++ b/runtime/pkg/rduckdb/remote.go @@ -2,6 +2,7 @@ package rduckdb import ( "context" + "encoding/json" "errors" "fmt" "io" @@ -17,9 +18,9 @@ import ( "golang.org/x/sync/errgroup" ) -// syncLocalWithBackup syncs the write path with the backup location. +// pullFromRemote updates local data with the latest data from remote. // This is not safe for concurrent calls. -func (d *db) syncLocalWithBackup(ctx context.Context) error { +func (d *db) pullFromRemote(ctx context.Context) error { if !d.writeDirty || d.backup == nil { // optimisation to skip sync if write was already synced return nil @@ -33,7 +34,7 @@ func (d *db) syncLocalWithBackup(ctx context.Context) error { Delimiter: "/", // only list directories with a trailing slash and IsDir set to true }) - tblVersions := make(map[string]string) + tblMetas := make(map[string]*tableMeta) for { // Stop the loop if the ctx was cancelled var stop bool @@ -62,42 +63,42 @@ func (d *db) syncLocalWithBackup(ctx context.Context) error { d.logger.Debug("SyncWithObjectStorage: discovered table", slog.String("table", table)) // get version of the table - var backedUpVersion string + var b []byte err = retry(ctx, func() error { - res, err := d.backup.ReadAll(ctx, path.Join(table, "version.txt")) + res, err := d.backup.ReadAll(ctx, path.Join(table, "meta.json")) if err != nil { return err } - backedUpVersion = string(res) + b = res return nil }) if err != nil { if gcerrors.Code(err) == gcerrors.NotFound { // invalid table directory d.logger.Debug("SyncWithObjectStorage: invalid table directory", slog.String("table", table)) - _ = d.deleteBackup(ctx, table, "") + continue } return err } - tblVersions[table] = backedUpVersion + backedUpMeta := &tableMeta{} + err = json.Unmarshal(b, backedUpMeta) + if err != nil { + d.logger.Debug("SyncWithObjectStorage: failed to unmarshal table metadata", slog.String("table", table), slog.Any("error", err)) + continue + } // check with current version - version, exists, _ := tableVersion(d.localPath, table) - if exists && version == backedUpVersion { + meta, _ := d.tableMeta(table) + if meta != nil && meta.Version == backedUpMeta.Version { d.logger.Debug("SyncWithObjectStorage: table is already up to date", slog.String("table", table)) continue } - - tableDir := filepath.Join(d.localPath, table) - // truncate existing table directory - if err := os.RemoveAll(tableDir); err != nil { - return err - } - if err := os.MkdirAll(filepath.Join(tableDir, backedUpVersion), os.ModePerm); err != nil { + tblMetas[table] = backedUpMeta + if err := os.MkdirAll(filepath.Join(d.localPath, table, backedUpMeta.Version), os.ModePerm); err != nil { return err } - tblIter := d.backup.List(&blob.ListOptions{Prefix: path.Join(table, backedUpVersion)}) + tblIter := d.backup.List(&blob.ListOptions{Prefix: path.Join(table, backedUpMeta.Version)}) // download all objects in the table and current version for { obj, err := tblIter.Next(ctx) @@ -135,14 +136,14 @@ func (d *db) syncLocalWithBackup(ctx context.Context) error { } // Update table versions - for table, version := range tblVersions { - err = d.setTableVersion(table, version) + for table, meta := range tblMetas { + err = d.writeTableMeta(table, meta) if err != nil { return err } } - // remove any tables that are not in backup + // mark tables that are not in backup for delete later entries, err := os.ReadDir(d.localPath) if err != nil { return err @@ -151,79 +152,77 @@ func (d *db) syncLocalWithBackup(ctx context.Context) error { if !entry.IsDir() { continue } - if _, ok := tblVersions[entry.Name()]; ok { + if _, ok := tblMetas[entry.Name()]; ok { continue } - err = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) - if err != nil { - return err + // get current meta + meta, _ := d.tableMeta(entry.Name()) + if meta == nil { + // cleanup ?? + continue } + meta.Deleted = true + _ = d.writeTableMeta(entry.Name(), meta) } return nil } -// syncBackupWithLocal syncs the backup location with the local path for given table. +// pushToRemote syncs the backup location with the local path for given table. // If oldVersion is specified, it is deleted after successful sync. -func (d *db) syncBackupWithLocal(ctx context.Context, table, oldVersion string) error { +func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tableMeta) error { if d.backup == nil { return nil } - d.logger.Debug("syncing table", slog.String("table", table)) - version, exist, err := tableVersion(d.localPath, table) - if err != nil { - return err - } - if !exist { - return fmt.Errorf("table %q not found", table) - } - - localPath := filepath.Join(d.localPath, table, version) - entries, err := os.ReadDir(localPath) - if err != nil { - return err - } - - for _, entry := range entries { - d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", localPath)) - // no directory should exist as of now - if entry.IsDir() { - d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", localPath)) - continue - } - - wr, err := os.Open(filepath.Join(localPath, entry.Name())) + if meta.Type == "TABLE" { + localPath := filepath.Join(d.localPath, table, meta.Version) + entries, err := os.ReadDir(localPath) if err != nil { return err } - // upload to cloud storage - err = retry(ctx, func() error { - return d.backup.Upload(ctx, path.Join(table, version, entry.Name()), wr, &blob.WriterOptions{ - ContentType: "application/octet-stream", + for _, entry := range entries { + d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", localPath)) + // no directory should exist as of now + if entry.IsDir() { + d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", localPath)) + continue + } + + wr, err := os.Open(filepath.Join(localPath, entry.Name())) + if err != nil { + return err + } + + // upload to cloud storage + err = retry(ctx, func() error { + return d.backup.Upload(ctx, path.Join(table, meta.Version, entry.Name()), wr, &blob.WriterOptions{ + ContentType: "application/octet-stream", + }) }) - }) - _ = wr.Close() - if err != nil { - return err + _ = wr.Close() + if err != nil { + return err + } } } - // update version.txt - // Ideally if this fails it leaves backup in inconsistent state but for now we will rely on retries - // ignore context cancellation errors for version.txt updates - ctxWithTimeout, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - err = retry(context.Background(), func() error { - return d.backup.WriteAll(ctxWithTimeout, path.Join(table, "version.txt"), []byte(version), nil) + // update table meta + // todo :: also use etag to avoid overwriting + m, err := json.Marshal(meta) + if err != nil { + return fmt.Errorf("failed to marshal table metadata: %w", err) + } + err = retry(ctx, func() error { + return d.backup.WriteAll(ctx, path.Join(table, "meta.json"), m, nil) }) if err != nil { d.logger.Error("failed to update version.txt in backup", slog.Any("error", err)) } // success -- remove old version - if oldVersion != "" { - _ = d.deleteBackup(ctx, table, oldVersion) + if oldMeta != nil { + _ = d.deleteBackup(ctx, table, oldMeta.Version) } return err } @@ -246,10 +245,7 @@ func (d *db) deleteBackup(ctx context.Context, table, version string) error { // deleting the entire table prefix = table + "/" // delete version.txt first - // also ignore context cancellation errors since it can leave the backup in inconsistent state - ctxWithTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - err := retry(context.Background(), func() error { return d.backup.Delete(ctxWithTimeout, "version.txt") }) + err := retry(ctx, func() error { return d.backup.Delete(ctx, "version.txt") }) if err != nil && gcerrors.Code(err) != gcerrors.NotFound { d.logger.Error("failed to delete version.txt in backup", slog.Any("error", err)) return err @@ -278,11 +274,6 @@ func (d *db) deleteBackup(ctx context.Context, table, version string) error { func retry(ctx context.Context, fn func() error) error { var err error for i := 0; i < _maxRetries; i++ { - select { - case <-ctx.Done(): - return ctx.Err() // return on context cancellation - case <-time.After(_retryDelay): - } err = fn() if err == nil { return nil // success @@ -290,6 +281,15 @@ func retry(ctx context.Context, fn func() error) error { if !strings.Contains(err.Error(), "stream error: stream ID") { break // break and return error } + + timer := time.NewTimer(_retryDelay) + select { + case <-ctx.Done(): + timer.Stop() + return ctx.Err() // return on context cancellation + case <-time.After(_retryDelay): + timer.Stop() + } } return err } diff --git a/runtime/pkg/rduckdb/singledb.go b/runtime/pkg/rduckdb/singledb.go deleted file mode 100644 index be0e060dd43..00000000000 --- a/runtime/pkg/rduckdb/singledb.go +++ /dev/null @@ -1,308 +0,0 @@ -package rduckdb - -import ( - "context" - "database/sql" - "database/sql/driver" - "errors" - "fmt" - "io" - "log/slog" - "net/url" - "os" - "strings" - "sync" - - "github.com/XSAM/otelsql" - "github.com/jmoiron/sqlx" - "github.com/marcboeker/go-duckdb" - "go.opentelemetry.io/otel/attribute" -) - -type singledb struct { - db *sqlx.DB - writeMU *sync.Mutex // limits write queries to one at a time. Does not block read queries. - logger *slog.Logger -} - -type SingleDBOptions struct { - DSN string - Clean bool - InitQueries []string - Logger *slog.Logger -} - -var _ DB = &singledb{} - -// NewSingleDB creates a new DB that writes to and reads from a single DuckDB database. -// This is useful for testing only. -func NewSingleDB(ctx context.Context, opts *SingleDBOptions) (DB, error) { - if opts.Clean { - u, err := url.Parse(opts.DSN) - if err != nil { - return nil, err - } - if u.Path != "" { - err = os.Remove(u.Path) - if err != nil && !os.IsNotExist(err) { - return nil, err - } - } - } - connector, err := duckdb.NewConnector(opts.DSN, func(execer driver.ExecerContext) error { - for _, qry := range opts.InitQueries { - _, err := execer.ExecContext(context.Background(), qry, nil) - if err != nil && strings.Contains(err.Error(), "Failed to download extension") { - // Retry using another mirror. Based on: https://github.com/duckdb/duckdb/issues/9378 - _, err = execer.ExecContext(context.Background(), qry+" FROM 'http://nightly-extensions.duckdb.org'", nil) - } - if err != nil { - return err - } - } - return nil - }) - if err != nil { - if strings.Contains(err.Error(), "Symbol not found") { - fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") - os.Exit(1) - } - return nil, err - } - - db := sqlx.NewDb(otelsql.OpenDB(connector), "duckdb") - err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(attribute.String("db.system", "duckdb"))) - if err != nil { - db.Close() - return nil, fmt.Errorf("registering db stats metrics: %w", err) - } - - err = db.PingContext(context.Background()) - if err != nil { - db.Close() - return nil, err - } - if opts.Logger == nil { - opts.Logger = slog.New(slog.NewJSONHandler(io.Discard, nil)) - } - return &singledb{ - db: db, - writeMU: &sync.Mutex{}, - logger: opts.Logger, - }, nil -} - -// Close implements DB. -func (s *singledb) Close() error { - return s.db.Close() -} - -// AcquireReadConnection implements DB. -func (s *singledb) AcquireReadConnection(ctx context.Context) (Conn, func() error, error) { - conn, err := s.db.Connx(ctx) - if err != nil { - return nil, nil, err - } - - return &singledbConn{ - Conn: conn, - db: s, - }, conn.Close, nil -} - -func (s *singledb) AcquireWriteConnection(ctx context.Context) (Conn, func() error, error) { - s.writeMU.Lock() - c, err := s.db.Connx(ctx) - if err != nil { - s.writeMU.Unlock() - return nil, nil, err - } - - return &singledbConn{ - Conn: c, - db: s, - }, func() error { - err := c.Close() - s.writeMU.Unlock() - return err - }, nil -} - -// CreateTableAsSelect implements DB. -func (s *singledb) CreateTableAsSelect(ctx context.Context, name, uery string, opts *CreateTableOptions) error { - s.writeMU.Lock() - defer s.writeMU.Unlock() - - conn, err := s.db.Connx(ctx) - if err != nil { - return err - } - - return s.createTableAsSelect(ctx, conn, name, uery, opts) -} - -func (s *singledb) createTableAsSelect(ctx context.Context, conn *sqlx.Conn, name, query string, opts *CreateTableOptions) error { - var typ string - if opts != nil && opts.View { - typ = "VIEW" - } else { - typ = "TABLE" - } - - _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE %s %s AS (%s\n)", typ, safeSQLName(name), query)) - return err -} - -// DropTable implements DB. -func (s *singledb) DropTable(ctx context.Context, name string) error { - s.writeMU.Lock() - defer s.writeMU.Unlock() - - conn, err := s.db.Connx(ctx) - if err != nil { - return err - } - - return s.dropTable(ctx, conn, name) -} - -func (s *singledb) dropTable(ctx context.Context, conn *sqlx.Conn, name string) error { - view, err := isView(ctx, conn, name) - if err != nil { - return err - } - var typ string - if view { - typ = "VIEW" - } else { - typ = "TABLE" - } - - _, err = conn.ExecContext(ctx, fmt.Sprintf("DROP %s %s", typ, safeSQLName(name))) - return err -} - -// InsertTableAsSelect implements DB. -func (s *singledb) InsertTableAsSelect(ctx context.Context, name, query string, opts *InsertTableOptions) error { - s.writeMU.Lock() - defer s.writeMU.Unlock() - - conn, err := s.db.Connx(ctx) - if err != nil { - return err - } - - return execIncrementalInsert(ctx, conn, safeSQLName(name), query, opts) -} - -// RenameTable implements DB. -func (s *singledb) RenameTable(ctx context.Context, oldName, newName string) error { - s.writeMU.Lock() - defer s.writeMU.Unlock() - - conn, err := s.db.Connx(ctx) - if err != nil { - return err - } - - return s.renameTable(ctx, conn, oldName, newName) -} - -func (s *singledb) renameTable(ctx context.Context, conn *sqlx.Conn, oldName, newName string) error { - view, err := isView(ctx, conn, oldName) - if err != nil { - return err - } - - var typ string - if view { - typ = "VIEW" - } else { - typ = "TABLE" - } - - newNameIsView, err := isView(ctx, conn, newName) - if err != nil { - if !errors.Is(err, sql.ErrNoRows) { - return err - } - // The newName does not exist. - _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(oldName), safeSQLName(newName))) - return err - } - - // The newName is already occupied. - var existingTyp string - if newNameIsView { - existingTyp = "VIEW" - } else { - existingTyp = "TABLE" - } - - _, err = conn.ExecContext(ctx, fmt.Sprintf("DROP %s IF EXISTS %s", existingTyp, safeSQLName(newName))) - if err != nil { - return err - } - - _, err = conn.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(oldName), safeSQLName(newName))) - return err -} - -// AddTableColumn implements DB. -func (s *singledb) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { - s.writeMU.Lock() - defer s.writeMU.Unlock() - - conn, err := s.db.Connx(ctx) - if err != nil { - return err - } - - return s.addTableColumn(ctx, conn, tableName, columnName, typ) -} - -func (s *singledb) addTableColumn(ctx context.Context, conn *sqlx.Conn, tableName, columnName, typ string) error { - _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ADD COLUMN %s %s", safeSQLString(tableName), safeSQLName(columnName), typ)) - return err -} - -// AlterTableColumn implements DB. -func (s *singledb) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { - s.writeMU.Lock() - defer s.writeMU.Unlock() - - conn, err := s.db.Connx(ctx) - if err != nil { - return err - } - - return s.alterTableColumn(ctx, conn, tableName, columnName, newType) -} - -func (s *singledb) alterTableColumn(ctx context.Context, conn *sqlx.Conn, tableName, columnName, newType string) error { - _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ALTER COLUMN %s TYPE %s", safeSQLName(tableName), safeSQLName(columnName), newType)) - return err -} - -// TODO :: fix by calling pragma_database_size -func (s *singledb) Size() int64 { - return 0 -} - -func isView(ctx context.Context, conn *sqlx.Conn, name string) (bool, error) { - var view bool - err := conn.QueryRowxContext(ctx, ` - SELECT - UPPER(table_type) = 'VIEW' - FROM - information_schema.tables - WHERE - table_catalog = current_database() - AND table_schema = 'main' - AND LOWER(table_name) = LOWER(?) - `, name).Scan(&view) - if err != nil { - return false, err - } - return view, nil -} diff --git a/runtime/pkg/rduckdb/singledb_test.go b/runtime/pkg/rduckdb/singledb_test.go deleted file mode 100644 index 86edc038538..00000000000 --- a/runtime/pkg/rduckdb/singledb_test.go +++ /dev/null @@ -1,103 +0,0 @@ -package rduckdb - -import ( - "context" - "database/sql" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestSingleDB_test(t *testing.T) { - ctx := context.Background() - db, err := NewSingleDB(ctx, &SingleDBOptions{ - DSN: "", - }) - require.NoError(t, err) - - // create table - rw, release, err := db.AcquireWriteConnection(ctx) - require.NoError(t, err) - - err = rw.CreateTableAsSelect(ctx, "test-2", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) - require.NoError(t, err) - - // rename table - err = rw.RenameTable(ctx, "test-2", "test") - require.NoError(t, err) - - // insert into table - err = rw.InsertTableAsSelect(ctx, "test", "SELECT 2 AS id, 'USA' AS country", &InsertTableOptions{ - Strategy: IncrementalStrategyAppend, - }) - require.NoError(t, err) - - // add column - err = rw.AddTableColumn(ctx, "test", "currency_score", "INT") - require.NoError(t, err) - - // alter column - err = rw.AlterTableColumn(ctx, "test", "currency_score", "FLOAT") - require.NoError(t, err) - require.NoError(t, release()) - - // select from table - conn, release, err := db.AcquireReadConnection(ctx) - require.NoError(t, err) - - var ( - id int - country string - currencyScore sql.NullFloat64 - ) - - err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country, currency_score FROM test WHERE id = 2").Scan(&id, &country, ¤cyScore) - require.NoError(t, err) - require.Equal(t, 2, id) - require.Equal(t, "USA", country) - require.Equal(t, false, currencyScore.Valid) - - err = release() - require.NoError(t, err) - - // drop table - err = db.DropTable(ctx, "test") - require.NoError(t, err) -} - -func TestSingleDB_testRenameExisting(t *testing.T) { - ctx := context.Background() - db, err := NewSingleDB(ctx, &SingleDBOptions{ - DSN: "", - }) - require.NoError(t, err) - - // create table - err = db.CreateTableAsSelect(ctx, "test-2", "SELECT 1 AS id, 'India' AS country", nil) - require.NoError(t, err) - - // create another table - err = db.CreateTableAsSelect(ctx, "test-3", "SELECT 2 AS id, 'USA' AS country", nil) - require.NoError(t, err) - - // rename table - err = db.RenameTable(ctx, "test-2", "test-3") - require.NoError(t, err) - - // select from table - conn, release, err := db.AcquireReadConnection(ctx) - require.NoError(t, err) - - var ( - id int - country string - ) - - err = conn.Connx().QueryRowxContext(ctx, "SELECT id, country FROM \"test-3\" WHERE id = 1").Scan(&id, &country) - require.NoError(t, err) - require.Equal(t, 1, id) - require.Equal(t, "India", country) - - err = release() - require.NoError(t, err) -} From 0acae1cfffd314c6fc49e1251e5606683f2510fc Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 18 Nov 2024 12:45:02 +0530 Subject: [PATCH 14/64] use semaphore instead of mutex for write locks --- runtime/pkg/rduckdb/db.go | 132 +++++++++++++++++++++------------ runtime/pkg/rduckdb/db_test.go | 4 +- runtime/pkg/rduckdb/remote.go | 43 +++++------ 3 files changed, 108 insertions(+), 71 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index f5c4b176e4a..fc03bc2b3ce 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -25,6 +25,7 @@ import ( "github.com/mitchellh/mapstructure" "go.opentelemetry.io/otel/attribute" "gocloud.dev/blob" + "golang.org/x/sync/semaphore" ) var errNotFound = errors.New("not found") @@ -58,12 +59,9 @@ type DB interface { } type DBOptions struct { - // Clean specifies whether to start with a clean database or download data from cloud storage and start with backed up data. - Clean bool // LocalPath is the path where local db files will be stored. Should be unique for each database. LocalPath string - - Backup *blob.Bucket + Remote *blob.Bucket // ReadSettings are settings applied the read duckDB handle. ReadSettings map[string]string @@ -149,6 +147,26 @@ func (d *DBOptions) ValidateSettings() error { } } + if readThread == 0 && writeThread == 0 { + connector, err := duckdb.NewConnector("", nil) + if err != nil { + return fmt.Errorf("unable to create duckdb connector: %w", err) + } + defer connector.Close() + db := sql.OpenDB(connector) + defer db.Close() + + row := db.QueryRow("SELECT value FROM duckdb_settings() WHERE name = 'threads'") + var threads int + err = row.Scan(&threads) + if err != nil { + return fmt.Errorf("unable to get threads: %w", err) + } + + read.Threads = strconv.Itoa((threads + 1) / 2) + write.Threads = strconv.Itoa(threads / 2) + } + if readThread == 0 != (writeThread == 0) { // only one is defined var threads int @@ -158,8 +176,12 @@ func (d *DBOptions) ValidateSettings() error { threads = writeThread } - read.Threads = strconv.Itoa(threads) - write.Threads = strconv.Itoa(threads) + read.Threads = strconv.Itoa((threads + 1) / 2) + if threads <= 3 { + write.Threads = "1" + } else { + write.Threads = strconv.Itoa(threads / 2) + } } err = mapstructure.WeakDecode(read, &d.ReadSettings) @@ -177,10 +199,11 @@ func (d *DBOptions) ValidateSettings() error { type CreateTableOptions struct { // View specifies whether the created table is a view. View bool + // InitSQL is the SQL to run before creating the table. + InitSQL string } // NewDB creates a new DB instance. -// This can be a slow operation if the backup is large. // dbIdentifier is a unique identifier for the database reported in metrics. func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { err := opts.ValidateSettings() @@ -190,18 +213,13 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { db := &db{ opts: opts, localPath: opts.LocalPath, + readMu: &sync.RWMutex{}, + writeSem: semaphore.NewWeighted(1), writeDirty: true, logger: opts.Logger, } - if opts.Backup != nil { - db.backup = opts.Backup - } - // if clean is true, remove the backup - if opts.Clean { - err = db.deleteBackup(ctx, "", "") - if err != nil { - return nil, fmt.Errorf("unable to clean backup: %w", err) - } + if opts.Remote != nil { + db.remote = opts.Remote } // create local path @@ -234,11 +252,11 @@ type db struct { localPath string readHandle *sqlx.DB - readMu sync.RWMutex - writeMu sync.Mutex + readMu *sync.RWMutex + writeSem *semaphore.Weighted writeDirty bool - backup *blob.Bucket + remote *blob.Bucket logger *slog.Logger } @@ -246,8 +264,8 @@ type db struct { var _ DB = &db{} func (d *db) Close() error { - d.writeMu.Lock() - defer d.writeMu.Unlock() + _ = d.writeSem.Acquire(context.Background(), 1) + defer d.writeSem.Release(1) d.readMu.Lock() defer d.readMu.Unlock() @@ -274,11 +292,14 @@ func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() erro func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts *CreateTableOptions) error { d.logger.Debug("create table", slog.String("name", name), slog.Bool("view", opts.View)) - d.writeMu.Lock() - defer d.writeMu.Unlock() + err := d.writeSem.Acquire(ctx, 1) + if err != nil { + return err + } + defer d.writeSem.Release(1) // pull latest changes from remote - err := d.pullFromRemote(ctx) + err = d.pullFromRemote(ctx) if err != nil { return err } @@ -300,7 +321,7 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * if opts.View { newMeta.SQL = query dsn = "" - // SPECIAL CASE + // special handling to ensure that if a view is recreated with the same name and schema then any views on top of this view still works if oldMeta != nil && oldMeta.Type == "VIEW" { newMeta.CreatedVersion = oldMeta.CreatedVersion } @@ -335,7 +356,12 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * } else { typ = "TABLE" newMeta.Type = "TABLE" - newMeta.SQL = "" + } + if opts.InitSQL != "" { + _, err = conn.ExecContext(ctx, opts.InitSQL, nil) + if err != nil { + return fmt.Errorf("create: init sql failed: %w", err) + } } // ingest data _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE %s %s AS (%s\n)", typ, safeName, query), nil) @@ -374,8 +400,11 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx context.Context, conn *sqlx.Conn) error) error { d.logger.Debug("mutate table", slog.String("name", name)) - d.writeMu.Lock() - defer d.writeMu.Unlock() + err := d.writeSem.Acquire(ctx, 1) + if err != nil { + return err + } + defer d.writeSem.Release(1) oldMeta, err := d.tableMeta(name) if err != nil { @@ -444,11 +473,14 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con // DropTable implements DB. func (d *db) DropTable(ctx context.Context, name string) error { d.logger.Debug("drop table", slog.String("name", name)) - d.writeMu.Lock() - defer d.writeMu.Unlock() + err := d.writeSem.Acquire(ctx, 1) + if err != nil { + return err + } + defer d.writeSem.Release(1) // pull latest changes from remote - err := d.pullFromRemote(ctx) + err = d.pullFromRemote(ctx) if err != nil { return fmt.Errorf("drop: unable to pull from remote: %w", err) } @@ -462,11 +494,11 @@ func (d *db) DropTable(ctx context.Context, name string) error { return fmt.Errorf("drop: unable to get table meta: %w", err) } - // drop the table from backup location + // drop the table from remote d.writeDirty = true - err = d.deleteBackup(ctx, name, "") + err = d.deleteRemote(ctx, name, "") if err != nil { - return fmt.Errorf("drop: unable to drop table %q from backup: %w", name, err) + return fmt.Errorf("drop: unable to drop table %q from remote: %w", name, err) } // mark table as deleted in local @@ -491,11 +523,14 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { if strings.EqualFold(oldName, newName) { return fmt.Errorf("rename: Table with name %q already exists", newName) } - d.writeMu.Lock() - defer d.writeMu.Unlock() + err := d.writeSem.Acquire(ctx, 1) + if err != nil { + return err + } + defer d.writeSem.Release(1) // pull latest changes from remote - err := d.pullFromRemote(ctx) + err = d.pullFromRemote(ctx) if err != nil { return fmt.Errorf("rename: unable to pull from remote: %w", err) } @@ -534,12 +569,14 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { return fmt.Errorf("rename: unable to replicate new table: %w", err) } - // drop the old table in backup - err = d.deleteBackup(ctx, oldName, "") + // TODO :: fix this + // at this point db is inconsistent + // has both old table and new table + + // drop the old table in remote + err = d.deleteRemote(ctx, oldName, "") if err != nil { - // at this point db is inconsistent - // has both old table and new table - return fmt.Errorf("rename: unable to delete old table %q from backup: %w", oldName, err) + return fmt.Errorf("rename: unable to delete old table %q from remote: %w", oldName, err) } // update local meta @@ -554,14 +591,13 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { if err != nil { return fmt.Errorf("drop: write meta failed: %w", err) } + d.writeDirty = false // reopen db handle ignoring old name err = d.reopen(ctx) if err != nil { return fmt.Errorf("rename: unable to reopen: %w", err) } - - d.writeDirty = false return nil } @@ -601,14 +637,14 @@ func (d *db) reopen(ctx context.Context) error { // no meta.json, delete the directory _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) } - oldMeta := &tableMeta{} - err = json.Unmarshal(bytes, oldMeta) + meta := &tableMeta{} + err = json.Unmarshal(bytes, meta) if err != nil { d.logger.Debug("error in unmarshalling meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) } - if oldMeta.Deleted { + if meta.Deleted { d.logger.Debug("deleting deleted table", slog.String("table", entry.Name())) _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) continue @@ -623,7 +659,7 @@ func (d *db) reopen(ctx context.Context) error { if !version.IsDir() { continue } - if version.Name() != oldMeta.Version { + if version.Name() != meta.Version { d.logger.Debug("deleting old version", slog.String("table", entry.Name()), slog.String("version", version.Name())) _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name(), version.Name())) } diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 0191821bf44..120eed00f15 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -210,7 +210,7 @@ func TestResetLocal(t *testing.T) { require.NoError(t, err) db, err = NewDB(ctx, &DBOptions{ LocalPath: localDir, - Backup: bucket, + Remote: bucket, ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, @@ -232,7 +232,7 @@ func prepareDB(t *testing.T) (db DB, localDir, remoteDir string) { require.NoError(t, err) db, err = NewDB(ctx, &DBOptions{ LocalPath: localDir, - Backup: bucket, + Remote: bucket, ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index 263aff4ca47..86538bc7d87 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -21,16 +21,16 @@ import ( // pullFromRemote updates local data with the latest data from remote. // This is not safe for concurrent calls. func (d *db) pullFromRemote(ctx context.Context) error { - if !d.writeDirty || d.backup == nil { + if !d.writeDirty || d.remote == nil { // optimisation to skip sync if write was already synced return nil } - d.logger.Debug("syncing from backup") + d.logger.Debug("syncing from remote") // Create an errgroup for background downloads with limited concurrency. g, ctx := errgroup.WithContext(ctx) g.SetLimit(8) - objects := d.backup.List(&blob.ListOptions{ + objects := d.remote.List(&blob.ListOptions{ Delimiter: "/", // only list directories with a trailing slash and IsDir set to true }) @@ -65,7 +65,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { // get version of the table var b []byte err = retry(ctx, func() error { - res, err := d.backup.ReadAll(ctx, path.Join(table, "meta.json")) + res, err := d.remote.ReadAll(ctx, path.Join(table, "meta.json")) if err != nil { return err } @@ -98,7 +98,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { return err } - tblIter := d.backup.List(&blob.ListOptions{Prefix: path.Join(table, backedUpMeta.Version)}) + tblIter := d.remote.List(&blob.ListOptions{Prefix: path.Join(table, backedUpMeta.Version)}) // download all objects in the table and current version for { obj, err := tblIter.Next(ctx) @@ -116,7 +116,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { } defer file.Close() - rdr, err := d.backup.NewReader(ctx, obj.Key, nil) + rdr, err := d.remote.NewReader(ctx, obj.Key, nil) if err != nil { return err } @@ -143,7 +143,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { } } - // mark tables that are not in backup for delete later + // mark tables that are not in remote for delete later entries, err := os.ReadDir(d.localPath) if err != nil { return err @@ -167,14 +167,15 @@ func (d *db) pullFromRemote(ctx context.Context) error { return nil } -// pushToRemote syncs the backup location with the local path for given table. +// pushToRemote syncs the remote location with the local path for given table. // If oldVersion is specified, it is deleted after successful sync. func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tableMeta) error { - if d.backup == nil { + if d.remote == nil { return nil } if meta.Type == "TABLE" { + // for views no db files exists, the SQL is stored in meta.json localPath := filepath.Join(d.localPath, table, meta.Version) entries, err := os.ReadDir(localPath) if err != nil { @@ -196,7 +197,7 @@ func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tabl // upload to cloud storage err = retry(ctx, func() error { - return d.backup.Upload(ctx, path.Join(table, meta.Version, entry.Name()), wr, &blob.WriterOptions{ + return d.remote.Upload(ctx, path.Join(table, meta.Version, entry.Name()), wr, &blob.WriterOptions{ ContentType: "application/octet-stream", }) }) @@ -208,30 +209,30 @@ func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tabl } // update table meta - // todo :: also use etag to avoid overwriting + // todo :: also use etag to avoid concurrent writer conflicts m, err := json.Marshal(meta) if err != nil { return fmt.Errorf("failed to marshal table metadata: %w", err) } err = retry(ctx, func() error { - return d.backup.WriteAll(ctx, path.Join(table, "meta.json"), m, nil) + return d.remote.WriteAll(ctx, path.Join(table, "meta.json"), m, nil) }) if err != nil { - d.logger.Error("failed to update version.txt in backup", slog.Any("error", err)) + d.logger.Error("failed to update version.txt in remote", slog.Any("error", err)) } // success -- remove old version if oldMeta != nil { - _ = d.deleteBackup(ctx, table, oldMeta.Version) + _ = d.deleteRemote(ctx, table, oldMeta.Version) } return err } -// deleteBackup deletes backup. +// deleteRemote deletes remote. // If table is specified, only that table is deleted. // If table and version is specified, only that version of the table is deleted. -func (d *db) deleteBackup(ctx context.Context, table, version string) error { - if d.backup == nil { +func (d *db) deleteRemote(ctx context.Context, table, version string) error { + if d.remote == nil { return nil } if table == "" && version != "" { @@ -245,16 +246,16 @@ func (d *db) deleteBackup(ctx context.Context, table, version string) error { // deleting the entire table prefix = table + "/" // delete version.txt first - err := retry(ctx, func() error { return d.backup.Delete(ctx, "version.txt") }) + err := retry(ctx, func() error { return d.remote.Delete(ctx, "version.txt") }) if err != nil && gcerrors.Code(err) != gcerrors.NotFound { - d.logger.Error("failed to delete version.txt in backup", slog.Any("error", err)) + d.logger.Error("failed to delete version.txt in remote", slog.Any("error", err)) return err } } } // ignore errors since version.txt is already removed - iter := d.backup.List(&blob.ListOptions{Prefix: prefix}) + iter := d.remote.List(&blob.ListOptions{Prefix: prefix}) for { obj, err := iter.Next(ctx) if err != nil { @@ -263,7 +264,7 @@ func (d *db) deleteBackup(ctx context.Context, table, version string) error { } d.logger.Debug("failed to list object", slog.Any("error", err)) } - err = retry(ctx, func() error { return d.backup.Delete(ctx, obj.Key) }) + err = retry(ctx, func() error { return d.remote.Delete(ctx, obj.Key) }) if err != nil { d.logger.Debug("failed to delete object", slog.String("object", obj.Key), slog.Any("error", err)) } From 9a6f9e6e7137d2445fa059ee4a6b916ca133ecf9 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 18 Nov 2024 13:31:26 +0530 Subject: [PATCH 15/64] local db monitor --- runtime/pkg/rduckdb/db.go | 135 ++++++++++++++++++++++++--------- runtime/pkg/rduckdb/db_test.go | 9 +-- runtime/pkg/rduckdb/remote.go | 2 +- 3 files changed, 106 insertions(+), 40 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index fc03bc2b3ce..e213fe62188 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -16,13 +16,13 @@ import ( "slices" "strconv" "strings" - "sync" "time" "github.com/XSAM/otelsql" "github.com/jmoiron/sqlx" "github.com/marcboeker/go-duckdb" "github.com/mitchellh/mapstructure" + "github.com/rilldata/rill/runtime/pkg/ctxsync" "go.opentelemetry.io/otel/attribute" "gocloud.dev/blob" "golang.org/x/sync/semaphore" @@ -210,13 +210,17 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { if err != nil { return nil, err } + bgctx, cancel := context.WithCancel(ctx) db := &db{ opts: opts, localPath: opts.LocalPath, - readMu: &sync.RWMutex{}, + readMu: ctxsync.NewRWMutex(), writeSem: semaphore.NewWeighted(1), - writeDirty: true, + localDirty: true, + ticker: time.NewTicker(5 * time.Minute), logger: opts.Logger, + ctx: bgctx, + cancel: cancel, } if opts.Remote != nil { db.remote = opts.Remote @@ -243,38 +247,60 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } return nil, err } - + go db.localDBMonitor() return db, nil } type db struct { opts *DBOptions - localPath string - readHandle *sqlx.DB - readMu *sync.RWMutex - writeSem *semaphore.Weighted - writeDirty bool + localPath string + remote *blob.Bucket - remote *blob.Bucket + // readHandle serves read queries + readHandle *sqlx.DB + // readMu controls access to readHandle + readMu ctxsync.RWMutex + // writeSem ensures only one write operation is allowed at a time + writeSem *semaphore.Weighted + // localDirty is set to true when a change is committed to the remote but not yet reflected in the local db + localDirty bool + // ticker to peroiodically check if local db is in sync with remote + ticker *time.Ticker logger *slog.Logger + + // ctx and cancel to cancel background operations + ctx context.Context + cancel context.CancelFunc } var _ DB = &db{} func (d *db) Close() error { - _ = d.writeSem.Acquire(context.Background(), 1) + // close background operations + d.cancel() + d.ticker.Stop() + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + _ = d.writeSem.Acquire(ctx, 1) defer d.writeSem.Release(1) - d.readMu.Lock() + err := d.readMu.Lock(ctx) + if err != nil { + return err + } defer d.readMu.Unlock() return d.readHandle.Close() } func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() error, error) { - d.readMu.RLock() + if err := d.readMu.RLock(ctx); err != nil { + return nil, nil, err + } conn, err := d.readHandle.Connx(ctx) if err != nil { @@ -375,26 +401,27 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * return err } - d.writeDirty = true + d.localDirty = true // update remote data and metadata if err := d.pushToRemote(ctx, name, oldMeta, newMeta); err != nil { return fmt.Errorf("create: replicate failed: %w", err) } d.logger.Debug("remote table updated", slog.String("name", name)) + // no errors after this point since background goroutine will eventually sync the local db // update local metadata err = d.writeTableMeta(name, newMeta) if err != nil { - return fmt.Errorf("create: write version file failed: %w", err) + d.logger.Debug("create: error in writing table meta", slog.String("name", name), slog.String("error", err.Error())) + return nil } - d.writeDirty = false err = d.reopen(ctx) if err != nil { - // TODO :: this means reads will not be in sync with remote till another write happens - // Should we mark db as reopen and wait for outstanding queries to become zero and then reopen? - return fmt.Errorf("create: db reopen failed: %w", err) + d.logger.Debug("create: error in reopening db", slog.String("error", err.Error())) + return nil } + d.localDirty = false return nil } @@ -442,7 +469,7 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con // push to remote _ = release() - d.writeDirty = true + d.localDirty = true meta := &tableMeta{ Name: name, Version: newVersion, @@ -454,19 +481,22 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con if err != nil { return fmt.Errorf("mutate: replicate failed: %w", err) } + // no errors after this point since background goroutine will eventually sync the local db // update local meta err = d.writeTableMeta(name, meta) if err != nil { - return fmt.Errorf("rename: write version file failed: %w", err) + d.logger.Debug("mutate: error in writing table meta", slog.String("name", name), slog.String("error", err.Error())) + return nil } - d.writeDirty = false // reopen db handle ignoring old name err = d.reopen(ctx) if err != nil { - return fmt.Errorf("rename: unable to reopen: %w", err) + d.logger.Debug("mutate: error in reopening db", slog.String("error", err.Error())) + return nil } + d.localDirty = false return nil } @@ -495,26 +525,28 @@ func (d *db) DropTable(ctx context.Context, name string) error { } // drop the table from remote - d.writeDirty = true + d.localDirty = true err = d.deleteRemote(ctx, name, "") if err != nil { return fmt.Errorf("drop: unable to drop table %q from remote: %w", name, err) } + // no errors after this point since background goroutine will eventually sync the local db // mark table as deleted in local meta.Deleted = true err = d.writeTableMeta(name, meta) if err != nil { - return fmt.Errorf("drop: write meta failed: %w", err) + d.logger.Debug("drop: error in writing table meta", slog.String("name", name), slog.String("error", err.Error())) + return nil } // reopen db handle err = d.reopen(ctx) if err != nil { - return fmt.Errorf("drop: unable to reopen: %w", err) + d.logger.Debug("drop: error in reopening db", slog.String("error", err.Error())) + return nil } - - d.writeDirty = false + d.localDirty = false return nil } @@ -556,7 +588,7 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { return fmt.Errorf("rename: rename table failed: %w", err) } - d.writeDirty = true + d.localDirty = true // sync the new table and new version meta := &tableMeta{ Name: newName, @@ -579,28 +611,60 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { return fmt.Errorf("rename: unable to delete old table %q from remote: %w", oldName, err) } + // no errors after this point since background goroutine will eventually sync the local db + // update local meta err = d.writeTableMeta(newName, meta) if err != nil { - return fmt.Errorf("rename: write version file failed: %w", err) + d.logger.Debug("rename: error in writing table meta", slog.String("name", newName), slog.String("error", err.Error())) + return nil } // mark table as deleted in local oldMeta.Deleted = true err = d.writeTableMeta(oldName, oldMeta) if err != nil { - return fmt.Errorf("drop: write meta failed: %w", err) + d.logger.Debug("rename: error in writing table meta", slog.String("name", oldName), slog.String("error", err.Error())) + return nil } - d.writeDirty = false // reopen db handle ignoring old name err = d.reopen(ctx) if err != nil { - return fmt.Errorf("rename: unable to reopen: %w", err) + d.logger.Debug("rename: error in reopening db", slog.String("error", err.Error())) + return nil } + d.localDirty = false return nil } +func (d *db) localDBMonitor() { + for { + select { + case <-d.ctx.Done(): + return + case <-d.ticker.C: + err := d.writeSem.Acquire(d.ctx, 1) + if err != nil && errors.Is(err, context.Canceled) { + d.logger.Error("localDBMonitor: error in acquiring write sem", slog.String("error", err.Error())) + continue + } + if !d.localDirty { + // all good + continue + } + err = d.pullFromRemote(d.ctx) + if err != nil { + d.logger.Error("localDBMonitor: error in pulling from remote", slog.String("error", err.Error())) + } + err = d.reopen(d.ctx) + if err != nil { + d.logger.Error("localDBMonitor: error in reopening db", slog.String("error", err.Error())) + } + } + } +} + func (d *db) reopen(ctx context.Context) error { handle, err := d.openDBAndAttach(ctx, "", "", true) if err != nil { @@ -608,7 +672,10 @@ func (d *db) reopen(ctx context.Context) error { } var oldDBHandle *sqlx.DB - d.readMu.Lock() + err = d.readMu.Lock(ctx) + if err != nil { + return err + } // swap read handle oldDBHandle = d.readHandle d.readHandle = handle diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 120eed00f15..d6a21e0168e 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -203,7 +203,7 @@ func TestResetLocal(t *testing.T) { require.NoError(t, db.Close()) require.NoError(t, os.RemoveAll(localDir)) - logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + logger := slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{ Level: slog.LevelDebug, })) bucket, err := fileblob.OpenBucket(remoteDir, nil) @@ -223,10 +223,9 @@ func TestResetLocal(t *testing.T) { func prepareDB(t *testing.T) (db DB, localDir, remoteDir string) { localDir = t.TempDir() ctx := context.Background() - // logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ - // Level: slog.LevelDebug, - // })) - logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + logger := slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) remoteDir = t.TempDir() bucket, err := fileblob.OpenBucket(remoteDir, nil) require.NoError(t, err) diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index 86538bc7d87..b6ba5120e4b 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -21,7 +21,7 @@ import ( // pullFromRemote updates local data with the latest data from remote. // This is not safe for concurrent calls. func (d *db) pullFromRemote(ctx context.Context) error { - if !d.writeDirty || d.remote == nil { + if !d.localDirty || d.remote == nil { // optimisation to skip sync if write was already synced return nil } From eac6d1b42900aab418bc03ff7eab5e6f2fbce9a3 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 18 Nov 2024 16:02:58 +0530 Subject: [PATCH 16/64] small fixes --- runtime/pkg/rduckdb/db.go | 28 +++++++++++++++------------- runtime/pkg/rduckdb/db_test.go | 4 ++++ runtime/pkg/rduckdb/remote.go | 19 ++++++------------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index e213fe62188..d1ebe95996d 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -61,7 +61,9 @@ type DB interface { type DBOptions struct { // LocalPath is the path where local db files will be stored. Should be unique for each database. LocalPath string - Remote *blob.Bucket + // Remote is the blob storage bucket where the database files will be stored. This is the source of truth. + // The local db will be eventually synced with the remote. + Remote *blob.Bucket // ReadSettings are settings applied the read duckDB handle. ReadSettings map[string]string @@ -214,6 +216,7 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { db := &db{ opts: opts, localPath: opts.LocalPath, + remote: opts.Remote, readMu: ctxsync.NewRWMutex(), writeSem: semaphore.NewWeighted(1), localDirty: true, @@ -222,9 +225,6 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { ctx: bgctx, cancel: cancel, } - if opts.Remote != nil { - db.remote = opts.Remote - } // create local path err = os.MkdirAll(db.localPath, fs.ModePerm) @@ -362,7 +362,6 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * return fmt.Errorf("create: unable to create dir %q: %w", name, err) } dsn = filepath.Join(newVersionDir, "data.db") - newMeta.CreatedVersion = newVersion } // need to attach existing table so that any views dependent on this table are correctly attached @@ -438,7 +437,7 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con if errors.Is(err, errNotFound) { return fmt.Errorf("mutate: Table %q not found", name) } - return fmt.Errorf("rename: unable to get table meta: %w", err) + return fmt.Errorf("mutate: unable to get table meta: %w", err) } // create new version directory @@ -468,7 +467,10 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con } // push to remote - _ = release() + err = release() + if err != nil { + return fmt.Errorf("mutate: failed to close connection: %w", err) + } d.localDirty = true meta := &tableMeta{ Name: name, @@ -613,14 +615,14 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { // no errors after this point since background goroutine will eventually sync the local db - // update local meta + // update local meta for new table err = d.writeTableMeta(newName, meta) if err != nil { d.logger.Debug("rename: error in writing table meta", slog.String("name", newName), slog.String("error", err.Error())) return nil } - // mark table as deleted in local + // mark old table as deleted in local oldMeta.Deleted = true err = d.writeTableMeta(oldName, oldMeta) if err != nil { @@ -628,7 +630,7 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { return nil } - // reopen db handle ignoring old name + // reopen db handle err = d.reopen(ctx) if err != nil { d.logger.Debug("rename: error in reopening db", slog.String("error", err.Error())) @@ -645,7 +647,7 @@ func (d *db) localDBMonitor() { return case <-d.ticker.C: err := d.writeSem.Acquire(d.ctx, 1) - if err != nil && errors.Is(err, context.Canceled) { + if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in acquiring write sem", slog.String("error", err.Error())) continue } @@ -897,7 +899,7 @@ func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, ignoreTable string) err // sort tables by created_version // this is to ensure that views/tables on which other views depend are attached first slices.SortFunc(tables, func(a, b *tableMeta) int { - // all tables should be attached first + // all tables should be attached first and can be attached in any order if a.Type == "TABLE" && b.Type == "TABLE" { return 0 } @@ -968,7 +970,7 @@ type tableMeta struct { Name string `json:"name"` Version string `json:"version"` CreatedVersion string `json:"created_version"` - Type string `json:"type"` // either table or view + Type string `json:"type"` // either TABLE or VIEW SQL string `json:"sql"` // populated for views // Deleted is set to true if the table is deleted. // This is only used for local tables since local copy can only be removed when db handle has been reattached. diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index d6a21e0168e..ef956f3a5d1 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -150,6 +150,10 @@ func TestMutateTable(t *testing.T) { err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'Delhi' AS city", &CreateTableOptions{}) require.NoError(t, err) + // create dependent view + err = db.CreateTableAsSelect(ctx, "test_view", "SELECT * FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + // insert into table err = db.MutateTable(ctx, "test", func(ctx context.Context, conn *sqlx.Conn) error { _, err := conn.ExecContext(ctx, "INSERT INTO test (id, city) VALUES (2, 'NY')") diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index b6ba5120e4b..c3ba52d0b93 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -21,7 +21,7 @@ import ( // pullFromRemote updates local data with the latest data from remote. // This is not safe for concurrent calls. func (d *db) pullFromRemote(ctx context.Context) error { - if !d.localDirty || d.remote == nil { + if !d.localDirty { // optimisation to skip sync if write was already synced return nil } @@ -170,10 +170,6 @@ func (d *db) pullFromRemote(ctx context.Context) error { // pushToRemote syncs the remote location with the local path for given table. // If oldVersion is specified, it is deleted after successful sync. func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tableMeta) error { - if d.remote == nil { - return nil - } - if meta.Type == "TABLE" { // for views no db files exists, the SQL is stored in meta.json localPath := filepath.Join(d.localPath, table, meta.Version) @@ -218,7 +214,7 @@ func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tabl return d.remote.WriteAll(ctx, path.Join(table, "meta.json"), m, nil) }) if err != nil { - d.logger.Error("failed to update version.txt in remote", slog.Any("error", err)) + d.logger.Error("failed to update meta.json in remote", slog.Any("error", err)) } // success -- remove old version @@ -232,9 +228,6 @@ func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tabl // If table is specified, only that table is deleted. // If table and version is specified, only that version of the table is deleted. func (d *db) deleteRemote(ctx context.Context, table, version string) error { - if d.remote == nil { - return nil - } if table == "" && version != "" { return fmt.Errorf("table must be specified if version is specified") } @@ -245,15 +238,15 @@ func (d *db) deleteRemote(ctx context.Context, table, version string) error { } else { // deleting the entire table prefix = table + "/" - // delete version.txt first - err := retry(ctx, func() error { return d.remote.Delete(ctx, "version.txt") }) + // delete meta.json first + err := retry(ctx, func() error { return d.remote.Delete(ctx, "meta.json") }) if err != nil && gcerrors.Code(err) != gcerrors.NotFound { - d.logger.Error("failed to delete version.txt in remote", slog.Any("error", err)) + d.logger.Error("failed to delete meta.json in remote", slog.Any("error", err)) return err } } } - // ignore errors since version.txt is already removed + // ignore errors since meta.json is already removed iter := d.remote.List(&blob.ListOptions{Prefix: prefix}) for { From ad95df0c9d4c7e82c932e07dcff0abc9dc155644 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 19 Nov 2024 16:30:40 +0530 Subject: [PATCH 17/64] add data bucket --- cli/cmd/runtime/install_duckdb_extensions.go | 3 +- cli/cmd/runtime/start.go | 49 +++- cli/pkg/cmdutil/project.go | 3 +- cli/pkg/local/app.go | 8 +- runtime/compilers/rillv1/parser_test.go | 3 +- runtime/connection_cache.go | 8 +- runtime/connections.go | 3 - runtime/drivers/admin/admin.go | 3 +- runtime/drivers/athena/athena.go | 3 +- runtime/drivers/azure/azure.go | 3 +- runtime/drivers/bigquery/bigquery.go | 3 +- runtime/drivers/clickhouse/clickhouse.go | 3 +- .../clickhouse/information_schema_test.go | 3 +- runtime/drivers/clickhouse/olap_test.go | 5 +- runtime/drivers/drivers.go | 7 +- runtime/drivers/drivers_test.go | 3 +- runtime/drivers/druid/druid.go | 3 +- runtime/drivers/druid/druid_test.go | 3 +- runtime/drivers/druid/sql_driver_test.go | 11 +- runtime/drivers/duckdb/config.go | 73 +---- runtime/drivers/duckdb/config_test.go | 63 +---- runtime/drivers/duckdb/duckdb.go | 22 +- runtime/drivers/duckdb/duckdb_test.go | 5 +- runtime/drivers/duckdb/olap.go | 4 +- runtime/drivers/duckdb/olap_crud_test.go | 21 +- runtime/drivers/duckdb/olap_test.go | 7 +- .../duckdb/transporter_duckDB_to_duckDB.go | 130 +++++---- .../transporter_duckDB_to_duckDB_test.go | 100 +++---- .../transporter_motherduck_to_duckDB.go | 167 ++++++------ .../transporter_mysql_to_duckDB_test.go | 250 +++++++++--------- .../transporter_postgres_to_duckDB_test.go | 5 +- .../transporter_sqlite_to_duckDB_test.go | 3 +- .../duckdb/transporter_sqlstore_to_duckDB.go | 92 +------ runtime/drivers/duckdb/transporter_test.go | 5 +- runtime/drivers/file/file.go | 3 +- runtime/drivers/gcs/gcs.go | 3 +- runtime/drivers/https/https.go | 3 +- .../drivers/mock/object_store/object_store.go | 2 +- runtime/drivers/mysql/mysql.go | 3 +- runtime/drivers/pinot/pinot.go | 3 +- runtime/drivers/postgres/postgres.go | 3 +- runtime/drivers/redshift/redshift.go | 3 +- runtime/drivers/s3/s3.go | 3 +- runtime/drivers/salesforce/salesforce.go | 3 +- runtime/drivers/slack/slack.go | 3 +- runtime/drivers/sqlite/sqlite.go | 3 +- runtime/registry.go | 11 + runtime/registry_test.go | 6 +- runtime/runtime.go | 25 +- runtime/server/queries_test.go | 3 +- runtime/testruntime/testruntime.go | 5 +- 51 files changed, 510 insertions(+), 649 deletions(-) diff --git a/cli/cmd/runtime/install_duckdb_extensions.go b/cli/cmd/runtime/install_duckdb_extensions.go index e7a45dc3804..9834f032dfc 100644 --- a/cli/cmd/runtime/install_duckdb_extensions.go +++ b/cli/cmd/runtime/install_duckdb_extensions.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/spf13/cobra" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) // InstallDuckDBExtensionsCmd adds a CLI command that forces DuckDB to install all required extensions. @@ -17,7 +18,7 @@ func InstallDuckDBExtensionsCmd(ch *cmdutil.Helper) *cobra.Command { Use: "install-duckdb-extensions", RunE: func(cmd *cobra.Command, args []string) error { cfg := map[string]any{"dsn": ":memory:"} // In-memory - h, err := drivers.Open("duckdb", "default", cfg, activity.NewNoopClient(), zap.NewNop()) + h, err := drivers.Open("duckdb", "default", cfg, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) if err != nil { return fmt.Errorf("failed to open ephemeral duckdb: %w", err) } diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index 899278b4135..d98f7299aa0 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -23,6 +23,9 @@ import ( "github.com/spf13/cobra" "go.uber.org/zap" "go.uber.org/zap/zapcore" + "gocloud.dev/blob/gcsblob" + "gocloud.dev/gcp" + "golang.org/x/oauth2/google" "golang.org/x/sync/errgroup" // Load connectors and reconcilers for runtime @@ -85,9 +88,9 @@ type Config struct { // DataDir stores data for all instances like duckdb file, temporary downloaded file etc. // The data for each instance is stored in a child directory named instance_id DataDir string `split_words:"true"` - // DuckDBBackupBucket is the name of the GCS bucket where DuckDB backups are stored - DuckDBBackupBucket string `split_words:"true"` - DuckDBBackupBucketCredentialsJSON string `split_words:"true"` + // DataBucket is the name of the GCS bucket where DuckDB backups are stored + DataBucket string `split_words:"true"` + DataBucketCredentialsJSON string `split_words:"true"` // Sink type of activity client: noop (or empty string), kafka ActivitySinkType string `default:"" split_words:"true"` // Kafka brokers of an activity client's sink @@ -201,18 +204,27 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { // Create ctx that cancels on termination signals ctx := graceful.WithCancelOnTerminate(context.Background()) + // Init dataBucket + client, err := newClient(ctx, conf.DataBucketCredentialsJSON) + if err != nil { + logger.Fatal("error: could not create GCP client", zap.Error(err)) + } + + bucket, err := gcsblob.OpenBucket(ctx, client, conf.DataBucket, nil) + if err != nil { + logger.Fatal("failed to open bucket %q, %w", zap.String("bucket", conf.DataBucket), zap.Error(err)) + } + // Init runtime opts := &runtime.Options{ - ConnectionCacheSize: conf.ConnectionCacheSize, - MetastoreConnector: "metastore", - QueryCacheSizeBytes: conf.QueryCacheSizeBytes, - SecurityEngineCacheSize: conf.SecurityEngineCacheSize, - ControllerLogBufferCapacity: conf.LogBufferCapacity, - ControllerLogBufferSizeBytes: conf.LogBufferSizeBytes, - AllowHostAccess: conf.AllowHostAccess, - DataDir: conf.DataDir, - DuckDBBackupBucket: conf.DuckDBBackupBucket, - DuckDBBackupBucketCredentialsJSON: conf.DuckDBBackupBucketCredentialsJSON, + ConnectionCacheSize: conf.ConnectionCacheSize, + MetastoreConnector: "metastore", + QueryCacheSizeBytes: conf.QueryCacheSizeBytes, + SecurityEngineCacheSize: conf.SecurityEngineCacheSize, + ControllerLogBufferCapacity: conf.LogBufferCapacity, + ControllerLogBufferSizeBytes: conf.LogBufferSizeBytes, + AllowHostAccess: conf.AllowHostAccess, + DataDir: conf.DataDir, SystemConnectors: []*runtimev1.Connector{ { Type: conf.MetastoreDriver, @@ -221,7 +233,7 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { }, }, } - rt, err := runtime.New(ctx, opts, logger, activityClient, emailClient) + rt, err := runtime.New(ctx, opts, logger, activityClient, emailClient, bucket) if err != nil { logger.Fatal("error: could not create runtime", zap.Error(err)) } @@ -272,3 +284,12 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { } return startCmd } + +func newClient(ctx context.Context, jsonData string) (*gcp.HTTPClient, error) { + creds, err := google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") + if err != nil { + return nil, fmt.Errorf("failed to create credentials: %w", err) + } + // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. + return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) +} diff --git a/cli/pkg/cmdutil/project.go b/cli/pkg/cmdutil/project.go index b1a3293edc4..e80829d6b60 100644 --- a/cli/pkg/cmdutil/project.go +++ b/cli/pkg/cmdutil/project.go @@ -6,6 +6,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob/memblob" // Ensure file driver is loaded _ "github.com/rilldata/rill/runtime/drivers/file" @@ -14,7 +15,7 @@ import ( // RepoForProjectPath creates an ad-hoc drivers.RepoStore for a local project file path func RepoForProjectPath(path string) (drivers.RepoStore, string, error) { instanceID := "default" - repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, activity.NewNoopClient(), zap.NewNop()) + repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) if err != nil { return nil, "", err } diff --git a/cli/pkg/local/app.go b/cli/pkg/local/app.go index 1099fe22c4e..9e26813ae4e 100644 --- a/cli/pkg/local/app.go +++ b/cli/pkg/local/app.go @@ -31,6 +31,7 @@ import ( "go.uber.org/zap" "go.uber.org/zap/buffer" "go.uber.org/zap/zapcore" + "gocloud.dev/blob/fileblob" "golang.org/x/sync/errgroup" "gopkg.in/natefinch/lumberjack.v2" ) @@ -156,7 +157,10 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { // if err != nil { // return nil, fmt.Errorf("failed to create email sender: %w", err) // } - + bkt, err := fileblob.OpenBucket(filepath.Join(dbDirPath, "remote"), &fileblob.Options{CreateDir: true}) + if err != nil { + return nil, err + } rtOpts := &runtime.Options{ ConnectionCacheSize: 100, MetastoreConnector: "metastore", @@ -168,7 +172,7 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := runtime.New(ctx, rtOpts, logger, opts.Ch.Telemetry(ctx), email.New(sender)) + rt, err := runtime.New(ctx, rtOpts, logger, opts.Ch.Telemetry(ctx), email.New(sender), bkt) if err != nil { return nil, err } diff --git a/runtime/compilers/rillv1/parser_test.go b/runtime/compilers/rillv1/parser_test.go index 3974c616b0d..16d7bad7aa4 100644 --- a/runtime/compilers/rillv1/parser_test.go +++ b/runtime/compilers/rillv1/parser_test.go @@ -14,6 +14,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" "google.golang.org/protobuf/types/known/structpb" _ "github.com/rilldata/rill/runtime/drivers/file" @@ -2037,7 +2038,7 @@ func requireResourcesAndErrors(t testing.TB, p *Parser, wantResources []*Resourc func makeRepo(t testing.TB, files map[string]string) drivers.RepoStore { root := t.TempDir() - handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, activity.NewNoopClient(), zap.NewNop()) + handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) repo, ok := handle.AsRepoStore("") diff --git a/runtime/connection_cache.go b/runtime/connection_cache.go index 72e271f404c..3bec04da536 100644 --- a/runtime/connection_cache.go +++ b/runtime/connection_cache.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/observability" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" + "gocloud.dev/blob" "golang.org/x/exp/maps" ) @@ -93,6 +94,7 @@ func (r *Runtime) evictInstanceConnections(instanceID string) { func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig) (drivers.Handle, error) { logger := r.Logger activityClient := r.activity + var dataBucket *blob.Bucket if cfg.instanceID != "" { // Not shared across multiple instances inst, err := r.Instance(ctx, cfg.instanceID) if err != nil { @@ -108,9 +110,13 @@ func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig if activityClient != nil { activityClient = activityClient.With(activityDims...) } + + dataBucket = r.DataBucket(cfg.instanceID) + } else { + dataBucket = r.DataBucket("__global__") } - handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, logger) + handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, dataBucket, logger) if err == nil && ctx.Err() != nil { err = fmt.Errorf("timed out while opening driver %q", cfg.driver) } diff --git a/runtime/connections.go b/runtime/connections.go index 169fd0281ce..bfad2e9848a 100644 --- a/runtime/connections.go +++ b/runtime/connections.go @@ -259,9 +259,6 @@ func (r *Runtime) ConnectorConfig(ctx context.Context, instanceID, name string) case "motherduck": res.setPreset("token", vars["token"], false) res.setPreset("dsn", "", true) - case "duckdb": - res.setPreset("backup_bucket", r.opts.DuckDBBackupBucket, false) - res.setPreset("backup_bucket_credentials_json", r.opts.DuckDBBackupBucketCredentialsJSON, false) case "local_file": // The "local_file" connector needs to know the repo root. // TODO: This is an ugly hack. But how can we get rid of it? diff --git a/runtime/drivers/admin/admin.go b/runtime/drivers/admin/admin.go index 93cd1be6ef0..9ac4dc46e66 100644 --- a/runtime/drivers/admin/admin.go +++ b/runtime/drivers/admin/admin.go @@ -23,6 +23,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/ctxsync" "go.opentelemetry.io/otel" "go.uber.org/zap" + "gocloud.dev/blob" "golang.org/x/sync/singleflight" "gopkg.in/yaml.v3" ) @@ -63,7 +64,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, ac *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("admin driver can't be shared") } diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go index bb687306076..50bbd73636d 100644 --- a/runtime/drivers/athena/athena.go +++ b/runtime/drivers/athena/athena.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -85,7 +86,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("athena driver can't be shared") } diff --git a/runtime/drivers/azure/azure.go b/runtime/drivers/azure/azure.go index 30bf08daaba..0282b0cbc63 100644 --- a/runtime/drivers/azure/azure.go +++ b/runtime/drivers/azure/azure.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -82,7 +83,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("azure driver can't be shared") } diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index ea555731faa..86644749f10 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -11,6 +11,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/gcputil" "go.uber.org/zap" + "gocloud.dev/blob" "google.golang.org/api/option" ) @@ -77,7 +78,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("bigquery driver can't be shared") } diff --git a/runtime/drivers/clickhouse/clickhouse.go b/runtime/drivers/clickhouse/clickhouse.go index 7e81e1a2f1d..fa8bef6edad 100644 --- a/runtime/drivers/clickhouse/clickhouse.go +++ b/runtime/drivers/clickhouse/clickhouse.go @@ -16,6 +16,7 @@ import ( "go.opentelemetry.io/otel/attribute" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" "go.uber.org/zap" + "gocloud.dev/blob" "golang.org/x/sync/semaphore" ) @@ -121,7 +122,7 @@ type configProperties struct { // Open connects to Clickhouse using std API. // Connection string format : https://github.com/ClickHouse/clickhouse-go?tab=readme-ov-file#dsn -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("clickhouse driver can't be shared") } diff --git a/runtime/drivers/clickhouse/information_schema_test.go b/runtime/drivers/clickhouse/information_schema_test.go index 074a9047fc0..62b40198917 100644 --- a/runtime/drivers/clickhouse/information_schema_test.go +++ b/runtime/drivers/clickhouse/information_schema_test.go @@ -12,6 +12,7 @@ import ( "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/modules/clickhouse" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestInformationSchema(t *testing.T) { @@ -37,7 +38,7 @@ func TestInformationSchema(t *testing.T) { port, err := clickHouseContainer.MappedPort(ctx, "9000/tcp") require.NoError(t, err) - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) prepareConn(t, conn) t.Run("testInformationSchemaAll", func(t *testing.T) { testInformationSchemaAll(t, conn) }) diff --git a/runtime/drivers/clickhouse/olap_test.go b/runtime/drivers/clickhouse/olap_test.go index 4b0ac79080b..d78b29a3bbb 100644 --- a/runtime/drivers/clickhouse/olap_test.go +++ b/runtime/drivers/clickhouse/olap_test.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/testruntime" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestClickhouseCrudOps(t *testing.T) { @@ -24,7 +25,7 @@ func TestClickhouseCrudOps(t *testing.T) { } func testClickhouseSingleHost(t *testing.T, dsn string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) defer conn.Close() prepareConn(t, conn) @@ -41,7 +42,7 @@ func testClickhouseSingleHost(t *testing.T, dsn string) { } func testClickhouseCluster(t *testing.T, dsn, cluster string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) defer conn.Close() diff --git a/runtime/drivers/drivers.go b/runtime/drivers/drivers.go index 14f0c988bb8..6d3afb3018b 100644 --- a/runtime/drivers/drivers.go +++ b/runtime/drivers/drivers.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) // ErrNotFound indicates the resource wasn't found. @@ -35,13 +36,13 @@ func Register(name string, driver Driver) { // Open opens a new connection. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. // If instanceID is not empty, the connection is considered instance-specific and its As...() functions will only be invoked with the same instance ID. -func Open(driver, instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (Handle, error) { +func Open(driver, instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (Handle, error) { d, ok := Drivers[driver] if !ok { return nil, fmt.Errorf("unknown driver: %s", driver) } - conn, err := d.Open(instanceID, config, client, logger) + conn, err := d.Open(instanceID, config, client, data, logger) if err != nil { return nil, err } @@ -56,7 +57,7 @@ type Driver interface { // Open opens a new handle. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. - Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (Handle, error) + Open(instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (Handle, error) // HasAnonymousSourceAccess returns true if the driver can access the data identified by srcProps without any additional configuration. HasAnonymousSourceAccess(ctx context.Context, srcProps map[string]any, logger *zap.Logger) (bool, error) diff --git a/runtime/drivers/drivers_test.go b/runtime/drivers/drivers_test.go index f6bb9fd3d3e..8091cc0aa18 100644 --- a/runtime/drivers/drivers_test.go +++ b/runtime/drivers/drivers_test.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" _ "github.com/rilldata/rill/runtime/drivers/duckdb" _ "github.com/rilldata/rill/runtime/drivers/file" @@ -31,7 +32,7 @@ func TestAll(t *testing.T) { for _, withDriver := range matrix { err := withDriver(t, func(driver, instanceID string, cfg map[string]any) { // Open - conn, err := drivers.Open(driver, instanceID, cfg, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open(driver, instanceID, cfg, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) require.NotNil(t, conn) diff --git a/runtime/drivers/druid/druid.go b/runtime/drivers/druid/druid.go index 2148ea93951..2e02543cecd 100644 --- a/runtime/drivers/druid/druid.go +++ b/runtime/drivers/druid/druid.go @@ -14,6 +14,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" + "gocloud.dev/blob" // Load Druid database/sql driver _ "github.com/rilldata/rill/runtime/drivers/druid/druidsqldriver" @@ -101,7 +102,7 @@ type configProperties struct { // Opens a connection to Apache Druid using HTTP API. // Note that the Druid connection string must have the form "http://user:password@host:port/druid/v2/sql". -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("druid driver can't be shared") } diff --git a/runtime/drivers/druid/druid_test.go b/runtime/drivers/druid/druid_test.go index 6e981bf40af..f32c8add77c 100644 --- a/runtime/drivers/druid/druid_test.go +++ b/runtime/drivers/druid/druid_test.go @@ -15,6 +15,7 @@ import ( "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) const testTable = "test_data" @@ -108,7 +109,7 @@ func TestDruid(t *testing.T) { require.NoError(t, err) dd := &driver{} - conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, activity.NewNoopClient(), zap.NewNop()) + conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/druid/sql_driver_test.go b/runtime/drivers/druid/sql_driver_test.go index 07036e30dac..71f85066360 100644 --- a/runtime/drivers/druid/sql_driver_test.go +++ b/runtime/drivers/druid/sql_driver_test.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/stretchr/testify/require" + "gocloud.dev/blob/memblob" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/pbutil" @@ -19,7 +20,7 @@ import ( */ func Ignore_TestDriver_types(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -56,7 +57,7 @@ func Ignore_TestDriver_types(t *testing.T) { func Ignore_TestDriver_array_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -82,7 +83,7 @@ func Ignore_TestDriver_array_type(t *testing.T) { func Ignore_TestDriver_json_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -107,7 +108,7 @@ func Ignore_TestDriver_json_type(t *testing.T) { func Ignore_TestDriver_multiple_rows(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -143,7 +144,7 @@ func Ignore_TestDriver_multiple_rows(t *testing.T) { func Ignore_TestDriver_error(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index 57285c36541..77391bae8ae 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -2,9 +2,7 @@ package duckdb import ( "fmt" - "net/url" "strconv" - "strings" "github.com/mitchellh/mapstructure" ) @@ -16,11 +14,6 @@ const ( // config represents the DuckDB driver config type config struct { - // DSN is the connection string. Also allows a special `:memory:` path to initialize an in-memory database. - DSN string `mapstructure:"dsn"` - // Path is a path to the database file. If set, it will take precedence over the path contained in DSN. - // This is a convenience option for setting the path in a more human-readable way. - Path string `mapstructure:"path"` // DataDir is the path to directory where duckdb file named `main.db` will be created. In case of external table storage all the files will also be present in DataDir's subdirectories. // If path is set then DataDir is ignored. DataDir string `mapstructure:"data_dir"` @@ -28,11 +21,6 @@ type config struct { PoolSize int `mapstructure:"pool_size"` // AllowHostAccess denotes whether to limit access to the local environment and file system AllowHostAccess bool `mapstructure:"allow_host_access"` - // ErrorOnIncompatibleVersion controls whether to return error or delete DBFile created with older duckdb version. - ErrorOnIncompatibleVersion bool `mapstructure:"error_on_incompatible_version"` - // ExtTableStorage controls if every table is stored in a different db file. - // Backup is only and automatically enabled when external table storage is enabled. - ExtTableStorage bool `mapstructure:"external_table_storage"` // CPU cores available for the read DB. If no CPUWrite is set and external_table_storage is enabled then this is split evenly between read and write. CPU int `mapstructure:"cpu"` // MemoryLimitGB is the amount of memory available for the read DB. If no MemoryLimitGBWrite is set and external_table_storage is enabled then this is split evenly between read and write. @@ -47,51 +35,18 @@ type config struct { InitSQL string `mapstructure:"init_sql"` // LogQueries controls whether to log the raw SQL passed to OLAP.Execute. (Internal queries will not be logged.) LogQueries bool `mapstructure:"log_queries"` - // BackupBucket is gcs bucket to store db backups. Should be of the form `bucket-name`. - BackupBucket string `mapstructure:"backup_bucket"` - // BackupBucketCredentialsJSON is the json credentials for the backup bucket. - BackupBucketCredentialsJSON string `mapstructure:"backup_bucket_credentials_json"` ReadSettings map[string]string `mapstructure:"-"` WriteSettings map[string]string `mapstructure:"-"` } func newConfig(cfgMap map[string]any) (*config, error) { - cfg := &config{ - ExtTableStorage: true, - } + cfg := &config{} err := mapstructure.WeakDecode(cfgMap, cfg) if err != nil { return nil, fmt.Errorf("could not decode config: %w", err) } - inMemory := false - if strings.HasPrefix(cfg.DSN, ":memory:") { - inMemory = true - cfg.DSN = strings.Replace(cfg.DSN, ":memory:", "", 1) - cfg.ExtTableStorage = false - } - - // Parse DSN as URL - uri, err := url.Parse(cfg.DSN) - if err != nil { - return nil, fmt.Errorf("could not parse dsn: %w", err) - } - qry, err := url.ParseQuery(uri.RawQuery) - if err != nil { - return nil, fmt.Errorf("could not parse dsn: %w", err) - } - - if !inMemory { - // Override DSN.Path with config.Path - if cfg.Path != "" { // backward compatibility, cfg.Path takes precedence over cfg.DataDir - uri.Path = cfg.Path - cfg.ExtTableStorage = false - } else if uri.Path != "" { // if some path is set in DSN, honour that path and ignore DataDir - cfg.ExtTableStorage = false - } - } - // Set memory limit cfg.ReadSettings = make(map[string]string) cfg.WriteSettings = make(map[string]string) @@ -113,19 +68,6 @@ func newConfig(cfgMap map[string]any) (*config, error) { // Set pool size poolSize := cfg.PoolSize - if qry.Has("rill_pool_size") { - // For backwards compatibility, we also support overriding the pool size via the DSN when "rill_pool_size" is a query argument. - - // Remove from query string (so not passed into DuckDB config) - val := qry.Get("rill_pool_size") - qry.Del("rill_pool_size") - - // Parse as integer - poolSize, err = strconv.Atoi(val) - if err != nil { - return nil, fmt.Errorf("could not parse dsn: 'rill_pool_size' is not an integer") - } - } if poolSize == 0 && threads != 0 { poolSize = threads if cfg.CPU != 0 && cfg.CPU < poolSize { @@ -137,18 +79,7 @@ func newConfig(cfgMap map[string]any) (*config, error) { cfg.PoolSize = poolSize // useful for motherduck but safe to pass at initial connect - if !qry.Has("custom_user_agent") { - qry.Add("custom_user_agent", "rill") - cfg.WriteSettings["custom_user_agent"] = "rill" - } else { - cfg.WriteSettings["custom_user_agent"] = qry.Get("custom_user_agent") - } - for k, v := range cfg.ReadSettings { - qry.Add(k, v) - } - // Rebuild DuckDB DSN (which should be "path?key=val&...") - // this is required since spaces and other special characters are valid in db file path but invalid and hence encoded in URL - cfg.DSN = generateDSN(uri.Path, qry.Encode()) + cfg.WriteSettings["custom_user_agent"] = "rill" return cfg, nil } diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 14df1399307..93917ebf32f 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -1,34 +1,25 @@ package duckdb import ( - "context" - "io/fs" - "os" - "path/filepath" "testing" - "github.com/rilldata/rill/runtime/drivers" - activity "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" - "go.uber.org/zap" ) func TestConfig(t *testing.T) { cfg, err := newConfig(map[string]any{}) require.NoError(t, err) - require.Equal(t, "?custom_user_agent=rill", cfg.DSN) require.Equal(t, 2, cfg.PoolSize) cfg, err = newConfig(map[string]any{"dsn": ":memory:?memory_limit=2GB"}) require.NoError(t, err) - require.Equal(t, "?custom_user_agent=rill&memory_limit=2GB", cfg.DSN) require.Equal(t, 2, cfg.PoolSize) cfg, err = newConfig(map[string]any{"dsn": "", "memory_limit_gb": "1", "cpu": 2}) require.NoError(t, err) - require.Equal(t, "?custom_user_agent=rill&max_memory=1GB&threads=2", cfg.DSN) + require.Equal(t, "1", cfg.ReadSettings["threads"]) + require.Equal(t, "1", cfg.WriteSettings["threads"]) require.Equal(t, 2, cfg.PoolSize) - require.Equal(t, true, cfg.ExtTableStorage) cfg, err = newConfig(map[string]any{"data_dir": "path/to"}) require.NoError(t, err) @@ -40,24 +31,6 @@ func TestConfig(t *testing.T) { require.Subset(t, cfg.WriteSettings, map[string]string{"custom_user_agent": "rill"}) require.Equal(t, 10, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"data_dir": "path/to", "pool_size": "10"}) - require.NoError(t, err) - require.Equal(t, 10, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"data_dir": "path/to", "dsn": "?rill_pool_size=4", "pool_size": "10"}) - require.NoError(t, err) - require.Equal(t, 4, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?rill_pool_size=10"}) - require.NoError(t, err) - require.Equal(t, "path/to/duck.db?custom_user_agent=rill", cfg.DSN) - require.Equal(t, 10, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB&rill_pool_size=10"}) - require.NoError(t, err) - require.Equal(t, "path/to/duck.db?custom_user_agent=rill&max_memory=4GB", cfg.DSN) - require.Equal(t, 10, cfg.PoolSize) - _, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB", "pool_size": "abc"}) require.Error(t, err) @@ -69,33 +42,9 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}) require.NoError(t, err) - require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=8GB&threads=2", cfg.DSN) + require.Equal(t, "1", cfg.ReadSettings["threads"]) + require.Equal(t, "1", cfg.WriteSettings["threads"]) + require.Equal(t, "4", cfg.ReadSettings["max_memory"]) + require.Equal(t, "4", cfg.WriteSettings["max_memory"]) require.Equal(t, 2, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "duck.db?max_memory=2GB&rill_pool_size=4"}) - require.NoError(t, err) - require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=2GB", cfg.DSN) - require.Equal(t, 4, cfg.PoolSize) -} - -func Test_specialCharInPath(t *testing.T) { - tempDir := t.TempDir() - path := filepath.Join(tempDir, "let's t@st \"weird\" dirs") - err := os.Mkdir(path, fs.ModePerm) - require.NoError(t, err) - - dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - config := conn.(*connection).config - require.Equal(t, dbFile+"?custom_user_agent=rill&max_memory=4GB&threads=1", config.DSN) - require.Equal(t, 2, config.PoolSize) - - olap, ok := conn.AsOLAP("") - require.True(t, ok) - - res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT 1"}) - require.NoError(t, err) - require.NoError(t, res.Close()) - require.NoError(t, conn.Close()) } diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index a21ba616735..d1c1e7f57f1 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -25,6 +25,7 @@ import ( "go.opentelemetry.io/otel/metric" "go.uber.org/zap" "go.uber.org/zap/exp/zapslog" + "gocloud.dev/blob" "golang.org/x/sync/semaphore" ) @@ -130,7 +131,7 @@ type Driver struct { name string } -func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Client, data *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("duckdb driver can't be shared") } @@ -144,7 +145,6 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie if err != nil { return nil, err } - logger.Debug("opening duckdb handle", zap.String("dsn", cfg.DSN)) // See note in connection struct olapSemSize := cfg.PoolSize - 1 @@ -158,6 +158,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie config: cfg, logger: logger, activity: ac, + data: blob.PrefixedBucket(data, "duckdb"), // todo : ideally the drivers should get name prefixed buckets metaSem: semaphore.NewWeighted(1), olapSem: priorityqueue.NewSemaphore(olapSemSize), longRunningSem: semaphore.NewWeighted(1), // Currently hard-coded to 1 @@ -184,11 +185,6 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie return nil, fmt.Errorf("failed to open database (is Rill already running?): %w", err) } - // Check for using incompatible database files - if c.config.ErrorOnIncompatibleVersion || !strings.Contains(err.Error(), "Trying to read a database file with version number") { - return nil, err - } - c.logger.Debug("Resetting .db file because it was created with an older, incompatible version of Rill") // reopen connection again if err := c.reopenDB(ctx, true); err != nil { @@ -278,6 +274,7 @@ type connection struct { config *config logger *zap.Logger activity *activity.Client + data *blob.Bucket // This driver may issue both OLAP and "meta" queries (like catalog info) against DuckDB. // Meta queries are usually fast, but OLAP queries may take a long time. To enable predictable parallel performance, // we gate queries with semaphores that limits the number of concurrent queries of each type. @@ -507,10 +504,13 @@ func (c *connection) reopenDB(ctx context.Context, clean bool) error { })) var err error c.db, err = rduckdb.NewDB(ctx, &rduckdb.DBOptions{ - LocalPath: c.config.DataDir, - Remote: backup, - InitQueries: bootQueries, - Logger: logger, + LocalPath: c.config.DataDir, + Remote: c.data, + ReadSettings: c.config.ReadSettings, + WriteSettings: c.config.WriteSettings, + InitQueries: bootQueries, + Logger: logger, + OtelAttributes: []attribute.KeyValue{attribute.String("instance_id", c.instanceID)}, }) return err } diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index abc57f03c7c..b55f319de28 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestNoFatalErr(t *testing.T) { @@ -19,7 +20,7 @@ func TestNoFatalErr(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -81,7 +82,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index 8df771316e8..0108364df27 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -265,13 +265,13 @@ func (c *connection) execIncrementalInsert(ctx context.Context, name, sql string } where += fmt.Sprintf("base.%s IS NOT DISTINCT FROM tmp.%s", key, key) } - _, err = conn.ExecContext(ctx, fmt.Sprintf("DELETE FROM %s base WHERE EXISTS (SELECT 1 FROM %s tmp WHERE %s)", safeName, safeSQLName(tmp), where)) + _, err = conn.ExecContext(ctx, fmt.Sprintf("DELETE FROM %s base WHERE EXISTS (SELECT 1 FROM %s tmp WHERE %s)", safeSQLName(name), safeSQLName(tmp), where)) if err != nil { return err } // Insert the new data into the target table - _, err = conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s SELECT * FROM %s", safeName, byNameClause, safeSQLName(tmp))) + _, err = conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s SELECT * FROM %s", safeSQLName(name), byNameClause, safeSQLName(tmp))) return err }) } diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index fd98c15cb60..5c9e83d9ebf 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -14,20 +14,21 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func Test_connection_CreateTableAsSelect(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") require.NoError(t, normalConn.Migrate(context.Background())) dbPath = filepath.Join(temp, "default") - handle, err = Driver{}.Open("default", map[string]any{"data_dir": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err = Driver{}.Open("default", map[string]any{"data_dir": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) viewConnection := handle.(*connection) require.NoError(t, viewConnection.Migrate(context.Background())) @@ -99,7 +100,7 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { temp := t.TempDir() - handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -143,7 +144,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { func Test_connection_DropTable(t *testing.T) { temp := t.TempDir() - handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -171,7 +172,7 @@ func Test_connection_DropTable(t *testing.T) { func Test_connection_InsertTableAsSelect(t *testing.T) { temp := t.TempDir() - handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -198,7 +199,7 @@ func Test_connection_InsertTableAsSelect(t *testing.T) { func Test_connection_RenameTable(t *testing.T) { temp := t.TempDir() - handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -222,7 +223,7 @@ func Test_connection_RenameTable(t *testing.T) { func Test_connection_RenameToExistingTable(t *testing.T) { temp := t.TempDir() - handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -248,7 +249,7 @@ func Test_connection_RenameToExistingTable(t *testing.T) { func Test_connection_AddTableColumn(t *testing.T) { temp := t.TempDir() - handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"data_dir": temp, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -277,7 +278,7 @@ func Test_connection_AddTableColumn(t *testing.T) { } func Test_connection_RenameToExistingTableOld(t *testing.T) { - handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -305,7 +306,7 @@ func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index 1678a762c71..ee2ba123425 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" "golang.org/x/sync/errgroup" ) @@ -212,7 +213,7 @@ func TestClose(t *testing.T) { } func prepareConn(t *testing.T) drivers.Handle { - conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") @@ -248,11 +249,11 @@ func Test_safeSQLString(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) require.NoError(t, conn.Close()) - conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 703e43e9a84..aba534b2d6e 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "net/url" - "path/filepath" "strings" "github.com/rilldata/rill/runtime/drivers" @@ -48,7 +47,7 @@ func (t *duckDBToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[s return err } } - return t.transferFromExternalDB(ctx, srcCfg, sinkCfg) + // return t.transferFromExternalDB(ctx, srcCfg, sinkCfg) } // We can't just pass the SQL statement to DuckDB outright. @@ -114,73 +113,66 @@ func (t *duckDBToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[s return t.to.CreateTableAsSelect(ctx, sinkCfg.Table, false, srcCfg.SQL, nil) } -func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *dbSourceProperties, sinkProps *sinkProperties) error { - rwConn, release, err := t.to.acquireConn(ctx, false) - if err != nil { - return err - } - defer func() { - _ = release() - }() - conn := rwConn.Connx() - - var localDB, localSchema string - err = conn.QueryRowContext(ctx, "SELECT current_database(),current_schema()").Scan(&localDB, &localSchema) - if err != nil { - return err - } - - // duckdb considers everything before first . as db name - // alternative solution can be to query `show databases()` before and after to identify db name - dbName, _, _ := strings.Cut(filepath.Base(srcProps.Database), ".") - if dbName == "main" { - return fmt.Errorf("`main` is a reserved db name") - } - - if _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(srcProps.Database), safeSQLName(dbName))); err != nil { - return fmt.Errorf("failed to attach db %q: %w", srcProps.Database, err) - } - - defer func() { - _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DETACH %s", safeSQLName(dbName))) - }() - - if _, err := conn.ExecContext(ctx, fmt.Sprintf("USE %s;", safeName(dbName))); err != nil { - return err - } - - defer func() { - _, err = conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))) - if err != nil { - t.logger.Error("failed to switch back to original database", zap.Error(err)) - } - }() - - userQuery := strings.TrimSpace(srcProps.SQL) - userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - safeTempTable := safeName(fmt.Sprintf("%s_tmp_", sinkProps.Table)) - defer func() { - // ensure temporary table is cleaned - _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTempTable)) - if err != nil { - t.logger.Error("failed to drop temp table", zap.String("table", safeTempTable), zap.Error(err)) - } - }() - - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeTempTable, userQuery) - _, err = conn.ExecContext(ctx, query) - // first revert to original database - if _, switchErr := conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))); switchErr != nil { - t.to.fatalInternalError(fmt.Errorf("failed to switch back to original database: %w", err)) - } - // check for the original error - if err != nil { - return fmt.Errorf("failed to create table: %w", err) - } - - // create permanent table from temp table using crud API - return rwConn.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), nil) -} +// func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *dbSourceProperties, sinkProps *sinkProperties) error { +// t.to.db.CreateTableAsSelect(ctx, sinkProps.Table, ) + +// var localDB, localSchema string +// err = conn.QueryRowContext(ctx, "SELECT current_database(),current_schema()").Scan(&localDB, &localSchema) +// if err != nil { +// return err +// } + +// // duckdb considers everything before first . as db name +// // alternative solution can be to query `show databases()` before and after to identify db name +// dbName, _, _ := strings.Cut(filepath.Base(srcProps.Database), ".") +// if dbName == "main" { +// return fmt.Errorf("`main` is a reserved db name") +// } + +// if _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(srcProps.Database), safeSQLName(dbName))); err != nil { +// return fmt.Errorf("failed to attach db %q: %w", srcProps.Database, err) +// } + +// defer func() { +// _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DETACH %s", safeSQLName(dbName))) +// }() + +// if _, err := conn.ExecContext(ctx, fmt.Sprintf("USE %s;", safeName(dbName))); err != nil { +// return err +// } + +// defer func() { +// _, err = conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))) +// if err != nil { +// t.logger.Error("failed to switch back to original database", zap.Error(err)) +// } +// }() + +// userQuery := strings.TrimSpace(srcProps.SQL) +// userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon +// safeTempTable := safeName(fmt.Sprintf("%s_tmp_", sinkProps.Table)) +// defer func() { +// // ensure temporary table is cleaned +// _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTempTable)) +// if err != nil { +// t.logger.Error("failed to drop temp table", zap.String("table", safeTempTable), zap.Error(err)) +// } +// }() + +// query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeTempTable, userQuery) +// _, err = conn.ExecContext(ctx, query) +// // first revert to original database +// if _, switchErr := conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))); switchErr != nil { +// t.to.fatalInternalError(fmt.Errorf("failed to switch back to original database: %w", err)) +// } +// // check for the original error +// if err != nil { +// return fmt.Errorf("failed to create table: %w", err) +// } + +// // create permanent table from temp table using crud API +// return rwConn.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), nil) +// } // rewriteLocalPaths rewrites a DuckDB SQL statement such that relative paths become absolute paths relative to the basePath, // and if allowHostAccess is false, returns an error if any of the paths resolve to a path outside of the basePath. diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 0a6d09128ff..89ce8fdc807 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -1,66 +1,66 @@ package duckdb -import ( - "context" - "fmt" - "path/filepath" - "testing" +// import ( +// "context" +// "fmt" +// "path/filepath" +// "testing" - "github.com/rilldata/rill/runtime/drivers" - activity "github.com/rilldata/rill/runtime/pkg/activity" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) +// "github.com/rilldata/rill/runtime/drivers" +// activity "github.com/rilldata/rill/runtime/pkg/activity" +// "github.com/stretchr/testify/require" +// "go.uber.org/zap" +// ) -func TestDuckDBToDuckDBTransfer(t *testing.T) { - tempDir := t.TempDir() - conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) +// func TestDuckDBToDuckDBTransfer(t *testing.T) { +// tempDir := t.TempDir() +// conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) +// require.NoError(t, err) - olap, ok := conn.AsOLAP("") - require.True(t, ok) +// olap, ok := conn.AsOLAP("") +// require.True(t, ok) - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "CREATE TABLE foo(bar VARCHAR, baz INTEGER)", - }) - require.NoError(t, err) +// err = olap.Exec(context.Background(), &drivers.Statement{ +// Query: "CREATE TABLE foo(bar VARCHAR, baz INTEGER)", +// }) +// require.NoError(t, err) - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "INSERT INTO foo VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)", - }) - require.NoError(t, err) - require.NoError(t, conn.Close()) +// err = olap.Exec(context.Background(), &drivers.Statement{ +// Query: "INSERT INTO foo VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)", +// }) +// require.NoError(t, err) +// require.NoError(t, conn.Close()) - to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) +// to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) +// require.NoError(t, err) - tr := newDuckDBToDuckDB(to.(*connection), zap.NewNop()) +// tr := newDuckDBToDuckDB(to.(*connection), zap.NewNop()) - // transfer once - err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) - require.NoError(t, err) +// // transfer once +// err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) +// require.NoError(t, err) - olap, ok = to.AsOLAP("") - require.True(t, ok) +// olap, ok = to.AsOLAP("") +// require.True(t, ok) - rows, err := to.(*connection).Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) - require.NoError(t, err) +// rows, err := to.(*connection).Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) +// require.NoError(t, err) - var count int - rows.Next() - require.NoError(t, rows.Scan(&count)) - require.Equal(t, 4, count) - require.NoError(t, rows.Close()) +// var count int +// rows.Next() +// require.NoError(t, rows.Scan(&count)) +// require.Equal(t, 4, count) +// require.NoError(t, rows.Close()) - // transfer again - err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) - require.NoError(t, err) +// // transfer again +// err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) +// require.NoError(t, err) - rows, err = olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) - require.NoError(t, err) +// rows, err = olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) +// require.NoError(t, err) - rows.Next() - require.NoError(t, rows.Scan(&count)) - require.Equal(t, 4, count) - require.NoError(t, rows.Close()) -} +// rows.Next() +// require.NoError(t, rows.Scan(&count)) +// require.Equal(t, 4, count) +// require.NoError(t, rows.Close()) +// } diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index 54c0b85e55e..8f69048331d 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -2,11 +2,7 @@ package duckdb import ( "context" - "fmt" - "os" - "strings" - "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "go.uber.org/zap" ) @@ -39,85 +35,86 @@ func newMotherduckToDuckDB(from drivers.Handle, to *connection, logger *zap.Logg } func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[string]any, opts *drivers.TransferOptions) error { - srcConfig := &mdSrcProps{} - err := mapstructure.WeakDecode(srcProps, srcConfig) - if err != nil { - return err - } - if srcConfig.SQL == "" { - return fmt.Errorf("property \"sql\" is mandatory for connector \"motherduck\"") - } - - sinkCfg, err := parseSinkProperties(sinkProps) - if err != nil { - return err - } - - mdConfig := &mdConfigProps{} - err = mapstructure.WeakDecode(t.from.Config(), mdConfig) - if err != nil { - return err - } - - // get token - var token string - if srcConfig.Token != "" { - token = srcConfig.Token - } else if mdConfig.Token != "" { - token = mdConfig.Token - } else if mdConfig.AllowHostAccess { - token = os.Getenv("motherduck_token") - } - if token == "" { - return fmt.Errorf("no motherduck token found. Refer to this documentation for instructions: https://docs.rilldata.com/reference/connectors/motherduck") - } - - t.logger = t.logger.With(zap.String("source", sinkCfg.Table)) - - rwConn, release, err := t.to.acquireConn(ctx, false) - if err != nil { - return err - } - defer func() { - _ = release() - }() - - conn := rwConn.Connx() - - // load motherduck extension; connect to motherduck service - _, err = conn.ExecContext(ctx, "INSTALL 'motherduck'; LOAD 'motherduck';") - if err != nil { - return fmt.Errorf("failed to load motherduck extension %w", err) - } - - if _, err = conn.ExecContext(ctx, fmt.Sprintf("SET motherduck_token='%s'", token)); err != nil { - if !strings.Contains(err.Error(), "can only be set during initialization") { - return fmt.Errorf("failed to set motherduck token %w", err) - } - } - - // ignore attach error since it might be already attached - _, _ = conn.ExecContext(ctx, fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)) - userQuery := strings.TrimSpace(srcConfig.SQL) - userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - - // we first ingest data in a temporary table in the main db - // and then copy it to the final table to ensure that the final table is always created using CRUD APIs - safeTmpTable := safeName(fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table)) - defer func() { - // ensure temporary table is cleaned - _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTmpTable)) - if err != nil { - t.logger.Error("failed to drop temp table", zap.String("table", safeTmpTable), zap.Error(err)) - } - }() - - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeTmpTable, userQuery) - _, err = conn.ExecContext(ctx, query) - if err != nil { - return err - } - - // copy data from temp table to target table - return rwConn.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", safeTmpTable), nil) + // srcConfig := &mdSrcProps{} + // err := mapstructure.WeakDecode(srcProps, srcConfig) + // if err != nil { + // return err + // } + // if srcConfig.SQL == "" { + // return fmt.Errorf("property \"sql\" is mandatory for connector \"motherduck\"") + // } + + // sinkCfg, err := parseSinkProperties(sinkProps) + // if err != nil { + // return err + // } + + // mdConfig := &mdConfigProps{} + // err = mapstructure.WeakDecode(t.from.Config(), mdConfig) + // if err != nil { + // return err + // } + + // // get token + // var token string + // if srcConfig.Token != "" { + // token = srcConfig.Token + // } else if mdConfig.Token != "" { + // token = mdConfig.Token + // } else if mdConfig.AllowHostAccess { + // token = os.Getenv("motherduck_token") + // } + // if token == "" { + // return fmt.Errorf("no motherduck token found. Refer to this documentation for instructions: https://docs.rilldata.com/reference/connectors/motherduck") + // } + + // t.logger = t.logger.With(zap.String("source", sinkCfg.Table)) + + // rwConn, release, err := t.to.acquireConn(ctx, false) + // if err != nil { + // return err + // } + // defer func() { + // _ = release() + // }() + + // conn := rwConn.Connx() + + // // load motherduck extension; connect to motherduck service + // _, err = conn.ExecContext(ctx, "INSTALL 'motherduck'; LOAD 'motherduck';") + // if err != nil { + // return fmt.Errorf("failed to load motherduck extension %w", err) + // } + + // if _, err = conn.ExecContext(ctx, fmt.Sprintf("SET motherduck_token='%s'", token)); err != nil { + // if !strings.Contains(err.Error(), "can only be set during initialization") { + // return fmt.Errorf("failed to set motherduck token %w", err) + // } + // } + + // // ignore attach error since it might be already attached + // _, _ = conn.ExecContext(ctx, fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)) + // userQuery := strings.TrimSpace(srcConfig.SQL) + // userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + + // // we first ingest data in a temporary table in the main db + // // and then copy it to the final table to ensure that the final table is always created using CRUD APIs + // safeTmpTable := safeName(fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table)) + // defer func() { + // // ensure temporary table is cleaned + // _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTmpTable)) + // if err != nil { + // t.logger.Error("failed to drop temp table", zap.String("table", safeTmpTable), zap.Error(err)) + // } + // }() + + // query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeTmpTable, userQuery) + // _, err = conn.ExecContext(ctx, query) + // if err != nil { + // return err + // } + + // // copy data from temp table to target table + // return rwConn.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", safeTmpTable), nil) + return nil } diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index 5b5ed4db09d..37c24c2880d 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -1,127 +1,127 @@ package duckdb -import ( - "context" - "database/sql" - "testing" - - "github.com/rilldata/rill/runtime/drivers" - "github.com/rilldata/rill/runtime/pkg/activity" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - - "fmt" - "time" - - _ "github.com/rilldata/rill/runtime/drivers/mysql" - "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" -) - -var mysqlInitStmt = ` -CREATE TABLE all_data_types_table ( - id INT AUTO_INCREMENT PRIMARY KEY, - sample_char CHAR(1), - sample_varchar VARCHAR(100), - sample_tinytext TINYTEXT, - sample_text TEXT, - sample_mediumtext MEDIUMTEXT, - sample_longtext LONGTEXT, - sample_binary BINARY(1), - sample_varbinary VARBINARY(100), - sample_tinyblob TINYBLOB, - sample_blob BLOB, - sample_mediumblob MEDIUMBLOB, - sample_longblob LONGBLOB, - sample_enum ENUM('value1', 'value2'), - sample_set SET('value1', 'value2'), - sample_bit BIT(8), - sample_tinyint TINYINT, - sample_tinyint_unsigned TINYINT UNSIGNED NOT NULL, - sample_smallint SMALLINT, - sample_smallint_unsigned SMALLINT UNSIGNED NOT NULL, - sample_mediumint MEDIUMINT, - sample_mediumint_unsigned MEDIUMINT UNSIGNED NOT NULL, - sample_int INT, - sample_int_unsigned INT UNSIGNED NOT NULL, - sample_bigint BIGINT, - sample_bigint_unsigned BIGINT UNSIGNED NOT NULL, - sample_float FLOAT, - sample_double DOUBLE, - sample_decimal DECIMAL(10,2), - sample_date DATE, - sample_datetime DATETIME, - sample_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - sample_time TIME, - sample_year YEAR, - sample_json JSON -); - -INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) -VALUES ('A', 'Sample Text', 'Tiny Text', 'Some Longer Text.', 'Medium Length Text', 'This is an example of really long text for the LONGTEXT column.', BINARY '1', 'Sample Binary', 'Tiny Blob Data', 'Sample Blob Data', 'Medium Blob Data', 'Long Blob Data', 'value1', 'value1,value2', b'10101010', -128, 255, -32768, 65535, -8388608, 16777215, -2147483648, 4294967295, -9223372036854775808, 18446744073709551615, 123.45, 1234567890.123, 12345.67, '2023-01-01', '2023-01-01 12:00:00', CURRENT_TIMESTAMP, '12:00:00', 2023, JSON_OBJECT('key', 'value')); - -INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) -VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); -` - -func TestMySQLToDuckDBTransfer(t *testing.T) { - ctx := context.Background() - container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ - Started: true, - ContainerRequest: testcontainers.ContainerRequest{ - WaitingFor: wait.ForLog("mysqld: ready for connections").WithOccurrence(2).WithStartupTimeout(15 * time.Second), - Image: "mysql:8.3.0", - ExposedPorts: []string{"3306/tcp"}, - Env: map[string]string{ - "MYSQL_ROOT_PASSWORD": "mypassword", - "MYSQL_DATABASE": "mydb", - "MYSQL_USER": "myuser", - "MYSQL_PASSWORD": "mypassword", - }, - }, - }) - require.NoError(t, err) - defer container.Terminate(ctx) - - host, err := container.Host(ctx) - require.NoError(t, err) - port, err := container.MappedPort(ctx, "3306/tcp") - require.NoError(t, err) - - dsn := fmt.Sprintf("myuser:mypassword@tcp(%s:%d)/mydb?multiStatements=true", host, port.Int()) - - db, err := sql.Open("mysql", dsn) - require.NoError(t, err) - defer db.Close() - - t.Run("AllDataTypes", func(t *testing.T) { allMySQLDataTypesTest(t, db, dsn) }) -} - -func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { - ctx := context.Background() - _, err := db.ExecContext(ctx, mysqlInitStmt) - require.NoError(t, err) - - handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - require.NotNil(t, handle) - - sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - olap, _ := to.AsOLAP("") - - tr := newSQLStoreToDuckDB(sqlStore, to.(*connection), zap.NewNop()) - err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_data_types_table;"}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) - require.NoError(t, err) - res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "select count(*) from sink"}) - require.NoError(t, err) - for res.Next() { - var count int - err = res.Rows.Scan(&count) - require.NoError(t, err) - require.Equal(t, count, 2) - } - require.NoError(t, res.Close()) - require.NoError(t, to.Close()) -} +// import ( +// "context" +// "database/sql" +// "testing" + +// "github.com/rilldata/rill/runtime/drivers" +// "github.com/rilldata/rill/runtime/pkg/activity" +// "github.com/stretchr/testify/require" +// "go.uber.org/zap" + +// "fmt" +// "time" + +// _ "github.com/rilldata/rill/runtime/drivers/mysql" +// "github.com/testcontainers/testcontainers-go" +// "github.com/testcontainers/testcontainers-go/wait" +// ) + +// var mysqlInitStmt = ` +// CREATE TABLE all_data_types_table ( +// id INT AUTO_INCREMENT PRIMARY KEY, +// sample_char CHAR(1), +// sample_varchar VARCHAR(100), +// sample_tinytext TINYTEXT, +// sample_text TEXT, +// sample_mediumtext MEDIUMTEXT, +// sample_longtext LONGTEXT, +// sample_binary BINARY(1), +// sample_varbinary VARBINARY(100), +// sample_tinyblob TINYBLOB, +// sample_blob BLOB, +// sample_mediumblob MEDIUMBLOB, +// sample_longblob LONGBLOB, +// sample_enum ENUM('value1', 'value2'), +// sample_set SET('value1', 'value2'), +// sample_bit BIT(8), +// sample_tinyint TINYINT, +// sample_tinyint_unsigned TINYINT UNSIGNED NOT NULL, +// sample_smallint SMALLINT, +// sample_smallint_unsigned SMALLINT UNSIGNED NOT NULL, +// sample_mediumint MEDIUMINT, +// sample_mediumint_unsigned MEDIUMINT UNSIGNED NOT NULL, +// sample_int INT, +// sample_int_unsigned INT UNSIGNED NOT NULL, +// sample_bigint BIGINT, +// sample_bigint_unsigned BIGINT UNSIGNED NOT NULL, +// sample_float FLOAT, +// sample_double DOUBLE, +// sample_decimal DECIMAL(10,2), +// sample_date DATE, +// sample_datetime DATETIME, +// sample_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, +// sample_time TIME, +// sample_year YEAR, +// sample_json JSON +// ); + +// INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) +// VALUES ('A', 'Sample Text', 'Tiny Text', 'Some Longer Text.', 'Medium Length Text', 'This is an example of really long text for the LONGTEXT column.', BINARY '1', 'Sample Binary', 'Tiny Blob Data', 'Sample Blob Data', 'Medium Blob Data', 'Long Blob Data', 'value1', 'value1,value2', b'10101010', -128, 255, -32768, 65535, -8388608, 16777215, -2147483648, 4294967295, -9223372036854775808, 18446744073709551615, 123.45, 1234567890.123, 12345.67, '2023-01-01', '2023-01-01 12:00:00', CURRENT_TIMESTAMP, '12:00:00', 2023, JSON_OBJECT('key', 'value')); + +// INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) +// VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +// ` + +// func TestMySQLToDuckDBTransfer(t *testing.T) { +// ctx := context.Background() +// container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ +// Started: true, +// ContainerRequest: testcontainers.ContainerRequest{ +// WaitingFor: wait.ForLog("mysqld: ready for connections").WithOccurrence(2).WithStartupTimeout(15 * time.Second), +// Image: "mysql:8.3.0", +// ExposedPorts: []string{"3306/tcp"}, +// Env: map[string]string{ +// "MYSQL_ROOT_PASSWORD": "mypassword", +// "MYSQL_DATABASE": "mydb", +// "MYSQL_USER": "myuser", +// "MYSQL_PASSWORD": "mypassword", +// }, +// }, +// }) +// require.NoError(t, err) +// defer container.Terminate(ctx) + +// host, err := container.Host(ctx) +// require.NoError(t, err) +// port, err := container.MappedPort(ctx, "3306/tcp") +// require.NoError(t, err) + +// dsn := fmt.Sprintf("myuser:mypassword@tcp(%s:%d)/mydb?multiStatements=true", host, port.Int()) + +// db, err := sql.Open("mysql", dsn) +// require.NoError(t, err) +// defer db.Close() + +// t.Run("AllDataTypes", func(t *testing.T) { allMySQLDataTypesTest(t, db, dsn) }) +// } + +// func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { +// ctx := context.Background() +// _, err := db.ExecContext(ctx, mysqlInitStmt) +// require.NoError(t, err) + +// handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), zap.NewNop()) +// require.NoError(t, err) +// require.NotNil(t, handle) + +// sqlStore, _ := handle.AsSQLStore() +// to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) +// require.NoError(t, err) +// olap, _ := to.AsOLAP("") + +// tr := newSQLStoreToDuckDB(sqlStore, to.(*connection), zap.NewNop()) +// err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_data_types_table;"}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) +// require.NoError(t, err) +// res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "select count(*) from sink"}) +// require.NoError(t, err) +// for res.Next() { +// var count int +// err = res.Rows.Scan(&count) +// require.NoError(t, err) +// require.Equal(t, count, 2) +// } +// require.NoError(t, res.Close()) +// require.NoError(t, to.Close()) +// } diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index 7f501ef652b..998458d322b 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" // Load postgres driver _ "github.com/jackc/pgx/v5/stdlib" @@ -67,12 +68,12 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { _, err := db.ExecContext(ctx, sqlStmt) require.NoError(t, err) - handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), zap.NewNop()) + handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) require.NotNil(t, handle) sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go index c2a4de73653..275029c690b 100644 --- a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go @@ -11,6 +11,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" _ "modernc.org/sqlite" ) @@ -29,7 +30,7 @@ func Test_sqliteToDuckDB_Transfer(t *testing.T) { require.NoError(t, err) db.Close() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go index a9eedfd75f7..e4c6179c083 100644 --- a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go @@ -50,97 +50,7 @@ func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map } func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter drivers.RowIterator, table string) error { - schema, err := iter.Schema(ctx) - if err != nil { - if errors.Is(err, drivers.ErrIteratorDone) { - return drivers.ErrNoRows - } - return err - } - - if total, ok := iter.Size(drivers.ProgressUnitRecord); ok { - s.logger.Debug("records to be ingested", zap.Uint64("rows", total)) - } - // we first ingest data in a temporary table in the main db - // and then copy it to the final table to ensure that the final table is always created using CRUD APIs - tmpTable := fmt.Sprintf("__%s_tmp_sqlstore", table) - // generate create table query - qry, err := createTableQuery(schema, tmpTable) - if err != nil { - return err - } - - rwConn, release, err := s.to.acquireConn(ctx, false) - if err != nil { - return err - } - defer func() { - _ = release() - }() - conn := rwConn.Connx() - - // create table - _, err = conn.ExecContext(ctx, qry, nil) - if err != nil { - return err - } - - defer func() { - // ensure temporary table is cleaned - _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeName(tmpTable))) - if err != nil { - s.logger.Error("failed to drop temp table", zap.String("table", tmpTable), zap.Error(err)) - } - }() - - // append data using appender API - err = rawConn(conn.Conn, func(conn driver.Conn) error { - a, err := duckdb.NewAppenderFromConn(conn, "", tmpTable) - if err != nil { - return err - } - defer func() { - err = a.Close() - if err != nil { - s.logger.Error("appender closed failed", zap.Error(err)) - } - }() - - for num := 0; ; num++ { - select { - case <-ctx.Done(): - return ctx.Err() - default: - if num == 10000 { - num = 0 - if err := a.Flush(); err != nil { - return err - } - } - - row, err := iter.Next(ctx) - if err != nil { - if errors.Is(err, drivers.ErrIteratorDone) { - return nil - } - return err - } - if err := convert(row, schema); err != nil { // duckdb specific datatype conversion - return err - } - - if err := a.AppendRow(row...); err != nil { - return err - } - } - } - }) - if err != nil { - return err - } - - // copy data from temp table to target table - return rwConn.CreateTableAsSelect(ctx, table, fmt.Sprintf("SELECT * FROM %s", safeName(tmpTable)), nil) + return nil } func createTableQuery(schema *runtimev1.StructType, name string) (string, error) { diff --git a/runtime/drivers/duckdb/transporter_test.go b/runtime/drivers/duckdb/transporter_test.go index 82103713686..4f8f79f3fd9 100644 --- a/runtime/drivers/duckdb/transporter_test.go +++ b/runtime/drivers/duckdb/transporter_test.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/fileblob" ) type mockObjectStore struct { @@ -592,7 +593,9 @@ func TestIterativeJSONIngestionWithVariableSchema(t *testing.T) { } func runOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, activity.NewNoopClient(), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) olap, canServe := conn.AsOLAP("") require.True(t, canServe) diff --git a/runtime/drivers/file/file.go b/runtime/drivers/file/file.go index 8611ef7f59e..929d92fd5f1 100644 --- a/runtime/drivers/file/file.go +++ b/runtime/drivers/file/file.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" "go.uber.org/zap" + "gocloud.dev/blob" "gopkg.in/yaml.v3" ) @@ -60,7 +61,7 @@ type rillYAML struct { IgnorePaths []string `yaml:"ignore_paths"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("file driver can't be shared") } diff --git a/runtime/drivers/gcs/gcs.go b/runtime/drivers/gcs/gcs.go index 21778ab21fd..4fd2e05597b 100644 --- a/runtime/drivers/gcs/gcs.go +++ b/runtime/drivers/gcs/gcs.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/gcputil" "github.com/rilldata/rill/runtime/pkg/globutil" "go.uber.org/zap" + "gocloud.dev/blob" "gocloud.dev/blob/gcsblob" "gocloud.dev/gcp" ) @@ -75,7 +76,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("gcs driver can't be shared") } diff --git a/runtime/drivers/https/https.go b/runtime/drivers/https/https.go index 9a559c38f3f..febdf4b117e 100644 --- a/runtime/drivers/https/https.go +++ b/runtime/drivers/https/https.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -46,7 +47,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("https driver can't be shared") } diff --git a/runtime/drivers/mock/object_store/object_store.go b/runtime/drivers/mock/object_store/object_store.go index c53fb50b7fb..107add7df59 100644 --- a/runtime/drivers/mock/object_store/object_store.go +++ b/runtime/drivers/mock/object_store/object_store.go @@ -38,7 +38,7 @@ func (driver) Spec() drivers.Spec { } // Open implements drivers.Driver. -func (driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { cfg := &configProperties{} err := mapstructure.WeakDecode(config, cfg) if err != nil { diff --git a/runtime/drivers/mysql/mysql.go b/runtime/drivers/mysql/mysql.go index deb89a5ba32..f33b7e9026e 100644 --- a/runtime/drivers/mysql/mysql.go +++ b/runtime/drivers/mysql/mysql.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -57,7 +58,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("mysql driver can't be shared") } diff --git a/runtime/drivers/pinot/pinot.go b/runtime/drivers/pinot/pinot.go index 038e2f31512..02a4f19ce10 100644 --- a/runtime/drivers/pinot/pinot.go +++ b/runtime/drivers/pinot/pinot.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -94,7 +95,7 @@ type configProperties struct { } // Open a connection to Apache Pinot using HTTP API. -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("pinot driver can't be shared") } diff --git a/runtime/drivers/postgres/postgres.go b/runtime/drivers/postgres/postgres.go index 3ba9e0fbca8..641d7d15fbe 100644 --- a/runtime/drivers/postgres/postgres.go +++ b/runtime/drivers/postgres/postgres.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -55,7 +56,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("postgres driver can't be shared") } diff --git a/runtime/drivers/redshift/redshift.go b/runtime/drivers/redshift/redshift.go index 42fd22e822f..e2e130c3976 100644 --- a/runtime/drivers/redshift/redshift.go +++ b/runtime/drivers/redshift/redshift.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -109,7 +110,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("redshift driver can't be shared") } diff --git a/runtime/drivers/s3/s3.go b/runtime/drivers/s3/s3.go index b401d3b801f..141e842845f 100644 --- a/runtime/drivers/s3/s3.go +++ b/runtime/drivers/s3/s3.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) var spec = drivers.Spec{ @@ -97,7 +98,7 @@ type ConfigProperties struct { } // Open implements drivers.Driver -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("s3 driver can't be shared") } diff --git a/runtime/drivers/salesforce/salesforce.go b/runtime/drivers/salesforce/salesforce.go index 1bc0f316a50..f85be674622 100644 --- a/runtime/drivers/salesforce/salesforce.go +++ b/runtime/drivers/salesforce/salesforce.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -128,7 +129,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("salesforce driver can't be shared") } diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 2129362de0f..9384c0d9691 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) var spec = drivers.Spec{ @@ -35,7 +36,7 @@ func (d driver) Spec() drivers.Spec { return spec } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("slack driver can't be shared") } diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index 1beb40c484b..79c9de4a1d9 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" // Load sqlite driver _ "modernc.org/sqlite" @@ -22,7 +23,7 @@ func init() { type driver struct{} -func (d driver) Open(_ string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(_ string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { dsn, ok := config["dsn"].(string) if !ok { return nil, fmt.Errorf("require dsn to open sqlite connection") diff --git a/runtime/registry.go b/runtime/registry.go index 8feb5422796..4dd19e593f1 100644 --- a/runtime/registry.go +++ b/runtime/registry.go @@ -21,6 +21,7 @@ import ( "go.opentelemetry.io/otel/trace" "go.uber.org/zap" "go.uber.org/zap/zapcore" + "gocloud.dev/blob" ) // GlobalProjectParserName is the name of the instance-global project parser resource that is created for each new instance. @@ -127,6 +128,16 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { return nil } +// DataBucket returns a prefixed bucket for the given instance. +// This bucket is used for storing data that is expected to be persisted across resets. +func (r *Runtime) DataBucket(instanceID string, elem ...string) *blob.Bucket { + b := blob.PrefixedBucket(r.dataBucket, instanceID) + for _, e := range elem { + b = blob.PrefixedBucket(b, e) + } + return b +} + // DataDir returns the path to a persistent data directory for the given instance. // Storage usage in the returned directory will be reported in the instance's heartbeat events. func (r *Runtime) DataDir(instanceID string, elem ...string) string { diff --git a/runtime/registry_test.go b/runtime/registry_test.go index ad2d8ce8578..7030cbe2d21 100644 --- a/runtime/registry_test.go +++ b/runtime/registry_test.go @@ -17,6 +17,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/email" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/fileblob" ) func TestRuntime_EditInstance(t *testing.T) { @@ -528,7 +529,10 @@ func newTestRuntime(t *testing.T) *Runtime { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := New(context.Background(), opts, zap.NewNop(), activity.NewNoopClient(), email.New(email.NewNoopSender())) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + + rt, err := New(context.Background(), opts, zap.NewNop(), activity.NewNoopClient(), email.New(email.NewNoopSender()), bkt) t.Cleanup(func() { rt.Close() }) diff --git a/runtime/runtime.go b/runtime/runtime.go index bf2601b0df2..23ef294412e 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -15,22 +15,21 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" + "gocloud.dev/blob" ) var tracer = otel.Tracer("github.com/rilldata/rill/runtime") type Options struct { - MetastoreConnector string - SystemConnectors []*runtimev1.Connector - ConnectionCacheSize int - QueryCacheSizeBytes int64 - SecurityEngineCacheSize int - ControllerLogBufferCapacity int - ControllerLogBufferSizeBytes int64 - AllowHostAccess bool - DataDir string - DuckDBBackupBucket string - DuckDBBackupBucketCredentialsJSON string + MetastoreConnector string + SystemConnectors []*runtimev1.Connector + ConnectionCacheSize int + QueryCacheSizeBytes int64 + SecurityEngineCacheSize int + ControllerLogBufferCapacity int + ControllerLogBufferSizeBytes int64 + AllowHostAccess bool + DataDir string } type Runtime struct { @@ -43,9 +42,10 @@ type Runtime struct { connCache conncache.Cache queryCache *queryCache securityEngine *securityEngine + dataBucket *blob.Bucket } -func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { +func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Client, emailClient *email.Client, dataBucket *blob.Bucket) (*Runtime, error) { if emailClient == nil { emailClient = email.New(email.NewNoopSender()) } @@ -57,6 +57,7 @@ func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Cl activity: ac, queryCache: newQueryCache(opts.QueryCacheSizeBytes), securityEngine: newSecurityEngine(opts.SecurityEngineCacheSize, logger), + dataBucket: dataBucket, } rt.connCache = rt.newConnectionCache() diff --git a/runtime/server/queries_test.go b/runtime/server/queries_test.go index a3a0591b292..f403bf6b3d5 100644 --- a/runtime/server/queries_test.go +++ b/runtime/server/queries_test.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestServer_InsertLimit_SELECT(t *testing.T) { @@ -132,7 +133,7 @@ func TestServer_UpdateLimit_UNION(t *testing.T) { } func prepareOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") require.True(t, ok) diff --git a/runtime/testruntime/testruntime.go b/runtime/testruntime/testruntime.go index 0ecfe111361..07897c5b0d2 100644 --- a/runtime/testruntime/testruntime.go +++ b/runtime/testruntime/testruntime.go @@ -19,6 +19,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/email" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/fileblob" // Load database drivers for testing. _ "github.com/rilldata/rill/runtime/drivers/admin" @@ -73,7 +74,9 @@ func New(t TestingT) *runtime.Runtime { require.NoError(t, err) } - rt, err := runtime.New(context.Background(), opts, logger, activity.NewNoopClient(), email.New(email.NewTestSender())) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + rt, err := runtime.New(context.Background(), opts, logger, activity.NewNoopClient(), email.New(email.NewTestSender()), bkt) require.NoError(t, err) t.Cleanup(func() { rt.Close() }) From 11203b414b7fe33c3577fed3bd28ec005f2cdb10 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 19 Nov 2024 16:33:46 +0530 Subject: [PATCH 18/64] fix snowflake --- runtime/drivers/snowflake/snowflake.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/drivers/snowflake/snowflake.go b/runtime/drivers/snowflake/snowflake.go index 0a8b00a0a63..8a3f65759d0 100644 --- a/runtime/drivers/snowflake/snowflake.go +++ b/runtime/drivers/snowflake/snowflake.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" // Load database/sql driver _ "github.com/snowflakedb/gosnowflake" @@ -66,7 +67,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("snowflake driver can't be shared") } From 09424ba41bf6f5e904b18b6dfa0ec35d88df99cd Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 20 Nov 2024 22:05:32 +0530 Subject: [PATCH 19/64] non blocking read handle updates --- runtime/pkg/rduckdb/db.go | 310 ++++++++++++++++++++++++--------- runtime/pkg/rduckdb/db_test.go | 3 +- 2 files changed, 233 insertions(+), 80 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index d1ebe95996d..83ea164c97e 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -16,6 +16,8 @@ import ( "slices" "strconv" "strings" + "sync" + "sync/atomic" "time" "github.com/XSAM/otelsql" @@ -214,16 +216,19 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } bgctx, cancel := context.WithCancel(ctx) db := &db{ - opts: opts, - localPath: opts.LocalPath, - remote: opts.Remote, - readMu: ctxsync.NewRWMutex(), - writeSem: semaphore.NewWeighted(1), - localDirty: true, - ticker: time.NewTicker(5 * time.Minute), - logger: opts.Logger, - ctx: bgctx, - cancel: cancel, + opts: opts, + localPath: opts.LocalPath, + remote: opts.Remote, + readMu: ctxsync.NewRWMutex(), + writeSem: semaphore.NewWeighted(1), + localDirty: true, + ticker: time.NewTicker(5 * time.Minute), + genCounter: make(map[int32]int32), + tableVersionCounter: make(map[string]map[string]int32), + tablVersionForGen: make(map[int32][]tableVersion), + logger: opts.Logger, + ctx: bgctx, + cancel: cancel, } // create local path @@ -239,7 +244,8 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } // create read handle - db.readHandle, err = db.openDBAndAttach(ctx, "", "", true) + var tblVersions []tableVersion + db.readHandle, tblVersions, err = db.openDBAndAttach(ctx, "", "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") @@ -247,6 +253,12 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } return nil, err } + _, err = db.readHandle.ExecContext(ctx, "CREATE SCHEMA "+schemaName(1), nil) + if err != nil { + return nil, err + } + db.latestGen.Store(1) + db.tablVersionForGen[1] = tblVersions go db.localDBMonitor() return db, nil } @@ -267,6 +279,17 @@ type db struct { localDirty bool // ticker to peroiodically check if local db is in sync with remote ticker *time.Ticker + // latestGen is the latest generation of the db. A generation is incremented whenever a write happens. + latestGen atomic.Int32 + + // counterMu protects genCounter and tableVersionCounter + counterMu sync.Mutex + // genCounter stores how many queries are being served by a particular generation + genCounter map[int32]int32 + // tableVersionCounter stores the number of queries being served by a particular table version + tableVersionCounter map[string]map[string]int32 + // tableVersionForGen stores all table versions for a particular generation + tablVersionForGen map[int32][]tableVersion logger *slog.Logger @@ -294,7 +317,9 @@ func (d *db) Close() error { } defer d.readMu.Unlock() - return d.readHandle.Close() + err = d.readHandle.Close() + d.readHandle = nil + return err } func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() error, error) { @@ -302,14 +327,39 @@ func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() erro return nil, nil, err } + // acquire a connection conn, err := d.readHandle.Connx(ctx) if err != nil { d.readMu.RUnlock() return nil, nil, err } + // increment all counters + // TODO :: may be use a sempahore here. Atleast the acquire here return early. But the release can stll be blocked. + d.counterMu.Lock() + // use the schema for the latest generation + gen := d.latestGen.Load() + _, err = conn.ExecContext(ctx, "USE "+schemaName(gen), nil) + if err != nil { + _ = conn.Close() + d.counterMu.Unlock() + d.readMu.RUnlock() + return nil, nil, err + } + // incement generation counter + d.genCounter[gen]++ + d.counterMu.Unlock() + release := func() error { - err := conn.Close() + // lock counterMu and decrement all counters + d.counterMu.Lock() + // queries served by this generation + d.genCounter[gen]-- + if d.genCounter[gen] == 0 { + delete(d.genCounter, gen) + } + d.counterMu.Unlock() + err = conn.Close() d.readMu.RUnlock() return err } @@ -417,7 +467,9 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * err = d.reopen(ctx) if err != nil { - d.logger.Debug("create: error in reopening db", slog.String("error", err.Error())) + if !errors.Is(err, context.Canceled) { + d.logger.Error("create: error in reopening db", slog.String("error", err.Error())) + } return nil } d.localDirty = false @@ -432,6 +484,12 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con } defer d.writeSem.Release(1) + // pull latest changes from remote + err = d.pullFromRemote(ctx) + if err != nil { + return err + } + oldMeta, err := d.tableMeta(name) if err != nil { if errors.Is(err, errNotFound) { @@ -495,7 +553,9 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con // reopen db handle ignoring old name err = d.reopen(ctx) if err != nil { - d.logger.Debug("mutate: error in reopening db", slog.String("error", err.Error())) + if !errors.Is(err, context.Canceled) { + d.logger.Error("mutate: error in reopening db", slog.String("error", err.Error())) + } return nil } d.localDirty = false @@ -545,7 +605,9 @@ func (d *db) DropTable(ctx context.Context, name string) error { // reopen db handle err = d.reopen(ctx) if err != nil { - d.logger.Debug("drop: error in reopening db", slog.String("error", err.Error())) + if !errors.Is(err, context.Canceled) { + d.logger.Error("drop: error in reopening db", slog.String("error", err.Error())) + } return nil } d.localDirty = false @@ -633,7 +695,9 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { // reopen db handle err = d.reopen(ctx) if err != nil { - d.logger.Debug("rename: error in reopening db", slog.String("error", err.Error())) + if !errors.Is(err, context.Canceled) { + d.logger.Error("rename: error in reopening db", slog.String("error", err.Error())) + } return nil } d.localDirty = false @@ -656,11 +720,11 @@ func (d *db) localDBMonitor() { continue } err = d.pullFromRemote(d.ctx) - if err != nil { + if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in pulling from remote", slog.String("error", err.Error())) } err = d.reopen(d.ctx) - if err != nil { + if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in reopening db", slog.String("error", err.Error())) } } @@ -668,30 +732,64 @@ func (d *db) localDBMonitor() { } func (d *db) reopen(ctx context.Context) error { - handle, err := d.openDBAndAttach(ctx, "", "", true) + conn, err := d.readHandle.Connx(ctx) if err != nil { return err } + defer conn.Close() - var oldDBHandle *sqlx.DB - err = d.readMu.Lock(ctx) + currentGen := d.latestGen.Load() + 1 + _, err = conn.ExecContext(ctx, "CREATE SCHEMA "+schemaName(currentGen)) if err != nil { return err } - // swap read handle - oldDBHandle = d.readHandle - d.readHandle = handle - d.readMu.Unlock() - // close old read handle - if oldDBHandle != nil { - err = oldDBHandle.Close() - if err != nil { - d.logger.Warn("error in closing old read handle", slog.String("error", err.Error())) - } + _, err = conn.ExecContext(ctx, "USE "+schemaName(currentGen), nil) + if err != nil { + return err + } + + // TODO :: this will pass because of IF NOT EXISTS clause in ATTACH existing files, but we should handle this more gracefully + tblVersions, err := d.attachDBs(ctx, conn, "") + if err != nil { + return err + } + + // update tableVersionForGen + d.tablVersionForGen[currentGen] = tblVersions + + // update latestGen + swapped := d.latestGen.Swap(currentGen) + if swapped != currentGen-1 { + d.logger.Error("reopen: generation mismatch", slog.Int("expected", int(currentGen)), slog.Int("actual", int(swapped))) } // do another scan on local data and remove old versions, deleted tables etc + // take into account the queries being served by the old generations + + // check the gens being served + gens := map[int32]any{currentGen: nil} + d.counterMu.Lock() + for g := range d.genCounter { + gens[g] = nil + } + d.counterMu.Unlock() + + // create a state of tables being served + servedTableVersions := make(map[string][]string) + + // iterate over served gens + for g := range d.tablVersionForGen { + if _, ok := gens[g]; !ok { + delete(d.tablVersionForGen, g) + continue + } + for _, tv := range d.tablVersionForGen[g] { + servedVersions := servedTableVersions[tv.Table] + servedTableVersions[tv.Table] = append(servedVersions, tv.Version) + } + } + entries, err := os.ReadDir(d.localPath) if err != nil { return err @@ -702,37 +800,62 @@ func (d *db) reopen(ctx context.Context) error { } bytes, err := os.ReadFile(filepath.Join(d.localPath, entry.Name(), "meta.json")) if err != nil { - d.logger.Debug("error in reading meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) // no meta.json, delete the directory + d.logger.Debug("error in reading meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) } meta := &tableMeta{} err = json.Unmarshal(bytes, meta) if err != nil { + // bad meta.json, delete the directory d.logger.Debug("error in unmarshalling meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) } - if meta.Deleted { - d.logger.Debug("deleting deleted table", slog.String("table", entry.Name())) - _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) - continue - } - - // remove old versions + // remove unserved versions + servedVersions, ok := servedTableVersions[meta.Name] versions, err := os.ReadDir(filepath.Join(d.localPath, entry.Name())) if err != nil { return err } + nothingServed := true for _, version := range versions { + table := tableVersion{Table: entry.Name(), Version: version.Name()} if !version.IsDir() { continue } - if version.Name() != meta.Version { - d.logger.Debug("deleting old version", slog.String("table", entry.Name()), slog.String("version", version.Name())) - _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name(), version.Name())) + if ok && slices.Contains(servedVersions, version.Name()) { + nothingServed = false + continue + } + _, err := d.readHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+safeSQLName(dbName(table.Table, table.Version)), nil) + if err != nil { + d.logger.Debug("error in detaching table", slog.String("table", table.Table), slog.String("version", table.Version), slog.String("error", err.Error())) + continue + } + err = d.deleteLocalTableFiles(table.Table, table.Version) + if err != nil { + d.logger.Debug("error in removing table", slog.String("table", table.Table), slog.String("version", table.Version), slog.String("error", err.Error())) } } + if nothingServed { + err = d.deleteLocalTableFiles(meta.Name, "") + if err != nil { + d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("error", err.Error())) + } + } + } + + // iterate over gens to delete with no gens being served + for g := range d.tablVersionForGen { + if _, ok := gens[g]; ok { + // this generation is being served + continue + } + _, err := d.readHandle.ExecContext(ctx, fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName(g)), nil) + if err != nil { + d.logger.Debug("error in dropping schema", slog.Int("gen", int(g)), slog.String("error", err.Error())) + } } return nil } @@ -769,7 +892,7 @@ func (d *db) acquireWriteConn(ctx context.Context, dsn, table string, attachExis if !attachExisting { ignoreTable = table } - db, err := d.openDBAndAttach(ctx, dsn, ignoreTable, false) + db, _, err := d.openDBAndAttach(ctx, dsn, ignoreTable, false) if err != nil { return nil, nil, err } @@ -795,13 +918,13 @@ func (d *db) acquireWriteConn(ctx context.Context, dsn, table string, attachExis }, nil } -func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read bool) (*sqlx.DB, error) { +func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read bool) (*sqlx.DB, []tableVersion, error) { d.logger.Debug("open db", slog.Bool("read", read), slog.String("uri", uri)) // open the db var settings map[string]string dsn, err := url.Parse(uri) // in-memory if err != nil { - return nil, err + return nil, nil, err } if read { settings = d.opts.ReadSettings @@ -827,25 +950,25 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read return nil }) if err != nil { - return nil, err + return nil, nil, err } db := sqlx.NewDb(otelsql.OpenDB(connector), "duckdb") err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(d.opts.OtelAttributes...)) if err != nil { - return nil, fmt.Errorf("registering db stats metrics: %w", err) + return nil, nil, fmt.Errorf("registering db stats metrics: %w", err) } - err = db.PingContext(ctx) + conn, err := db.Connx(ctx) if err != nil { db.Close() - return nil, err + return nil, nil, err } - err = d.attachDBs(ctx, db, ignoreTable) + tblVersions, err := d.attachDBs(ctx, conn, ignoreTable) if err != nil { db.Close() - return nil, err + return nil, nil, err } // 2023-12-11: Hail mary for solving this issue: https://github.com/duckdblabs/rilldata/issues/6. @@ -867,16 +990,16 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read `) if err != nil { db.Close() - return nil, err + return nil, nil, err } - return db, nil + return db, tblVersions, nil } -func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, ignoreTable string) error { +func (d *db) attachDBs(ctx context.Context, conn *sqlx.Conn, ignoreTable string) ([]tableVersion, error) { entries, err := os.ReadDir(d.localPath) if err != nil { - return err + return nil, err } tables := make([]*tableMeta, 0) @@ -889,7 +1012,7 @@ func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, ignoreTable string) err } meta, _ := d.tableMeta(entry.Name()) - if meta == nil || meta.Deleted { + if meta == nil { continue } d.logger.Debug("discovered table", slog.String("table", entry.Name()), slog.String("version", meta.Version)) @@ -912,30 +1035,36 @@ func (d *db) attachDBs(ctx context.Context, db *sqlx.DB, ignoreTable string) err return strings.Compare(a.CreatedVersion, b.CreatedVersion) }) - for _, table := range tables { - safeTable := safeSQLName(table.Name) - if table.Type == "VIEW" { - _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) - if err != nil { - return err - } - continue - } - versionPath := filepath.Join(d.localPath, table.Name, table.Version) - safeDBName := safeSQLName(dbName(table.Name)) - _, err = db.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s (READ_ONLY)", safeSQLString(filepath.Join(versionPath, "data.db")), safeDBName)) + res := make([]tableVersion, len(tables)) + for i, table := range tables { + err = d.attachTable(ctx, conn, table) if err != nil { - d.logger.Error("error in attaching db", slog.String("table", table.Name), slog.Any("error", err)) - _ = os.RemoveAll(filepath.Join(d.localPath, table.Name)) - return err + return nil, fmt.Errorf("failed to attach table %q: %w", table.Name, err) } - - _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) - if err != nil { - return err + res[i] = tableVersion{ + Table: table.Name, + Version: table.Version, } } - return nil + return res, nil +} + +func (d *db) attachTable(ctx context.Context, db *sqlx.Conn, table *tableMeta) error { + safeTable := safeSQLName(table.Name) + if table.Type == "VIEW" { + _, err := db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) + return err + } + + safeDBName := safeSQLName(dbName(table.Name, table.Version)) + _, err := db.ExecContext(ctx, fmt.Sprintf("ATTACH IF NOT EXISTS %s AS %s (READ_ONLY)", safeSQLString(filepath.Join(d.localPath, table.Name, table.Version, "data.db")), safeDBName)) + if err != nil { + d.logger.Warn("error in attaching db", slog.String("table", table.Name), slog.Any("error", err)) + return err + } + + _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) + return err } func (d *db) tableMeta(name string) (*tableMeta, error) { @@ -951,6 +1080,9 @@ func (d *db) tableMeta(name string) (*tableMeta, error) { if err != nil { return nil, err } + if m.Deleted { + return nil, errNotFound + } return m, nil } @@ -966,6 +1098,17 @@ func (d *db) writeTableMeta(name string, meta *tableMeta) error { return nil } +// deleteLocalTableFiles delete table files for the given table name. If version is provided, only that version is deleted. +func (d *db) deleteLocalTableFiles(name, version string) error { + var path string + if version == "" { + path = filepath.Join(d.localPath, name) + } else { + path = filepath.Join(d.localPath, name, version) + } + return os.RemoveAll(path) +} + type tableMeta struct { Name string `json:"name"` Version string `json:"version"` @@ -1005,8 +1148,8 @@ func newVersion() string { return strconv.FormatInt(time.Now().UnixMilli(), 10) } -func dbName(name string) string { - return fmt.Sprintf("%s__data__db", name) +func dbName(table, version string) string { + return fmt.Sprintf("%s__%s__db", table, version) } type settings struct { @@ -1066,3 +1209,12 @@ func humanReadableSizeToBytes(sizeStr string) (float64, error) { return sizeFloat * multiplier, nil } + +type tableVersion struct { + Table string + Version string +} + +func schemaName(gen int32) string { + return fmt.Sprintf("main_%v", gen) +} diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index ef956f3a5d1..7b2f5994213 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -57,10 +57,11 @@ func TestDB(t *testing.T) { require.NoError(t, release()) // Add column - db.MutateTable(ctx, "test2", func(ctx context.Context, conn *sqlx.Conn) error { + err = db.MutateTable(ctx, "test2", func(ctx context.Context, conn *sqlx.Conn) error { _, err := conn.ExecContext(ctx, "ALTER TABLE test2 ADD COLUMN city TEXT") return err }) + require.NoError(t, err) // drop table err = db.DropTable(ctx, "test2") From 3b0eee74e9d1a1f0abfb6078ef3885aa2027637e Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 20 Nov 2024 22:37:55 +0530 Subject: [PATCH 20/64] use tableMeta plus minor fix --- runtime/pkg/rduckdb/db.go | 102 ++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 59 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 83ea164c97e..3114aa43f1f 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -216,19 +216,18 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } bgctx, cancel := context.WithCancel(ctx) db := &db{ - opts: opts, - localPath: opts.LocalPath, - remote: opts.Remote, - readMu: ctxsync.NewRWMutex(), - writeSem: semaphore.NewWeighted(1), - localDirty: true, - ticker: time.NewTicker(5 * time.Minute), - genCounter: make(map[int32]int32), - tableVersionCounter: make(map[string]map[string]int32), - tablVersionForGen: make(map[int32][]tableVersion), - logger: opts.Logger, - ctx: bgctx, - cancel: cancel, + opts: opts, + localPath: opts.LocalPath, + remote: opts.Remote, + readMu: ctxsync.NewRWMutex(), + writeSem: semaphore.NewWeighted(1), + localDirty: true, + ticker: time.NewTicker(5 * time.Minute), + genCounter: make(map[int32]int32), + schemaForGen: make(map[int32][]*tableMeta), + logger: opts.Logger, + ctx: bgctx, + cancel: cancel, } // create local path @@ -244,8 +243,8 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } // create read handle - var tblVersions []tableVersion - db.readHandle, tblVersions, err = db.openDBAndAttach(ctx, "", "", true) + var schema []*tableMeta + db.readHandle, schema, err = db.openDBAndAttach(ctx, "", "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") @@ -258,7 +257,7 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { return nil, err } db.latestGen.Store(1) - db.tablVersionForGen[1] = tblVersions + db.schemaForGen[1] = schema go db.localDBMonitor() return db, nil } @@ -281,15 +280,13 @@ type db struct { ticker *time.Ticker // latestGen is the latest generation of the db. A generation is incremented whenever a write happens. latestGen atomic.Int32 + // schemaForGen stores tableMeta for all tables active in a generation + schemaForGen map[int32][]*tableMeta // counterMu protects genCounter and tableVersionCounter counterMu sync.Mutex // genCounter stores how many queries are being served by a particular generation genCounter map[int32]int32 - // tableVersionCounter stores the number of queries being served by a particular table version - tableVersionCounter map[string]map[string]int32 - // tableVersionForGen stores all table versions for a particular generation - tablVersionForGen map[int32][]tableVersion logger *slog.Logger @@ -465,7 +462,7 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * return nil } - err = d.reopen(ctx) + err = d.pushNewGen(ctx) if err != nil { if !errors.Is(err, context.Canceled) { d.logger.Error("create: error in reopening db", slog.String("error", err.Error())) @@ -551,7 +548,7 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con } // reopen db handle ignoring old name - err = d.reopen(ctx) + err = d.pushNewGen(ctx) if err != nil { if !errors.Is(err, context.Canceled) { d.logger.Error("mutate: error in reopening db", slog.String("error", err.Error())) @@ -603,7 +600,7 @@ func (d *db) DropTable(ctx context.Context, name string) error { } // reopen db handle - err = d.reopen(ctx) + err = d.pushNewGen(ctx) if err != nil { if !errors.Is(err, context.Canceled) { d.logger.Error("drop: error in reopening db", slog.String("error", err.Error())) @@ -693,7 +690,7 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { } // reopen db handle - err = d.reopen(ctx) + err = d.pushNewGen(ctx) if err != nil { if !errors.Is(err, context.Canceled) { d.logger.Error("rename: error in reopening db", slog.String("error", err.Error())) @@ -723,7 +720,7 @@ func (d *db) localDBMonitor() { if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in pulling from remote", slog.String("error", err.Error())) } - err = d.reopen(d.ctx) + err = d.pushNewGen(d.ctx) if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in reopening db", slog.String("error", err.Error())) } @@ -731,7 +728,7 @@ func (d *db) localDBMonitor() { } } -func (d *db) reopen(ctx context.Context) error { +func (d *db) pushNewGen(ctx context.Context) error { conn, err := d.readHandle.Connx(ctx) if err != nil { return err @@ -756,7 +753,7 @@ func (d *db) reopen(ctx context.Context) error { } // update tableVersionForGen - d.tablVersionForGen[currentGen] = tblVersions + d.schemaForGen[currentGen] = tblVersions // update latestGen swapped := d.latestGen.Swap(currentGen) @@ -779,14 +776,13 @@ func (d *db) reopen(ctx context.Context) error { servedTableVersions := make(map[string][]string) // iterate over served gens - for g := range d.tablVersionForGen { + for g := range d.schemaForGen { if _, ok := gens[g]; !ok { - delete(d.tablVersionForGen, g) continue } - for _, tv := range d.tablVersionForGen[g] { - servedVersions := servedTableVersions[tv.Table] - servedTableVersions[tv.Table] = append(servedVersions, tv.Version) + for _, tv := range d.schemaForGen[g] { + servedVersions := servedTableVersions[tv.Name] + servedTableVersions[tv.Name] = append(servedVersions, tv.Version) } } @@ -818,27 +814,25 @@ func (d *db) reopen(ctx context.Context) error { if err != nil { return err } - nothingServed := true - for _, version := range versions { - table := tableVersion{Table: entry.Name(), Version: version.Name()} - if !version.IsDir() { + for _, v := range versions { + if !v.IsDir() { continue } - if ok && slices.Contains(servedVersions, version.Name()) { - nothingServed = false + version := v.Name() + if ok && slices.Contains(servedVersions, version) { continue } - _, err := d.readHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+safeSQLName(dbName(table.Table, table.Version)), nil) + _, err := d.readHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+safeSQLName(dbName(meta.Name, version)), nil) if err != nil { - d.logger.Debug("error in detaching table", slog.String("table", table.Table), slog.String("version", table.Version), slog.String("error", err.Error())) + d.logger.Debug("error in detaching table", slog.String("table", meta.Name), slog.String("version", version), slog.String("error", err.Error())) continue } - err = d.deleteLocalTableFiles(table.Table, table.Version) + err = d.deleteLocalTableFiles(meta.Name, version) if err != nil { - d.logger.Debug("error in removing table", slog.String("table", table.Table), slog.String("version", table.Version), slog.String("error", err.Error())) + d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("version", version), slog.String("error", err.Error())) } } - if nothingServed { + if len(servedVersions) == 0 { err = d.deleteLocalTableFiles(meta.Name, "") if err != nil { d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("error", err.Error())) @@ -847,11 +841,12 @@ func (d *db) reopen(ctx context.Context) error { } // iterate over gens to delete with no gens being served - for g := range d.tablVersionForGen { + for g := range d.schemaForGen { if _, ok := gens[g]; ok { // this generation is being served continue } + delete(d.schemaForGen, g) _, err := d.readHandle.ExecContext(ctx, fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName(g)), nil) if err != nil { d.logger.Debug("error in dropping schema", slog.Int("gen", int(g)), slog.String("error", err.Error())) @@ -918,7 +913,7 @@ func (d *db) acquireWriteConn(ctx context.Context, dsn, table string, attachExis }, nil } -func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read bool) (*sqlx.DB, []tableVersion, error) { +func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read bool) (*sqlx.DB, []*tableMeta, error) { d.logger.Debug("open db", slog.Bool("read", read), slog.String("uri", uri)) // open the db var settings map[string]string @@ -996,7 +991,7 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read return db, tblVersions, nil } -func (d *db) attachDBs(ctx context.Context, conn *sqlx.Conn, ignoreTable string) ([]tableVersion, error) { +func (d *db) attachDBs(ctx context.Context, conn *sqlx.Conn, ignoreTable string) ([]*tableMeta, error) { entries, err := os.ReadDir(d.localPath) if err != nil { return nil, err @@ -1034,19 +1029,13 @@ func (d *db) attachDBs(ctx context.Context, conn *sqlx.Conn, ignoreTable string) } return strings.Compare(a.CreatedVersion, b.CreatedVersion) }) - - res := make([]tableVersion, len(tables)) - for i, table := range tables { + for _, table := range tables { err = d.attachTable(ctx, conn, table) if err != nil { return nil, fmt.Errorf("failed to attach table %q: %w", table.Name, err) } - res[i] = tableVersion{ - Table: table.Name, - Version: table.Version, - } } - return res, nil + return tables, nil } func (d *db) attachTable(ctx context.Context, db *sqlx.Conn, table *tableMeta) error { @@ -1210,11 +1199,6 @@ func humanReadableSizeToBytes(sizeStr string) (float64, error) { return sizeFloat * multiplier, nil } -type tableVersion struct { - Table string - Version string -} - func schemaName(gen int32) string { return fmt.Sprintf("main_%v", gen) } From 50660cefd9227a3ec2bdd0aeb7aad93c0ee7a531 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Thu, 21 Nov 2024 11:38:37 +0530 Subject: [PATCH 21/64] small cleanups --- runtime/pkg/rduckdb/db.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 3114aa43f1f..9b902aa19b6 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -331,21 +331,22 @@ func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() erro return nil, nil, err } - // increment all counters + // increment gen counter // TODO :: may be use a sempahore here. Atleast the acquire here return early. But the release can stll be blocked. d.counterMu.Lock() // use the schema for the latest generation gen := d.latestGen.Load() + // incement generation counter + d.genCounter[gen]++ + d.counterMu.Unlock() + + // switch to the latest generation _, err = conn.ExecContext(ctx, "USE "+schemaName(gen), nil) if err != nil { _ = conn.Close() - d.counterMu.Unlock() d.readMu.RUnlock() return nil, nil, err } - // incement generation counter - d.genCounter[gen]++ - d.counterMu.Unlock() release := func() error { // lock counterMu and decrement all counters @@ -736,7 +737,7 @@ func (d *db) pushNewGen(ctx context.Context) error { defer conn.Close() currentGen := d.latestGen.Load() + 1 - _, err = conn.ExecContext(ctx, "CREATE SCHEMA "+schemaName(currentGen)) + _, err = conn.ExecContext(ctx, "CREATE SCHEMA IF NOT EXISTS "+schemaName(currentGen)) if err != nil { return err } @@ -758,7 +759,7 @@ func (d *db) pushNewGen(ctx context.Context) error { // update latestGen swapped := d.latestGen.Swap(currentGen) if swapped != currentGen-1 { - d.logger.Error("reopen: generation mismatch", slog.Int("expected", int(currentGen)), slog.Int("actual", int(swapped))) + d.logger.Error("reopen: generation mismatch", slog.Int("expected", int(currentGen-1)), slog.Int("actual", int(swapped))) } // do another scan on local data and remove old versions, deleted tables etc @@ -810,6 +811,13 @@ func (d *db) pushNewGen(ctx context.Context) error { // remove unserved versions servedVersions, ok := servedTableVersions[meta.Name] + if len(servedVersions) == 0 { + err = d.deleteLocalTableFiles(meta.Name, "") + if err != nil { + d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("error", err.Error())) + } + continue + } versions, err := os.ReadDir(filepath.Join(d.localPath, entry.Name())) if err != nil { return err @@ -832,12 +840,6 @@ func (d *db) pushNewGen(ctx context.Context) error { d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("version", version), slog.String("error", err.Error())) } } - if len(servedVersions) == 0 { - err = d.deleteLocalTableFiles(meta.Name, "") - if err != nil { - d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("error", err.Error())) - } - } } // iterate over gens to delete with no gens being served From 365f48486cc49ee5e29c357c1392aa2a1498279d Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 25 Nov 2024 19:27:42 +0530 Subject: [PATCH 22/64] use a catalog to manage table lifecyle --- runtime/pkg/rduckdb/catalog.go | 248 ++++++++++++++++++++ runtime/pkg/rduckdb/db.go | 411 +++++++++++---------------------- runtime/pkg/rduckdb/remote.go | 46 ++-- 3 files changed, 417 insertions(+), 288 deletions(-) create mode 100644 runtime/pkg/rduckdb/catalog.go diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go new file mode 100644 index 00000000000..5b9a94711de --- /dev/null +++ b/runtime/pkg/rduckdb/catalog.go @@ -0,0 +1,248 @@ +/* +Example init logic: +- Sync remote files with the local cache +- Create a catalog +- Traverse the local files and call addTableVersion for table +Example write logic: +- Call addTableVersion after adding a new table version +- Call removeTable when deleting a table +Example read logic: +- Call acquireSnapshot when starting a read +- If it doesn't already exist, create a schema for the snapshot ID with views for all the table version in the snapshot +- Call releaseSnapshot when done reading the snapshot +Example removeFunc logic: +- Detach the version +- Remove the version file +- If there are no files left in it, remove the table folder +*/ +package rduckdb + +import ( + "context" + "fmt" + + "golang.org/x/sync/semaphore" +) + +// Represents one table and its versions currently present in the local cache. +type table struct { + name string + deleted bool + currentVersion string + versionReferenceCounts map[string]int + versionMeta map[string]*tableMeta +} + +// Represents a snapshot of table versions. +// The table versions referenced by the snapshot are guaranteed to exist for as long as the snapshot is acquired. +type snapshot struct { + id int + referenceCount int + tables []*tableMeta + // if snapshot is ready to be served then ready will be marked true + ready bool +} + +// Represents a catalog of available table versions. +// It is thread-safe and supports acquiring a snapshot of table versions which will not be mutated or removed for as long as the snapshot is held. +type catalog struct { + sem *semaphore.Weighted + tables map[string]*table + snapshots map[int]*snapshot + currentSnapshotID int + + removeVersionFunc func(context.Context, string, string) error + removeSnapshotFunc func(context.Context, int) error +} + +// newCatalog creates a new catalog. +// The removeSnapshotFunc func will be called exactly once for each snapshot ID when it is no longer the current snapshot and is no longer held by any readers. +// The removeVersionFunc func will be called exactly once for each table version when it is no longer the current version and is no longer used by any active snapshots. +func newCatalog(removeVersionFunc func(context.Context, string, string) error, removeSnapshotFunc func(context.Context, int) error) *catalog { + return &catalog{ + sem: semaphore.NewWeighted(1), + tables: make(map[string]*table), + snapshots: make(map[int]*snapshot), + removeVersionFunc: removeVersionFunc, + removeSnapshotFunc: removeSnapshotFunc, + } +} + +func (c *catalog) hasTableVersion(ctx context.Context, name, version string) (bool, error) { + err := c.sem.Acquire(ctx, 1) + if err != nil { + return false, err + } + defer c.sem.Release(1) + + t, ok := c.tables[name] + if ok && !t.deleted && t.currentVersion == version { + return true, nil + } + return false, nil +} + +// addTableVersion registers a new version of a table. +// If the table name has not been seen before, it is added to the catalog. +func (c *catalog) addTableVersion(ctx context.Context, name string, meta *tableMeta) error { + err := c.sem.Acquire(ctx, 1) + if err != nil { + return err + } + defer c.sem.Release(1) + + t, ok := c.tables[name] + if !ok { + t = &table{ + name: name, + versionReferenceCounts: make(map[string]int), + versionMeta: make(map[string]*tableMeta), + } + c.tables[name] = t + } + + oldVersion := t.currentVersion + t.deleted = false // In case the table was deleted previously, but a snapshot still references it. + t.currentVersion = meta.Version + t.versionMeta[meta.Version] = meta + c.acquireVersion(t, t.currentVersion) + if oldVersion != "" { + _ = c.releaseVersion(ctx, t, oldVersion) + } + + c.currentSnapshotID++ + return nil +} + +// removeTable removes a table from the catalog. +// If the table is currently used by a snapshot, it will stay in the catalog but marked with deleted=true. +// When the last snapshot referencing the table is released, the table will be removed completely. +func (c *catalog) removeTable(ctx context.Context, name string) error { + err := c.sem.Acquire(ctx, 1) + if err != nil { + return err + } + defer c.sem.Release(1) + + t, ok := c.tables[name] + if !ok { + return fmt.Errorf("table %q not found", name) + } + + oldVersion := t.currentVersion + t.deleted = true + t.currentVersion = "" + return c.releaseVersion(ctx, t, oldVersion) +} + +func (c *catalog) listTables(ctx context.Context) ([]*tableMeta, error) { + err := c.sem.Acquire(ctx, 1) + if err != nil { + return nil, err + } + defer c.sem.Release(1) + + tables := make([]*tableMeta, 0) + for _, t := range c.tables { + if t.deleted { + continue + } + meta, ok := t.versionMeta[t.currentVersion] + if !ok { + return nil, fmt.Errorf("internal error: meta for version %q not found", t.currentVersion) + } + tables = append(tables, meta) + } + return tables, nil +} + +// acquireSnapshot acquires a snapshot of the current table versions. +func (c *catalog) acquireSnapshot(ctx context.Context) (*snapshot, error) { + err := c.sem.Acquire(ctx, 1) + if err != nil { + return nil, err + } + defer c.sem.Release(1) + + s, ok := c.snapshots[c.currentSnapshotID] + if ok { + s.referenceCount++ + return s, nil + } + // first acquire + s = &snapshot{ + id: c.currentSnapshotID, + referenceCount: 1, + tables: make([]*tableMeta, 0), + } + for _, t := range c.tables { + if t.deleted { + continue + } + + meta, ok := t.versionMeta[t.currentVersion] + if !ok { + return nil, fmt.Errorf("internal error: meta for version %q not found", t.currentVersion) + } + s.tables = append(s.tables, meta) + c.acquireVersion(t, t.currentVersion) + } + c.snapshots[c.currentSnapshotID] = s + return s, nil +} + +// releaseSnapshot releases a snapshot of table versions. +func (c *catalog) releaseSnapshot(ctx context.Context, s *snapshot) error { + err := c.sem.Acquire(ctx, 1) + if err != nil { + return err + } + defer c.sem.Release(1) + + s.referenceCount-- + if s.referenceCount > 0 { + return nil + } + + for _, meta := range s.tables { + t, ok := c.tables[meta.Name] + if !ok { + return fmt.Errorf("internal error: table %q not found", meta.Name) + } + if err := c.releaseVersion(ctx, t, meta.Version); err != nil { + return err + } + } + + delete(c.snapshots, s.id) + return c.removeSnapshotFunc(ctx, s.id) +} + +// acquireVersion increments the reference count of a table version. +// It must be called while holding the catalog mutex. +func (c *catalog) acquireVersion(t *table, version string) { + referenceCount := t.versionReferenceCounts[version] + referenceCount++ + t.versionReferenceCounts[version] = referenceCount +} + +// releaseVersion decrements the reference count of a table version. +// If the reference count reaches zero and the version is no longer the current version, it is removec. +func (c *catalog) releaseVersion(ctx context.Context, t *table, version string) error { + referenceCount, ok := t.versionReferenceCounts[version] + if !ok { + return fmt.Errorf("version %q of table %q not found", version, t.name) + } + referenceCount-- + if referenceCount > 0 { + t.versionReferenceCounts[version] = referenceCount + return nil + } + + delete(t.versionReferenceCounts, version) + if t.deleted { + delete(c.tables, t.name) + } + + return c.removeVersionFunc(ctx, t.name, version) +} diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 9b902aa19b6..9cbfb77edae 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -16,8 +16,6 @@ import ( "slices" "strconv" "strings" - "sync" - "sync/atomic" "time" "github.com/XSAM/otelsql" @@ -214,21 +212,26 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { if err != nil { return nil, err } - bgctx, cancel := context.WithCancel(ctx) + + bgctx, cancel := context.WithCancel(context.Background()) db := &db{ - opts: opts, - localPath: opts.LocalPath, - remote: opts.Remote, - readMu: ctxsync.NewRWMutex(), - writeSem: semaphore.NewWeighted(1), - localDirty: true, - ticker: time.NewTicker(5 * time.Minute), - genCounter: make(map[int32]int32), - schemaForGen: make(map[int32][]*tableMeta), - logger: opts.Logger, - ctx: bgctx, - cancel: cancel, - } + opts: opts, + localPath: opts.LocalPath, + remote: opts.Remote, + readMu: ctxsync.NewRWMutex(), + writeSem: semaphore.NewWeighted(1), + metaSem: semaphore.NewWeighted(1), + localDirty: true, + ticker: time.NewTicker(5 * time.Minute), + logger: opts.Logger, + ctx: bgctx, + cancel: cancel, + } + // catalog + db.catalog = newCatalog( + db.removeTableVersion, + db.removeSnapshot, + ) // create local path err = os.MkdirAll(db.localPath, fs.ModePerm) @@ -243,8 +246,7 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } // create read handle - var schema []*tableMeta - db.readHandle, schema, err = db.openDBAndAttach(ctx, "", "", true) + db.dbHandle, err = db.openDBAndAttach(ctx, "", "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") @@ -252,12 +254,6 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } return nil, err } - _, err = db.readHandle.ExecContext(ctx, "CREATE SCHEMA "+schemaName(1), nil) - if err != nil { - return nil, err - } - db.latestGen.Store(1) - db.schemaForGen[1] = schema go db.localDBMonitor() return db, nil } @@ -268,25 +264,20 @@ type db struct { localPath string remote *blob.Bucket - // readHandle serves read queries - readHandle *sqlx.DB + // dbHandle serves executes meta queries and serves read queries + dbHandle *sqlx.DB // readMu controls access to readHandle readMu ctxsync.RWMutex // writeSem ensures only one write operation is allowed at a time writeSem *semaphore.Weighted + // metaSem enures only one meta operation can run on a duckb handle. + // Meta operations are attach, detach, create view queries done on the db handle + metaSem *semaphore.Weighted // localDirty is set to true when a change is committed to the remote but not yet reflected in the local db localDirty bool // ticker to peroiodically check if local db is in sync with remote - ticker *time.Ticker - // latestGen is the latest generation of the db. A generation is incremented whenever a write happens. - latestGen atomic.Int32 - // schemaForGen stores tableMeta for all tables active in a generation - schemaForGen map[int32][]*tableMeta - - // counterMu protects genCounter and tableVersionCounter - counterMu sync.Mutex - // genCounter stores how many queries are being served by a particular generation - genCounter map[int32]int32 + ticker *time.Ticker + catalog *catalog logger *slog.Logger @@ -314,8 +305,8 @@ func (d *db) Close() error { } defer d.readMu.Unlock() - err = d.readHandle.Close() - d.readHandle = nil + err = d.dbHandle.Close() + d.dbHandle = nil return err } @@ -325,23 +316,19 @@ func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() erro } // acquire a connection - conn, err := d.readHandle.Connx(ctx) + snapshot, err := d.catalog.acquireSnapshot(ctx) if err != nil { d.readMu.RUnlock() return nil, nil, err } - // increment gen counter - // TODO :: may be use a sempahore here. Atleast the acquire here return early. But the release can stll be blocked. - d.counterMu.Lock() - // use the schema for the latest generation - gen := d.latestGen.Load() - // incement generation counter - d.genCounter[gen]++ - d.counterMu.Unlock() + conn, err := d.dbHandle.Connx(ctx) + if err != nil { + d.readMu.RUnlock() + return nil, nil, err + } - // switch to the latest generation - _, err = conn.ExecContext(ctx, "USE "+schemaName(gen), nil) + err = d.prepareSnapshot(ctx, conn, snapshot) if err != nil { _ = conn.Close() d.readMu.RUnlock() @@ -349,15 +336,8 @@ func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() erro } release := func() error { - // lock counterMu and decrement all counters - d.counterMu.Lock() - // queries served by this generation - d.genCounter[gen]-- - if d.genCounter[gen] == 0 { - delete(d.genCounter, gen) - } - d.counterMu.Unlock() - err = conn.Close() + err = d.catalog.releaseSnapshot(ctx, snapshot) + err = errors.Join(err, conn.Close()) d.readMu.RUnlock() return err } @@ -379,6 +359,9 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * } // check if some older version exists + // We can also use catalog to get the latest version + // but we are not using it here since pullFromRemote should have already updated the catalog + // and we need meta.json contents oldMeta, _ := d.tableMeta(name) if oldMeta != nil { d.logger.Debug("old version", slog.String("version", oldMeta.Version)) @@ -393,8 +376,8 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * } var dsn string if opts.View { - newMeta.SQL = query dsn = "" + newMeta.SQL = query // special handling to ensure that if a view is recreated with the same name and schema then any views on top of this view still works if oldMeta != nil && oldMeta.Type == "VIEW" { newMeta.CreatedVersion = oldMeta.CreatedVersion @@ -463,11 +446,9 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * return nil } - err = d.pushNewGen(ctx) + err = d.catalog.addTableVersion(ctx, name, newMeta) if err != nil { - if !errors.Is(err, context.Canceled) { - d.logger.Error("create: error in reopening db", slog.String("error", err.Error())) - } + d.logger.Debug("create: error in adding version", slog.String("table", name), slog.String("version", newMeta.Version), slog.String("error", err.Error())) return nil } d.localDirty = false @@ -548,12 +529,9 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con return nil } - // reopen db handle ignoring old name - err = d.pushNewGen(ctx) + err = d.catalog.addTableVersion(ctx, name, meta) if err != nil { - if !errors.Is(err, context.Canceled) { - d.logger.Error("mutate: error in reopening db", slog.String("error", err.Error())) - } + d.logger.Debug("mutate: error in adding version", slog.String("table", name), slog.String("version", meta.Version), slog.String("error", err.Error())) return nil } d.localDirty = false @@ -576,7 +554,7 @@ func (d *db) DropTable(ctx context.Context, name string) error { } // check if table exists - meta, err := d.tableMeta(name) + _, err = d.tableMeta(name) if err != nil { if errors.Is(err, errNotFound) { return fmt.Errorf("drop: Table %q not found", name) @@ -592,20 +570,9 @@ func (d *db) DropTable(ctx context.Context, name string) error { } // no errors after this point since background goroutine will eventually sync the local db - // mark table as deleted in local - meta.Deleted = true - err = d.writeTableMeta(name, meta) - if err != nil { - d.logger.Debug("drop: error in writing table meta", slog.String("name", name), slog.String("error", err.Error())) - return nil - } - - // reopen db handle - err = d.pushNewGen(ctx) + err = d.catalog.removeTable(ctx, name) if err != nil { - if !errors.Is(err, context.Canceled) { - d.logger.Error("drop: error in reopening db", slog.String("error", err.Error())) - } + d.logger.Debug("drop: error in removing table", slog.String("name", name), slog.String("error", err.Error())) return nil } d.localDirty = false @@ -682,20 +649,15 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { return nil } - // mark old table as deleted in local - oldMeta.Deleted = true - err = d.writeTableMeta(oldName, oldMeta) + // remove old table from local db + err = d.catalog.removeTable(ctx, oldName) if err != nil { - d.logger.Debug("rename: error in writing table meta", slog.String("name", oldName), slog.String("error", err.Error())) + d.logger.Debug("rename: error in removing table", slog.String("name", oldName), slog.String("error", err.Error())) return nil } - - // reopen db handle - err = d.pushNewGen(ctx) + err = d.catalog.addTableVersion(ctx, newName, meta) if err != nil { - if !errors.Is(err, context.Canceled) { - d.logger.Error("rename: error in reopening db", slog.String("error", err.Error())) - } + d.logger.Debug("rename: error in adding version", slog.String("table", newName), slog.String("version", newVersion), slog.String("error", err.Error())) return nil } d.localDirty = false @@ -721,142 +683,10 @@ func (d *db) localDBMonitor() { if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in pulling from remote", slog.String("error", err.Error())) } - err = d.pushNewGen(d.ctx) - if err != nil && !errors.Is(err, context.Canceled) { - d.logger.Error("localDBMonitor: error in reopening db", slog.String("error", err.Error())) - } } } } -func (d *db) pushNewGen(ctx context.Context) error { - conn, err := d.readHandle.Connx(ctx) - if err != nil { - return err - } - defer conn.Close() - - currentGen := d.latestGen.Load() + 1 - _, err = conn.ExecContext(ctx, "CREATE SCHEMA IF NOT EXISTS "+schemaName(currentGen)) - if err != nil { - return err - } - - _, err = conn.ExecContext(ctx, "USE "+schemaName(currentGen), nil) - if err != nil { - return err - } - - // TODO :: this will pass because of IF NOT EXISTS clause in ATTACH existing files, but we should handle this more gracefully - tblVersions, err := d.attachDBs(ctx, conn, "") - if err != nil { - return err - } - - // update tableVersionForGen - d.schemaForGen[currentGen] = tblVersions - - // update latestGen - swapped := d.latestGen.Swap(currentGen) - if swapped != currentGen-1 { - d.logger.Error("reopen: generation mismatch", slog.Int("expected", int(currentGen-1)), slog.Int("actual", int(swapped))) - } - - // do another scan on local data and remove old versions, deleted tables etc - // take into account the queries being served by the old generations - - // check the gens being served - gens := map[int32]any{currentGen: nil} - d.counterMu.Lock() - for g := range d.genCounter { - gens[g] = nil - } - d.counterMu.Unlock() - - // create a state of tables being served - servedTableVersions := make(map[string][]string) - - // iterate over served gens - for g := range d.schemaForGen { - if _, ok := gens[g]; !ok { - continue - } - for _, tv := range d.schemaForGen[g] { - servedVersions := servedTableVersions[tv.Name] - servedTableVersions[tv.Name] = append(servedVersions, tv.Version) - } - } - - entries, err := os.ReadDir(d.localPath) - if err != nil { - return err - } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - bytes, err := os.ReadFile(filepath.Join(d.localPath, entry.Name(), "meta.json")) - if err != nil { - // no meta.json, delete the directory - d.logger.Debug("error in reading meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) - _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) - } - meta := &tableMeta{} - err = json.Unmarshal(bytes, meta) - if err != nil { - // bad meta.json, delete the directory - d.logger.Debug("error in unmarshalling meta.json, removing entry", slog.String("entry", entry.Name()), slog.String("error", err.Error())) - _ = os.RemoveAll(filepath.Join(d.localPath, entry.Name())) - } - - // remove unserved versions - servedVersions, ok := servedTableVersions[meta.Name] - if len(servedVersions) == 0 { - err = d.deleteLocalTableFiles(meta.Name, "") - if err != nil { - d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("error", err.Error())) - } - continue - } - versions, err := os.ReadDir(filepath.Join(d.localPath, entry.Name())) - if err != nil { - return err - } - for _, v := range versions { - if !v.IsDir() { - continue - } - version := v.Name() - if ok && slices.Contains(servedVersions, version) { - continue - } - _, err := d.readHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+safeSQLName(dbName(meta.Name, version)), nil) - if err != nil { - d.logger.Debug("error in detaching table", slog.String("table", meta.Name), slog.String("version", version), slog.String("error", err.Error())) - continue - } - err = d.deleteLocalTableFiles(meta.Name, version) - if err != nil { - d.logger.Debug("error in removing table", slog.String("table", meta.Name), slog.String("version", version), slog.String("error", err.Error())) - } - } - } - - // iterate over gens to delete with no gens being served - for g := range d.schemaForGen { - if _, ok := gens[g]; ok { - // this generation is being served - continue - } - delete(d.schemaForGen, g) - _, err := d.readHandle.ExecContext(ctx, fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName(g)), nil) - if err != nil { - d.logger.Debug("error in dropping schema", slog.Int("gen", int(g)), slog.String("error", err.Error())) - } - } - return nil -} - func (d *db) Size() int64 { var paths []string entries, err := os.ReadDir(d.localPath) @@ -889,7 +719,7 @@ func (d *db) acquireWriteConn(ctx context.Context, dsn, table string, attachExis if !attachExisting { ignoreTable = table } - db, _, err := d.openDBAndAttach(ctx, dsn, ignoreTable, false) + db, err := d.openDBAndAttach(ctx, dsn, ignoreTable, false) if err != nil { return nil, nil, err } @@ -915,13 +745,13 @@ func (d *db) acquireWriteConn(ctx context.Context, dsn, table string, attachExis }, nil } -func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read bool) (*sqlx.DB, []*tableMeta, error) { +func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read bool) (*sqlx.DB, error) { d.logger.Debug("open db", slog.Bool("read", read), slog.String("uri", uri)) // open the db var settings map[string]string dsn, err := url.Parse(uri) // in-memory if err != nil { - return nil, nil, err + return nil, err } if read { settings = d.opts.ReadSettings @@ -947,25 +777,30 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read return nil }) if err != nil { - return nil, nil, err + return nil, err } db := sqlx.NewDb(otelsql.OpenDB(connector), "duckdb") err = otelsql.RegisterDBStatsMetrics(db.DB, otelsql.WithAttributes(d.opts.OtelAttributes...)) if err != nil { - return nil, nil, fmt.Errorf("registering db stats metrics: %w", err) + return nil, fmt.Errorf("registering db stats metrics: %w", err) } conn, err := db.Connx(ctx) if err != nil { db.Close() - return nil, nil, err + return nil, err + } + + tables, err := d.catalog.listTables(ctx) + if err != nil { + return nil, err } - tblVersions, err := d.attachDBs(ctx, conn, ignoreTable) + err = d.attachTables(ctx, conn, tables, ignoreTable) if err != nil { db.Close() - return nil, nil, err + return nil, err } // 2023-12-11: Hail mary for solving this issue: https://github.com/duckdblabs/rilldata/issues/6. @@ -987,35 +822,13 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read `) if err != nil { db.Close() - return nil, nil, err - } - - return db, tblVersions, nil -} - -func (d *db) attachDBs(ctx context.Context, conn *sqlx.Conn, ignoreTable string) ([]*tableMeta, error) { - entries, err := os.ReadDir(d.localPath) - if err != nil { return nil, err } - tables := make([]*tableMeta, 0) - for _, entry := range entries { - if !entry.IsDir() { - continue - } - if entry.Name() == ignoreTable { - continue - } - - meta, _ := d.tableMeta(entry.Name()) - if meta == nil { - continue - } - d.logger.Debug("discovered table", slog.String("table", entry.Name()), slog.String("version", meta.Version)) - tables = append(tables, meta) - } + return db, nil +} +func (d *db) attachTables(ctx context.Context, conn *sqlx.Conn, tables []*tableMeta, ignoreTable string) error { // sort tables by created_version // this is to ensure that views/tables on which other views depend are attached first slices.SortFunc(tables, func(a, b *tableMeta) int { @@ -1032,29 +845,32 @@ func (d *db) attachDBs(ctx context.Context, conn *sqlx.Conn, ignoreTable string) return strings.Compare(a.CreatedVersion, b.CreatedVersion) }) for _, table := range tables { - err = d.attachTable(ctx, conn, table) + if table.Name == ignoreTable { + continue + } + err := d.attachTable(ctx, conn, table) if err != nil { - return nil, fmt.Errorf("failed to attach table %q: %w", table.Name, err) + return fmt.Errorf("failed to attach table %q: %w", table.Name, err) } } - return tables, nil + return nil } -func (d *db) attachTable(ctx context.Context, db *sqlx.Conn, table *tableMeta) error { +func (d *db) attachTable(ctx context.Context, conn *sqlx.Conn, table *tableMeta) error { safeTable := safeSQLName(table.Name) if table.Type == "VIEW" { - _, err := db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) + _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) return err } safeDBName := safeSQLName(dbName(table.Name, table.Version)) - _, err := db.ExecContext(ctx, fmt.Sprintf("ATTACH IF NOT EXISTS %s AS %s (READ_ONLY)", safeSQLString(filepath.Join(d.localPath, table.Name, table.Version, "data.db")), safeDBName)) + _, err := conn.ExecContext(ctx, fmt.Sprintf("ATTACH IF NOT EXISTS %s AS %s (READ_ONLY)", safeSQLString(filepath.Join(d.localPath, table.Name, table.Version, "data.db")), safeDBName)) if err != nil { d.logger.Warn("error in attaching db", slog.String("table", table.Name), slog.Any("error", err)) return err } - _, err = db.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) + _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) return err } @@ -1071,8 +887,17 @@ func (d *db) tableMeta(name string) (*tableMeta, error) { if err != nil { return nil, err } - if m.Deleted { - return nil, errNotFound + + if m.Type == "VIEW" { + return m, nil + } + // this is required because release version does not table table directory as of now + _, err = os.Stat(filepath.Join(d.localPath, name, m.Version)) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil, errNotFound + } + return nil, err } return m, nil } @@ -1100,15 +925,61 @@ func (d *db) deleteLocalTableFiles(name, version string) error { return os.RemoveAll(path) } +func (d *db) removeTableVersion(ctx context.Context, name, version string) error { + err := d.metaSem.Acquire(ctx, 1) + if err != nil { + return err + } + d.metaSem.Release(1) + + _, err = d.dbHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+dbName(name, version)) + if err != nil { + return err + } + return d.deleteLocalTableFiles(name, version) +} + +func (d *db) prepareSnapshot(ctx context.Context, conn *sqlx.Conn, s *snapshot) error { + err := d.metaSem.Acquire(ctx, 1) + if err != nil { + return err + } + defer d.metaSem.Release(1) + + if s.ready { + return nil + } + + _, err = conn.ExecContext(ctx, "CREATE SCHEMA IF NOT EXISTS "+schemaName(s.id)) + if err != nil { + return err + } + + _, err = conn.ExecContext(ctx, "USE "+schemaName(s.id)) + if err != nil { + return err + } + + return d.attachTables(ctx, conn, s.tables, "") +} + +func (d *db) removeSnapshot(ctx context.Context, id int) error { + err := d.metaSem.Acquire(ctx, 1) + if err != nil { + return err + } + defer d.metaSem.Release(1) + + _, err = d.dbHandle.Exec(fmt.Sprintf("DROP SCHEMA %s CASCADE", schemaName(id))) + return err +} + type tableMeta struct { Name string `json:"name"` Version string `json:"version"` CreatedVersion string `json:"created_version"` Type string `json:"type"` // either TABLE or VIEW SQL string `json:"sql"` // populated for views - // Deleted is set to true if the table is deleted. - // This is only used for local tables since local copy can only be removed when db handle has been reattached. - Deleted bool `json:"deleted"` } func renameTable(ctx context.Context, dbFile, old, newName string) error { @@ -1201,6 +1072,6 @@ func humanReadableSizeToBytes(sizeStr string) (float64, error) { return sizeFloat * multiplier, nil } -func schemaName(gen int32) string { +func schemaName(gen int) string { return fmt.Sprintf("main_%v", gen) } diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index c3ba52d0b93..f6738a8a929 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -27,7 +27,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { } d.logger.Debug("syncing from remote") // Create an errgroup for background downloads with limited concurrency. - g, ctx := errgroup.WithContext(ctx) + g, gctx := errgroup.WithContext(ctx) g.SetLimit(8) objects := d.remote.List(&blob.ListOptions{ @@ -39,7 +39,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { // Stop the loop if the ctx was cancelled var stop bool select { - case <-ctx.Done(): + case <-gctx.Done(): stop = true default: // don't break @@ -48,7 +48,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { break // can't use break inside the select } - obj, err := objects.Next(ctx) + obj, err := objects.Next(gctx) if err != nil { if errors.Is(err, io.EOF) { break @@ -64,8 +64,8 @@ func (d *db) pullFromRemote(ctx context.Context) error { // get version of the table var b []byte - err = retry(ctx, func() error { - res, err := d.remote.ReadAll(ctx, path.Join(table, "meta.json")) + err = retry(gctx, func() error { + res, err := d.remote.ReadAll(gctx, path.Join(table, "meta.json")) if err != nil { return err } @@ -87,13 +87,23 @@ func (d *db) pullFromRemote(ctx context.Context) error { continue } - // check with current version - meta, _ := d.tableMeta(table) - if meta != nil && meta.Version == backedUpMeta.Version { + inSync, err := d.catalog.hasTableVersion(gctx, table, backedUpMeta.Version) + if err != nil { + return err + } + if inSync { d.logger.Debug("SyncWithObjectStorage: table is already up to date", slog.String("table", table)) continue } tblMetas[table] = backedUpMeta + + // check with local meta + meta, _ := d.tableMeta(table) + if meta != nil && meta.Version == backedUpMeta.Version { + d.logger.Debug("SyncWithObjectStorage: local table is not present in catalog", slog.String("table", table)) + tblMetas[table] = backedUpMeta + continue + } if err := os.MkdirAll(filepath.Join(d.localPath, table, backedUpMeta.Version), os.ModePerm); err != nil { return err } @@ -101,7 +111,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { tblIter := d.remote.List(&blob.ListOptions{Prefix: path.Join(table, backedUpMeta.Version)}) // download all objects in the table and current version for { - obj, err := tblIter.Next(ctx) + obj, err := tblIter.Next(gctx) if err != nil { if errors.Is(err, io.EOF) { break @@ -109,14 +119,14 @@ func (d *db) pullFromRemote(ctx context.Context) error { return err } g.Go(func() error { - return retry(ctx, func() error { + return retry(gctx, func() error { file, err := os.Create(filepath.Join(d.localPath, obj.Key)) if err != nil { return err } defer file.Close() - rdr, err := d.remote.NewReader(ctx, obj.Key, nil) + rdr, err := d.remote.NewReader(gctx, obj.Key, nil) if err != nil { return err } @@ -141,6 +151,10 @@ func (d *db) pullFromRemote(ctx context.Context) error { if err != nil { return err } + err = d.catalog.addTableVersion(ctx, table, meta) + if err != nil { + return err + } } // mark tables that are not in remote for delete later @@ -155,14 +169,10 @@ func (d *db) pullFromRemote(ctx context.Context) error { if _, ok := tblMetas[entry.Name()]; ok { continue } - // get current meta - meta, _ := d.tableMeta(entry.Name()) - if meta == nil { - // cleanup ?? - continue + err = d.catalog.removeTable(ctx, entry.Name()) + if err != nil { + return err } - meta.Deleted = true - _ = d.writeTableMeta(entry.Name(), meta) } return nil } From 2c3b4851c04be52b37edf0802f2d349604ed95a4 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 25 Nov 2024 19:51:23 +0530 Subject: [PATCH 23/64] use catalog to check if table exists --- runtime/pkg/rduckdb/catalog.go | 15 ++++++++++----- runtime/pkg/rduckdb/db.go | 10 +++++----- runtime/pkg/rduckdb/remote.go | 12 +++++------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 5b9a94711de..76c5bf4de54 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -68,18 +68,22 @@ func newCatalog(removeVersionFunc func(context.Context, string, string) error, r } } -func (c *catalog) hasTableVersion(ctx context.Context, name, version string) (bool, error) { +func (c *catalog) tableMeta(ctx context.Context, name string) (*tableMeta, error) { err := c.sem.Acquire(ctx, 1) if err != nil { - return false, err + return nil, err } defer c.sem.Release(1) t, ok := c.tables[name] - if ok && !t.deleted && t.currentVersion == version { - return true, nil + if !ok || t.deleted { + return nil, errNotFound + } + meta, ok := t.versionMeta[t.currentVersion] + if !ok { + return nil, fmt.Errorf("internal error: meta for version %q not found", t.currentVersion) } - return false, nil + return meta, nil } // addTableVersion registers a new version of a table. @@ -135,6 +139,7 @@ func (c *catalog) removeTable(ctx context.Context, name string) error { return c.releaseVersion(ctx, t, oldVersion) } +// listTables returns tableMeta for all active tables present in the catalog. func (c *catalog) listTables(ctx context.Context) ([]*tableMeta, error) { err := c.sem.Acquire(ctx, 1) if err != nil { diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 9cbfb77edae..89aab0378df 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -362,7 +362,7 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * // We can also use catalog to get the latest version // but we are not using it here since pullFromRemote should have already updated the catalog // and we need meta.json contents - oldMeta, _ := d.tableMeta(name) + oldMeta, _ := d.catalog.tableMeta(ctx, name) if oldMeta != nil { d.logger.Debug("old version", slog.String("version", oldMeta.Version)) } @@ -469,7 +469,7 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con return err } - oldMeta, err := d.tableMeta(name) + oldMeta, err := d.catalog.tableMeta(ctx, name) if err != nil { if errors.Is(err, errNotFound) { return fmt.Errorf("mutate: Table %q not found", name) @@ -554,7 +554,7 @@ func (d *db) DropTable(ctx context.Context, name string) error { } // check if table exists - _, err = d.tableMeta(name) + _, err = d.catalog.tableMeta(ctx, name) if err != nil { if errors.Is(err, errNotFound) { return fmt.Errorf("drop: Table %q not found", name) @@ -596,7 +596,7 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { return fmt.Errorf("rename: unable to pull from remote: %w", err) } - oldMeta, err := d.tableMeta(oldName) + oldMeta, err := d.catalog.tableMeta(ctx, oldName) if err != nil { if errors.Is(err, errNotFound) { return fmt.Errorf("rename: Table %q not found", oldName) @@ -703,7 +703,7 @@ func (d *db) Size() int64 { if strings.HasPrefix(entry.Name(), "__rill_tmp_") { continue } - meta, _ := d.tableMeta(entry.Name()) + meta, _ := d.catalog.tableMeta(context.Background(), entry.Name()) if meta != nil { paths = append(paths, filepath.Join(d.localPath, entry.Name(), meta.Version, "data.db")) } diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index f6738a8a929..3874f925da7 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -87,18 +87,16 @@ func (d *db) pullFromRemote(ctx context.Context) error { continue } - inSync, err := d.catalog.hasTableVersion(gctx, table, backedUpMeta.Version) - if err != nil { - return err - } - if inSync { + // check if table in catalog is already upto date + meta, _ := d.catalog.tableMeta(gctx, table) + if meta != nil && meta.Version == backedUpMeta.Version { d.logger.Debug("SyncWithObjectStorage: table is already up to date", slog.String("table", table)) continue } tblMetas[table] = backedUpMeta - // check with local meta - meta, _ := d.tableMeta(table) + // check if table is locally present but not added to catalog yet + meta, _ = d.tableMeta(table) if meta != nil && meta.Version == backedUpMeta.Version { d.logger.Debug("SyncWithObjectStorage: local table is not present in catalog", slog.String("table", table)) tblMetas[table] = backedUpMeta From c369e09634893b6141709b7455bb5b7c16f3aa4b Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 25 Nov 2024 20:25:33 +0530 Subject: [PATCH 24/64] add concurrent access unit tests --- runtime/pkg/rduckdb/catalog.go | 2 +- runtime/pkg/rduckdb/db_test.go | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 76c5bf4de54..8a681d52eb6 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -245,7 +245,7 @@ func (c *catalog) releaseVersion(ctx context.Context, t *table, version string) } delete(t.versionReferenceCounts, version) - if t.deleted { + if t.deleted && len(t.versionReferenceCounts) == 0 { delete(c.tables, t.name) } diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 7b2f5994213..02ac7cb4c86 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -225,6 +225,49 @@ func TestResetLocal(t *testing.T) { verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) } +func TestConcurrentReads(t *testing.T) { + testDB, _, _ := prepareDB(t) + ctx := context.Background() + + // create table + err := testDB.CreateTableAsSelect(ctx, "pest", "SELECT 2 AS id, 'USA' AS country", &CreateTableOptions{}) + require.NoError(t, err) + + // create test table + err = testDB.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + + // acquire connection + conn1, release1, err1 := testDB.AcquireReadConnection(ctx) + require.NoError(t, err1) + + // replace with a view + err = testDB.CreateTableAsSelect(ctx, "test", "SELECT * FROM pest", &CreateTableOptions{View: true}) + require.NoError(t, err) + + // acquire connection + conn2, release2, err2 := testDB.AcquireReadConnection(ctx) + require.NoError(t, err2) + + // drop table + err = testDB.DropTable(ctx, "test") + + // verify both tables are still accessible + verifyTableForConn(t, conn1, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + require.NoError(t, release1()) + verifyTableForConn(t, conn2, "SELECT id, country FROM test", []testData{{ID: 2, Country: "USA"}}) + require.NoError(t, release2()) + + // acquire connection to see that table is now dropped + conn3, release3, err3 := testDB.AcquireReadConnection(ctx) + require.NoError(t, err3) + var id int + var country string + err = conn3.QueryRowContext(ctx, "SELECT id, country FROM test").Scan(&id, &country) + require.Error(t, err) + require.NoError(t, release3()) +} + func prepareDB(t *testing.T) (db DB, localDir, remoteDir string) { localDir = t.TempDir() ctx := context.Background() @@ -258,6 +301,14 @@ func verifyTable(t *testing.T, db DB, query string, data []testData) { require.Equal(t, data, scannedData) } +func verifyTableForConn(t *testing.T, conn *sqlx.Conn, query string, data []testData) { + ctx := context.Background() + var scannedData []testData + err := conn.SelectContext(ctx, &scannedData, query) + require.NoError(t, err) + require.Equal(t, data, scannedData) +} + type testData struct { ID int `db:"id"` Country string `db:"country"` From 1f59235c6f798d97de26cb39d54fa89560bc4649 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:33:52 +0530 Subject: [PATCH 25/64] minor tweaks --- runtime/pkg/rduckdb/db.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 89aab0378df..fceabb4ffd9 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -359,9 +359,6 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * } // check if some older version exists - // We can also use catalog to get the latest version - // but we are not using it here since pullFromRemote should have already updated the catalog - // and we need meta.json contents oldMeta, _ := d.catalog.tableMeta(ctx, name) if oldMeta != nil { d.logger.Debug("old version", slog.String("version", oldMeta.Version)) @@ -891,7 +888,7 @@ func (d *db) tableMeta(name string) (*tableMeta, error) { if m.Type == "VIEW" { return m, nil } - // this is required because release version does not table table directory as of now + // this is required because release version does not delete table directory as of now _, err = os.Stat(filepath.Join(d.localPath, name, m.Version)) if err != nil { if errors.Is(err, fs.ErrNotExist) { @@ -930,7 +927,7 @@ func (d *db) removeTableVersion(ctx context.Context, name, version string) error if err != nil { return err } - d.metaSem.Release(1) + defer d.metaSem.Release(1) _, err = d.dbHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+dbName(name, version)) if err != nil { From a39074640eaefa25d0275c5789be87ab5faed79d Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:04:53 +0530 Subject: [PATCH 26/64] data bucket for persisting data to gcs --- cli/cmd/runtime/install_duckdb_extensions.go | 3 +- cli/cmd/runtime/start.go | 28 ++++++++++- cli/pkg/cmdutil/project.go | 3 +- cli/pkg/local/app.go | 8 +++- runtime/compilers/rillv1/parser_test.go | 47 ++++--------------- runtime/connection_cache.go | 8 +++- runtime/drivers/admin/admin.go | 3 +- runtime/drivers/athena/athena.go | 3 +- runtime/drivers/azure/azure.go | 3 +- runtime/drivers/bigquery/bigquery.go | 3 +- runtime/drivers/clickhouse/clickhouse.go | 3 +- .../clickhouse/information_schema_test.go | 3 +- runtime/drivers/clickhouse/olap_test.go | 5 +- runtime/drivers/drivers.go | 7 +-- runtime/drivers/drivers_test.go | 3 +- runtime/drivers/druid/druid.go | 3 +- runtime/drivers/druid/druid_test.go | 3 +- runtime/drivers/druid/sql_driver_test.go | 11 +++-- runtime/drivers/duckdb/config_test.go | 5 +- runtime/drivers/duckdb/duckdb.go | 3 +- runtime/drivers/duckdb/duckdb_test.go | 7 +-- runtime/drivers/duckdb/olap_crud_test.go | 27 ++++++----- runtime/drivers/duckdb/olap_test.go | 7 +-- .../transporter_duckDB_to_duckDB_test.go | 5 +- .../transporter_mysql_to_duckDB_test.go | 5 +- .../transporter_postgres_to_duckDB_test.go | 5 +- .../transporter_sqlite_to_duckDB_test.go | 3 +- runtime/drivers/duckdb/transporter_test.go | 3 +- runtime/drivers/file/file.go | 3 +- runtime/drivers/gcs/gcs.go | 3 +- runtime/drivers/https/https.go | 3 +- .../drivers/mock/object_store/object_store.go | 2 +- runtime/drivers/mysql/mysql.go | 3 +- runtime/drivers/pinot/pinot.go | 3 +- runtime/drivers/postgres/postgres.go | 3 +- runtime/drivers/redshift/redshift.go | 3 +- runtime/drivers/s3/s3.go | 3 +- runtime/drivers/salesforce/salesforce.go | 3 +- runtime/drivers/slack/slack.go | 3 +- runtime/drivers/snowflake/snowflake.go | 3 +- runtime/drivers/sqlite/sqlite.go | 3 +- runtime/registry.go | 11 +++++ runtime/runtime.go | 5 +- runtime/server/queries_test.go | 3 +- runtime/testruntime/testruntime.go | 5 +- 45 files changed, 171 insertions(+), 108 deletions(-) diff --git a/cli/cmd/runtime/install_duckdb_extensions.go b/cli/cmd/runtime/install_duckdb_extensions.go index e7a45dc3804..9834f032dfc 100644 --- a/cli/cmd/runtime/install_duckdb_extensions.go +++ b/cli/cmd/runtime/install_duckdb_extensions.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/spf13/cobra" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) // InstallDuckDBExtensionsCmd adds a CLI command that forces DuckDB to install all required extensions. @@ -17,7 +18,7 @@ func InstallDuckDBExtensionsCmd(ch *cmdutil.Helper) *cobra.Command { Use: "install-duckdb-extensions", RunE: func(cmd *cobra.Command, args []string) error { cfg := map[string]any{"dsn": ":memory:"} // In-memory - h, err := drivers.Open("duckdb", "default", cfg, activity.NewNoopClient(), zap.NewNop()) + h, err := drivers.Open("duckdb", "default", cfg, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) if err != nil { return fmt.Errorf("failed to open ephemeral duckdb: %w", err) } diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index ea79ea64020..d98f7299aa0 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -23,6 +23,9 @@ import ( "github.com/spf13/cobra" "go.uber.org/zap" "go.uber.org/zap/zapcore" + "gocloud.dev/blob/gcsblob" + "gocloud.dev/gcp" + "golang.org/x/oauth2/google" "golang.org/x/sync/errgroup" // Load connectors and reconcilers for runtime @@ -85,6 +88,9 @@ type Config struct { // DataDir stores data for all instances like duckdb file, temporary downloaded file etc. // The data for each instance is stored in a child directory named instance_id DataDir string `split_words:"true"` + // DataBucket is the name of the GCS bucket where DuckDB backups are stored + DataBucket string `split_words:"true"` + DataBucketCredentialsJSON string `split_words:"true"` // Sink type of activity client: noop (or empty string), kafka ActivitySinkType string `default:"" split_words:"true"` // Kafka brokers of an activity client's sink @@ -198,6 +204,17 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { // Create ctx that cancels on termination signals ctx := graceful.WithCancelOnTerminate(context.Background()) + // Init dataBucket + client, err := newClient(ctx, conf.DataBucketCredentialsJSON) + if err != nil { + logger.Fatal("error: could not create GCP client", zap.Error(err)) + } + + bucket, err := gcsblob.OpenBucket(ctx, client, conf.DataBucket, nil) + if err != nil { + logger.Fatal("failed to open bucket %q, %w", zap.String("bucket", conf.DataBucket), zap.Error(err)) + } + // Init runtime opts := &runtime.Options{ ConnectionCacheSize: conf.ConnectionCacheSize, @@ -216,7 +233,7 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { }, }, } - rt, err := runtime.New(ctx, opts, logger, activityClient, emailClient) + rt, err := runtime.New(ctx, opts, logger, activityClient, emailClient, bucket) if err != nil { logger.Fatal("error: could not create runtime", zap.Error(err)) } @@ -267,3 +284,12 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { } return startCmd } + +func newClient(ctx context.Context, jsonData string) (*gcp.HTTPClient, error) { + creds, err := google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") + if err != nil { + return nil, fmt.Errorf("failed to create credentials: %w", err) + } + // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. + return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) +} diff --git a/cli/pkg/cmdutil/project.go b/cli/pkg/cmdutil/project.go index b1a3293edc4..e80829d6b60 100644 --- a/cli/pkg/cmdutil/project.go +++ b/cli/pkg/cmdutil/project.go @@ -6,6 +6,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob/memblob" // Ensure file driver is loaded _ "github.com/rilldata/rill/runtime/drivers/file" @@ -14,7 +15,7 @@ import ( // RepoForProjectPath creates an ad-hoc drivers.RepoStore for a local project file path func RepoForProjectPath(path string) (drivers.RepoStore, string, error) { instanceID := "default" - repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, activity.NewNoopClient(), zap.NewNop()) + repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) if err != nil { return nil, "", err } diff --git a/cli/pkg/local/app.go b/cli/pkg/local/app.go index 1099fe22c4e..9e26813ae4e 100644 --- a/cli/pkg/local/app.go +++ b/cli/pkg/local/app.go @@ -31,6 +31,7 @@ import ( "go.uber.org/zap" "go.uber.org/zap/buffer" "go.uber.org/zap/zapcore" + "gocloud.dev/blob/fileblob" "golang.org/x/sync/errgroup" "gopkg.in/natefinch/lumberjack.v2" ) @@ -156,7 +157,10 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { // if err != nil { // return nil, fmt.Errorf("failed to create email sender: %w", err) // } - + bkt, err := fileblob.OpenBucket(filepath.Join(dbDirPath, "remote"), &fileblob.Options{CreateDir: true}) + if err != nil { + return nil, err + } rtOpts := &runtime.Options{ ConnectionCacheSize: 100, MetastoreConnector: "metastore", @@ -168,7 +172,7 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := runtime.New(ctx, rtOpts, logger, opts.Ch.Telemetry(ctx), email.New(sender)) + rt, err := runtime.New(ctx, rtOpts, logger, opts.Ch.Telemetry(ctx), email.New(sender), bkt) if err != nil { return nil, err } diff --git a/runtime/compilers/rillv1/parser_test.go b/runtime/compilers/rillv1/parser_test.go index 7c616c07606..16d7bad7aa4 100644 --- a/runtime/compilers/rillv1/parser_test.go +++ b/runtime/compilers/rillv1/parser_test.go @@ -2,7 +2,6 @@ package rillv1 import ( "context" - "encoding/json" "fmt" "maps" "reflect" @@ -15,6 +14,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" "google.golang.org/protobuf/types/known/structpb" _ "github.com/rilldata/rill/runtime/drivers/file" @@ -156,9 +156,6 @@ dimensions: measures: - name: b expression: count(*) - format_d3: "0,0" - format_d3_locale: - currency: ["£", ""] first_day_of_week: 7 first_month_of_year: 3 `, @@ -280,13 +277,7 @@ schema: default {Name: "a", Column: "a"}, }, Measures: []*runtimev1.MetricsViewSpec_MeasureV2{ - { - Name: "b", - Expression: "count(*)", - Type: runtimev1.MetricsViewSpec_MEASURE_TYPE_SIMPLE, - FormatD3: "0,0", - FormatD3Locale: must(structpb.NewStruct(map[string]any{"currency": []any{"£", ""}})), - }, + {Name: "b", Expression: "count(*)", Type: runtimev1.MetricsViewSpec_MEASURE_TYPE_SIMPLE}, }, FirstDayOfWeek: 7, FirstMonthOfYear: 3, @@ -314,10 +305,7 @@ schema: default }, }, DefaultPreset: &runtimev1.ExplorePreset{ - DimensionsSelector: &runtimev1.FieldSelector{Selector: &runtimev1.FieldSelector_All{All: true}}, - MeasuresSelector: &runtimev1.FieldSelector{Selector: &runtimev1.FieldSelector_All{All: true}}, - TimeRange: "P4W", - ComparisonMode: runtimev1.ExploreComparisonMode_EXPLORE_COMPARISON_MODE_NONE, + TimeRange: "P4W", }, }, }, @@ -1599,7 +1587,7 @@ theme: } func TestComponentsAndCanvas(t *testing.T) { - vegaLiteSpec := normalizeJSON(t, ` + vegaLiteSpec := ` { "$schema": "https://vega.github.io/schema/vega-lite/v5.json", "description": "A simple bar chart with embedded data.", @@ -1611,7 +1599,7 @@ func TestComponentsAndCanvas(t *testing.T) { "x": {"field": "time", "type": "nominal", "axis": {"labelAngle": 0}}, "y": {"field": "total_sales", "type": "quantitative"} } - }`) + }` ctx := context.Background() repo := makeRepo(t, map[string]string{ `rill.yaml`: ``, @@ -1621,8 +1609,7 @@ data: api: MetricsViewAggregation args: metrics_view: foo -vega_lite: > - %s +vega_lite: |%s `, vegaLiteSpec), `components/c2.yaml`: fmt.Sprintf(` type: component @@ -1630,8 +1617,7 @@ data: api: MetricsViewAggregation args: metrics_view: bar -vega_lite: > - %s +vega_lite: |%s `, vegaLiteSpec), `components/c3.yaml`: ` type: component @@ -1712,7 +1698,7 @@ items: Items: []*runtimev1.CanvasItem{ {Component: "c1"}, {Component: "c2", Width: asPtr(uint32(1)), Height: asPtr(uint32(2))}, - {Component: "d1--component-2", DefinedInCanvas: true}, + {Component: "d1--component-2"}, }, }, }, @@ -2035,15 +2021,10 @@ func requireResourcesAndErrors(t testing.TB, p *Parser, wantResources []*Resourc require.Equal(t, want.SourceSpec, got.SourceSpec, "for resource %q", want.Name) require.Equal(t, want.ModelSpec, got.ModelSpec, "for resource %q", want.Name) require.Equal(t, want.MetricsViewSpec, got.MetricsViewSpec, "for resource %q", want.Name) - require.Equal(t, want.ExploreSpec, got.ExploreSpec, "for resource %q", want.Name) require.Equal(t, want.MigrationSpec, got.MigrationSpec, "for resource %q", want.Name) + require.Equal(t, want.ThemeSpec, got.ThemeSpec, "for resource %q", want.Name) require.True(t, reflect.DeepEqual(want.ReportSpec, got.ReportSpec), "for resource %q", want.Name) require.True(t, reflect.DeepEqual(want.AlertSpec, got.AlertSpec), "for resource %q", want.Name) - require.Equal(t, want.ThemeSpec, got.ThemeSpec, "for resource %q", want.Name) - require.Equal(t, want.ComponentSpec, got.ComponentSpec, "for resource %q", want.Name) - require.Equal(t, want.CanvasSpec, got.CanvasSpec, "for resource %q", want.Name) - require.Equal(t, want.APISpec, got.APISpec, "for resource %q", want.Name) - require.Equal(t, want.ConnectorSpec, got.ConnectorSpec, "for resource %q", want.Name) delete(gotResources, got.Name) found = true @@ -2057,7 +2038,7 @@ func requireResourcesAndErrors(t testing.TB, p *Parser, wantResources []*Resourc func makeRepo(t testing.TB, files map[string]string) drivers.RepoStore { root := t.TempDir() - handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, activity.NewNoopClient(), zap.NewNop()) + handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) repo, ok := handle.AsRepoStore("") @@ -2085,11 +2066,3 @@ func deleteRepo(t testing.TB, repo drivers.RepoStore, files ...string) { func asPtr[T any](val T) *T { return &val } - -func normalizeJSON(t *testing.T, s string) string { - var v interface{} - require.NoError(t, json.Unmarshal([]byte(s), &v)) - b, err := json.Marshal(v) - require.NoError(t, err) - return string(b) -} diff --git a/runtime/connection_cache.go b/runtime/connection_cache.go index 72e271f404c..3bec04da536 100644 --- a/runtime/connection_cache.go +++ b/runtime/connection_cache.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/observability" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" + "gocloud.dev/blob" "golang.org/x/exp/maps" ) @@ -93,6 +94,7 @@ func (r *Runtime) evictInstanceConnections(instanceID string) { func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig) (drivers.Handle, error) { logger := r.Logger activityClient := r.activity + var dataBucket *blob.Bucket if cfg.instanceID != "" { // Not shared across multiple instances inst, err := r.Instance(ctx, cfg.instanceID) if err != nil { @@ -108,9 +110,13 @@ func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig if activityClient != nil { activityClient = activityClient.With(activityDims...) } + + dataBucket = r.DataBucket(cfg.instanceID) + } else { + dataBucket = r.DataBucket("__global__") } - handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, logger) + handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, dataBucket, logger) if err == nil && ctx.Err() != nil { err = fmt.Errorf("timed out while opening driver %q", cfg.driver) } diff --git a/runtime/drivers/admin/admin.go b/runtime/drivers/admin/admin.go index 93cd1be6ef0..9ac4dc46e66 100644 --- a/runtime/drivers/admin/admin.go +++ b/runtime/drivers/admin/admin.go @@ -23,6 +23,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/ctxsync" "go.opentelemetry.io/otel" "go.uber.org/zap" + "gocloud.dev/blob" "golang.org/x/sync/singleflight" "gopkg.in/yaml.v3" ) @@ -63,7 +64,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, ac *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("admin driver can't be shared") } diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go index bb687306076..50bbd73636d 100644 --- a/runtime/drivers/athena/athena.go +++ b/runtime/drivers/athena/athena.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -85,7 +86,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("athena driver can't be shared") } diff --git a/runtime/drivers/azure/azure.go b/runtime/drivers/azure/azure.go index 30bf08daaba..0282b0cbc63 100644 --- a/runtime/drivers/azure/azure.go +++ b/runtime/drivers/azure/azure.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -82,7 +83,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("azure driver can't be shared") } diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index ea555731faa..86644749f10 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -11,6 +11,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/gcputil" "go.uber.org/zap" + "gocloud.dev/blob" "google.golang.org/api/option" ) @@ -77,7 +78,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("bigquery driver can't be shared") } diff --git a/runtime/drivers/clickhouse/clickhouse.go b/runtime/drivers/clickhouse/clickhouse.go index 7e81e1a2f1d..cd0db9cd045 100644 --- a/runtime/drivers/clickhouse/clickhouse.go +++ b/runtime/drivers/clickhouse/clickhouse.go @@ -16,6 +16,7 @@ import ( "go.opentelemetry.io/otel/attribute" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" "go.uber.org/zap" + "gocloud.dev/blob" "golang.org/x/sync/semaphore" ) @@ -121,7 +122,7 @@ type configProperties struct { // Open connects to Clickhouse using std API. // Connection string format : https://github.com/ClickHouse/clickhouse-go?tab=readme-ov-file#dsn -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("clickhouse driver can't be shared") } diff --git a/runtime/drivers/clickhouse/information_schema_test.go b/runtime/drivers/clickhouse/information_schema_test.go index 074a9047fc0..62b40198917 100644 --- a/runtime/drivers/clickhouse/information_schema_test.go +++ b/runtime/drivers/clickhouse/information_schema_test.go @@ -12,6 +12,7 @@ import ( "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/modules/clickhouse" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestInformationSchema(t *testing.T) { @@ -37,7 +38,7 @@ func TestInformationSchema(t *testing.T) { port, err := clickHouseContainer.MappedPort(ctx, "9000/tcp") require.NoError(t, err) - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) prepareConn(t, conn) t.Run("testInformationSchemaAll", func(t *testing.T) { testInformationSchemaAll(t, conn) }) diff --git a/runtime/drivers/clickhouse/olap_test.go b/runtime/drivers/clickhouse/olap_test.go index 4b0ac79080b..d78b29a3bbb 100644 --- a/runtime/drivers/clickhouse/olap_test.go +++ b/runtime/drivers/clickhouse/olap_test.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/testruntime" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestClickhouseCrudOps(t *testing.T) { @@ -24,7 +25,7 @@ func TestClickhouseCrudOps(t *testing.T) { } func testClickhouseSingleHost(t *testing.T, dsn string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) defer conn.Close() prepareConn(t, conn) @@ -41,7 +42,7 @@ func testClickhouseSingleHost(t *testing.T, dsn string) { } func testClickhouseCluster(t *testing.T, dsn, cluster string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) defer conn.Close() diff --git a/runtime/drivers/drivers.go b/runtime/drivers/drivers.go index 14f0c988bb8..6d3afb3018b 100644 --- a/runtime/drivers/drivers.go +++ b/runtime/drivers/drivers.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) // ErrNotFound indicates the resource wasn't found. @@ -35,13 +36,13 @@ func Register(name string, driver Driver) { // Open opens a new connection. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. // If instanceID is not empty, the connection is considered instance-specific and its As...() functions will only be invoked with the same instance ID. -func Open(driver, instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (Handle, error) { +func Open(driver, instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (Handle, error) { d, ok := Drivers[driver] if !ok { return nil, fmt.Errorf("unknown driver: %s", driver) } - conn, err := d.Open(instanceID, config, client, logger) + conn, err := d.Open(instanceID, config, client, data, logger) if err != nil { return nil, err } @@ -56,7 +57,7 @@ type Driver interface { // Open opens a new handle. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. - Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (Handle, error) + Open(instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (Handle, error) // HasAnonymousSourceAccess returns true if the driver can access the data identified by srcProps without any additional configuration. HasAnonymousSourceAccess(ctx context.Context, srcProps map[string]any, logger *zap.Logger) (bool, error) diff --git a/runtime/drivers/drivers_test.go b/runtime/drivers/drivers_test.go index f6bb9fd3d3e..8091cc0aa18 100644 --- a/runtime/drivers/drivers_test.go +++ b/runtime/drivers/drivers_test.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" _ "github.com/rilldata/rill/runtime/drivers/duckdb" _ "github.com/rilldata/rill/runtime/drivers/file" @@ -31,7 +32,7 @@ func TestAll(t *testing.T) { for _, withDriver := range matrix { err := withDriver(t, func(driver, instanceID string, cfg map[string]any) { // Open - conn, err := drivers.Open(driver, instanceID, cfg, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open(driver, instanceID, cfg, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) require.NotNil(t, conn) diff --git a/runtime/drivers/druid/druid.go b/runtime/drivers/druid/druid.go index 2148ea93951..0df7dbf9125 100644 --- a/runtime/drivers/druid/druid.go +++ b/runtime/drivers/druid/druid.go @@ -14,6 +14,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" + "gocloud.dev/blob" // Load Druid database/sql driver _ "github.com/rilldata/rill/runtime/drivers/druid/druidsqldriver" @@ -101,7 +102,7 @@ type configProperties struct { // Opens a connection to Apache Druid using HTTP API. // Note that the Druid connection string must have the form "http://user:password@host:port/druid/v2/sql". -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("druid driver can't be shared") } diff --git a/runtime/drivers/druid/druid_test.go b/runtime/drivers/druid/druid_test.go index 6e981bf40af..f32c8add77c 100644 --- a/runtime/drivers/druid/druid_test.go +++ b/runtime/drivers/druid/druid_test.go @@ -15,6 +15,7 @@ import ( "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) const testTable = "test_data" @@ -108,7 +109,7 @@ func TestDruid(t *testing.T) { require.NoError(t, err) dd := &driver{} - conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, activity.NewNoopClient(), zap.NewNop()) + conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/druid/sql_driver_test.go b/runtime/drivers/druid/sql_driver_test.go index 07036e30dac..71f85066360 100644 --- a/runtime/drivers/druid/sql_driver_test.go +++ b/runtime/drivers/druid/sql_driver_test.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/stretchr/testify/require" + "gocloud.dev/blob/memblob" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/pbutil" @@ -19,7 +20,7 @@ import ( */ func Ignore_TestDriver_types(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -56,7 +57,7 @@ func Ignore_TestDriver_types(t *testing.T) { func Ignore_TestDriver_array_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -82,7 +83,7 @@ func Ignore_TestDriver_array_type(t *testing.T) { func Ignore_TestDriver_json_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -107,7 +108,7 @@ func Ignore_TestDriver_json_type(t *testing.T) { func Ignore_TestDriver_multiple_rows(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -143,7 +144,7 @@ func Ignore_TestDriver_multiple_rows(t *testing.T) { func Ignore_TestDriver_error(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 7cb2822e2c2..1bc7cd7eb10 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -11,6 +11,7 @@ import ( activity "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestConfig(t *testing.T) { @@ -93,7 +94,7 @@ func Test_specialCharInPath(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) config := conn.(*connection).config require.Equal(t, filepath.Join(path, "st@g3's.db?custom_user_agent=rill&max_memory=4GB&threads=1"), config.DSN) @@ -110,7 +111,7 @@ func Test_specialCharInPath(t *testing.T) { func TestOverrides(t *testing.T) { cfgMap := map[string]any{"path": "duck.db", "memory_limit_gb": "4", "cpu": "2", "max_memory_gb_override": "2", "threads_override": "10", "external_table_storage": false} - handle, err := Driver{}.Open("default", cfgMap, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", cfgMap, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 142bb64a3da..bcdb675cc6c 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -30,6 +30,7 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" + "gocloud.dev/blob" "golang.org/x/sync/semaphore" ) @@ -135,7 +136,7 @@ type Driver struct { name string } -func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("duckdb driver can't be shared") } diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index 64f251ae92d..8149351aa71 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestOpenDrop(t *testing.T) { @@ -19,7 +20,7 @@ func TestOpenDrop(t *testing.T) { walpath := path + ".wal" dsn := path - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -43,7 +44,7 @@ func TestNoFatalErr(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -105,7 +106,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index 352d46e3673..0effa6b6ca2 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -14,20 +14,21 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func Test_connection_CreateTableAsSelect(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") require.NoError(t, normalConn.Migrate(context.Background())) dbPath = filepath.Join(temp, "default", "view.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) viewConnection := handle.(*connection) require.NoError(t, viewConnection.Migrate(context.Background())) @@ -100,7 +101,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -145,7 +146,7 @@ func Test_connection_DropTable(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -174,7 +175,7 @@ func Test_connection_InsertTableAsSelect(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -203,7 +204,7 @@ func Test_connection_RenameTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -229,7 +230,7 @@ func Test_connection_RenameToExistingTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "default", "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -258,7 +259,7 @@ func Test_connection_AddTableColumn(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -287,7 +288,7 @@ func Test_connection_AddTableColumn(t *testing.T) { } func Test_connection_RenameToExistingTableOld(t *testing.T) { - handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -316,7 +317,7 @@ func Test_connection_CastEnum(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -361,7 +362,7 @@ func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") @@ -398,7 +399,7 @@ func Test_connection_ChangingOrder(t *testing.T) { // on cloud dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -421,7 +422,7 @@ func Test_connection_ChangingOrder(t *testing.T) { // on local dbPath = filepath.Join(temp, "local.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, activity.NewNoopClient(), zap.NewNop()) + handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) c = handle.(*connection) require.NoError(t, c.Migrate(context.Background())) diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index 1678a762c71..ee2ba123425 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" "golang.org/x/sync/errgroup" ) @@ -212,7 +213,7 @@ func TestClose(t *testing.T) { } func prepareConn(t *testing.T) drivers.Handle { - conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") @@ -248,11 +249,11 @@ func Test_safeSQLString(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) require.NoError(t, conn.Close()) - conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 3962c46cfef..68997ad4436 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -10,11 +10,12 @@ import ( activity "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestDuckDBToDuckDBTransfer(t *testing.T) { tempDir := t.TempDir() - conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") @@ -31,7 +32,7 @@ func TestDuckDBToDuckDBTransfer(t *testing.T) { require.NoError(t, err) require.NoError(t, conn.Close()) - to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), zap.NewNop()) + to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ = to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index 7971d379508..b88b05e23e3 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" "fmt" "time" @@ -102,12 +103,12 @@ func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { _, err := db.ExecContext(ctx, mysqlInitStmt) require.NoError(t, err) - handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), zap.NewNop()) + handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) require.NotNil(t, handle) sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index a2d148ae4d1..279abf58eff 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" // Load postgres driver _ "github.com/jackc/pgx/v5/stdlib" @@ -67,12 +68,12 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { _, err := db.ExecContext(ctx, sqlStmt) require.NoError(t, err) - handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), zap.NewNop()) + handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) require.NotNil(t, handle) sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go index 4aac3f3810f..b532213339e 100644 --- a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go @@ -11,6 +11,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" _ "modernc.org/sqlite" ) @@ -29,7 +30,7 @@ func Test_sqliteToDuckDB_Transfer(t *testing.T) { require.NoError(t, err) db.Close() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_test.go b/runtime/drivers/duckdb/transporter_test.go index 82103713686..cfe61619bf7 100644 --- a/runtime/drivers/duckdb/transporter_test.go +++ b/runtime/drivers/duckdb/transporter_test.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) type mockObjectStore struct { @@ -592,7 +593,7 @@ func TestIterativeJSONIngestionWithVariableSchema(t *testing.T) { } func runOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, canServe := conn.AsOLAP("") require.True(t, canServe) diff --git a/runtime/drivers/file/file.go b/runtime/drivers/file/file.go index 8611ef7f59e..929d92fd5f1 100644 --- a/runtime/drivers/file/file.go +++ b/runtime/drivers/file/file.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" "go.uber.org/zap" + "gocloud.dev/blob" "gopkg.in/yaml.v3" ) @@ -60,7 +61,7 @@ type rillYAML struct { IgnorePaths []string `yaml:"ignore_paths"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("file driver can't be shared") } diff --git a/runtime/drivers/gcs/gcs.go b/runtime/drivers/gcs/gcs.go index 21778ab21fd..4fd2e05597b 100644 --- a/runtime/drivers/gcs/gcs.go +++ b/runtime/drivers/gcs/gcs.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/gcputil" "github.com/rilldata/rill/runtime/pkg/globutil" "go.uber.org/zap" + "gocloud.dev/blob" "gocloud.dev/blob/gcsblob" "gocloud.dev/gcp" ) @@ -75,7 +76,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("gcs driver can't be shared") } diff --git a/runtime/drivers/https/https.go b/runtime/drivers/https/https.go index 9a559c38f3f..febdf4b117e 100644 --- a/runtime/drivers/https/https.go +++ b/runtime/drivers/https/https.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -46,7 +47,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("https driver can't be shared") } diff --git a/runtime/drivers/mock/object_store/object_store.go b/runtime/drivers/mock/object_store/object_store.go index c53fb50b7fb..107add7df59 100644 --- a/runtime/drivers/mock/object_store/object_store.go +++ b/runtime/drivers/mock/object_store/object_store.go @@ -38,7 +38,7 @@ func (driver) Spec() drivers.Spec { } // Open implements drivers.Driver. -func (driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { cfg := &configProperties{} err := mapstructure.WeakDecode(config, cfg) if err != nil { diff --git a/runtime/drivers/mysql/mysql.go b/runtime/drivers/mysql/mysql.go index deb89a5ba32..f33b7e9026e 100644 --- a/runtime/drivers/mysql/mysql.go +++ b/runtime/drivers/mysql/mysql.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -57,7 +58,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("mysql driver can't be shared") } diff --git a/runtime/drivers/pinot/pinot.go b/runtime/drivers/pinot/pinot.go index 038e2f31512..02a4f19ce10 100644 --- a/runtime/drivers/pinot/pinot.go +++ b/runtime/drivers/pinot/pinot.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -94,7 +95,7 @@ type configProperties struct { } // Open a connection to Apache Pinot using HTTP API. -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("pinot driver can't be shared") } diff --git a/runtime/drivers/postgres/postgres.go b/runtime/drivers/postgres/postgres.go index 3ba9e0fbca8..641d7d15fbe 100644 --- a/runtime/drivers/postgres/postgres.go +++ b/runtime/drivers/postgres/postgres.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -55,7 +56,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("postgres driver can't be shared") } diff --git a/runtime/drivers/redshift/redshift.go b/runtime/drivers/redshift/redshift.go index 42fd22e822f..e2e130c3976 100644 --- a/runtime/drivers/redshift/redshift.go +++ b/runtime/drivers/redshift/redshift.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -109,7 +110,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("redshift driver can't be shared") } diff --git a/runtime/drivers/s3/s3.go b/runtime/drivers/s3/s3.go index b401d3b801f..141e842845f 100644 --- a/runtime/drivers/s3/s3.go +++ b/runtime/drivers/s3/s3.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) var spec = drivers.Spec{ @@ -97,7 +98,7 @@ type ConfigProperties struct { } // Open implements drivers.Driver -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("s3 driver can't be shared") } diff --git a/runtime/drivers/salesforce/salesforce.go b/runtime/drivers/salesforce/salesforce.go index 1bc0f316a50..f85be674622 100644 --- a/runtime/drivers/salesforce/salesforce.go +++ b/runtime/drivers/salesforce/salesforce.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) func init() { @@ -128,7 +129,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("salesforce driver can't be shared") } diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 848d303999f..3356b7bacf8 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" ) var spec = drivers.Spec{ @@ -35,7 +36,7 @@ func (d driver) Spec() drivers.Spec { return spec } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("slack driver can't be shared") } diff --git a/runtime/drivers/snowflake/snowflake.go b/runtime/drivers/snowflake/snowflake.go index 0a8b00a0a63..8a3f65759d0 100644 --- a/runtime/drivers/snowflake/snowflake.go +++ b/runtime/drivers/snowflake/snowflake.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" // Load database/sql driver _ "github.com/snowflakedb/gosnowflake" @@ -66,7 +67,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("snowflake driver can't be shared") } diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index 1beb40c484b..79c9de4a1d9 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" + "gocloud.dev/blob" // Load sqlite driver _ "modernc.org/sqlite" @@ -22,7 +23,7 @@ func init() { type driver struct{} -func (d driver) Open(_ string, config map[string]any, client *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(_ string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { dsn, ok := config["dsn"].(string) if !ok { return nil, fmt.Errorf("require dsn to open sqlite connection") diff --git a/runtime/registry.go b/runtime/registry.go index 8feb5422796..4dd19e593f1 100644 --- a/runtime/registry.go +++ b/runtime/registry.go @@ -21,6 +21,7 @@ import ( "go.opentelemetry.io/otel/trace" "go.uber.org/zap" "go.uber.org/zap/zapcore" + "gocloud.dev/blob" ) // GlobalProjectParserName is the name of the instance-global project parser resource that is created for each new instance. @@ -127,6 +128,16 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { return nil } +// DataBucket returns a prefixed bucket for the given instance. +// This bucket is used for storing data that is expected to be persisted across resets. +func (r *Runtime) DataBucket(instanceID string, elem ...string) *blob.Bucket { + b := blob.PrefixedBucket(r.dataBucket, instanceID) + for _, e := range elem { + b = blob.PrefixedBucket(b, e) + } + return b +} + // DataDir returns the path to a persistent data directory for the given instance. // Storage usage in the returned directory will be reported in the instance's heartbeat events. func (r *Runtime) DataDir(instanceID string, elem ...string) string { diff --git a/runtime/runtime.go b/runtime/runtime.go index 80b1356af26..23ef294412e 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -15,6 +15,7 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" + "gocloud.dev/blob" ) var tracer = otel.Tracer("github.com/rilldata/rill/runtime") @@ -41,9 +42,10 @@ type Runtime struct { connCache conncache.Cache queryCache *queryCache securityEngine *securityEngine + dataBucket *blob.Bucket } -func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { +func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Client, emailClient *email.Client, dataBucket *blob.Bucket) (*Runtime, error) { if emailClient == nil { emailClient = email.New(email.NewNoopSender()) } @@ -55,6 +57,7 @@ func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Cl activity: ac, queryCache: newQueryCache(opts.QueryCacheSizeBytes), securityEngine: newSecurityEngine(opts.SecurityEngineCacheSize, logger), + dataBucket: dataBucket, } rt.connCache = rt.newConnectionCache() diff --git a/runtime/server/queries_test.go b/runtime/server/queries_test.go index a3a0591b292..f403bf6b3d5 100644 --- a/runtime/server/queries_test.go +++ b/runtime/server/queries_test.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestServer_InsertLimit_SELECT(t *testing.T) { @@ -132,7 +133,7 @@ func TestServer_UpdateLimit_UNION(t *testing.T) { } func prepareOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, activity.NewNoopClient(), zap.NewNop()) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") require.True(t, ok) diff --git a/runtime/testruntime/testruntime.go b/runtime/testruntime/testruntime.go index 0ecfe111361..07897c5b0d2 100644 --- a/runtime/testruntime/testruntime.go +++ b/runtime/testruntime/testruntime.go @@ -19,6 +19,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/email" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/fileblob" // Load database drivers for testing. _ "github.com/rilldata/rill/runtime/drivers/admin" @@ -73,7 +74,9 @@ func New(t TestingT) *runtime.Runtime { require.NoError(t, err) } - rt, err := runtime.New(context.Background(), opts, logger, activity.NewNoopClient(), email.New(email.NewTestSender())) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + rt, err := runtime.New(context.Background(), opts, logger, activity.NewNoopClient(), email.New(email.NewTestSender()), bkt) require.NoError(t, err) t.Cleanup(func() { rt.Close() }) From 532525240eeeddd133117a8e47006dc9d929ff9f Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:20:19 +0530 Subject: [PATCH 27/64] test fix --- runtime/registry_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/registry_test.go b/runtime/registry_test.go index 01189f06229..cc94b22a9ba 100644 --- a/runtime/registry_test.go +++ b/runtime/registry_test.go @@ -16,6 +16,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/email" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/memblob" ) func TestRuntime_EditInstance(t *testing.T) { @@ -527,7 +528,7 @@ func newTestRuntime(t *testing.T) *Runtime { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := New(context.Background(), opts, zap.NewNop(), activity.NewNoopClient(), email.New(email.NewNoopSender())) + rt, err := New(context.Background(), opts, zap.NewNop(), activity.NewNoopClient(), email.New(email.NewNoopSender()), memblob.OpenBucket(nil)) t.Cleanup(func() { rt.Close() }) From 953247d626301b79284ae62bf179f98fa76a40c9 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:41:38 +0530 Subject: [PATCH 28/64] also prefix with driver --- cli/cmd/runtime/start.go | 6 +-- runtime/connection_cache.go | 4 +- runtime/drivers/duckdb/olap_crud_test.go | 49 ++++++++++++++++++------ 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index d98f7299aa0..f32e20bd3c5 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -88,7 +88,7 @@ type Config struct { // DataDir stores data for all instances like duckdb file, temporary downloaded file etc. // The data for each instance is stored in a child directory named instance_id DataDir string `split_words:"true"` - // DataBucket is the name of the GCS bucket where DuckDB backups are stored + // DataBucket is a common GCS bucket to store data for all instances. The data is expected to be persisted across resets. DataBucket string `split_words:"true"` DataBucketCredentialsJSON string `split_words:"true"` // Sink type of activity client: noop (or empty string), kafka @@ -207,12 +207,12 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { // Init dataBucket client, err := newClient(ctx, conf.DataBucketCredentialsJSON) if err != nil { - logger.Fatal("error: could not create GCP client", zap.Error(err)) + logger.Fatal("could not create GCP client", zap.Error(err)) } bucket, err := gcsblob.OpenBucket(ctx, client, conf.DataBucket, nil) if err != nil { - logger.Fatal("failed to open bucket %q, %w", zap.String("bucket", conf.DataBucket), zap.Error(err)) + logger.Fatal("failed to open bucket %q: %w", zap.String("bucket", conf.DataBucket), zap.Error(err)) } // Init runtime diff --git a/runtime/connection_cache.go b/runtime/connection_cache.go index 3bec04da536..bb1878340a3 100644 --- a/runtime/connection_cache.go +++ b/runtime/connection_cache.go @@ -111,9 +111,9 @@ func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig activityClient = activityClient.With(activityDims...) } - dataBucket = r.DataBucket(cfg.instanceID) + dataBucket = r.DataBucket(cfg.instanceID, cfg.driver) } else { - dataBucket = r.DataBucket("__global__") + dataBucket = r.DataBucket("__shared__", cfg.driver) } handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, dataBucket, logger) diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index 0effa6b6ca2..d8bf19f6090 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -14,6 +14,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" + "gocloud.dev/blob/fileblob" "gocloud.dev/blob/memblob" ) @@ -21,14 +22,18 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") require.NoError(t, normalConn.Migrate(context.Background())) dbPath = filepath.Join(temp, "default", "view.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err = fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) viewConnection := handle.(*connection) require.NoError(t, viewConnection.Migrate(context.Background())) @@ -101,7 +106,9 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -146,7 +153,9 @@ func Test_connection_DropTable(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -175,7 +184,9 @@ func Test_connection_InsertTableAsSelect(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -204,7 +215,9 @@ func Test_connection_RenameTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -230,7 +243,9 @@ func Test_connection_RenameToExistingTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "default", "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -259,7 +274,9 @@ func Test_connection_AddTableColumn(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -288,7 +305,9 @@ func Test_connection_AddTableColumn(t *testing.T) { } func Test_connection_RenameToExistingTableOld(t *testing.T) { - handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -317,7 +336,9 @@ func Test_connection_CastEnum(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -362,7 +383,9 @@ func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") @@ -399,7 +422,9 @@ func Test_connection_ChangingOrder(t *testing.T) { // on cloud dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + bkt, err := fileblob.OpenBucket(t.TempDir(), nil) + require.NoError(t, err) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), bkt, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) From b10cfabcf7236bead6344db16b786167fed2d1e4 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:48:34 +0530 Subject: [PATCH 29/64] merge with main --- runtime/compilers/rillv1/parser_test.go | 44 ++++++++++++++++++++----- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/runtime/compilers/rillv1/parser_test.go b/runtime/compilers/rillv1/parser_test.go index 16d7bad7aa4..540b3233d44 100644 --- a/runtime/compilers/rillv1/parser_test.go +++ b/runtime/compilers/rillv1/parser_test.go @@ -2,6 +2,7 @@ package rillv1 import ( "context" + "encoding/json" "fmt" "maps" "reflect" @@ -156,6 +157,9 @@ dimensions: measures: - name: b expression: count(*) + format_d3: "0,0" + format_d3_locale: + currency: ["£", ""] first_day_of_week: 7 first_month_of_year: 3 `, @@ -277,7 +281,13 @@ schema: default {Name: "a", Column: "a"}, }, Measures: []*runtimev1.MetricsViewSpec_MeasureV2{ - {Name: "b", Expression: "count(*)", Type: runtimev1.MetricsViewSpec_MEASURE_TYPE_SIMPLE}, + { + Name: "b", + Expression: "count(*)", + Type: runtimev1.MetricsViewSpec_MEASURE_TYPE_SIMPLE, + FormatD3: "0,0", + FormatD3Locale: must(structpb.NewStruct(map[string]any{"currency": []any{"£", ""}})), + }, }, FirstDayOfWeek: 7, FirstMonthOfYear: 3, @@ -305,7 +315,10 @@ schema: default }, }, DefaultPreset: &runtimev1.ExplorePreset{ - TimeRange: "P4W", + DimensionsSelector: &runtimev1.FieldSelector{Selector: &runtimev1.FieldSelector_All{All: true}}, + MeasuresSelector: &runtimev1.FieldSelector{Selector: &runtimev1.FieldSelector_All{All: true}}, + TimeRange: "P4W", + ComparisonMode: runtimev1.ExploreComparisonMode_EXPLORE_COMPARISON_MODE_NONE, }, }, }, @@ -1587,7 +1600,7 @@ theme: } func TestComponentsAndCanvas(t *testing.T) { - vegaLiteSpec := ` + vegaLiteSpec := normalizeJSON(t, ` { "$schema": "https://vega.github.io/schema/vega-lite/v5.json", "description": "A simple bar chart with embedded data.", @@ -1599,7 +1612,7 @@ func TestComponentsAndCanvas(t *testing.T) { "x": {"field": "time", "type": "nominal", "axis": {"labelAngle": 0}}, "y": {"field": "total_sales", "type": "quantitative"} } - }` + }`) ctx := context.Background() repo := makeRepo(t, map[string]string{ `rill.yaml`: ``, @@ -1609,7 +1622,8 @@ data: api: MetricsViewAggregation args: metrics_view: foo -vega_lite: |%s +vega_lite: > + %s `, vegaLiteSpec), `components/c2.yaml`: fmt.Sprintf(` type: component @@ -1617,7 +1631,8 @@ data: api: MetricsViewAggregation args: metrics_view: bar -vega_lite: |%s +vega_lite: > + %s `, vegaLiteSpec), `components/c3.yaml`: ` type: component @@ -1698,7 +1713,7 @@ items: Items: []*runtimev1.CanvasItem{ {Component: "c1"}, {Component: "c2", Width: asPtr(uint32(1)), Height: asPtr(uint32(2))}, - {Component: "d1--component-2"}, + {Component: "d1--component-2", DefinedInCanvas: true}, }, }, }, @@ -2021,10 +2036,15 @@ func requireResourcesAndErrors(t testing.TB, p *Parser, wantResources []*Resourc require.Equal(t, want.SourceSpec, got.SourceSpec, "for resource %q", want.Name) require.Equal(t, want.ModelSpec, got.ModelSpec, "for resource %q", want.Name) require.Equal(t, want.MetricsViewSpec, got.MetricsViewSpec, "for resource %q", want.Name) + require.Equal(t, want.ExploreSpec, got.ExploreSpec, "for resource %q", want.Name) require.Equal(t, want.MigrationSpec, got.MigrationSpec, "for resource %q", want.Name) - require.Equal(t, want.ThemeSpec, got.ThemeSpec, "for resource %q", want.Name) require.True(t, reflect.DeepEqual(want.ReportSpec, got.ReportSpec), "for resource %q", want.Name) require.True(t, reflect.DeepEqual(want.AlertSpec, got.AlertSpec), "for resource %q", want.Name) + require.Equal(t, want.ThemeSpec, got.ThemeSpec, "for resource %q", want.Name) + require.Equal(t, want.ComponentSpec, got.ComponentSpec, "for resource %q", want.Name) + require.Equal(t, want.CanvasSpec, got.CanvasSpec, "for resource %q", want.Name) + require.Equal(t, want.APISpec, got.APISpec, "for resource %q", want.Name) + require.Equal(t, want.ConnectorSpec, got.ConnectorSpec, "for resource %q", want.Name) delete(gotResources, got.Name) found = true @@ -2066,3 +2086,11 @@ func deleteRepo(t testing.TB, repo drivers.RepoStore, files ...string) { func asPtr[T any](val T) *T { return &val } + +func normalizeJSON(t *testing.T, s string) string { + var v interface{} + require.NoError(t, json.Unmarshal([]byte(s), &v)) + b, err := json.Marshal(v) + require.NoError(t, err) + return string(b) +} From 5585e3b95e4d4322ce58373f3caaca38b11132d0 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:46:29 +0530 Subject: [PATCH 30/64] close bucket plus directory prefix --- runtime/registry.go | 1 + runtime/runtime.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/registry.go b/runtime/registry.go index 4dd19e593f1..b16799287c7 100644 --- a/runtime/registry.go +++ b/runtime/registry.go @@ -135,6 +135,7 @@ func (r *Runtime) DataBucket(instanceID string, elem ...string) *blob.Bucket { for _, e := range elem { b = blob.PrefixedBucket(b, e) } + b = blob.PrefixedBucket(b, "/") return b } diff --git a/runtime/runtime.go b/runtime/runtime.go index 23ef294412e..40fe427332a 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -91,7 +91,8 @@ func (r *Runtime) Close() error { r.registryCache.close(ctx) err1 := r.queryCache.close() err2 := r.connCache.Close(ctx) // Also closes metastore // TODO: Propagate ctx cancellation - return errors.Join(err1, err2) + err3 := r.dataBucket.Close() + return errors.Join(err1, err2, err3) } func (r *Runtime) ResolveSecurity(instanceID string, claims *SecurityClaims, res *runtimev1.Resource) (*ResolvedSecurity, error) { From d3bfbb65ada6da54781047e725277bd02cb00de0 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 26 Nov 2024 14:34:02 +0530 Subject: [PATCH 31/64] bucket is closed when prefixed so need to open new data bucket all times --- cli/cmd/runtime/install_duckdb_extensions.go | 3 +- cli/cmd/runtime/start.go | 30 ++--------- cli/pkg/cmdutil/project.go | 3 +- cli/pkg/local/app.go | 7 +-- runtime/compilers/rillv1/parser_test.go | 3 +- runtime/connection_cache.go | 8 +-- runtime/drivers/admin/admin.go | 3 +- runtime/drivers/athena/athena.go | 3 +- runtime/drivers/azure/azure.go | 3 +- runtime/drivers/bigquery/bigquery.go | 3 +- runtime/drivers/clickhouse/clickhouse.go | 3 +- .../clickhouse/information_schema_test.go | 3 +- runtime/drivers/clickhouse/olap_test.go | 5 +- runtime/drivers/drivers.go | 10 ++-- runtime/drivers/drivers_test.go | 3 +- runtime/drivers/druid/druid.go | 3 +- runtime/drivers/druid/druid_test.go | 3 +- runtime/drivers/druid/sql_driver_test.go | 11 ++-- runtime/drivers/duckdb/config_test.go | 5 +- runtime/drivers/duckdb/duckdb.go | 3 +- runtime/drivers/duckdb/duckdb_test.go | 7 ++- runtime/drivers/duckdb/olap_crud_test.go | 52 +++++-------------- runtime/drivers/duckdb/olap_test.go | 7 ++- .../transporter_duckDB_to_duckDB_test.go | 5 +- .../transporter_mysql_to_duckDB_test.go | 5 +- .../transporter_postgres_to_duckDB_test.go | 5 +- .../transporter_sqlite_to_duckDB_test.go | 3 +- runtime/drivers/duckdb/transporter_test.go | 3 +- runtime/drivers/file/file.go | 3 +- runtime/drivers/gcs/gcs.go | 3 +- runtime/drivers/https/https.go | 3 +- .../drivers/mock/object_store/object_store.go | 2 +- runtime/drivers/mysql/mysql.go | 3 +- runtime/drivers/pinot/pinot.go | 3 +- runtime/drivers/postgres/postgres.go | 3 +- runtime/drivers/redshift/redshift.go | 3 +- runtime/drivers/s3/s3.go | 3 +- runtime/drivers/salesforce/salesforce.go | 3 +- runtime/drivers/slack/slack.go | 3 +- runtime/drivers/snowflake/snowflake.go | 3 +- runtime/drivers/sqlite/sqlite.go | 3 +- runtime/registry.go | 35 +++++++++++-- runtime/registry_test.go | 3 +- runtime/runtime.go | 10 ++-- runtime/server/queries_test.go | 3 +- runtime/testruntime/testruntime.go | 5 +- 46 files changed, 112 insertions(+), 184 deletions(-) diff --git a/cli/cmd/runtime/install_duckdb_extensions.go b/cli/cmd/runtime/install_duckdb_extensions.go index 9834f032dfc..226476e6864 100644 --- a/cli/cmd/runtime/install_duckdb_extensions.go +++ b/cli/cmd/runtime/install_duckdb_extensions.go @@ -8,7 +8,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/spf13/cobra" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) // InstallDuckDBExtensionsCmd adds a CLI command that forces DuckDB to install all required extensions. @@ -18,7 +17,7 @@ func InstallDuckDBExtensionsCmd(ch *cmdutil.Helper) *cobra.Command { Use: "install-duckdb-extensions", RunE: func(cmd *cobra.Command, args []string) error { cfg := map[string]any{"dsn": ":memory:"} // In-memory - h, err := drivers.Open("duckdb", "default", cfg, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + h, err := drivers.Open("duckdb", "default", cfg, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) if err != nil { return fmt.Errorf("failed to open ephemeral duckdb: %w", err) } diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index f32e20bd3c5..9b4638451b4 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -23,9 +23,6 @@ import ( "github.com/spf13/cobra" "go.uber.org/zap" "go.uber.org/zap/zapcore" - "gocloud.dev/blob/gcsblob" - "gocloud.dev/gcp" - "golang.org/x/oauth2/google" "golang.org/x/sync/errgroup" // Load connectors and reconcilers for runtime @@ -88,7 +85,7 @@ type Config struct { // DataDir stores data for all instances like duckdb file, temporary downloaded file etc. // The data for each instance is stored in a child directory named instance_id DataDir string `split_words:"true"` - // DataBucket is a common GCS bucket to store data for all instances. The data is expected to be persisted across resets. + // DataBucket is a common GCS bucket to store data for all instances. This data is expected to be persisted across resets. DataBucket string `split_words:"true"` DataBucketCredentialsJSON string `split_words:"true"` // Sink type of activity client: noop (or empty string), kafka @@ -203,18 +200,6 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { // Create ctx that cancels on termination signals ctx := graceful.WithCancelOnTerminate(context.Background()) - - // Init dataBucket - client, err := newClient(ctx, conf.DataBucketCredentialsJSON) - if err != nil { - logger.Fatal("could not create GCP client", zap.Error(err)) - } - - bucket, err := gcsblob.OpenBucket(ctx, client, conf.DataBucket, nil) - if err != nil { - logger.Fatal("failed to open bucket %q: %w", zap.String("bucket", conf.DataBucket), zap.Error(err)) - } - // Init runtime opts := &runtime.Options{ ConnectionCacheSize: conf.ConnectionCacheSize, @@ -225,6 +210,8 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { ControllerLogBufferSizeBytes: conf.LogBufferSizeBytes, AllowHostAccess: conf.AllowHostAccess, DataDir: conf.DataDir, + DataBucket: conf.DataBucket, + DataBucketCredentialsJSON: conf.DataBucketCredentialsJSON, SystemConnectors: []*runtimev1.Connector{ { Type: conf.MetastoreDriver, @@ -233,7 +220,7 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { }, }, } - rt, err := runtime.New(ctx, opts, logger, activityClient, emailClient, bucket) + rt, err := runtime.New(ctx, opts, logger, activityClient, emailClient) if err != nil { logger.Fatal("error: could not create runtime", zap.Error(err)) } @@ -284,12 +271,3 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { } return startCmd } - -func newClient(ctx context.Context, jsonData string) (*gcp.HTTPClient, error) { - creds, err := google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") - if err != nil { - return nil, fmt.Errorf("failed to create credentials: %w", err) - } - // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. - return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) -} diff --git a/cli/pkg/cmdutil/project.go b/cli/pkg/cmdutil/project.go index e80829d6b60..703acac170f 100644 --- a/cli/pkg/cmdutil/project.go +++ b/cli/pkg/cmdutil/project.go @@ -6,7 +6,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob/memblob" // Ensure file driver is loaded _ "github.com/rilldata/rill/runtime/drivers/file" @@ -15,7 +14,7 @@ import ( // RepoForProjectPath creates an ad-hoc drivers.RepoStore for a local project file path func RepoForProjectPath(path string) (drivers.RepoStore, string, error) { instanceID := "default" - repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) if err != nil { return nil, "", err } diff --git a/cli/pkg/local/app.go b/cli/pkg/local/app.go index 9e26813ae4e..6aee9ab3d96 100644 --- a/cli/pkg/local/app.go +++ b/cli/pkg/local/app.go @@ -31,7 +31,6 @@ import ( "go.uber.org/zap" "go.uber.org/zap/buffer" "go.uber.org/zap/zapcore" - "gocloud.dev/blob/fileblob" "golang.org/x/sync/errgroup" "gopkg.in/natefinch/lumberjack.v2" ) @@ -157,10 +156,6 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { // if err != nil { // return nil, fmt.Errorf("failed to create email sender: %w", err) // } - bkt, err := fileblob.OpenBucket(filepath.Join(dbDirPath, "remote"), &fileblob.Options{CreateDir: true}) - if err != nil { - return nil, err - } rtOpts := &runtime.Options{ ConnectionCacheSize: 100, MetastoreConnector: "metastore", @@ -172,7 +167,7 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := runtime.New(ctx, rtOpts, logger, opts.Ch.Telemetry(ctx), email.New(sender), bkt) + rt, err := runtime.New(ctx, rtOpts, logger, opts.Ch.Telemetry(ctx), email.New(sender)) if err != nil { return nil, err } diff --git a/runtime/compilers/rillv1/parser_test.go b/runtime/compilers/rillv1/parser_test.go index 540b3233d44..f332fbb33d5 100644 --- a/runtime/compilers/rillv1/parser_test.go +++ b/runtime/compilers/rillv1/parser_test.go @@ -15,7 +15,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" "google.golang.org/protobuf/types/known/structpb" _ "github.com/rilldata/rill/runtime/drivers/file" @@ -2058,7 +2057,7 @@ func requireResourcesAndErrors(t testing.TB, p *Parser, wantResources []*Resourc func makeRepo(t testing.TB, files map[string]string) drivers.RepoStore { root := t.TempDir() - handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) repo, ok := handle.AsRepoStore("") diff --git a/runtime/connection_cache.go b/runtime/connection_cache.go index bb1878340a3..5f0f39f0eab 100644 --- a/runtime/connection_cache.go +++ b/runtime/connection_cache.go @@ -13,7 +13,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/observability" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" - "gocloud.dev/blob" "golang.org/x/exp/maps" ) @@ -94,7 +93,6 @@ func (r *Runtime) evictInstanceConnections(instanceID string) { func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig) (drivers.Handle, error) { logger := r.Logger activityClient := r.activity - var dataBucket *blob.Bucket if cfg.instanceID != "" { // Not shared across multiple instances inst, err := r.Instance(ctx, cfg.instanceID) if err != nil { @@ -110,13 +108,9 @@ func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig if activityClient != nil { activityClient = activityClient.With(activityDims...) } - - dataBucket = r.DataBucket(cfg.instanceID, cfg.driver) - } else { - dataBucket = r.DataBucket("__shared__", cfg.driver) } - handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, dataBucket, logger) + handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, r.DataBucket, logger) if err == nil && ctx.Err() != nil { err = fmt.Errorf("timed out while opening driver %q", cfg.driver) } diff --git a/runtime/drivers/admin/admin.go b/runtime/drivers/admin/admin.go index 9ac4dc46e66..f420284f514 100644 --- a/runtime/drivers/admin/admin.go +++ b/runtime/drivers/admin/admin.go @@ -23,7 +23,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/ctxsync" "go.opentelemetry.io/otel" "go.uber.org/zap" - "gocloud.dev/blob" "golang.org/x/sync/singleflight" "gopkg.in/yaml.v3" ) @@ -64,7 +63,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, ac *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, ac *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("admin driver can't be shared") } diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go index 50bbd73636d..1aa94ac5221 100644 --- a/runtime/drivers/athena/athena.go +++ b/runtime/drivers/athena/athena.go @@ -8,7 +8,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -86,7 +85,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("athena driver can't be shared") } diff --git a/runtime/drivers/azure/azure.go b/runtime/drivers/azure/azure.go index 0282b0cbc63..d3104019cb7 100644 --- a/runtime/drivers/azure/azure.go +++ b/runtime/drivers/azure/azure.go @@ -9,7 +9,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -83,7 +82,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("azure driver can't be shared") } diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index 86644749f10..5467784485a 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -11,7 +11,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/gcputil" "go.uber.org/zap" - "gocloud.dev/blob" "google.golang.org/api/option" ) @@ -78,7 +77,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("bigquery driver can't be shared") } diff --git a/runtime/drivers/clickhouse/clickhouse.go b/runtime/drivers/clickhouse/clickhouse.go index cd0db9cd045..0ab4b6b77ae 100644 --- a/runtime/drivers/clickhouse/clickhouse.go +++ b/runtime/drivers/clickhouse/clickhouse.go @@ -16,7 +16,6 @@ import ( "go.opentelemetry.io/otel/attribute" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" "go.uber.org/zap" - "gocloud.dev/blob" "golang.org/x/sync/semaphore" ) @@ -122,7 +121,7 @@ type configProperties struct { // Open connects to Clickhouse using std API. // Connection string format : https://github.com/ClickHouse/clickhouse-go?tab=readme-ov-file#dsn -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("clickhouse driver can't be shared") } diff --git a/runtime/drivers/clickhouse/information_schema_test.go b/runtime/drivers/clickhouse/information_schema_test.go index 62b40198917..285f2ffc970 100644 --- a/runtime/drivers/clickhouse/information_schema_test.go +++ b/runtime/drivers/clickhouse/information_schema_test.go @@ -12,7 +12,6 @@ import ( "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/modules/clickhouse" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) func TestInformationSchema(t *testing.T) { @@ -38,7 +37,7 @@ func TestInformationSchema(t *testing.T) { port, err := clickHouseContainer.MappedPort(ctx, "9000/tcp") require.NoError(t, err) - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) prepareConn(t, conn) t.Run("testInformationSchemaAll", func(t *testing.T) { testInformationSchemaAll(t, conn) }) diff --git a/runtime/drivers/clickhouse/olap_test.go b/runtime/drivers/clickhouse/olap_test.go index d78b29a3bbb..4c65432270f 100644 --- a/runtime/drivers/clickhouse/olap_test.go +++ b/runtime/drivers/clickhouse/olap_test.go @@ -10,7 +10,6 @@ import ( "github.com/rilldata/rill/runtime/testruntime" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) func TestClickhouseCrudOps(t *testing.T) { @@ -25,7 +24,7 @@ func TestClickhouseCrudOps(t *testing.T) { } func testClickhouseSingleHost(t *testing.T, dsn string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) defer conn.Close() prepareConn(t, conn) @@ -42,7 +41,7 @@ func testClickhouseSingleHost(t *testing.T, dsn string) { } func testClickhouseCluster(t *testing.T, dsn, cluster string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) defer conn.Close() diff --git a/runtime/drivers/drivers.go b/runtime/drivers/drivers.go index 6d3afb3018b..4e5b9da5ee7 100644 --- a/runtime/drivers/drivers.go +++ b/runtime/drivers/drivers.go @@ -33,16 +33,20 @@ func Register(name string, driver Driver) { Drivers[name] = driver } +type OpenDataBucketFn func(ctx context.Context, instanceID string, elem ...string) (*blob.Bucket, error) + +var OpenNilDataBucket OpenDataBucketFn = func(ctx context.Context, instanceID string, elem ...string) (*blob.Bucket, error) { return nil, nil } + // Open opens a new connection. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. // If instanceID is not empty, the connection is considered instance-specific and its As...() functions will only be invoked with the same instance ID. -func Open(driver, instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (Handle, error) { +func Open(driver, instanceID string, config map[string]any, client *activity.Client, fn OpenDataBucketFn, logger *zap.Logger) (Handle, error) { d, ok := Drivers[driver] if !ok { return nil, fmt.Errorf("unknown driver: %s", driver) } - conn, err := d.Open(instanceID, config, client, data, logger) + conn, err := d.Open(instanceID, config, client, fn, logger) if err != nil { return nil, err } @@ -57,7 +61,7 @@ type Driver interface { // Open opens a new handle. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. - Open(instanceID string, config map[string]any, client *activity.Client, data *blob.Bucket, logger *zap.Logger) (Handle, error) + Open(instanceID string, config map[string]any, client *activity.Client, fn OpenDataBucketFn, logger *zap.Logger) (Handle, error) // HasAnonymousSourceAccess returns true if the driver can access the data identified by srcProps without any additional configuration. HasAnonymousSourceAccess(ctx context.Context, srcProps map[string]any, logger *zap.Logger) (bool, error) diff --git a/runtime/drivers/drivers_test.go b/runtime/drivers/drivers_test.go index 8091cc0aa18..f3bb177be5b 100644 --- a/runtime/drivers/drivers_test.go +++ b/runtime/drivers/drivers_test.go @@ -9,7 +9,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" _ "github.com/rilldata/rill/runtime/drivers/duckdb" _ "github.com/rilldata/rill/runtime/drivers/file" @@ -32,7 +31,7 @@ func TestAll(t *testing.T) { for _, withDriver := range matrix { err := withDriver(t, func(driver, instanceID string, cfg map[string]any) { // Open - conn, err := drivers.Open(driver, instanceID, cfg, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := drivers.Open(driver, instanceID, cfg, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) require.NotNil(t, conn) diff --git a/runtime/drivers/druid/druid.go b/runtime/drivers/druid/druid.go index 0df7dbf9125..c94e11c8d56 100644 --- a/runtime/drivers/druid/druid.go +++ b/runtime/drivers/druid/druid.go @@ -14,7 +14,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" - "gocloud.dev/blob" // Load Druid database/sql driver _ "github.com/rilldata/rill/runtime/drivers/druid/druidsqldriver" @@ -102,7 +101,7 @@ type configProperties struct { // Opens a connection to Apache Druid using HTTP API. // Note that the Druid connection string must have the form "http://user:password@host:port/druid/v2/sql". -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("druid driver can't be shared") } diff --git a/runtime/drivers/druid/druid_test.go b/runtime/drivers/druid/druid_test.go index f32c8add77c..45fecf51cd5 100644 --- a/runtime/drivers/druid/druid_test.go +++ b/runtime/drivers/druid/druid_test.go @@ -15,7 +15,6 @@ import ( "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) const testTable = "test_data" @@ -109,7 +108,7 @@ func TestDruid(t *testing.T) { require.NoError(t, err) dd := &driver{} - conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/druid/sql_driver_test.go b/runtime/drivers/druid/sql_driver_test.go index 71f85066360..2bae2c25c4a 100644 --- a/runtime/drivers/druid/sql_driver_test.go +++ b/runtime/drivers/druid/sql_driver_test.go @@ -7,7 +7,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/stretchr/testify/require" - "gocloud.dev/blob/memblob" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/pbutil" @@ -20,7 +19,7 @@ import ( */ func Ignore_TestDriver_types(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -57,7 +56,7 @@ func Ignore_TestDriver_types(t *testing.T) { func Ignore_TestDriver_array_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -83,7 +82,7 @@ func Ignore_TestDriver_array_type(t *testing.T) { func Ignore_TestDriver_json_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -108,7 +107,7 @@ func Ignore_TestDriver_json_type(t *testing.T) { func Ignore_TestDriver_multiple_rows(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -144,7 +143,7 @@ func Ignore_TestDriver_multiple_rows(t *testing.T) { func Ignore_TestDriver_error(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 1bc7cd7eb10..20f719bd71d 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -11,7 +11,6 @@ import ( activity "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) func TestConfig(t *testing.T) { @@ -94,7 +93,7 @@ func Test_specialCharInPath(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) config := conn.(*connection).config require.Equal(t, filepath.Join(path, "st@g3's.db?custom_user_agent=rill&max_memory=4GB&threads=1"), config.DSN) @@ -111,7 +110,7 @@ func Test_specialCharInPath(t *testing.T) { func TestOverrides(t *testing.T) { cfgMap := map[string]any{"path": "duck.db", "memory_limit_gb": "4", "cpu": "2", "max_memory_gb_override": "2", "threads_override": "10", "external_table_storage": false} - handle, err := Driver{}.Open("default", cfgMap, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := Driver{}.Open("default", cfgMap, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index bcdb675cc6c..8f29927968c 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -30,7 +30,6 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" - "gocloud.dev/blob" "golang.org/x/sync/semaphore" ) @@ -136,7 +135,7 @@ type Driver struct { name string } -func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("duckdb driver can't be shared") } diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index 8149351aa71..1bbb09b5c44 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -12,7 +12,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) func TestOpenDrop(t *testing.T) { @@ -20,7 +19,7 @@ func TestOpenDrop(t *testing.T) { walpath := path + ".wal" dsn := path - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -44,7 +43,7 @@ func TestNoFatalErr(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -106,7 +105,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index d8bf19f6090..a1079348a31 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -14,26 +14,20 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/fileblob" - "gocloud.dev/blob/memblob" ) func Test_connection_CreateTableAsSelect(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") require.NoError(t, normalConn.Migrate(context.Background())) dbPath = filepath.Join(temp, "default", "view.db") - bkt, err = fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) viewConnection := handle.(*connection) require.NoError(t, viewConnection.Migrate(context.Background())) @@ -106,9 +100,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -153,9 +145,7 @@ func Test_connection_DropTable(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -184,9 +174,7 @@ func Test_connection_InsertTableAsSelect(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -215,9 +203,7 @@ func Test_connection_RenameTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -243,9 +229,7 @@ func Test_connection_RenameToExistingTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "default", "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -274,9 +258,7 @@ func Test_connection_AddTableColumn(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -305,9 +287,7 @@ func Test_connection_AddTableColumn(t *testing.T) { } func Test_connection_RenameToExistingTableOld(t *testing.T) { - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -336,9 +316,7 @@ func Test_connection_CastEnum(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -383,9 +361,7 @@ func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") @@ -422,9 +398,7 @@ func Test_connection_ChangingOrder(t *testing.T) { // on cloud dbPath := filepath.Join(temp, "view.db") - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), bkt, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -447,7 +421,7 @@ func Test_connection_ChangingOrder(t *testing.T) { // on local dbPath = filepath.Join(temp, "local.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) c = handle.(*connection) require.NoError(t, c.Migrate(context.Background())) diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index ee2ba123425..5067a17ca9d 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -15,7 +15,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" "golang.org/x/sync/errgroup" ) @@ -213,7 +212,7 @@ func TestClose(t *testing.T) { } func prepareConn(t *testing.T) drivers.Handle { - conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") @@ -249,11 +248,11 @@ func Test_safeSQLString(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) require.NoError(t, conn.Close()) - conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 68997ad4436..1b094a3583b 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -10,12 +10,11 @@ import ( activity "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) func TestDuckDBToDuckDBTransfer(t *testing.T) { tempDir := t.TempDir() - conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") @@ -32,7 +31,7 @@ func TestDuckDBToDuckDBTransfer(t *testing.T) { require.NoError(t, err) require.NoError(t, conn.Close()) - to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, _ = to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index b88b05e23e3..a77ec2e13b9 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -9,7 +9,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" "fmt" "time" @@ -103,12 +102,12 @@ func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { _, err := db.ExecContext(ctx, mysqlInitStmt) require.NoError(t, err) - handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) require.NotNil(t, handle) sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index 279abf58eff..e9c11772cef 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -10,7 +10,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" // Load postgres driver _ "github.com/jackc/pgx/v5/stdlib" @@ -68,12 +67,12 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { _, err := db.ExecContext(ctx, sqlStmt) require.NoError(t, err) - handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) require.NotNil(t, handle) sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go index b532213339e..6eab17831aa 100644 --- a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go @@ -11,7 +11,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" _ "modernc.org/sqlite" ) @@ -30,7 +29,7 @@ func Test_sqliteToDuckDB_Transfer(t *testing.T) { require.NoError(t, err) db.Close() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_test.go b/runtime/drivers/duckdb/transporter_test.go index cfe61619bf7..329bdeb1138 100644 --- a/runtime/drivers/duckdb/transporter_test.go +++ b/runtime/drivers/duckdb/transporter_test.go @@ -13,7 +13,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) type mockObjectStore struct { @@ -593,7 +592,7 @@ func TestIterativeJSONIngestionWithVariableSchema(t *testing.T) { } func runOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, canServe := conn.AsOLAP("") require.True(t, canServe) diff --git a/runtime/drivers/file/file.go b/runtime/drivers/file/file.go index 929d92fd5f1..8407d1653c3 100644 --- a/runtime/drivers/file/file.go +++ b/runtime/drivers/file/file.go @@ -13,7 +13,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" "go.uber.org/zap" - "gocloud.dev/blob" "gopkg.in/yaml.v3" ) @@ -61,7 +60,7 @@ type rillYAML struct { IgnorePaths []string `yaml:"ignore_paths"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("file driver can't be shared") } diff --git a/runtime/drivers/gcs/gcs.go b/runtime/drivers/gcs/gcs.go index 4fd2e05597b..f38680831b6 100644 --- a/runtime/drivers/gcs/gcs.go +++ b/runtime/drivers/gcs/gcs.go @@ -13,7 +13,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/gcputil" "github.com/rilldata/rill/runtime/pkg/globutil" "go.uber.org/zap" - "gocloud.dev/blob" "gocloud.dev/blob/gcsblob" "gocloud.dev/gcp" ) @@ -76,7 +75,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("gcs driver can't be shared") } diff --git a/runtime/drivers/https/https.go b/runtime/drivers/https/https.go index febdf4b117e..be30ad3b178 100644 --- a/runtime/drivers/https/https.go +++ b/runtime/drivers/https/https.go @@ -13,7 +13,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -47,7 +46,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("https driver can't be shared") } diff --git a/runtime/drivers/mock/object_store/object_store.go b/runtime/drivers/mock/object_store/object_store.go index 107add7df59..ffda83b54c5 100644 --- a/runtime/drivers/mock/object_store/object_store.go +++ b/runtime/drivers/mock/object_store/object_store.go @@ -38,7 +38,7 @@ func (driver) Spec() drivers.Spec { } // Open implements drivers.Driver. -func (driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { cfg := &configProperties{} err := mapstructure.WeakDecode(config, cfg) if err != nil { diff --git a/runtime/drivers/mysql/mysql.go b/runtime/drivers/mysql/mysql.go index f33b7e9026e..3cb2ce36061 100644 --- a/runtime/drivers/mysql/mysql.go +++ b/runtime/drivers/mysql/mysql.go @@ -7,7 +7,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -58,7 +57,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("mysql driver can't be shared") } diff --git a/runtime/drivers/pinot/pinot.go b/runtime/drivers/pinot/pinot.go index 02a4f19ce10..6d9bf2f3e22 100644 --- a/runtime/drivers/pinot/pinot.go +++ b/runtime/drivers/pinot/pinot.go @@ -13,7 +13,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -95,7 +94,7 @@ type configProperties struct { } // Open a connection to Apache Pinot using HTTP API. -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("pinot driver can't be shared") } diff --git a/runtime/drivers/postgres/postgres.go b/runtime/drivers/postgres/postgres.go index 641d7d15fbe..69471163b72 100644 --- a/runtime/drivers/postgres/postgres.go +++ b/runtime/drivers/postgres/postgres.go @@ -7,7 +7,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -56,7 +55,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("postgres driver can't be shared") } diff --git a/runtime/drivers/redshift/redshift.go b/runtime/drivers/redshift/redshift.go index e2e130c3976..ebc82e508e7 100644 --- a/runtime/drivers/redshift/redshift.go +++ b/runtime/drivers/redshift/redshift.go @@ -8,7 +8,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -110,7 +109,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("redshift driver can't be shared") } diff --git a/runtime/drivers/s3/s3.go b/runtime/drivers/s3/s3.go index 141e842845f..6dd27d1b881 100644 --- a/runtime/drivers/s3/s3.go +++ b/runtime/drivers/s3/s3.go @@ -10,7 +10,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) var spec = drivers.Spec{ @@ -98,7 +97,7 @@ type ConfigProperties struct { } // Open implements drivers.Driver -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("s3 driver can't be shared") } diff --git a/runtime/drivers/salesforce/salesforce.go b/runtime/drivers/salesforce/salesforce.go index f85be674622..3f4eb52ae6c 100644 --- a/runtime/drivers/salesforce/salesforce.go +++ b/runtime/drivers/salesforce/salesforce.go @@ -8,7 +8,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { @@ -129,7 +128,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("salesforce driver can't be shared") } diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 3356b7bacf8..81ff63ea8fd 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -9,7 +9,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" ) var spec = drivers.Spec{ @@ -36,7 +35,7 @@ func (d driver) Spec() drivers.Spec { return spec } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("slack driver can't be shared") } diff --git a/runtime/drivers/snowflake/snowflake.go b/runtime/drivers/snowflake/snowflake.go index 8a3f65759d0..c0439bf297b 100644 --- a/runtime/drivers/snowflake/snowflake.go +++ b/runtime/drivers/snowflake/snowflake.go @@ -8,7 +8,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" // Load database/sql driver _ "github.com/snowflakedb/gosnowflake" @@ -67,7 +66,7 @@ type configProperties struct { TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("snowflake driver can't be shared") } diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index 79c9de4a1d9..a626493501b 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -10,7 +10,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "go.uber.org/zap" - "gocloud.dev/blob" // Load sqlite driver _ "modernc.org/sqlite" @@ -23,7 +22,7 @@ func init() { type driver struct{} -func (d driver) Open(_ string, config map[string]any, client *activity.Client, _ *blob.Bucket, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(_ string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { dsn, ok := config["dsn"].(string) if !ok { return nil, fmt.Errorf("require dsn to open sqlite connection") diff --git a/runtime/registry.go b/runtime/registry.go index b16799287c7..e09175e0fc7 100644 --- a/runtime/registry.go +++ b/runtime/registry.go @@ -22,6 +22,9 @@ import ( "go.uber.org/zap" "go.uber.org/zap/zapcore" "gocloud.dev/blob" + "gocloud.dev/blob/gcsblob" + "gocloud.dev/gcp" + "golang.org/x/oauth2/google" ) // GlobalProjectParserName is the name of the instance-global project parser resource that is created for each new instance. @@ -130,13 +133,26 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { // DataBucket returns a prefixed bucket for the given instance. // This bucket is used for storing data that is expected to be persisted across resets. -func (r *Runtime) DataBucket(instanceID string, elem ...string) *blob.Bucket { - b := blob.PrefixedBucket(r.dataBucket, instanceID) +func (r *Runtime) DataBucket(ctx context.Context, instanceID string, elem ...string) (*blob.Bucket, error) { + if r.opts.DataBucket == "" { + return nil, nil + } + // Init dataBucket + client, err := newClient(ctx, r.opts.DataBucketCredentialsJSON) + if err != nil { + return nil, fmt.Errorf("could not create GCP client: %w", err) + } + + bucket, err := gcsblob.OpenBucket(ctx, client, r.opts.DataBucket, nil) + if err != nil { + return nil, fmt.Errorf("failed to open bucket %q: %w", r.opts.DataBucket, err) + } + prefix := instanceID + "/" for _, e := range elem { - b = blob.PrefixedBucket(b, e) + prefix = prefix + e + "/" } - b = blob.PrefixedBucket(b, "/") - return b + b := blob.PrefixedBucket(bucket, prefix) + return b, nil } // DataDir returns the path to a persistent data directory for the given instance. @@ -608,6 +624,15 @@ func (r *registryCache) updateProjectConfig(iwc *instanceWithController) error { return r.rt.UpdateInstanceWithRillYAML(iwc.ctx, iwc.instanceID, p, false) } +func newClient(ctx context.Context, jsonData string) (*gcp.HTTPClient, error) { + creds, err := google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") + if err != nil { + return nil, fmt.Errorf("failed to create credentials: %w", err) + } + // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. + return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) +} + func sizeOfDir(path string) int64 { var size int64 _ = fs.WalkDir(os.DirFS(path), ".", func(path string, d fs.DirEntry, err error) error { diff --git a/runtime/registry_test.go b/runtime/registry_test.go index cc94b22a9ba..01189f06229 100644 --- a/runtime/registry_test.go +++ b/runtime/registry_test.go @@ -16,7 +16,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/email" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) func TestRuntime_EditInstance(t *testing.T) { @@ -528,7 +527,7 @@ func newTestRuntime(t *testing.T) *Runtime { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := New(context.Background(), opts, zap.NewNop(), activity.NewNoopClient(), email.New(email.NewNoopSender()), memblob.OpenBucket(nil)) + rt, err := New(context.Background(), opts, zap.NewNop(), activity.NewNoopClient(), email.New(email.NewNoopSender())) t.Cleanup(func() { rt.Close() }) diff --git a/runtime/runtime.go b/runtime/runtime.go index 40fe427332a..e16a17efd5b 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -15,7 +15,6 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" - "gocloud.dev/blob" ) var tracer = otel.Tracer("github.com/rilldata/rill/runtime") @@ -30,6 +29,8 @@ type Options struct { ControllerLogBufferSizeBytes int64 AllowHostAccess bool DataDir string + DataBucket string + DataBucketCredentialsJSON string } type Runtime struct { @@ -42,10 +43,9 @@ type Runtime struct { connCache conncache.Cache queryCache *queryCache securityEngine *securityEngine - dataBucket *blob.Bucket } -func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Client, emailClient *email.Client, dataBucket *blob.Bucket) (*Runtime, error) { +func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { if emailClient == nil { emailClient = email.New(email.NewNoopSender()) } @@ -57,7 +57,6 @@ func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Cl activity: ac, queryCache: newQueryCache(opts.QueryCacheSizeBytes), securityEngine: newSecurityEngine(opts.SecurityEngineCacheSize, logger), - dataBucket: dataBucket, } rt.connCache = rt.newConnectionCache() @@ -91,8 +90,7 @@ func (r *Runtime) Close() error { r.registryCache.close(ctx) err1 := r.queryCache.close() err2 := r.connCache.Close(ctx) // Also closes metastore // TODO: Propagate ctx cancellation - err3 := r.dataBucket.Close() - return errors.Join(err1, err2, err3) + return errors.Join(err1, err2) } func (r *Runtime) ResolveSecurity(instanceID string, claims *SecurityClaims, res *runtimev1.Resource) (*ResolvedSecurity, error) { diff --git a/runtime/server/queries_test.go b/runtime/server/queries_test.go index f403bf6b3d5..81eada4a7f9 100644 --- a/runtime/server/queries_test.go +++ b/runtime/server/queries_test.go @@ -8,7 +8,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" ) func TestServer_InsertLimit_SELECT(t *testing.T) { @@ -133,7 +132,7 @@ func TestServer_UpdateLimit_UNION(t *testing.T) { } func prepareOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") require.True(t, ok) diff --git a/runtime/testruntime/testruntime.go b/runtime/testruntime/testruntime.go index 07897c5b0d2..0ecfe111361 100644 --- a/runtime/testruntime/testruntime.go +++ b/runtime/testruntime/testruntime.go @@ -19,7 +19,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/email" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/fileblob" // Load database drivers for testing. _ "github.com/rilldata/rill/runtime/drivers/admin" @@ -74,9 +73,7 @@ func New(t TestingT) *runtime.Runtime { require.NoError(t, err) } - bkt, err := fileblob.OpenBucket(t.TempDir(), nil) - require.NoError(t, err) - rt, err := runtime.New(context.Background(), opts, logger, activity.NewNoopClient(), email.New(email.NewTestSender()), bkt) + rt, err := runtime.New(context.Background(), opts, logger, activity.NewNoopClient(), email.New(email.NewTestSender())) require.NoError(t, err) t.Cleanup(func() { rt.Close() }) From f51a2b1db233542641e167f02f0d9c55fcf8751b Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:13:57 +0530 Subject: [PATCH 32/64] Update runtime/pkg/rduckdb/db.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Benjamin Egelund-Müller --- runtime/pkg/rduckdb/db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index fceabb4ffd9..3c792785291 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -28,7 +28,7 @@ import ( "golang.org/x/sync/semaphore" ) -var errNotFound = errors.New("not found") +var errNotFound = errors.New("rduckdb: not found") type DB interface { // Close closes the database. From 5732e6ec963b3733d0349ff27432e814dbf2e489 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 29 Nov 2024 19:53:40 +0530 Subject: [PATCH 33/64] remove ctx cancellation from catalog --- runtime/pkg/rduckdb/catalog.go | 127 ++++++---------- runtime/pkg/rduckdb/db.go | 261 ++++++++++++++------------------- runtime/pkg/rduckdb/db_test.go | 63 ++++++++ runtime/pkg/rduckdb/remote.go | 72 ++++----- 4 files changed, 250 insertions(+), 273 deletions(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 8a681d52eb6..190a88f26f2 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -1,27 +1,9 @@ -/* -Example init logic: -- Sync remote files with the local cache -- Create a catalog -- Traverse the local files and call addTableVersion for table -Example write logic: -- Call addTableVersion after adding a new table version -- Call removeTable when deleting a table -Example read logic: -- Call acquireSnapshot when starting a read -- If it doesn't already exist, create a schema for the snapshot ID with views for all the table version in the snapshot -- Call releaseSnapshot when done reading the snapshot -Example removeFunc logic: -- Detach the version -- Remove the version file -- If there are no files left in it, remove the table folder -*/ package rduckdb import ( - "context" "fmt" - - "golang.org/x/sync/semaphore" + "log/slog" + "sync" ) // Represents one table and its versions currently present in the local cache. @@ -46,34 +28,33 @@ type snapshot struct { // Represents a catalog of available table versions. // It is thread-safe and supports acquiring a snapshot of table versions which will not be mutated or removed for as long as the snapshot is held. type catalog struct { - sem *semaphore.Weighted + lock sync.Mutex tables map[string]*table snapshots map[int]*snapshot currentSnapshotID int - removeVersionFunc func(context.Context, string, string) error - removeSnapshotFunc func(context.Context, int) error + removeVersionFunc func(string, string) + removeSnapshotFunc func(int) + + logger *slog.Logger } // newCatalog creates a new catalog. // The removeSnapshotFunc func will be called exactly once for each snapshot ID when it is no longer the current snapshot and is no longer held by any readers. // The removeVersionFunc func will be called exactly once for each table version when it is no longer the current version and is no longer used by any active snapshots. -func newCatalog(removeVersionFunc func(context.Context, string, string) error, removeSnapshotFunc func(context.Context, int) error) *catalog { +func newCatalog(removeVersionFunc func(string, string), removeSnapshotFunc func(int), logger *slog.Logger) *catalog { return &catalog{ - sem: semaphore.NewWeighted(1), tables: make(map[string]*table), snapshots: make(map[int]*snapshot), removeVersionFunc: removeVersionFunc, removeSnapshotFunc: removeSnapshotFunc, + logger: logger, } } -func (c *catalog) tableMeta(ctx context.Context, name string) (*tableMeta, error) { - err := c.sem.Acquire(ctx, 1) - if err != nil { - return nil, err - } - defer c.sem.Release(1) +func (c *catalog) tableMeta(name string) (*tableMeta, error) { + c.lock.Lock() + defer c.lock.Unlock() t, ok := c.tables[name] if !ok || t.deleted { @@ -88,12 +69,9 @@ func (c *catalog) tableMeta(ctx context.Context, name string) (*tableMeta, error // addTableVersion registers a new version of a table. // If the table name has not been seen before, it is added to the catalog. -func (c *catalog) addTableVersion(ctx context.Context, name string, meta *tableMeta) error { - err := c.sem.Acquire(ctx, 1) - if err != nil { - return err - } - defer c.sem.Release(1) +func (c *catalog) addTableVersion(name string, meta *tableMeta) { + c.lock.Lock() + defer c.lock.Unlock() t, ok := c.tables[name] if !ok { @@ -111,41 +89,34 @@ func (c *catalog) addTableVersion(ctx context.Context, name string, meta *tableM t.versionMeta[meta.Version] = meta c.acquireVersion(t, t.currentVersion) if oldVersion != "" { - _ = c.releaseVersion(ctx, t, oldVersion) + c.releaseVersion(t, oldVersion) } - c.currentSnapshotID++ - return nil } // removeTable removes a table from the catalog. // If the table is currently used by a snapshot, it will stay in the catalog but marked with deleted=true. // When the last snapshot referencing the table is released, the table will be removed completely. -func (c *catalog) removeTable(ctx context.Context, name string) error { - err := c.sem.Acquire(ctx, 1) - if err != nil { - return err - } - defer c.sem.Release(1) +func (c *catalog) removeTable(name string) { + c.lock.Lock() + defer c.lock.Unlock() t, ok := c.tables[name] if !ok { - return fmt.Errorf("table %q not found", name) + c.logger.Debug("table not found in rduckdb catalog", slog.String("name", name)) } oldVersion := t.currentVersion t.deleted = true t.currentVersion = "" - return c.releaseVersion(ctx, t, oldVersion) + c.currentSnapshotID++ + c.releaseVersion(t, oldVersion) } // listTables returns tableMeta for all active tables present in the catalog. -func (c *catalog) listTables(ctx context.Context) ([]*tableMeta, error) { - err := c.sem.Acquire(ctx, 1) - if err != nil { - return nil, err - } - defer c.sem.Release(1) +func (c *catalog) listTables() []*tableMeta { + c.lock.Lock() + defer c.lock.Unlock() tables := make([]*tableMeta, 0) for _, t := range c.tables { @@ -154,25 +125,22 @@ func (c *catalog) listTables(ctx context.Context) ([]*tableMeta, error) { } meta, ok := t.versionMeta[t.currentVersion] if !ok { - return nil, fmt.Errorf("internal error: meta for version %q not found", t.currentVersion) + c.logger.Error("internal error: meta for table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } tables = append(tables, meta) } - return tables, nil + return tables } // acquireSnapshot acquires a snapshot of the current table versions. -func (c *catalog) acquireSnapshot(ctx context.Context) (*snapshot, error) { - err := c.sem.Acquire(ctx, 1) - if err != nil { - return nil, err - } - defer c.sem.Release(1) +func (c *catalog) acquireSnapshot() *snapshot { + c.lock.Lock() + defer c.lock.Unlock() s, ok := c.snapshots[c.currentSnapshotID] if ok { s.referenceCount++ - return s, nil + return s } // first acquire s = &snapshot{ @@ -187,40 +155,34 @@ func (c *catalog) acquireSnapshot(ctx context.Context) (*snapshot, error) { meta, ok := t.versionMeta[t.currentVersion] if !ok { - return nil, fmt.Errorf("internal error: meta for version %q not found", t.currentVersion) + c.logger.Error("internal error: meta for table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } s.tables = append(s.tables, meta) c.acquireVersion(t, t.currentVersion) } c.snapshots[c.currentSnapshotID] = s - return s, nil + return s } // releaseSnapshot releases a snapshot of table versions. -func (c *catalog) releaseSnapshot(ctx context.Context, s *snapshot) error { - err := c.sem.Acquire(ctx, 1) - if err != nil { - return err - } - defer c.sem.Release(1) +func (c *catalog) releaseSnapshot(s *snapshot) { + c.lock.Lock() + defer c.lock.Unlock() s.referenceCount-- if s.referenceCount > 0 { - return nil + return } for _, meta := range s.tables { t, ok := c.tables[meta.Name] if !ok { - return fmt.Errorf("internal error: table %q not found", meta.Name) - } - if err := c.releaseVersion(ctx, t, meta.Version); err != nil { - return err + c.logger.Error("internal error: table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } + c.releaseVersion(t, meta.Version) } - delete(c.snapshots, s.id) - return c.removeSnapshotFunc(ctx, s.id) + c.removeSnapshotFunc(s.id) } // acquireVersion increments the reference count of a table version. @@ -233,21 +195,20 @@ func (c *catalog) acquireVersion(t *table, version string) { // releaseVersion decrements the reference count of a table version. // If the reference count reaches zero and the version is no longer the current version, it is removec. -func (c *catalog) releaseVersion(ctx context.Context, t *table, version string) error { +func (c *catalog) releaseVersion(t *table, version string) { referenceCount, ok := t.versionReferenceCounts[version] if !ok { - return fmt.Errorf("version %q of table %q not found", version, t.name) + c.logger.Error("internal error: version of table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } referenceCount-- if referenceCount > 0 { t.versionReferenceCounts[version] = referenceCount - return nil + return } delete(t.versionReferenceCounts, version) if t.deleted && len(t.versionReferenceCounts) == 0 { delete(c.tables, t.name) } - - return c.removeVersionFunc(ctx, t.name, version) + c.removeVersionFunc(t.name, version) } diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 3c792785291..cc7bcecd73d 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -22,7 +22,6 @@ import ( "github.com/jmoiron/sqlx" "github.com/marcboeker/go-duckdb" "github.com/mitchellh/mapstructure" - "github.com/rilldata/rill/runtime/pkg/ctxsync" "go.opentelemetry.io/otel/attribute" "gocloud.dev/blob" "golang.org/x/sync/semaphore" @@ -76,7 +75,6 @@ type DBOptions struct { OtelAttributes []attribute.KeyValue } -// TODO :: revisit this logic func (d *DBOptions) ValidateSettings() error { read := &settings{} err := mapstructure.Decode(d.ReadSettings, read) @@ -218,25 +216,38 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { opts: opts, localPath: opts.LocalPath, remote: opts.Remote, - readMu: ctxsync.NewRWMutex(), writeSem: semaphore.NewWeighted(1), metaSem: semaphore.NewWeighted(1), localDirty: true, - ticker: time.NewTicker(5 * time.Minute), logger: opts.Logger, ctx: bgctx, cancel: cancel, } // catalog db.catalog = newCatalog( - db.removeTableVersion, - db.removeSnapshot, + func(name, version string) { + go func() { + err = db.removeTableVersion(bgctx, name, version) + if err != nil && !errors.Is(err, context.Canceled) { + db.logger.Error("error in removing table version", slog.String("name", name), slog.String("version", version), slog.String("error", err.Error())) + } + }() + }, + func(i int) { + go func() { + err = db.removeSnapshot(bgctx, i) + if err != nil && !errors.Is(err, context.Canceled) { + db.logger.Error("error in removing snapshot", slog.Int("id", i), slog.String("error", err.Error())) + } + }() + }, + opts.Logger, ) // create local path err = os.MkdirAll(db.localPath, fs.ModePerm) if err != nil { - return nil, fmt.Errorf("unable to create read path: %w", err) + return nil, fmt.Errorf("unable to create local path: %w", err) } // sync local data @@ -245,7 +256,7 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { return nil, err } - // create read handle + // create db handle db.dbHandle, err = db.openDBAndAttach(ctx, "", "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { @@ -266,8 +277,6 @@ type db struct { // dbHandle serves executes meta queries and serves read queries dbHandle *sqlx.DB - // readMu controls access to readHandle - readMu ctxsync.RWMutex // writeSem ensures only one write operation is allowed at a time writeSem *semaphore.Weighted // metaSem enures only one meta operation can run on a duckb handle. @@ -275,9 +284,7 @@ type db struct { metaSem *semaphore.Weighted // localDirty is set to true when a change is committed to the remote but not yet reflected in the local db localDirty bool - // ticker to peroiodically check if local db is in sync with remote - ticker *time.Ticker - catalog *catalog + catalog *catalog logger *slog.Logger @@ -291,7 +298,6 @@ var _ DB = &db{} func (d *db) Close() error { // close background operations d.cancel() - d.ticker.Stop() ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() @@ -299,47 +305,26 @@ func (d *db) Close() error { _ = d.writeSem.Acquire(ctx, 1) defer d.writeSem.Release(1) - err := d.readMu.Lock(ctx) - if err != nil { - return err - } - defer d.readMu.Unlock() - - err = d.dbHandle.Close() - d.dbHandle = nil - return err + return d.dbHandle.Close() } func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() error, error) { - if err := d.readMu.RLock(ctx); err != nil { - return nil, nil, err - } - - // acquire a connection - snapshot, err := d.catalog.acquireSnapshot(ctx) - if err != nil { - d.readMu.RUnlock() - return nil, nil, err - } + snapshot := d.catalog.acquireSnapshot() conn, err := d.dbHandle.Connx(ctx) if err != nil { - d.readMu.RUnlock() return nil, nil, err } err = d.prepareSnapshot(ctx, conn, snapshot) if err != nil { _ = conn.Close() - d.readMu.RUnlock() return nil, nil, err } release := func() error { - err = d.catalog.releaseSnapshot(ctx, snapshot) - err = errors.Join(err, conn.Close()) - d.readMu.RUnlock() - return err + d.catalog.releaseSnapshot(snapshot) + return conn.Close() } return conn, release, nil } @@ -359,38 +344,23 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * } // check if some older version exists - oldMeta, _ := d.catalog.tableMeta(ctx, name) + oldMeta, _ := d.catalog.tableMeta(name) if oldMeta != nil { - d.logger.Debug("old version", slog.String("version", oldMeta.Version)) + d.logger.Debug("old version", slog.String("table", name), slog.String("version", oldMeta.Version)) } // create new version directory newVersion := newVersion() newMeta := &tableMeta{ - Name: name, - Version: newVersion, - CreatedVersion: newVersion, + Name: name, + Version: newVersion, } - var dsn string - if opts.View { - dsn = "" - newMeta.SQL = query - // special handling to ensure that if a view is recreated with the same name and schema then any views on top of this view still works - if oldMeta != nil && oldMeta.Type == "VIEW" { - newMeta.CreatedVersion = oldMeta.CreatedVersion - } - err = os.MkdirAll(filepath.Join(d.localPath, name), fs.ModePerm) - if err != nil { - return fmt.Errorf("create: unable to create dir %q: %w", name, err) - } - } else { - newVersionDir := filepath.Join(d.localPath, name, newVersion) - err = os.MkdirAll(newVersionDir, fs.ModePerm) - if err != nil { - return fmt.Errorf("create: unable to create dir %q: %w", name, err) - } - dsn = filepath.Join(newVersionDir, "data.db") + + err = d.initLocalTable(name, newVersion) + if err != nil { + return fmt.Errorf("create: unable to create dir %q: %w", name, err) } + dsn := d.localDBPath(name, newVersion) // need to attach existing table so that any views dependent on this table are correctly attached conn, release, err := d.acquireWriteConn(ctx, dsn, name, true) @@ -405,11 +375,10 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * var typ string if opts.View { typ = "VIEW" - newMeta.Type = "VIEW" } else { typ = "TABLE" - newMeta.Type = "TABLE" } + newMeta.Type = typ if opts.InitSQL != "" { _, err = conn.ExecContext(ctx, opts.InitSQL, nil) if err != nil { @@ -422,13 +391,12 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * return fmt.Errorf("create: create %s %q failed: %w", typ, name, err) } - // close write handle before syncing read so that temp files or wal files are removed + // close write handle before syncing local so that temp files or wal files are removed err = release() if err != nil { return err } - d.localDirty = true // update remote data and metadata if err := d.pushToRemote(ctx, name, oldMeta, newMeta); err != nil { return fmt.Errorf("create: replicate failed: %w", err) @@ -443,11 +411,7 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * return nil } - err = d.catalog.addTableVersion(ctx, name, newMeta) - if err != nil { - d.logger.Debug("create: error in adding version", slog.String("table", name), slog.String("version", newMeta.Version), slog.String("error", err.Error())) - return nil - } + d.catalog.addTableVersion(name, newMeta) d.localDirty = false return nil } @@ -466,7 +430,7 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con return err } - oldMeta, err := d.catalog.tableMeta(ctx, name) + oldMeta, err := d.catalog.tableMeta(name) if err != nil { if errors.Is(err, errNotFound) { return fmt.Errorf("mutate: Table %q not found", name) @@ -476,20 +440,14 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con // create new version directory newVersion := newVersion() - newVersionDir := filepath.Join(d.localPath, name, newVersion) - err = os.MkdirAll(newVersionDir, fs.ModePerm) - if err != nil { - return fmt.Errorf("mutate: unable to create dir %q: %w", name, err) - } - - err = copyDir(newVersionDir, filepath.Join(d.localPath, name, oldMeta.Version)) + err = copyDir(d.localTableDir(name, newVersion), d.localTableDir(name, oldMeta.Version)) if err != nil { return fmt.Errorf("mutate: copy table failed: %w", err) } // acquire write connection // need to ignore attaching table since it is already present in the db file - conn, release, err := d.acquireWriteConn(ctx, filepath.Join(newVersionDir, "data.db"), name, false) + conn, release, err := d.acquireWriteConn(ctx, d.localDBPath(name, newVersion), name, false) if err != nil { return err } @@ -505,13 +463,10 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con if err != nil { return fmt.Errorf("mutate: failed to close connection: %w", err) } - d.localDirty = true meta := &tableMeta{ - Name: name, - Version: newVersion, - CreatedVersion: oldMeta.CreatedVersion, - Type: oldMeta.Type, - SQL: oldMeta.SQL, + Name: name, + Version: newVersion, + Type: oldMeta.Type, } err = d.pushToRemote(ctx, name, oldMeta, meta) if err != nil { @@ -526,11 +481,7 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con return nil } - err = d.catalog.addTableVersion(ctx, name, meta) - if err != nil { - d.logger.Debug("mutate: error in adding version", slog.String("table", name), slog.String("version", meta.Version), slog.String("error", err.Error())) - return nil - } + d.catalog.addTableVersion(name, meta) d.localDirty = false return nil } @@ -551,7 +502,7 @@ func (d *db) DropTable(ctx context.Context, name string) error { } // check if table exists - _, err = d.catalog.tableMeta(ctx, name) + _, err = d.catalog.tableMeta(name) if err != nil { if errors.Is(err, errNotFound) { return fmt.Errorf("drop: Table %q not found", name) @@ -567,11 +518,7 @@ func (d *db) DropTable(ctx context.Context, name string) error { } // no errors after this point since background goroutine will eventually sync the local db - err = d.catalog.removeTable(ctx, name) - if err != nil { - d.logger.Debug("drop: error in removing table", slog.String("name", name), slog.String("error", err.Error())) - return nil - } + d.catalog.removeTable(name) d.localDirty = false return nil } @@ -593,7 +540,7 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { return fmt.Errorf("rename: unable to pull from remote: %w", err) } - oldMeta, err := d.catalog.tableMeta(ctx, oldName) + oldMeta, err := d.catalog.tableMeta(oldName) if err != nil { if errors.Is(err, errNotFound) { return fmt.Errorf("rename: Table %q not found", oldName) @@ -603,25 +550,22 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { // copy the old table to new table newVersion := newVersion() - err = copyDir(filepath.Join(d.localPath, newName, newVersion), filepath.Join(d.localPath, oldName, oldMeta.Version)) + err = copyDir(d.localTableDir(newName, newVersion), d.localTableDir(oldName, oldMeta.Version)) if err != nil { return fmt.Errorf("rename: copy table failed: %w", err) } // rename the underlying table - err = renameTable(ctx, filepath.Join(d.localPath, newName, newVersion, "data.db"), oldName, newName) + err = renameTable(ctx, d.localDBPath(newName, newVersion), oldName, newName) if err != nil { return fmt.Errorf("rename: rename table failed: %w", err) } - d.localDirty = true // sync the new table and new version meta := &tableMeta{ - Name: newName, - Version: newVersion, - CreatedVersion: newVersion, - Type: oldMeta.Type, - SQL: oldMeta.SQL, + Name: newName, + Version: newVersion, + Type: oldMeta.Type, } if err := d.pushToRemote(ctx, newName, oldMeta, meta); err != nil { return fmt.Errorf("rename: unable to replicate new table: %w", err) @@ -647,32 +591,27 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { } // remove old table from local db - err = d.catalog.removeTable(ctx, oldName) - if err != nil { - d.logger.Debug("rename: error in removing table", slog.String("name", oldName), slog.String("error", err.Error())) - return nil - } - err = d.catalog.addTableVersion(ctx, newName, meta) - if err != nil { - d.logger.Debug("rename: error in adding version", slog.String("table", newName), slog.String("version", newVersion), slog.String("error", err.Error())) - return nil - } + d.catalog.removeTable(oldName) + d.catalog.addTableVersion(newName, meta) d.localDirty = false return nil } func (d *db) localDBMonitor() { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() for { select { case <-d.ctx.Done(): return - case <-d.ticker.C: + case <-ticker.C: err := d.writeSem.Acquire(d.ctx, 1) if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in acquiring write sem", slog.String("error", err.Error())) continue } if !d.localDirty { + d.writeSem.Release(1) // all good continue } @@ -680,6 +619,7 @@ func (d *db) localDBMonitor() { if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in pulling from remote", slog.String("error", err.Error())) } + d.writeSem.Release(1) } } } @@ -700,9 +640,9 @@ func (d *db) Size() int64 { if strings.HasPrefix(entry.Name(), "__rill_tmp_") { continue } - meta, _ := d.catalog.tableMeta(context.Background(), entry.Name()) + meta, _ := d.catalog.tableMeta(entry.Name()) if meta != nil { - paths = append(paths, filepath.Join(d.localPath, entry.Name(), meta.Version, "data.db")) + paths = append(paths, d.localDBPath(meta.Name, meta.Version)) } } return fileSize(paths) @@ -746,7 +686,7 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read d.logger.Debug("open db", slog.Bool("read", read), slog.String("uri", uri)) // open the db var settings map[string]string - dsn, err := url.Parse(uri) // in-memory + dsn, err := url.Parse(uri) if err != nil { return nil, err } @@ -789,11 +729,7 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read return nil, err } - tables, err := d.catalog.listTables(ctx) - if err != nil { - return nil, err - } - + tables := d.catalog.listTables() err = d.attachTables(ctx, conn, tables, ignoreTable) if err != nil { db.Close() @@ -839,7 +775,8 @@ func (d *db) attachTables(ctx context.Context, conn *sqlx.Conn, tables []*tableM if b.Type == "TABLE" { return 1 } - return strings.Compare(a.CreatedVersion, b.CreatedVersion) + // any order for views + return 0 }) for _, table := range tables { if table.Name == ignoreTable { @@ -855,24 +792,25 @@ func (d *db) attachTables(ctx context.Context, conn *sqlx.Conn, tables []*tableM func (d *db) attachTable(ctx context.Context, conn *sqlx.Conn, table *tableMeta) error { safeTable := safeSQLName(table.Name) - if table.Type == "VIEW" { - _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) - return err - } - safeDBName := safeSQLName(dbName(table.Name, table.Version)) - _, err := conn.ExecContext(ctx, fmt.Sprintf("ATTACH IF NOT EXISTS %s AS %s (READ_ONLY)", safeSQLString(filepath.Join(d.localPath, table.Name, table.Version, "data.db")), safeDBName)) + _, err := conn.ExecContext(ctx, fmt.Sprintf("ATTACH IF NOT EXISTS %s AS %s (READ_ONLY)", safeSQLString(d.localDBPath(table.Name, table.Version)), safeDBName)) if err != nil { - d.logger.Warn("error in attaching db", slog.String("table", table.Name), slog.Any("error", err)) + d.logger.Warn("error in attaching db", slog.String("table", table.Name), slog.String("version", table.Version), slog.Any("error", err)) return err } _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) - return err + if err != nil && !errors.Is(err, context.Canceled) && table.Type == "VIEW" { + // create a view that returns an error on querying + // may be the view is incompatible with the underlying data due to schema changes + _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT error('View %s is incompatible with the underlying data: %s')", safeTable, safeTable, strings.ReplaceAll(err.Error(), "'", "''"))) + return err + } + return nil } func (d *db) tableMeta(name string) (*tableMeta, error) { - contents, err := os.ReadFile(filepath.Join(d.localPath, name, "meta.json")) + contents, err := os.ReadFile(d.localMetaPath(name)) if err != nil { if errors.Is(err, fs.ErrNotExist) { return nil, errNotFound @@ -885,11 +823,8 @@ func (d *db) tableMeta(name string) (*tableMeta, error) { return nil, err } - if m.Type == "VIEW" { - return m, nil - } // this is required because release version does not delete table directory as of now - _, err = os.Stat(filepath.Join(d.localPath, name, m.Version)) + _, err = os.Stat(d.localTableDir(name, m.Version)) if err != nil { if errors.Is(err, fs.ErrNotExist) { return nil, errNotFound @@ -904,24 +839,42 @@ func (d *db) writeTableMeta(name string, meta *tableMeta) error { if err != nil { return fmt.Errorf("create: marshal meta failed: %w", err) } - err = os.WriteFile(filepath.Join(d.localPath, name, "meta.json"), metaBytes, fs.ModePerm) + err = os.WriteFile(d.localMetaPath(name), metaBytes, fs.ModePerm) if err != nil { return fmt.Errorf("create: write meta failed: %w", err) } return nil } -// deleteLocalTableFiles delete table files for the given table name. If version is provided, only that version is deleted. -func (d *db) deleteLocalTableFiles(name, version string) error { +func (d *db) localTableDir(name, version string) string { var path string if version == "" { path = filepath.Join(d.localPath, name) } else { path = filepath.Join(d.localPath, name, version) } - return os.RemoveAll(path) + return path +} + +func (d *db) localMetaPath(table string) string { + return filepath.Join(d.localPath, table, "meta.json") +} + +func (d *db) localDBPath(table, version string) string { + return filepath.Join(d.localPath, table, version, "data.db") +} + +// initLocalTable creates a directory for the table in the local path. +// If version is provided, a version directory is also created. +func (d *db) initLocalTable(name, version string) error { + err := os.MkdirAll(d.localTableDir(name, version), fs.ModePerm) + if err != nil { + return fmt.Errorf("create: unable to create dir %q: %w", name, err) + } + return nil } +// removeTableVersion removes the table version from the catalog and deletes the local table files. func (d *db) removeTableVersion(ctx context.Context, name, version string) error { err := d.metaSem.Acquire(ctx, 1) if err != nil { @@ -936,6 +889,11 @@ func (d *db) removeTableVersion(ctx context.Context, name, version string) error return d.deleteLocalTableFiles(name, version) } +// deleteLocalTableFiles delete table files for the given table name. If version is provided, only that version is deleted. +func (d *db) deleteLocalTableFiles(name, version string) error { + return os.RemoveAll(d.localTableDir(name, version)) +} + func (d *db) prepareSnapshot(ctx context.Context, conn *sqlx.Conn, s *snapshot) error { err := d.metaSem.Acquire(ctx, 1) if err != nil { @@ -957,7 +915,12 @@ func (d *db) prepareSnapshot(ctx context.Context, conn *sqlx.Conn, s *snapshot) return err } - return d.attachTables(ctx, conn, s.tables, "") + err = d.attachTables(ctx, conn, s.tables, "") + if err != nil { + return err + } + s.ready = true + return nil } func (d *db) removeSnapshot(ctx context.Context, id int) error { @@ -972,11 +935,9 @@ func (d *db) removeSnapshot(ctx context.Context, id int) error { } type tableMeta struct { - Name string `json:"name"` - Version string `json:"version"` - CreatedVersion string `json:"created_version"` - Type string `json:"type"` // either TABLE or VIEW - SQL string `json:"sql"` // populated for views + Name string `json:"name"` + Version string `json:"version"` + Type string `json:"type"` // either TABLE or VIEW } func renameTable(ctx context.Context, dbFile, old, newName string) error { diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 02ac7cb4c86..d50b34c65c6 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -268,6 +268,69 @@ func TestConcurrentReads(t *testing.T) { require.NoError(t, release3()) } +func TestInconsistentSchema(t *testing.T) { + testDB, _, _ := prepareDB(t) + ctx := context.Background() + + // create table + err := testDB.CreateTableAsSelect(ctx, "test", "SELECT 2 AS id, 'USA' AS country", &CreateTableOptions{}) + require.NoError(t, err) + + // create view + err = testDB.CreateTableAsSelect(ctx, "test_view", "SELECT id, country FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + verifyTable(t, testDB, "SELECT * FROM test_view", []testData{{ID: 2, Country: "USA"}}) + + // replace underlying table + err = testDB.CreateTableAsSelect(ctx, "test", "SELECT 20 AS id, 'USB' AS city", &CreateTableOptions{}) + require.NoError(t, err) + + conn, release, err := testDB.AcquireReadConnection(ctx) + require.NoError(t, err) + defer release() + + var ( + id int + country string + ) + err = conn.QueryRowxContext(ctx, "SELECT * FROM test_view").Scan(&id, &country) + require.Error(t, err) + + // but querying from table should work + err = conn.QueryRowxContext(ctx, "SELECT * FROM test").Scan(&id, &country) + require.NoError(t, err) + require.Equal(t, 20, id) + require.Equal(t, "USB", country) +} + +func TestViews(t *testing.T) { + testDB, _, _ := prepareDB(t) + ctx := context.Background() + + // create view + err := testDB.CreateTableAsSelect(ctx, "parent_view", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{View: true}) + require.NoError(t, err) + + // create dependent view + err = testDB.CreateTableAsSelect(ctx, "child_view", "SELECT * FROM parent_view", &CreateTableOptions{View: true}) + require.NoError(t, err) + verifyTable(t, testDB, "SELECT id, country FROM child_view", []testData{{ID: 1, Country: "India"}}) + + // replace parent view + err = testDB.CreateTableAsSelect(ctx, "parent_view", "SELECT 2 AS id, 'USA' AS country", &CreateTableOptions{View: true}) + require.NoError(t, err) + verifyTable(t, testDB, "SELECT id, country FROM child_view", []testData{{ID: 2, Country: "USA"}}) + + // rename child view + err = testDB.RenameTable(ctx, "child_view", "child_view2") + require.NoError(t, err) + verifyTable(t, testDB, "SELECT id, country FROM child_view2", []testData{{ID: 2, Country: "USA"}}) + + // old child view does not exist + err = testDB.DropTable(ctx, "child_view") + require.Error(t, err) +} + func prepareDB(t *testing.T) (db DB, localDir, remoteDir string) { localDir = t.TempDir() ctx := context.Background() diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index 3874f925da7..a0ba65addc3 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -88,7 +88,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { } // check if table in catalog is already upto date - meta, _ := d.catalog.tableMeta(gctx, table) + meta, _ := d.catalog.tableMeta(table) if meta != nil && meta.Version == backedUpMeta.Version { d.logger.Debug("SyncWithObjectStorage: table is already up to date", slog.String("table", table)) continue @@ -149,10 +149,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { if err != nil { return err } - err = d.catalog.addTableVersion(ctx, table, meta) - if err != nil { - return err - } + d.catalog.addTableVersion(table, meta) } // mark tables that are not in remote for delete later @@ -167,10 +164,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { if _, ok := tblMetas[entry.Name()]; ok { continue } - err = d.catalog.removeTable(ctx, entry.Name()) - if err != nil { - return err - } + d.catalog.removeTable(entry.Name()) } return nil } @@ -178,42 +172,41 @@ func (d *db) pullFromRemote(ctx context.Context) error { // pushToRemote syncs the remote location with the local path for given table. // If oldVersion is specified, it is deleted after successful sync. func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tableMeta) error { - if meta.Type == "TABLE" { - // for views no db files exists, the SQL is stored in meta.json - localPath := filepath.Join(d.localPath, table, meta.Version) - entries, err := os.ReadDir(localPath) + // for views no db files exists, the SQL is stored in meta.json + localPath := filepath.Join(d.localPath, table, meta.Version) + entries, err := os.ReadDir(localPath) + if err != nil { + return err + } + + for _, entry := range entries { + d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", localPath)) + // no directory should exist as of now + if entry.IsDir() { + d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", localPath)) + continue + } + + wr, err := os.Open(filepath.Join(localPath, entry.Name())) if err != nil { return err } - for _, entry := range entries { - d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", localPath)) - // no directory should exist as of now - if entry.IsDir() { - d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", localPath)) - continue - } - - wr, err := os.Open(filepath.Join(localPath, entry.Name())) - if err != nil { - return err - } - - // upload to cloud storage - err = retry(ctx, func() error { - return d.remote.Upload(ctx, path.Join(table, meta.Version, entry.Name()), wr, &blob.WriterOptions{ - ContentType: "application/octet-stream", - }) + // upload to cloud storage + err = retry(ctx, func() error { + return d.remote.Upload(ctx, path.Join(table, meta.Version, entry.Name()), wr, &blob.WriterOptions{ + ContentType: "application/octet-stream", }) - _ = wr.Close() - if err != nil { - return err - } + }) + _ = wr.Close() + if err != nil { + return err } } // update table meta // todo :: also use etag to avoid concurrent writer conflicts + d.localDirty = true m, err := json.Marshal(meta) if err != nil { return fmt.Errorf("failed to marshal table metadata: %w", err) @@ -222,7 +215,7 @@ func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tabl return d.remote.WriteAll(ctx, path.Join(table, "meta.json"), m, nil) }) if err != nil { - d.logger.Error("failed to update meta.json in remote", slog.Any("error", err)) + d.logger.Error("failed to update meta.json in remote", slog.String("table", table), slog.Any("error", err)) } // success -- remove old version @@ -244,12 +237,11 @@ func (d *db) deleteRemote(ctx context.Context, table, version string) error { if version != "" { prefix = path.Join(table, version) + "/" } else { - // deleting the entire table prefix = table + "/" // delete meta.json first err := retry(ctx, func() error { return d.remote.Delete(ctx, "meta.json") }) if err != nil && gcerrors.Code(err) != gcerrors.NotFound { - d.logger.Error("failed to delete meta.json in remote", slog.Any("error", err)) + d.logger.Error("failed to delete meta.json in remote", slog.String("table", table), slog.Any("error", err)) return err } } @@ -263,11 +255,11 @@ func (d *db) deleteRemote(ctx context.Context, table, version string) error { if errors.Is(err, io.EOF) { break } - d.logger.Debug("failed to list object", slog.Any("error", err)) + d.logger.Debug("failed to list object", slog.String("table", table), slog.Any("error", err)) } err = retry(ctx, func() error { return d.remote.Delete(ctx, obj.Key) }) if err != nil { - d.logger.Debug("failed to delete object", slog.String("object", obj.Key), slog.Any("error", err)) + d.logger.Debug("failed to delete object", slog.String("table", table), slog.String("object", obj.Key), slog.Any("error", err)) } } return nil From 154ed025bd562958541694bdbcb8e5db63f690f5 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 29 Nov 2024 20:40:50 +0530 Subject: [PATCH 34/64] close fix --- runtime/pkg/rduckdb/catalog.go | 8 ++++---- runtime/pkg/rduckdb/db.go | 7 ------- runtime/pkg/rduckdb/remote.go | 1 - 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 190a88f26f2..558f761aa43 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -125,7 +125,7 @@ func (c *catalog) listTables() []*tableMeta { } meta, ok := t.versionMeta[t.currentVersion] if !ok { - c.logger.Error("internal error: meta for table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) + c.logger.Error("internal error: meta for table not found in catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } tables = append(tables, meta) } @@ -155,7 +155,7 @@ func (c *catalog) acquireSnapshot() *snapshot { meta, ok := t.versionMeta[t.currentVersion] if !ok { - c.logger.Error("internal error: meta for table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) + c.logger.Error("internal error: meta for table not found in catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } s.tables = append(s.tables, meta) c.acquireVersion(t, t.currentVersion) @@ -177,7 +177,7 @@ func (c *catalog) releaseSnapshot(s *snapshot) { for _, meta := range s.tables { t, ok := c.tables[meta.Name] if !ok { - c.logger.Error("internal error: table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) + c.logger.Error("internal error: table not found in catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } c.releaseVersion(t, meta.Version) } @@ -198,7 +198,7 @@ func (c *catalog) acquireVersion(t *table, version string) { func (c *catalog) releaseVersion(t *table, version string) { referenceCount, ok := t.versionReferenceCounts[version] if !ok { - c.logger.Error("internal error: version of table not found in rduckdb catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) + c.logger.Error("internal error: version of table not found in catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) } referenceCount-- if referenceCount > 0 { diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index cc7bcecd73d..f9c20744898 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -298,13 +298,6 @@ var _ DB = &db{} func (d *db) Close() error { // close background operations d.cancel() - - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - _ = d.writeSem.Acquire(ctx, 1) - defer d.writeSem.Release(1) - return d.dbHandle.Close() } diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index a0ba65addc3..0a9a9657e02 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -172,7 +172,6 @@ func (d *db) pullFromRemote(ctx context.Context) error { // pushToRemote syncs the remote location with the local path for given table. // If oldVersion is specified, it is deleted after successful sync. func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tableMeta) error { - // for views no db files exists, the SQL is stored in meta.json localPath := filepath.Join(d.localPath, table, meta.Version) entries, err := os.ReadDir(localPath) if err != nil { From e3cceadb19c238f8c28c56012f8c999c7898540c Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 29 Nov 2024 20:57:04 +0530 Subject: [PATCH 35/64] small renames --- runtime/pkg/rduckdb/catalog.go | 26 +++++++++++++------------- runtime/pkg/rduckdb/db.go | 6 +++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 558f761aa43..395312547a3 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -28,7 +28,7 @@ type snapshot struct { // Represents a catalog of available table versions. // It is thread-safe and supports acquiring a snapshot of table versions which will not be mutated or removed for as long as the snapshot is held. type catalog struct { - lock sync.Mutex + mu sync.Mutex tables map[string]*table snapshots map[int]*snapshot currentSnapshotID int @@ -53,8 +53,8 @@ func newCatalog(removeVersionFunc func(string, string), removeSnapshotFunc func( } func (c *catalog) tableMeta(name string) (*tableMeta, error) { - c.lock.Lock() - defer c.lock.Unlock() + c.mu.Lock() + defer c.mu.Unlock() t, ok := c.tables[name] if !ok || t.deleted { @@ -70,8 +70,8 @@ func (c *catalog) tableMeta(name string) (*tableMeta, error) { // addTableVersion registers a new version of a table. // If the table name has not been seen before, it is added to the catalog. func (c *catalog) addTableVersion(name string, meta *tableMeta) { - c.lock.Lock() - defer c.lock.Unlock() + c.mu.Lock() + defer c.mu.Unlock() t, ok := c.tables[name] if !ok { @@ -98,8 +98,8 @@ func (c *catalog) addTableVersion(name string, meta *tableMeta) { // If the table is currently used by a snapshot, it will stay in the catalog but marked with deleted=true. // When the last snapshot referencing the table is released, the table will be removed completely. func (c *catalog) removeTable(name string) { - c.lock.Lock() - defer c.lock.Unlock() + c.mu.Lock() + defer c.mu.Unlock() t, ok := c.tables[name] if !ok { @@ -115,8 +115,8 @@ func (c *catalog) removeTable(name string) { // listTables returns tableMeta for all active tables present in the catalog. func (c *catalog) listTables() []*tableMeta { - c.lock.Lock() - defer c.lock.Unlock() + c.mu.Lock() + defer c.mu.Unlock() tables := make([]*tableMeta, 0) for _, t := range c.tables { @@ -134,8 +134,8 @@ func (c *catalog) listTables() []*tableMeta { // acquireSnapshot acquires a snapshot of the current table versions. func (c *catalog) acquireSnapshot() *snapshot { - c.lock.Lock() - defer c.lock.Unlock() + c.mu.Lock() + defer c.mu.Unlock() s, ok := c.snapshots[c.currentSnapshotID] if ok { @@ -166,8 +166,8 @@ func (c *catalog) acquireSnapshot() *snapshot { // releaseSnapshot releases a snapshot of table versions. func (c *catalog) releaseSnapshot(s *snapshot) { - c.lock.Lock() - defer c.lock.Unlock() + c.mu.Lock() + defer c.mu.Unlock() s.referenceCount-- if s.referenceCount > 0 { diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index f9c20744898..623c1e4832b 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -323,7 +323,7 @@ func (d *db) AcquireReadConnection(ctx context.Context) (*sqlx.Conn, func() erro } func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts *CreateTableOptions) error { - d.logger.Debug("create table", slog.String("name", name), slog.Bool("view", opts.View)) + d.logger.Debug("create: create table", slog.String("name", name), slog.Bool("view", opts.View)) err := d.writeSem.Acquire(ctx, 1) if err != nil { return err @@ -339,7 +339,7 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * // check if some older version exists oldMeta, _ := d.catalog.tableMeta(name) if oldMeta != nil { - d.logger.Debug("old version", slog.String("table", name), slog.String("version", oldMeta.Version)) + d.logger.Debug("create: old version", slog.String("table", name), slog.String("version", oldMeta.Version)) } // create new version directory @@ -394,7 +394,7 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * if err := d.pushToRemote(ctx, name, oldMeta, newMeta); err != nil { return fmt.Errorf("create: replicate failed: %w", err) } - d.logger.Debug("remote table updated", slog.String("name", name)) + d.logger.Debug("create: remote table updated", slog.String("name", name)) // no errors after this point since background goroutine will eventually sync the local db // update local metadata From 86aaf2a1186467461a6e01cb04c1e492140ce4f3 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 29 Nov 2024 23:08:01 +0530 Subject: [PATCH 36/64] view fix --- runtime/pkg/rduckdb/db.go | 142 +++++++++++++++++++++++---------- runtime/pkg/rduckdb/db_test.go | 14 +++- runtime/pkg/rduckdb/remote.go | 50 ++++++------ 3 files changed, 139 insertions(+), 67 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 623c1e4832b..38a70617ac5 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -345,15 +345,25 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * // create new version directory newVersion := newVersion() newMeta := &tableMeta{ - Name: name, - Version: newVersion, + Name: name, + Version: newVersion, + CreatedVersion: newVersion, } - - err = d.initLocalTable(name, newVersion) - if err != nil { - return fmt.Errorf("create: unable to create dir %q: %w", name, err) + var dsn string + if opts.View { + dsn = "" + newMeta.SQL = query + err = d.initLocalTable(name, "") + if err != nil { + return fmt.Errorf("create: unable to create dir %q: %w", name, err) + } + } else { + err = d.initLocalTable(name, newVersion) + if err != nil { + return fmt.Errorf("create: unable to create dir %q: %w", name, err) + } + dsn = d.localDBPath(name, newVersion) } - dsn := d.localDBPath(name, newVersion) // need to attach existing table so that any views dependent on this table are correctly attached conn, release, err := d.acquireWriteConn(ctx, dsn, name, true) @@ -457,9 +467,11 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con return fmt.Errorf("mutate: failed to close connection: %w", err) } meta := &tableMeta{ - Name: name, - Version: newVersion, - Type: oldMeta.Type, + Name: name, + Version: newVersion, + CreatedVersion: oldMeta.CreatedVersion, + Type: oldMeta.Type, + SQL: oldMeta.SQL, } err = d.pushToRemote(ctx, name, oldMeta, meta) if err != nil { @@ -543,22 +555,31 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { // copy the old table to new table newVersion := newVersion() - err = copyDir(d.localTableDir(newName, newVersion), d.localTableDir(oldName, oldMeta.Version)) - if err != nil { - return fmt.Errorf("rename: copy table failed: %w", err) - } + if oldMeta.Type == "TABLE" { + err = copyDir(d.localTableDir(newName, newVersion), d.localTableDir(oldName, oldMeta.Version)) + if err != nil { + return fmt.Errorf("rename: copy table failed: %w", err) + } - // rename the underlying table - err = renameTable(ctx, d.localDBPath(newName, newVersion), oldName, newName) - if err != nil { - return fmt.Errorf("rename: rename table failed: %w", err) + // rename the underlying table + err = renameTable(ctx, d.localDBPath(newName, newVersion), oldName, newName) + if err != nil { + return fmt.Errorf("rename: rename table failed: %w", err) + } + } else { + err = copyDir(d.localTableDir(newName, ""), d.localTableDir(oldName, "")) + if err != nil { + return fmt.Errorf("rename: copy view failed: %w", err) + } } // sync the new table and new version meta := &tableMeta{ - Name: newName, - Version: newVersion, - Type: oldMeta.Type, + Name: newName, + Version: newVersion, + CreatedVersion: newVersion, + Type: oldMeta.Type, + SQL: oldMeta.SQL, } if err := d.pushToRemote(ctx, newName, oldMeta, meta); err != nil { return fmt.Errorf("rename: unable to replicate new table: %w", err) @@ -769,35 +790,72 @@ func (d *db) attachTables(ctx context.Context, conn *sqlx.Conn, tables []*tableM return 1 } // any order for views - return 0 + return strings.Compare(a.CreatedVersion, b.CreatedVersion) }) + + var failedViews []*tableMeta + // attach database files for _, table := range tables { if table.Name == ignoreTable { continue } - err := d.attachTable(ctx, conn, table) + safeTable := safeSQLName(table.Name) + if table.Type == "VIEW" { + _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) + if err != nil { + if errors.Is(err, context.Canceled) { + return err + } + failedViews = append(failedViews, table) + } + continue + } + safeDBName := safeSQLName(dbName(table.Name, table.Version)) + _, err := conn.ExecContext(ctx, fmt.Sprintf("ATTACH IF NOT EXISTS %s AS %s (READ_ONLY)", safeSQLString(d.localDBPath(table.Name, table.Version)), safeDBName)) if err != nil { return fmt.Errorf("failed to attach table %q: %w", table.Name, err) } + _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) + if err != nil { + return err + } } - return nil -} -func (d *db) attachTable(ctx context.Context, conn *sqlx.Conn, table *tableMeta) error { - safeTable := safeSQLName(table.Name) - safeDBName := safeSQLName(dbName(table.Name, table.Version)) - _, err := conn.ExecContext(ctx, fmt.Sprintf("ATTACH IF NOT EXISTS %s AS %s (READ_ONLY)", safeSQLString(d.localDBPath(table.Name, table.Version)), safeDBName)) - if err != nil { - d.logger.Warn("error in attaching db", slog.String("table", table.Name), slog.String("version", table.Version), slog.Any("error", err)) - return err - } + // retry creating views + for len(failedViews) > 0 { + allViewsFailed := true + size := len(failedViews) + for i := 0; i < size; i++ { + table := failedViews[0] + failedViews = failedViews[1:] + safeTable := safeSQLName(table.Name) + _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) + if err != nil { + if errors.Is(err, context.Canceled) { + return err + } + failedViews = append(failedViews, table) + continue + } + // successfully created view + allViewsFailed = false + } + if !allViewsFailed { + // at least one view should always be created unless there is a circular dependency which is not allowed + continue + } - _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT * FROM %s.%s", safeTable, safeDBName, safeTable)) - if err != nil && !errors.Is(err, context.Canceled) && table.Type == "VIEW" { - // create a view that returns an error on querying + // create views that return error on querying // may be the view is incompatible with the underlying data due to schema changes - _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT error('View %s is incompatible with the underlying data: %s')", safeTable, safeTable, strings.ReplaceAll(err.Error(), "'", "''"))) - return err + for i := 0; i < len(failedViews); i++ { + table := failedViews[i] + safeTable := safeSQLName(table.Name) + _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT error('View %s is incompatible with the underlying data')", safeTable, safeTable)) + if err != nil { + return err + } + } + break } return nil } @@ -928,9 +986,11 @@ func (d *db) removeSnapshot(ctx context.Context, id int) error { } type tableMeta struct { - Name string `json:"name"` - Version string `json:"version"` - Type string `json:"type"` // either TABLE or VIEW + Name string `json:"name"` + Version string `json:"version"` + CreatedVersion string `json:"created_version"` + Type string `json:"type"` // either TABLE or VIEW + SQL string `json:"sql"` // populated for views } func renameTable(ctx context.Context, dbFile, old, newName string) error { diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index d50b34c65c6..71722e39879 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -2,6 +2,7 @@ package rduckdb import ( "context" + "fmt" "io" "log/slog" "os" @@ -322,13 +323,22 @@ func TestViews(t *testing.T) { verifyTable(t, testDB, "SELECT id, country FROM child_view", []testData{{ID: 2, Country: "USA"}}) // rename child view - err = testDB.RenameTable(ctx, "child_view", "child_view2") + err = testDB.RenameTable(ctx, "child_view", "view0") require.NoError(t, err) - verifyTable(t, testDB, "SELECT id, country FROM child_view2", []testData{{ID: 2, Country: "USA"}}) + verifyTable(t, testDB, "SELECT id, country FROM view0", []testData{{ID: 2, Country: "USA"}}) // old child view does not exist err = testDB.DropTable(ctx, "child_view") require.Error(t, err) + + // create a chain of views + for i := 1; i <= 10; i++ { + err = testDB.CreateTableAsSelect(ctx, fmt.Sprintf("view%d", i), fmt.Sprintf("SELECT * FROM view%d", i-1), &CreateTableOptions{View: true}) + require.NoError(t, err) + } + verifyTable(t, testDB, "SELECT id, country FROM view10", []testData{{ID: 2, Country: "USA"}}) + + require.NoError(t, testDB.Close()) } func prepareDB(t *testing.T) (db DB, localDir, remoteDir string) { diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index 0a9a9657e02..73e8ae6eb56 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -102,7 +102,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { tblMetas[table] = backedUpMeta continue } - if err := os.MkdirAll(filepath.Join(d.localPath, table, backedUpMeta.Version), os.ModePerm); err != nil { + if err := d.initLocalTable(table, backedUpMeta.Version); err != nil { return err } @@ -172,34 +172,36 @@ func (d *db) pullFromRemote(ctx context.Context) error { // pushToRemote syncs the remote location with the local path for given table. // If oldVersion is specified, it is deleted after successful sync. func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tableMeta) error { - localPath := filepath.Join(d.localPath, table, meta.Version) - entries, err := os.ReadDir(localPath) - if err != nil { - return err - } - - for _, entry := range entries { - d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", localPath)) - // no directory should exist as of now - if entry.IsDir() { - d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", localPath)) - continue - } - - wr, err := os.Open(filepath.Join(localPath, entry.Name())) + if meta.Type == "TABLE" { + localPath := d.localTableDir(table, meta.Version) + entries, err := os.ReadDir(localPath) if err != nil { return err } - // upload to cloud storage - err = retry(ctx, func() error { - return d.remote.Upload(ctx, path.Join(table, meta.Version, entry.Name()), wr, &blob.WriterOptions{ - ContentType: "application/octet-stream", + for _, entry := range entries { + d.logger.Debug("replicating file", slog.String("file", entry.Name()), slog.String("path", localPath)) + // no directory should exist as of now + if entry.IsDir() { + d.logger.Debug("found directory in path which should not exist", slog.String("file", entry.Name()), slog.String("path", localPath)) + continue + } + + wr, err := os.Open(filepath.Join(localPath, entry.Name())) + if err != nil { + return err + } + + // upload to cloud storage + err = retry(ctx, func() error { + return d.remote.Upload(ctx, path.Join(table, meta.Version, entry.Name()), wr, &blob.WriterOptions{ + ContentType: "application/octet-stream", + }) }) - }) - _ = wr.Close() - if err != nil { - return err + _ = wr.Close() + if err != nil { + return err + } } } From 5fcc93498beafc6e24612dbfec9a4edbab409a75 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:38:42 +0530 Subject: [PATCH 37/64] add a storage client and remove preset data_dir --- cli/cmd/runtime/install_duckdb_extensions.go | 4 +- cli/cmd/runtime/start.go | 16 +- cli/pkg/cmdutil/project.go | 4 +- cli/pkg/local/app.go | 8 +- runtime/compilers/rillv1/parser_test.go | 3 +- runtime/connection_cache.go | 2 +- runtime/connections.go | 5 - runtime/drivers/admin/admin.go | 11 +- runtime/drivers/athena/athena.go | 3 +- runtime/drivers/azure/azure.go | 14 +- runtime/drivers/azure/object_store.go | 2 +- runtime/drivers/bigquery/bigquery.go | 14 +- runtime/drivers/bigquery/warehouse.go | 2 +- runtime/drivers/clickhouse/clickhouse.go | 12 +- .../clickhouse/information_schema_test.go | 3 +- runtime/drivers/clickhouse/olap_test.go | 5 +- runtime/drivers/drivers.go | 7 +- runtime/drivers/drivers_test.go | 3 +- runtime/drivers/druid/druid.go | 3 +- runtime/drivers/druid/druid_test.go | 3 +- runtime/drivers/druid/sql_driver_test.go | 11 +- runtime/drivers/duckdb/config.go | 3 +- runtime/drivers/duckdb/config_test.go | 33 ++-- runtime/drivers/duckdb/duckdb.go | 7 +- runtime/drivers/duckdb/duckdb_test.go | 7 +- runtime/drivers/duckdb/olap_crud_test.go | 27 ++-- runtime/drivers/duckdb/olap_test.go | 7 +- .../transporter_duckDB_to_duckDB_test.go | 5 +- .../transporter_mysql_to_duckDB_test.go | 5 +- .../transporter_postgres_to_duckDB_test.go | 5 +- .../transporter_sqlite_to_duckDB_test.go | 3 +- runtime/drivers/duckdb/transporter_test.go | 3 +- runtime/drivers/file/file.go | 3 +- runtime/drivers/gcs/gcs.go | 14 +- runtime/drivers/gcs/object_store.go | 2 +- runtime/drivers/https/https.go | 3 +- .../drivers/mock/object_store/object_store.go | 3 +- runtime/drivers/mysql/mysql.go | 3 +- runtime/drivers/pinot/pinot.go | 3 +- runtime/drivers/postgres/postgres.go | 3 +- runtime/drivers/redshift/redshift.go | 3 +- runtime/drivers/s3/object_store.go | 2 +- runtime/drivers/s3/s3.go | 9 +- runtime/drivers/salesforce/salesforce.go | 3 +- runtime/drivers/slack/slack.go | 3 +- runtime/drivers/snowflake/snowflake.go | 6 +- runtime/drivers/snowflake/sql_store.go | 2 +- runtime/drivers/sqlite/sqlite.go | 3 +- runtime/metricsview/executor_export.go | 3 +- runtime/registry.go | 64 +------- runtime/registry_test.go | 12 +- runtime/runtime.go | 8 +- runtime/server/queries_test.go | 3 +- runtime/storage/storage.go | 150 ++++++++++++++++++ runtime/testruntime/testruntime.go | 4 +- 55 files changed, 349 insertions(+), 200 deletions(-) create mode 100644 runtime/storage/storage.go diff --git a/cli/cmd/runtime/install_duckdb_extensions.go b/cli/cmd/runtime/install_duckdb_extensions.go index 226476e6864..53403ec5d6a 100644 --- a/cli/cmd/runtime/install_duckdb_extensions.go +++ b/cli/cmd/runtime/install_duckdb_extensions.go @@ -2,10 +2,12 @@ package runtime import ( "fmt" + "os" "github.com/rilldata/rill/cli/pkg/cmdutil" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/spf13/cobra" "go.uber.org/zap" ) @@ -17,7 +19,7 @@ func InstallDuckDBExtensionsCmd(ch *cmdutil.Helper) *cobra.Command { Use: "install-duckdb-extensions", RunE: func(cmd *cobra.Command, args []string) error { cfg := map[string]any{"dsn": ":memory:"} // In-memory - h, err := drivers.Open("duckdb", "default", cfg, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + h, err := drivers.Open("duckdb", "default", cfg, storage.MustNew(os.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) if err != nil { return fmt.Errorf("failed to open ephemeral duckdb: %w", err) } diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index 9b4638451b4..83fcdee0227 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -20,6 +20,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/observability" "github.com/rilldata/rill/runtime/pkg/ratelimit" "github.com/rilldata/rill/runtime/server" + "github.com/rilldata/rill/runtime/storage" "github.com/spf13/cobra" "go.uber.org/zap" "go.uber.org/zap/zapcore" @@ -198,6 +199,16 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { activityClient = activityClient.WithIsDev() } + // storage client + bucketConfig := map[string]interface{}{ + "bucket": conf.DataBucket, + "google_application_credentials": conf.DataBucketCredentialsJSON, + } + storage, err := storage.New(conf.DataDir, bucketConfig) + if err != nil { + logger.Fatal("error: could not create storage client", zap.Error(err)) + } + // Create ctx that cancels on termination signals ctx := graceful.WithCancelOnTerminate(context.Background()) // Init runtime @@ -209,9 +220,6 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { ControllerLogBufferCapacity: conf.LogBufferCapacity, ControllerLogBufferSizeBytes: conf.LogBufferSizeBytes, AllowHostAccess: conf.AllowHostAccess, - DataDir: conf.DataDir, - DataBucket: conf.DataBucket, - DataBucketCredentialsJSON: conf.DataBucketCredentialsJSON, SystemConnectors: []*runtimev1.Connector{ { Type: conf.MetastoreDriver, @@ -220,7 +228,7 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { }, }, } - rt, err := runtime.New(ctx, opts, logger, activityClient, emailClient) + rt, err := runtime.New(ctx, opts, logger, storage, activityClient, emailClient) if err != nil { logger.Fatal("error: could not create runtime", zap.Error(err)) } diff --git a/cli/pkg/cmdutil/project.go b/cli/pkg/cmdutil/project.go index 703acac170f..8c864bc420d 100644 --- a/cli/pkg/cmdutil/project.go +++ b/cli/pkg/cmdutil/project.go @@ -2,9 +2,11 @@ package cmdutil import ( "context" + "os" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" // Ensure file driver is loaded @@ -14,7 +16,7 @@ import ( // RepoForProjectPath creates an ad-hoc drivers.RepoStore for a local project file path func RepoForProjectPath(path string) (drivers.RepoStore, string, error) { instanceID := "default" - repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + repoHandle, err := drivers.Open("file", instanceID, map[string]any{"dsn": path}, storage.MustNew(os.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) if err != nil { return nil, "", err } diff --git a/cli/pkg/local/app.go b/cli/pkg/local/app.go index 6aee9ab3d96..93df78ad80b 100644 --- a/cli/pkg/local/app.go +++ b/cli/pkg/local/app.go @@ -27,6 +27,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/observability" "github.com/rilldata/rill/runtime/pkg/ratelimit" runtimeserver "github.com/rilldata/rill/runtime/server" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" "go.uber.org/zap/buffer" @@ -161,13 +162,16 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { MetastoreConnector: "metastore", QueryCacheSizeBytes: int64(datasize.MB * 100), AllowHostAccess: true, - DataDir: dbDirPath, SystemConnectors: systemConnectors, SecurityEngineCacheSize: 1000, ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := runtime.New(ctx, rtOpts, logger, opts.Ch.Telemetry(ctx), email.New(sender)) + st, err := storage.New(dbDirPath, nil) + if err != nil { + return nil, err + } + rt, err := runtime.New(ctx, rtOpts, logger, st, opts.Ch.Telemetry(ctx), email.New(sender)) if err != nil { return nil, err } diff --git a/runtime/compilers/rillv1/parser_test.go b/runtime/compilers/rillv1/parser_test.go index f332fbb33d5..9014167a0de 100644 --- a/runtime/compilers/rillv1/parser_test.go +++ b/runtime/compilers/rillv1/parser_test.go @@ -13,6 +13,7 @@ import ( runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" "google.golang.org/protobuf/types/known/structpb" @@ -2057,7 +2058,7 @@ func requireResourcesAndErrors(t testing.TB, p *Parser, wantResources []*Resourc func makeRepo(t testing.TB, files map[string]string) drivers.RepoStore { root := t.TempDir() - handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := drivers.Open("file", "default", map[string]any{"dsn": root}, storage.MustNew(root, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) repo, ok := handle.AsRepoStore("") diff --git a/runtime/connection_cache.go b/runtime/connection_cache.go index 5f0f39f0eab..25f98ffdaa7 100644 --- a/runtime/connection_cache.go +++ b/runtime/connection_cache.go @@ -110,7 +110,7 @@ func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig } } - handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, activityClient, r.DataBucket, logger) + handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, r.storage.WithPrefix(cfg.instanceID), activityClient, logger) if err == nil && ctx.Err() != nil { err = fmt.Errorf("timed out while opening driver %q", cfg.driver) } diff --git a/runtime/connections.go b/runtime/connections.go index bfad2e9848a..eff10e90e14 100644 --- a/runtime/connections.go +++ b/runtime/connections.go @@ -279,11 +279,6 @@ func (r *Runtime) ConnectorConfig(ctx context.Context, instanceID, name string) // Apply built-in system-wide config res.setPreset("allow_host_access", strconv.FormatBool(r.opts.AllowHostAccess), true) - // data_dir stores persistent data - res.setPreset("data_dir", r.DataDir(instanceID, name), true) - // temp_dir stores temporary data. The logic that creates any temporary file here should also delete them. - // The contents will also be deleted on runtime restarts. - res.setPreset("temp_dir", r.TempDir(instanceID), true) // Done return res, nil diff --git a/runtime/drivers/admin/admin.go b/runtime/drivers/admin/admin.go index f420284f514..5fb8046a8cc 100644 --- a/runtime/drivers/admin/admin.go +++ b/runtime/drivers/admin/admin.go @@ -21,6 +21,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/archive" "github.com/rilldata/rill/runtime/pkg/ctxsync" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel" "go.uber.org/zap" "golang.org/x/sync/singleflight" @@ -60,10 +61,9 @@ type configProperties struct { AccessToken string `mapstructure:"access_token"` ProjectID string `mapstructure:"project_id"` Branch string `mapstructure:"branch"` - TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, ac *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("admin driver can't be shared") } @@ -105,6 +105,7 @@ func (d driver) TertiarySourceConnectors(ctx context.Context, src map[string]any type Handle struct { config *configProperties logger *zap.Logger + storage *storage.Client admin *client.Client repoMu ctxsync.RWMutex repoSF *singleflight.Group @@ -391,7 +392,7 @@ func (h *Handle) checkHandshake(ctx context.Context) error { } if h.repoPath == "" { - h.repoPath, err = os.MkdirTemp(h.config.TempDir, "admin_driver_repo") + h.repoPath, err = os.MkdirTemp(h.storage.TempDir(), "admin_driver_repo") if err != nil { return err } @@ -577,7 +578,7 @@ func (h *Handle) stashVirtual() error { return nil } - dst, err := generateTmpPath(h.config.TempDir, "admin_driver_virtual_stash", "") + dst, err := generateTmpPath(h.storage.TempDir(), "admin_driver_virtual_stash", "") if err != nil { return fmt.Errorf("stash virtual: %w", err) } @@ -622,7 +623,7 @@ func (h *Handle) download() error { defer cancel() // generate a temporary file to copy repo tar directory - downloadDst, err := generateTmpPath(h.config.TempDir, "admin_driver_zipped_repo", ".tar.gz") + downloadDst, err := generateTmpPath(h.storage.TempDir(), "admin_driver_zipped_repo", ".tar.gz") if err != nil { return fmt.Errorf("download: %w", err) } diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go index 1aa94ac5221..ba25961b363 100644 --- a/runtime/drivers/athena/athena.go +++ b/runtime/drivers/athena/athena.go @@ -7,6 +7,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -85,7 +86,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("athena driver can't be shared") } diff --git a/runtime/drivers/azure/azure.go b/runtime/drivers/azure/azure.go index d3104019cb7..1bc65d7489e 100644 --- a/runtime/drivers/azure/azure.go +++ b/runtime/drivers/azure/azure.go @@ -8,6 +8,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -79,10 +80,9 @@ type configProperties struct { SASToken string `mapstructure:"azure_storage_sas_token"` ConnectionString string `mapstructure:"azure_storage_connection_string"` AllowHostAccess bool `mapstructure:"allow_host_access"` - TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("azure driver can't be shared") } @@ -94,8 +94,9 @@ func (d driver) Open(instanceID string, config map[string]any, client *activity. } conn := &Connection{ - config: conf, - logger: logger, + config: conf, + storage: storage, + logger: logger, } return conn, nil } @@ -129,8 +130,9 @@ func (d driver) TertiarySourceConnectors(ctx context.Context, src map[string]any } type Connection struct { - config *configProperties - logger *zap.Logger + config *configProperties + storage *storage.Client + logger *zap.Logger } var _ drivers.Handle = &Connection{} diff --git a/runtime/drivers/azure/object_store.go b/runtime/drivers/azure/object_store.go index d6dcae5d9ca..8e0b85cc961 100644 --- a/runtime/drivers/azure/object_store.go +++ b/runtime/drivers/azure/object_store.go @@ -125,7 +125,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (d ExtractPolicy: conf.extractPolicy, BatchSizeBytes: int64(batchSize.Bytes()), KeepFilesUntilClose: conf.BatchSize == "-1", - TempDir: c.config.TempDir, + TempDir: c.storage.TempDir(), } iter, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger) diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index 5467784485a..5f379b36173 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/gcputil" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" "google.golang.org/api/option" ) @@ -74,10 +75,9 @@ type driver struct{} type configProperties struct { SecretJSON string `mapstructure:"google_application_credentials"` AllowHostAccess bool `mapstructure:"allow_host_access"` - TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("bigquery driver can't be shared") } @@ -89,8 +89,9 @@ func (d driver) Open(instanceID string, config map[string]any, client *activity. } conn := &Connection{ - config: conf, - logger: logger, + config: conf, + storage: storage, + logger: logger, } return conn, nil } @@ -109,8 +110,9 @@ func (d driver) TertiarySourceConnectors(ctx context.Context, src map[string]any } type Connection struct { - config *configProperties - logger *zap.Logger + config *configProperties + storage *storage.Client + logger *zap.Logger } var _ drivers.Handle = &Connection{} diff --git a/runtime/drivers/bigquery/warehouse.go b/runtime/drivers/bigquery/warehouse.go index d864871439a..35c0c6d0329 100644 --- a/runtime/drivers/bigquery/warehouse.go +++ b/runtime/drivers/bigquery/warehouse.go @@ -146,7 +146,7 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any) (dr c.logger.Debug("query took", zap.Duration("duration", time.Since(now)), observability.ZapCtx(ctx)) } - tempDir, err := os.MkdirTemp(c.config.TempDir, "bigquery") + tempDir, err := os.MkdirTemp(c.storage.TempDir(), "bigquery") if err != nil { return nil, err } diff --git a/runtime/drivers/clickhouse/clickhouse.go b/runtime/drivers/clickhouse/clickhouse.go index 0ab4b6b77ae..6a7a492168d 100644 --- a/runtime/drivers/clickhouse/clickhouse.go +++ b/runtime/drivers/clickhouse/clickhouse.go @@ -13,6 +13,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/priorityqueue" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel/attribute" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" "go.uber.org/zap" @@ -112,16 +113,13 @@ type configProperties struct { // SettingsOverride override the default settings used in queries. One use case is to disable settings and set `readonly = 1` when using read-only user. SettingsOverride string `mapstructure:"settings_override"` // EmbedPort is the port to run Clickhouse locally (0 is random port). - EmbedPort int `mapstructure:"embed_port"` - // DataDir is the path to directory where db files will be created. - DataDir string `mapstructure:"data_dir"` - TempDir string `mapstructure:"temp_dir"` - CanScaleToZero bool `mapstructure:"can_scale_to_zero"` + EmbedPort int `mapstructure:"embed_port"` + CanScaleToZero bool `mapstructure:"can_scale_to_zero"` } // Open connects to Clickhouse using std API. // Connection string format : https://github.com/ClickHouse/clickhouse-go?tab=readme-ov-file#dsn -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("clickhouse driver can't be shared") } @@ -175,7 +173,7 @@ func (d driver) Open(instanceID string, config map[string]any, client *activity. } } else { // run clickhouse locally - embed = newEmbedClickHouse(conf.EmbedPort, conf.DataDir, conf.TempDir, logger) + embed = newEmbedClickHouse(conf.EmbedPort, storage.DataDir(), storage.TempDir(), logger) opts, err = embed.start() if err != nil { return nil, err diff --git a/runtime/drivers/clickhouse/information_schema_test.go b/runtime/drivers/clickhouse/information_schema_test.go index 285f2ffc970..7bd10e032ff 100644 --- a/runtime/drivers/clickhouse/information_schema_test.go +++ b/runtime/drivers/clickhouse/information_schema_test.go @@ -8,6 +8,7 @@ import ( runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/modules/clickhouse" @@ -37,7 +38,7 @@ func TestInformationSchema(t *testing.T) { port, err := clickHouseContainer.MappedPort(ctx, "9000/tcp") require.NoError(t, err) - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": fmt.Sprintf("clickhouse://clickhouse:clickhouse@%v:%v", host, port.Port())}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) prepareConn(t, conn) t.Run("testInformationSchemaAll", func(t *testing.T) { testInformationSchemaAll(t, conn) }) diff --git a/runtime/drivers/clickhouse/olap_test.go b/runtime/drivers/clickhouse/olap_test.go index 4c65432270f..d05e2767431 100644 --- a/runtime/drivers/clickhouse/olap_test.go +++ b/runtime/drivers/clickhouse/olap_test.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/rilldata/rill/runtime/testruntime" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -24,7 +25,7 @@ func TestClickhouseCrudOps(t *testing.T) { } func testClickhouseSingleHost(t *testing.T, dsn string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) defer conn.Close() prepareConn(t, conn) @@ -41,7 +42,7 @@ func testClickhouseSingleHost(t *testing.T, dsn string) { } func testClickhouseCluster(t *testing.T, dsn, cluster string) { - conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := drivers.Open("clickhouse", "default", map[string]any{"dsn": dsn, "cluster": cluster}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) defer conn.Close() diff --git a/runtime/drivers/drivers.go b/runtime/drivers/drivers.go index 4e5b9da5ee7..21e65ded226 100644 --- a/runtime/drivers/drivers.go +++ b/runtime/drivers/drivers.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" "gocloud.dev/blob" ) @@ -40,13 +41,13 @@ var OpenNilDataBucket OpenDataBucketFn = func(ctx context.Context, instanceID st // Open opens a new connection. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. // If instanceID is not empty, the connection is considered instance-specific and its As...() functions will only be invoked with the same instance ID. -func Open(driver, instanceID string, config map[string]any, client *activity.Client, fn OpenDataBucketFn, logger *zap.Logger) (Handle, error) { +func Open(driver, instanceID string, config map[string]any, storage *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) { d, ok := Drivers[driver] if !ok { return nil, fmt.Errorf("unknown driver: %s", driver) } - conn, err := d.Open(instanceID, config, client, fn, logger) + conn, err := d.Open(instanceID, config, storage, activity, logger) if err != nil { return nil, err } @@ -61,7 +62,7 @@ type Driver interface { // Open opens a new handle. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. - Open(instanceID string, config map[string]any, client *activity.Client, fn OpenDataBucketFn, logger *zap.Logger) (Handle, error) + Open(instanceID string, config map[string]any, storage *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) // HasAnonymousSourceAccess returns true if the driver can access the data identified by srcProps without any additional configuration. HasAnonymousSourceAccess(ctx context.Context, srcProps map[string]any, logger *zap.Logger) (bool, error) diff --git a/runtime/drivers/drivers_test.go b/runtime/drivers/drivers_test.go index f3bb177be5b..f54c7191492 100644 --- a/runtime/drivers/drivers_test.go +++ b/runtime/drivers/drivers_test.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/admin/pkg/pgtestcontainer" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -31,7 +32,7 @@ func TestAll(t *testing.T) { for _, withDriver := range matrix { err := withDriver(t, func(driver, instanceID string, cfg map[string]any) { // Open - conn, err := drivers.Open(driver, instanceID, cfg, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := drivers.Open(driver, instanceID, cfg, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) require.NotNil(t, conn) diff --git a/runtime/drivers/druid/druid.go b/runtime/drivers/druid/druid.go index c94e11c8d56..a943e23ea63 100644 --- a/runtime/drivers/druid/druid.go +++ b/runtime/drivers/druid/druid.go @@ -12,6 +12,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" @@ -101,7 +102,7 @@ type configProperties struct { // Opens a connection to Apache Druid using HTTP API. // Note that the Druid connection string must have the form "http://user:password@host:port/druid/v2/sql". -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("druid driver can't be shared") } diff --git a/runtime/drivers/druid/druid_test.go b/runtime/drivers/druid/druid_test.go index 45fecf51cd5..635d4f2279e 100644 --- a/runtime/drivers/druid/druid_test.go +++ b/runtime/drivers/druid/druid_test.go @@ -11,6 +11,7 @@ import ( runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" @@ -108,7 +109,7 @@ func TestDruid(t *testing.T) { require.NoError(t, err) dd := &driver{} - conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := dd.Open("default", map[string]any{"dsn": druidAPIURL}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/druid/sql_driver_test.go b/runtime/drivers/druid/sql_driver_test.go index 2bae2c25c4a..ff9dbf57bd7 100644 --- a/runtime/drivers/druid/sql_driver_test.go +++ b/runtime/drivers/druid/sql_driver_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/rilldata/rill/runtime/drivers" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "github.com/rilldata/rill/runtime/pkg/activity" @@ -19,7 +20,7 @@ import ( */ func Ignore_TestDriver_types(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -56,7 +57,7 @@ func Ignore_TestDriver_types(t *testing.T) { func Ignore_TestDriver_array_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -82,7 +83,7 @@ func Ignore_TestDriver_array_type(t *testing.T) { func Ignore_TestDriver_json_type(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -107,7 +108,7 @@ func Ignore_TestDriver_json_type(t *testing.T) { func Ignore_TestDriver_multiple_rows(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -143,7 +144,7 @@ func Ignore_TestDriver_multiple_rows(t *testing.T) { func Ignore_TestDriver_error(t *testing.T) { driver := &driver{} - handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := driver.Open("default", map[string]any{"pool_size": 2, "dsn": "http://localhost:8888/druid/v2/sql"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index c9b8220e7f3..90f52b088a2 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -53,9 +53,10 @@ type config struct { LogQueries bool `mapstructure:"log_queries"` } -func newConfig(cfgMap map[string]any) (*config, error) { +func newConfig(cfgMap map[string]any, dataDir string) (*config, error) { cfg := &config{ ExtTableStorage: true, + DataDir: dataDir, } err := mapstructure.WeakDecode(cfgMap, cfg) if err != nil { diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 20f719bd71d..d3caa218107 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -9,77 +9,78 @@ import ( "github.com/rilldata/rill/runtime/drivers" activity "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" ) func TestConfig(t *testing.T) { - cfg, err := newConfig(map[string]any{}) + cfg, err := newConfig(map[string]any{}, "") require.NoError(t, err) require.Equal(t, "?custom_user_agent=rill", cfg.DSN) require.Equal(t, 2, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": ":memory:?memory_limit=2GB"}) + cfg, err = newConfig(map[string]any{"dsn": ":memory:?memory_limit=2GB"}, "") require.NoError(t, err) require.Equal(t, "?custom_user_agent=rill&memory_limit=2GB", cfg.DSN) require.Equal(t, 2, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": "", "memory_limit_gb": "1", "cpu": 2}) + cfg, err = newConfig(map[string]any{"dsn": "", "memory_limit_gb": "1", "cpu": 2}, "") require.NoError(t, err) require.Equal(t, "?custom_user_agent=rill&max_memory=1GB&threads=2", cfg.DSN) require.Equal(t, 2, cfg.PoolSize) require.Equal(t, true, cfg.ExtTableStorage) - cfg, err = newConfig(map[string]any{"data_dir": "path/to"}) + cfg, err = newConfig(map[string]any{}, "path/to") require.NoError(t, err) require.Equal(t, "path/to/main.db?custom_user_agent=rill", cfg.DSN) require.Equal(t, "path/to/main.db", cfg.DBFilePath) require.Equal(t, 2, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"data_dir": "path/to", "pool_size": 10}) + cfg, err = newConfig(map[string]any{"pool_size": 10}, "path/to") require.NoError(t, err) require.Equal(t, "path/to/main.db?custom_user_agent=rill", cfg.DSN) require.Equal(t, "path/to/main.db", cfg.DBFilePath) require.Equal(t, 10, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"data_dir": "path/to", "pool_size": "10"}) + cfg, err = newConfig(map[string]any{"pool_size": "10"}, "path/to") require.NoError(t, err) require.Equal(t, 10, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"data_dir": "path/to", "dsn": "?rill_pool_size=4", "pool_size": "10"}) + cfg, err = newConfig(map[string]any{"dsn": "?rill_pool_size=4", "pool_size": "10"}, "path/to") require.NoError(t, err) require.Equal(t, 4, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?rill_pool_size=10"}) + cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?rill_pool_size=10"}, "path/to") require.NoError(t, err) require.Equal(t, "path/to/duck.db?custom_user_agent=rill", cfg.DSN) require.Equal(t, "path/to/duck.db", cfg.DBFilePath) require.Equal(t, 10, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB&rill_pool_size=10"}) + cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB&rill_pool_size=10"}, "path/to") require.NoError(t, err) require.Equal(t, "path/to/duck.db?custom_user_agent=rill&max_memory=4GB", cfg.DSN) require.Equal(t, 10, cfg.PoolSize) require.Equal(t, "path/to/duck.db", cfg.DBFilePath) - _, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB", "pool_size": "abc"}) + _, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB", "pool_size": "abc"}, "path/to") require.Error(t, err) - cfg, err = newConfig(map[string]any{"dsn": "duck.db"}) + cfg, err = newConfig(map[string]any{"dsn": "duck.db"}, "path/to") require.NoError(t, err) require.Equal(t, "duck.db", cfg.DBFilePath) - cfg, err = newConfig(map[string]any{"dsn": "duck.db?rill_pool_size=10"}) + cfg, err = newConfig(map[string]any{"dsn": "duck.db?rill_pool_size=10"}, "path/to") require.NoError(t, err) require.Equal(t, "duck.db", cfg.DBFilePath) - cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}) + cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}, "path/to") require.NoError(t, err) require.Equal(t, "duck.db", cfg.DBFilePath) require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=8GB&threads=2", cfg.DSN) require.Equal(t, 2, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": "duck.db?max_memory=2GB&rill_pool_size=4"}) + cfg, err = newConfig(map[string]any{"dsn": "duck.db?max_memory=2GB&rill_pool_size=4"}, "path/to") require.NoError(t, err) require.Equal(t, "duck.db", cfg.DBFilePath) require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=2GB", cfg.DSN) @@ -93,7 +94,7 @@ func Test_specialCharInPath(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) config := conn.(*connection).config require.Equal(t, filepath.Join(path, "st@g3's.db?custom_user_agent=rill&max_memory=4GB&threads=1"), config.DSN) @@ -110,7 +111,7 @@ func Test_specialCharInPath(t *testing.T) { func TestOverrides(t *testing.T) { cfgMap := map[string]any{"path": "duck.db", "memory_limit_gb": "4", "cpu": "2", "max_memory_gb_override": "2", "threads_override": "10", "external_table_storage": false} - handle, err := Driver{}.Open("default", cfgMap, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", cfgMap, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 8f29927968c..a3bb6109401 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -27,6 +27,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/duckdbsql" "github.com/rilldata/rill/runtime/pkg/observability" "github.com/rilldata/rill/runtime/pkg/priorityqueue" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" @@ -135,7 +136,7 @@ type Driver struct { name string } -func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d Driver) Open(instanceID string, cfgMap map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("duckdb driver can't be shared") } @@ -145,7 +146,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie logger.Warn("failed to install embedded DuckDB extensions, let DuckDB download them", zap.Error(err)) } - cfg, err := newConfig(cfgMap) + cfg, err := newConfig(cfgMap, storage.DataDir()) if err != nil { return nil, err } @@ -245,7 +246,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie } func (d Driver) Drop(cfgMap map[string]any, logger *zap.Logger) error { - cfg, err := newConfig(cfgMap) + cfg, err := newConfig(cfgMap, "") if err != nil { return err } diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index 1bbb09b5c44..6a26e7f6cb2 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -10,6 +10,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" ) @@ -19,7 +20,7 @@ func TestOpenDrop(t *testing.T) { walpath := path + ".wal" dsn := path - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -43,7 +44,7 @@ func TestNoFatalErr(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -105,7 +106,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { dsn := filepath.Join(t.TempDir(), "tmp.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index a1079348a31..7ebf25827f0 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" ) @@ -20,14 +21,14 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") require.NoError(t, normalConn.Migrate(context.Background())) dbPath = filepath.Join(temp, "default", "view.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) viewConnection := handle.(*connection) require.NoError(t, viewConnection.Migrate(context.Background())) @@ -100,7 +101,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -145,7 +146,7 @@ func Test_connection_DropTable(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -174,7 +175,7 @@ func Test_connection_InsertTableAsSelect(t *testing.T) { temp := t.TempDir() dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -203,7 +204,7 @@ func Test_connection_RenameTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -229,7 +230,7 @@ func Test_connection_RenameToExistingTable(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "default", "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -258,7 +259,7 @@ func Test_connection_AddTableColumn(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -287,7 +288,7 @@ func Test_connection_AddTableColumn(t *testing.T) { } func Test_connection_RenameToExistingTableOld(t *testing.T) { - handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -316,7 +317,7 @@ func Test_connection_CastEnum(t *testing.T) { os.Mkdir(temp, fs.ModePerm) dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -361,7 +362,7 @@ func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") @@ -398,7 +399,7 @@ func Test_connection_ChangingOrder(t *testing.T) { // on cloud dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -421,7 +422,7 @@ func Test_connection_ChangingOrder(t *testing.T) { // on local dbPath = filepath.Join(temp, "local.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c = handle.(*connection) require.NoError(t, c.Migrate(context.Background())) diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index 5067a17ca9d..38d7fa3a3af 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -212,7 +213,7 @@ func TestClose(t *testing.T) { } func prepareConn(t *testing.T) drivers.Handle { - conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") @@ -248,11 +249,11 @@ func Test_safeSQLString(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) require.NoError(t, conn.Close()) - conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 1b094a3583b..27fc71672d8 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -8,13 +8,14 @@ import ( "github.com/rilldata/rill/runtime/drivers" activity "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" ) func TestDuckDBToDuckDBTransfer(t *testing.T) { tempDir := t.TempDir() - conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") @@ -31,7 +32,7 @@ func TestDuckDBToDuckDBTransfer(t *testing.T) { require.NoError(t, err) require.NoError(t, conn.Close()) - to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ = to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index a77ec2e13b9..e011b17d3f7 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -7,6 +7,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -102,12 +103,12 @@ func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { _, err := db.ExecContext(ctx, mysqlInitStmt) require.NoError(t, err) - handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) require.NotNil(t, handle) sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index e9c11772cef..e59934d7e58 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -8,6 +8,7 @@ import ( "github.com/rilldata/rill/admin/pkg/pgtestcontainer" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -67,12 +68,12 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { _, err := db.ExecContext(ctx, sqlStmt) require.NoError(t, err) - handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) require.NotNil(t, handle) sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go index 6eab17831aa..b99a382a965 100644 --- a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go @@ -9,6 +9,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" _ "github.com/rilldata/rill/runtime/drivers/sqlite" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" _ "modernc.org/sqlite" @@ -29,7 +30,7 @@ func Test_sqliteToDuckDB_Transfer(t *testing.T) { require.NoError(t, err) db.Close() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_test.go b/runtime/drivers/duckdb/transporter_test.go index 329bdeb1138..83edd09fcf2 100644 --- a/runtime/drivers/duckdb/transporter_test.go +++ b/runtime/drivers/duckdb/transporter_test.go @@ -11,6 +11,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/drivers/duckdb" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" ) @@ -592,7 +593,7 @@ func TestIterativeJSONIngestionWithVariableSchema(t *testing.T) { } func runOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, canServe := conn.AsOLAP("") require.True(t, canServe) diff --git a/runtime/drivers/file/file.go b/runtime/drivers/file/file.go index 8407d1653c3..dad38fb311b 100644 --- a/runtime/drivers/file/file.go +++ b/runtime/drivers/file/file.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" "gopkg.in/yaml.v3" ) @@ -60,7 +61,7 @@ type rillYAML struct { IgnorePaths []string `yaml:"ignore_paths"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("file driver can't be shared") } diff --git a/runtime/drivers/gcs/gcs.go b/runtime/drivers/gcs/gcs.go index f38680831b6..c03989b01e1 100644 --- a/runtime/drivers/gcs/gcs.go +++ b/runtime/drivers/gcs/gcs.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/gcputil" "github.com/rilldata/rill/runtime/pkg/globutil" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" "gocloud.dev/blob/gcsblob" "gocloud.dev/gcp" @@ -72,10 +73,9 @@ type driver struct{} type configProperties struct { SecretJSON string `mapstructure:"google_application_credentials"` AllowHostAccess bool `mapstructure:"allow_host_access"` - TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("gcs driver can't be shared") } @@ -87,8 +87,9 @@ func (d driver) Open(instanceID string, config map[string]any, client *activity. } conn := &Connection{ - config: conf, - logger: logger, + config: conf, + storage: storage, + logger: logger, } return conn, nil } @@ -166,8 +167,9 @@ func parseSourceProperties(props map[string]any) (*sourceProperties, error) { } type Connection struct { - config *configProperties - logger *zap.Logger + config *configProperties + storage *storage.Client + logger *zap.Logger } var _ drivers.Handle = &Connection{} diff --git a/runtime/drivers/gcs/object_store.go b/runtime/drivers/gcs/object_store.go index 47d295ac99f..28437da94fa 100644 --- a/runtime/drivers/gcs/object_store.go +++ b/runtime/drivers/gcs/object_store.go @@ -79,7 +79,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (d ExtractPolicy: conf.extractPolicy, BatchSizeBytes: int64(batchSize.Bytes()), KeepFilesUntilClose: conf.BatchSize == "-1", - TempDir: c.config.TempDir, + TempDir: c.storage.TempDir(), } iter, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger) diff --git a/runtime/drivers/https/https.go b/runtime/drivers/https/https.go index be30ad3b178..b7548a6ff11 100644 --- a/runtime/drivers/https/https.go +++ b/runtime/drivers/https/https.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/fileutil" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -46,7 +47,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("https driver can't be shared") } diff --git a/runtime/drivers/mock/object_store/object_store.go b/runtime/drivers/mock/object_store/object_store.go index ffda83b54c5..2a4ca00c91d 100644 --- a/runtime/drivers/mock/object_store/object_store.go +++ b/runtime/drivers/mock/object_store/object_store.go @@ -11,6 +11,7 @@ import ( rillblob "github.com/rilldata/rill/runtime/drivers/blob" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/globutil" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" "gocloud.dev/blob" @@ -38,7 +39,7 @@ func (driver) Spec() drivers.Spec { } // Open implements drivers.Driver. -func (driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { cfg := &configProperties{} err := mapstructure.WeakDecode(config, cfg) if err != nil { diff --git a/runtime/drivers/mysql/mysql.go b/runtime/drivers/mysql/mysql.go index 3cb2ce36061..739ca592a33 100644 --- a/runtime/drivers/mysql/mysql.go +++ b/runtime/drivers/mysql/mysql.go @@ -6,6 +6,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -57,7 +58,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("mysql driver can't be shared") } diff --git a/runtime/drivers/pinot/pinot.go b/runtime/drivers/pinot/pinot.go index 6d9bf2f3e22..519eca500a7 100644 --- a/runtime/drivers/pinot/pinot.go +++ b/runtime/drivers/pinot/pinot.go @@ -11,6 +11,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/drivers/pinot/sqldriver" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" ) @@ -94,7 +95,7 @@ type configProperties struct { } // Open a connection to Apache Pinot using HTTP API. -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("pinot driver can't be shared") } diff --git a/runtime/drivers/postgres/postgres.go b/runtime/drivers/postgres/postgres.go index 69471163b72..4bcf837d5dc 100644 --- a/runtime/drivers/postgres/postgres.go +++ b/runtime/drivers/postgres/postgres.go @@ -6,6 +6,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -55,7 +56,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("postgres driver can't be shared") } diff --git a/runtime/drivers/redshift/redshift.go b/runtime/drivers/redshift/redshift.go index ebc82e508e7..b79613bd607 100644 --- a/runtime/drivers/redshift/redshift.go +++ b/runtime/drivers/redshift/redshift.go @@ -7,6 +7,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -109,7 +110,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("redshift driver can't be shared") } diff --git a/runtime/drivers/s3/object_store.go b/runtime/drivers/s3/object_store.go index 7d17ba5e99a..021f2804c45 100644 --- a/runtime/drivers/s3/object_store.go +++ b/runtime/drivers/s3/object_store.go @@ -136,7 +136,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, src map[string]any) (dri BatchSizeBytes: int64(batchSize.Bytes()), KeepFilesUntilClose: conf.BatchSize == "-1", RetainFiles: c.config.RetainFiles, - TempDir: c.config.TempDir, + TempDir: c.storage.TempDir(), } it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger) diff --git a/runtime/drivers/s3/s3.go b/runtime/drivers/s3/s3.go index 6dd27d1b881..039633583d3 100644 --- a/runtime/drivers/s3/s3.go +++ b/runtime/drivers/s3/s3.go @@ -9,6 +9,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -93,11 +94,10 @@ type ConfigProperties struct { SessionToken string `mapstructure:"aws_access_token"` AllowHostAccess bool `mapstructure:"allow_host_access"` RetainFiles bool `mapstructure:"retain_files"` - TempDir string `mapstructure:"temp_dir"` } // Open implements drivers.Driver -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("s3 driver can't be shared") } @@ -145,8 +145,9 @@ func (d driver) TertiarySourceConnectors(ctx context.Context, src map[string]any type Connection struct { // config is input configs passed to driver.Open - config *ConfigProperties - logger *zap.Logger + config *ConfigProperties + storage *storage.Client + logger *zap.Logger } var _ drivers.Handle = &Connection{} diff --git a/runtime/drivers/salesforce/salesforce.go b/runtime/drivers/salesforce/salesforce.go index 3f4eb52ae6c..7d3d2280968 100644 --- a/runtime/drivers/salesforce/salesforce.go +++ b/runtime/drivers/salesforce/salesforce.go @@ -7,6 +7,7 @@ import ( force "github.com/ForceCLI/force/lib" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -128,7 +129,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("salesforce driver can't be shared") } diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 81ff63ea8fd..2d755cd2b3a 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -8,6 +8,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" ) @@ -35,7 +36,7 @@ func (d driver) Spec() drivers.Spec { return spec } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("slack driver can't be shared") } diff --git a/runtime/drivers/snowflake/snowflake.go b/runtime/drivers/snowflake/snowflake.go index c0439bf297b..1ddd0a835d1 100644 --- a/runtime/drivers/snowflake/snowflake.go +++ b/runtime/drivers/snowflake/snowflake.go @@ -7,6 +7,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" // Load database/sql driver @@ -63,10 +64,9 @@ type driver struct{} type configProperties struct { DSN string `mapstructure:"dsn"` ParallelFetchLimit int `mapstructure:"parallel_fetch_limit"` - TempDir string `mapstructure:"temp_dir"` } -func (d driver) Open(instanceID string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("snowflake driver can't be shared") } @@ -80,6 +80,7 @@ func (d driver) Open(instanceID string, config map[string]any, client *activity. // actual db connection is opened during query return &connection{ configProperties: conf, + storage: storage, logger: logger, }, nil } @@ -98,6 +99,7 @@ func (d driver) TertiarySourceConnectors(ctx context.Context, src map[string]any type connection struct { configProperties *configProperties + storage *storage.Client logger *zap.Logger } diff --git a/runtime/drivers/snowflake/sql_store.go b/runtime/drivers/snowflake/sql_store.go index d8e4564729e..136eaddf32a 100644 --- a/runtime/drivers/snowflake/sql_store.go +++ b/runtime/drivers/snowflake/sql_store.go @@ -86,7 +86,7 @@ func (c *connection) QueryAsFiles(ctx context.Context, props map[string]any) (dr return nil, drivers.ErrNoRows } - tempDir, err := os.MkdirTemp(c.configProperties.TempDir, "snowflake") + tempDir, err := os.MkdirTemp(c.storage.TempDir(), "snowflake") if err != nil { return nil, err } diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index a626493501b..7d490c551fd 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -9,6 +9,7 @@ import ( "github.com/jmoiron/sqlx" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" // Load sqlite driver @@ -22,7 +23,7 @@ func init() { type driver struct{} -func (d driver) Open(_ string, config map[string]any, client *activity.Client, fn drivers.OpenDataBucketFn, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(_ string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { dsn, ok := config["dsn"].(string) if !ok { return nil, fmt.Errorf("require dsn to open sqlite connection") diff --git a/runtime/metricsview/executor_export.go b/runtime/metricsview/executor_export.go index e462e641aac..37d6a52d45a 100644 --- a/runtime/metricsview/executor_export.go +++ b/runtime/metricsview/executor_export.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "os" - "path/filepath" "github.com/rilldata/rill/runtime/drivers" ) @@ -24,7 +23,7 @@ func (e *Executor) executeExport(ctx context.Context, format drivers.FileFormat, return "", err } name = format.Filename(name) - path := filepath.Join(e.rt.TempDir(e.instanceID), name) + path := e.rt.TempDir(e.instanceID, name) ic, ir, err := e.rt.AcquireHandle(ctx, e.instanceID, inputConnector) if err != nil { diff --git a/runtime/registry.go b/runtime/registry.go index e09175e0fc7..e0d3d189898 100644 --- a/runtime/registry.go +++ b/runtime/registry.go @@ -6,7 +6,6 @@ import ( "fmt" "io/fs" "os" - "path/filepath" "sync" "time" @@ -21,10 +20,6 @@ import ( "go.opentelemetry.io/otel/trace" "go.uber.org/zap" "go.uber.org/zap/zapcore" - "gocloud.dev/blob" - "gocloud.dev/blob/gcsblob" - "gocloud.dev/gcp" - "golang.org/x/oauth2/google" ) // GlobalProjectParserName is the name of the instance-global project parser resource that is created for each new instance. @@ -113,7 +108,7 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { // Wait for the controller to stop and the connection cache to be evicted <-completed - if err := os.RemoveAll(filepath.Join(r.opts.DataDir, instanceID)); err != nil { + if err := r.storage.RemoveInstance(instanceID); err != nil { r.Logger.Error("could not drop instance data directory", zap.Error(err), zap.String("instance_id", instanceID), observability.ZapCtx(ctx)) } @@ -131,35 +126,10 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { return nil } -// DataBucket returns a prefixed bucket for the given instance. -// This bucket is used for storing data that is expected to be persisted across resets. -func (r *Runtime) DataBucket(ctx context.Context, instanceID string, elem ...string) (*blob.Bucket, error) { - if r.opts.DataBucket == "" { - return nil, nil - } - // Init dataBucket - client, err := newClient(ctx, r.opts.DataBucketCredentialsJSON) - if err != nil { - return nil, fmt.Errorf("could not create GCP client: %w", err) - } - - bucket, err := gcsblob.OpenBucket(ctx, client, r.opts.DataBucket, nil) - if err != nil { - return nil, fmt.Errorf("failed to open bucket %q: %w", r.opts.DataBucket, err) - } - prefix := instanceID + "/" - for _, e := range elem { - prefix = prefix + e + "/" - } - b := blob.PrefixedBucket(bucket, prefix) - return b, nil -} - // DataDir returns the path to a persistent data directory for the given instance. // Storage usage in the returned directory will be reported in the instance's heartbeat events. func (r *Runtime) DataDir(instanceID string, elem ...string) string { - elem = append([]string{r.opts.DataDir, instanceID}, elem...) - return filepath.Join(elem...) + return r.storage.WithPrefix(instanceID).DataDir(elem...) } // TempDir returns the path to a temporary directory for the given instance. @@ -167,8 +137,7 @@ func (r *Runtime) DataDir(instanceID string, elem ...string) string { // The TempDir may be cleared after restarts. // Storage usage in the returned directory will be reported in the instance's heartbeat events. func (r *Runtime) TempDir(instanceID string, elem ...string) string { - elem = append([]string{r.opts.DataDir, instanceID, "tmp"}, elem...) - return filepath.Join(elem...) + return r.storage.WithPrefix(instanceID).TempDir(elem...) } // registryCache caches all the runtime's instances and manages the life-cycle of their controllers. @@ -356,19 +325,9 @@ func (r *registryCache) add(inst *drivers.Instance) error { instance: inst, } r.instances[inst.ID] = iwc - if r.rt.opts.DataDir != "" { - if err := os.Mkdir(filepath.Join(r.rt.opts.DataDir, inst.ID), os.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { - return err - } - - // also recreate instance's tmp directory - tmpDir := filepath.Join(r.rt.opts.DataDir, inst.ID, "tmp") - if err := os.RemoveAll(tmpDir); err != nil { - r.logger.Warn("failed to remove tmp directory", zap.String("instance_id", inst.ID), zap.Error(err)) - } - if err := os.Mkdir(tmpDir, os.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { - return err - } + err := r.rt.storage.AddInstance(inst.ID) + if err != nil { + return err } // Setup the logger to duplicate logs to a) the Zap logger, b) an in-memory buffer that exposes the logs over the API @@ -589,7 +548,7 @@ func (r *registryCache) emitHeartbeats() { } func (r *registryCache) emitHeartbeatForInstance(inst *drivers.Instance) { - dataDir := filepath.Join(r.rt.opts.DataDir, inst.ID) + dataDir := r.rt.storage.WithPrefix(inst.ID).DataDir() // Add instance annotations as attributes to pass organization id, project id, etc. attrs := instanceAnnotationsToAttribs(inst) @@ -624,15 +583,6 @@ func (r *registryCache) updateProjectConfig(iwc *instanceWithController) error { return r.rt.UpdateInstanceWithRillYAML(iwc.ctx, iwc.instanceID, p, false) } -func newClient(ctx context.Context, jsonData string) (*gcp.HTTPClient, error) { - creds, err := google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") - if err != nil { - return nil, fmt.Errorf("failed to create credentials: %w", err) - } - // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. - return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) -} - func sizeOfDir(path string) int64 { var size int64 _ = fs.WalkDir(os.DirFS(path), ".", func(path string, d fs.DirEntry, err error) error { diff --git a/runtime/registry_test.go b/runtime/registry_test.go index 01189f06229..c376695ac7f 100644 --- a/runtime/registry_test.go +++ b/runtime/registry_test.go @@ -14,6 +14,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/email" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" ) @@ -379,7 +380,6 @@ func TestRuntime_EditInstance(t *testing.T) { func TestRuntime_DeleteInstance(t *testing.T) { repodsn := t.TempDir() rt := newTestRuntime(t) - rt.opts.DataDir = t.TempDir() tests := []struct { name string wantErr bool @@ -404,7 +404,7 @@ func TestRuntime_DeleteInstance(t *testing.T) { { Type: "duckdb", Name: "duckdb", - Config: map[string]string{"data_dir": rt.opts.DataDir}, + Config: map[string]string{}, }, }, } @@ -455,7 +455,6 @@ func TestRuntime_DeleteInstance_DropCorrupted(t *testing.T) { // Prepare ctx := context.Background() rt := newTestRuntime(t) - rt.opts.DataDir = t.TempDir() // Create instance inst := &drivers.Instance{ Environment: "test", @@ -471,14 +470,14 @@ func TestRuntime_DeleteInstance_DropCorrupted(t *testing.T) { { Type: "duckdb", Name: "duckdb", - Config: map[string]string{"data_dir": rt.opts.DataDir}, + Config: map[string]string{}, }, }, } err := rt.CreateInstance(context.Background(), inst) require.NoError(t, err) - dbpath := filepath.Join(rt.opts.DataDir, inst.ID, "duckdb", "main.db") + dbpath := filepath.Join(rt.storage.DataDir(), inst.ID, "duckdb", "main.db") // Put some data into it to create a .db file on disk olap, release, err := rt.OLAP(ctx, inst.ID, "") @@ -527,7 +526,8 @@ func newTestRuntime(t *testing.T) *Runtime { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), } - rt, err := New(context.Background(), opts, zap.NewNop(), activity.NewNoopClient(), email.New(email.NewNoopSender())) + + rt, err := New(context.Background(), opts, zap.NewNop(), storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), email.New(email.NewNoopSender())) t.Cleanup(func() { rt.Close() }) diff --git a/runtime/runtime.go b/runtime/runtime.go index e16a17efd5b..247f6684b97 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -12,6 +12,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/conncache" "github.com/rilldata/rill/runtime/pkg/email" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" @@ -28,15 +29,13 @@ type Options struct { ControllerLogBufferCapacity int ControllerLogBufferSizeBytes int64 AllowHostAccess bool - DataDir string - DataBucket string - DataBucketCredentialsJSON string } type Runtime struct { Email *email.Client opts *Options Logger *zap.Logger + storage *storage.Client activity *activity.Client metastore drivers.Handle registryCache *registryCache @@ -45,7 +44,7 @@ type Runtime struct { securityEngine *securityEngine } -func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { +func New(ctx context.Context, opts *Options, logger *zap.Logger, storage *storage.Client, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { if emailClient == nil { emailClient = email.New(email.NewNoopSender()) } @@ -54,6 +53,7 @@ func New(ctx context.Context, opts *Options, logger *zap.Logger, ac *activity.Cl Email: emailClient, opts: opts, Logger: logger, + storage: storage, activity: ac, queryCache: newQueryCache(opts.QueryCacheSizeBytes), securityEngine: newSecurityEngine(opts.SecurityEngineCacheSize, logger), diff --git a/runtime/server/queries_test.go b/runtime/server/queries_test.go index 81eada4a7f9..6fdc548ce77 100644 --- a/runtime/server/queries_test.go +++ b/runtime/server/queries_test.go @@ -6,6 +6,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" ) @@ -132,7 +133,7 @@ func TestServer_UpdateLimit_UNION(t *testing.T) { } func prepareOLAPStore(t *testing.T) drivers.OLAPStore { - conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, activity.NewNoopClient(), drivers.OpenNilDataBucket, zap.NewNop()) + conn, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") require.True(t, ok) diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go new file mode 100644 index 00000000000..ff125cdff8f --- /dev/null +++ b/runtime/storage/storage.go @@ -0,0 +1,150 @@ +package storage + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + "github.com/mitchellh/mapstructure" + "github.com/rilldata/rill/runtime/pkg/gcputil" + "gocloud.dev/blob" + "gocloud.dev/blob/gcsblob" + "gocloud.dev/gcp" +) + +type Client struct { + dataDirPath string + bucketConfig *gcsBucketConfig + instanceID string +} + +type gcsBucketConfig struct { + Bucket string `mapstructure:"bucket"` + SecretJSON string `mapstructure:"google_application_credentials"` + AllowHostAccess bool `mapstructure:"allow_host_access"` +} + +func New(dataDir string, bucketCfg map[string]any) (*Client, error) { + c := &Client{ + dataDirPath: dataDir, + } + + if len(bucketCfg) != 0 { + gcsBucketConfig := &gcsBucketConfig{} + err := mapstructure.WeakDecode(bucketCfg, gcsBucketConfig) + if err != nil { + return nil, err + } + c.bucketConfig = gcsBucketConfig + } + return c, nil +} + +func MustNew(dataDir string, bucketCfg map[string]any) *Client { + c := &Client{ + dataDirPath: dataDir, + } + + if len(bucketCfg) != 0 { + gcsBucketConfig := &gcsBucketConfig{} + if err := mapstructure.WeakDecode(bucketCfg, gcsBucketConfig); err != nil { + panic(err) + } + c.bucketConfig = gcsBucketConfig + } + return c +} + +func (c *Client) AddInstance(instanceID string) error { + err := os.Mkdir(filepath.Join(c.dataDirPath, instanceID), os.ModePerm) + if err != nil && !errors.Is(err, fs.ErrExist) { + return fmt.Errorf("could not create instance directory: %w", err) + } + + // recreate instance's tmp directory + tmpDir := filepath.Join(c.dataDirPath, instanceID, "tmp") + if err := os.RemoveAll(tmpDir); err != nil { + return fmt.Errorf("could not remove instance tmp directory: %w", err) + } + if err := os.Mkdir(tmpDir, os.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { + return err + } + + return nil +} + +func (c *Client) RemoveInstance(instanceID string) error { + err := os.RemoveAll(filepath.Join(c.dataDirPath, instanceID)) + if err != nil { + return fmt.Errorf("could not remove instance directory: %w", err) + } + return nil +} + +func (c *Client) WithPrefix(prefix string) *Client { + c.instanceID = prefix + return c +} + +func (c *Client) DataDir(elem ...string) string { + paths := []string{c.dataDirPath} + if c.instanceID != "" { + paths = append(paths, c.instanceID) + } + paths = append(paths, elem...) + return filepath.Join(paths...) +} + +func (c *Client) TempDir(elem ...string) string { + paths := []string{c.dataDirPath} + if c.instanceID != "" { + paths = append(paths, c.instanceID) + } + paths = append(paths, "tmp") + paths = append(paths, elem...) + return filepath.Join(paths...) +} + +func (c *Client) OpenBucket(ctx context.Context, elem ...string) (*blob.Bucket, bool, error) { + if c.bucketConfig == nil { + return nil, false, nil + } + // Init dataBucket + client, err := c.newGCPClient(ctx) + if err != nil { + return nil, false, fmt.Errorf("could not create GCP client: %w", err) + } + + bucket, err := gcsblob.OpenBucket(ctx, client, c.bucketConfig.Bucket, nil) + if err != nil { + return nil, false, fmt.Errorf("failed to open bucket %q: %w", c.bucketConfig.Bucket, err) + } + var prefix string + if c.instanceID != "" { + prefix = c.instanceID + "/" + } + for _, e := range elem { + prefix = prefix + e + "/" + } + if prefix == "" { + return bucket, true, nil + } + return blob.PrefixedBucket(bucket, prefix), true, nil +} + +func (c *Client) newGCPClient(ctx context.Context) (*gcp.HTTPClient, error) { + creds, err := gcputil.Credentials(ctx, c.bucketConfig.SecretJSON, c.bucketConfig.AllowHostAccess) + if err != nil { + if !errors.Is(err, gcputil.ErrNoCredentials) { + return nil, err + } + + // no credentials set, we try with a anonymous client in case user is trying to access public buckets + return gcp.NewAnonymousHTTPClient(gcp.DefaultTransport()), nil + } + // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. + return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) +} diff --git a/runtime/testruntime/testruntime.go b/runtime/testruntime/testruntime.go index 0ecfe111361..f0b601a9151 100644 --- a/runtime/testruntime/testruntime.go +++ b/runtime/testruntime/testruntime.go @@ -17,6 +17,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/pkg/email" + "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -63,7 +64,6 @@ func New(t TestingT) *runtime.Runtime { ControllerLogBufferCapacity: 10000, ControllerLogBufferSizeBytes: int64(datasize.MB * 16), AllowHostAccess: true, - DataDir: t.TempDir(), } logger := zap.NewNop() @@ -73,7 +73,7 @@ func New(t TestingT) *runtime.Runtime { require.NoError(t, err) } - rt, err := runtime.New(context.Background(), opts, logger, activity.NewNoopClient(), email.New(email.NewTestSender())) + rt, err := runtime.New(context.Background(), opts, logger, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), email.New(email.NewTestSender())) require.NoError(t, err) t.Cleanup(func() { rt.Close() }) From 539b481fd34b3d4810974acb64cfd4aac6d55f45 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:06:06 +0530 Subject: [PATCH 38/64] lint fixes --- runtime/drivers/admin/admin.go | 2 +- runtime/drivers/athena/athena.go | 2 +- runtime/drivers/azure/azure.go | 4 ++-- runtime/drivers/bigquery/bigquery.go | 4 ++-- runtime/drivers/clickhouse/clickhouse.go | 4 ++-- runtime/drivers/drivers.go | 6 +++--- runtime/drivers/druid/druid.go | 2 +- runtime/drivers/duckdb/duckdb.go | 4 ++-- runtime/drivers/file/file.go | 2 +- runtime/drivers/gcs/gcs.go | 4 ++-- runtime/drivers/https/https.go | 2 +- runtime/drivers/mysql/mysql.go | 2 +- runtime/drivers/pinot/pinot.go | 2 +- runtime/drivers/postgres/postgres.go | 2 +- runtime/drivers/redshift/redshift.go | 2 +- runtime/drivers/s3/s3.go | 2 +- runtime/drivers/salesforce/salesforce.go | 2 +- runtime/drivers/slack/slack.go | 2 +- runtime/drivers/snowflake/snowflake.go | 4 ++-- runtime/runtime.go | 4 ++-- 20 files changed, 29 insertions(+), 29 deletions(-) diff --git a/runtime/drivers/admin/admin.go b/runtime/drivers/admin/admin.go index 5fb8046a8cc..a4191560d99 100644 --- a/runtime/drivers/admin/admin.go +++ b/runtime/drivers/admin/admin.go @@ -63,7 +63,7 @@ type configProperties struct { Branch string `mapstructure:"branch"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("admin driver can't be shared") } diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go index ba25961b363..166018e09d2 100644 --- a/runtime/drivers/athena/athena.go +++ b/runtime/drivers/athena/athena.go @@ -86,7 +86,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("athena driver can't be shared") } diff --git a/runtime/drivers/azure/azure.go b/runtime/drivers/azure/azure.go index 1bc65d7489e..e561d57ed36 100644 --- a/runtime/drivers/azure/azure.go +++ b/runtime/drivers/azure/azure.go @@ -82,7 +82,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("azure driver can't be shared") } @@ -95,7 +95,7 @@ func (d driver) Open(instanceID string, config map[string]any, storage *storage. conn := &Connection{ config: conf, - storage: storage, + storage: st, logger: logger, } return conn, nil diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index 5f379b36173..0bb4bad68e6 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -77,7 +77,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("bigquery driver can't be shared") } @@ -90,7 +90,7 @@ func (d driver) Open(instanceID string, config map[string]any, storage *storage. conn := &Connection{ config: conf, - storage: storage, + storage: st, logger: logger, } return conn, nil diff --git a/runtime/drivers/clickhouse/clickhouse.go b/runtime/drivers/clickhouse/clickhouse.go index 6a7a492168d..a0c5c517110 100644 --- a/runtime/drivers/clickhouse/clickhouse.go +++ b/runtime/drivers/clickhouse/clickhouse.go @@ -119,7 +119,7 @@ type configProperties struct { // Open connects to Clickhouse using std API. // Connection string format : https://github.com/ClickHouse/clickhouse-go?tab=readme-ov-file#dsn -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("clickhouse driver can't be shared") } @@ -173,7 +173,7 @@ func (d driver) Open(instanceID string, config map[string]any, storage *storage. } } else { // run clickhouse locally - embed = newEmbedClickHouse(conf.EmbedPort, storage.DataDir(), storage.TempDir(), logger) + embed = newEmbedClickHouse(conf.EmbedPort, st.DataDir(), st.TempDir(), logger) opts, err = embed.start() if err != nil { return nil, err diff --git a/runtime/drivers/drivers.go b/runtime/drivers/drivers.go index 21e65ded226..cf8543080a2 100644 --- a/runtime/drivers/drivers.go +++ b/runtime/drivers/drivers.go @@ -41,13 +41,13 @@ var OpenNilDataBucket OpenDataBucketFn = func(ctx context.Context, instanceID st // Open opens a new connection. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. // If instanceID is not empty, the connection is considered instance-specific and its As...() functions will only be invoked with the same instance ID. -func Open(driver, instanceID string, config map[string]any, storage *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) { +func Open(driver, instanceID string, config map[string]any, st *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) { d, ok := Drivers[driver] if !ok { return nil, fmt.Errorf("unknown driver: %s", driver) } - conn, err := d.Open(instanceID, config, storage, activity, logger) + conn, err := d.Open(instanceID, config, st, activity, logger) if err != nil { return nil, err } @@ -62,7 +62,7 @@ type Driver interface { // Open opens a new handle. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. - Open(instanceID string, config map[string]any, storage *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) + Open(instanceID string, config map[string]any, st *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) // HasAnonymousSourceAccess returns true if the driver can access the data identified by srcProps without any additional configuration. HasAnonymousSourceAccess(ctx context.Context, srcProps map[string]any, logger *zap.Logger) (bool, error) diff --git a/runtime/drivers/druid/druid.go b/runtime/drivers/druid/druid.go index a943e23ea63..11687815392 100644 --- a/runtime/drivers/druid/druid.go +++ b/runtime/drivers/druid/druid.go @@ -102,7 +102,7 @@ type configProperties struct { // Opens a connection to Apache Druid using HTTP API. // Note that the Druid connection string must have the form "http://user:password@host:port/druid/v2/sql". -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("druid driver can't be shared") } diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index a3bb6109401..2eb0a91cb39 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -136,7 +136,7 @@ type Driver struct { name string } -func (d Driver) Open(instanceID string, cfgMap map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("duckdb driver can't be shared") } @@ -146,7 +146,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, storage *storage. logger.Warn("failed to install embedded DuckDB extensions, let DuckDB download them", zap.Error(err)) } - cfg, err := newConfig(cfgMap, storage.DataDir()) + cfg, err := newConfig(cfgMap, st.DataDir("duckdb")) if err != nil { return nil, err } diff --git a/runtime/drivers/file/file.go b/runtime/drivers/file/file.go index dad38fb311b..c1c7c9a4ee6 100644 --- a/runtime/drivers/file/file.go +++ b/runtime/drivers/file/file.go @@ -61,7 +61,7 @@ type rillYAML struct { IgnorePaths []string `yaml:"ignore_paths"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("file driver can't be shared") } diff --git a/runtime/drivers/gcs/gcs.go b/runtime/drivers/gcs/gcs.go index c03989b01e1..cf1e5cfd8ae 100644 --- a/runtime/drivers/gcs/gcs.go +++ b/runtime/drivers/gcs/gcs.go @@ -75,7 +75,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("gcs driver can't be shared") } @@ -88,7 +88,7 @@ func (d driver) Open(instanceID string, config map[string]any, storage *storage. conn := &Connection{ config: conf, - storage: storage, + storage: st, logger: logger, } return conn, nil diff --git a/runtime/drivers/https/https.go b/runtime/drivers/https/https.go index b7548a6ff11..24775d1de57 100644 --- a/runtime/drivers/https/https.go +++ b/runtime/drivers/https/https.go @@ -47,7 +47,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("https driver can't be shared") } diff --git a/runtime/drivers/mysql/mysql.go b/runtime/drivers/mysql/mysql.go index 739ca592a33..5dc2a0a627b 100644 --- a/runtime/drivers/mysql/mysql.go +++ b/runtime/drivers/mysql/mysql.go @@ -58,7 +58,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("mysql driver can't be shared") } diff --git a/runtime/drivers/pinot/pinot.go b/runtime/drivers/pinot/pinot.go index 519eca500a7..a5afd5e1dcd 100644 --- a/runtime/drivers/pinot/pinot.go +++ b/runtime/drivers/pinot/pinot.go @@ -95,7 +95,7 @@ type configProperties struct { } // Open a connection to Apache Pinot using HTTP API. -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("pinot driver can't be shared") } diff --git a/runtime/drivers/postgres/postgres.go b/runtime/drivers/postgres/postgres.go index 4bcf837d5dc..2dc83d613ee 100644 --- a/runtime/drivers/postgres/postgres.go +++ b/runtime/drivers/postgres/postgres.go @@ -56,7 +56,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("postgres driver can't be shared") } diff --git a/runtime/drivers/redshift/redshift.go b/runtime/drivers/redshift/redshift.go index b79613bd607..6ef9e77467f 100644 --- a/runtime/drivers/redshift/redshift.go +++ b/runtime/drivers/redshift/redshift.go @@ -110,7 +110,7 @@ type configProperties struct { AllowHostAccess bool `mapstructure:"allow_host_access"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("redshift driver can't be shared") } diff --git a/runtime/drivers/s3/s3.go b/runtime/drivers/s3/s3.go index 039633583d3..a2122816a9c 100644 --- a/runtime/drivers/s3/s3.go +++ b/runtime/drivers/s3/s3.go @@ -97,7 +97,7 @@ type ConfigProperties struct { } // Open implements drivers.Driver -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("s3 driver can't be shared") } diff --git a/runtime/drivers/salesforce/salesforce.go b/runtime/drivers/salesforce/salesforce.go index 7d3d2280968..36171336129 100644 --- a/runtime/drivers/salesforce/salesforce.go +++ b/runtime/drivers/salesforce/salesforce.go @@ -129,7 +129,7 @@ var spec = drivers.Spec{ type driver struct{} -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("salesforce driver can't be shared") } diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 2d755cd2b3a..04490477dc3 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -36,7 +36,7 @@ func (d driver) Spec() drivers.Spec { return spec } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, fmt.Errorf("slack driver can't be shared") } diff --git a/runtime/drivers/snowflake/snowflake.go b/runtime/drivers/snowflake/snowflake.go index 1ddd0a835d1..b86d938e870 100644 --- a/runtime/drivers/snowflake/snowflake.go +++ b/runtime/drivers/snowflake/snowflake.go @@ -66,7 +66,7 @@ type configProperties struct { ParallelFetchLimit int `mapstructure:"parallel_fetch_limit"` } -func (d driver) Open(instanceID string, config map[string]any, storage *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { +func (d driver) Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (drivers.Handle, error) { if instanceID == "" { return nil, errors.New("snowflake driver can't be shared") } @@ -80,7 +80,7 @@ func (d driver) Open(instanceID string, config map[string]any, storage *storage. // actual db connection is opened during query return &connection{ configProperties: conf, - storage: storage, + storage: st, logger: logger, }, nil } diff --git a/runtime/runtime.go b/runtime/runtime.go index 247f6684b97..a4a78055397 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -44,7 +44,7 @@ type Runtime struct { securityEngine *securityEngine } -func New(ctx context.Context, opts *Options, logger *zap.Logger, storage *storage.Client, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { +func New(ctx context.Context, opts *Options, logger *zap.Logger, st *storage.Client, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { if emailClient == nil { emailClient = email.New(email.NewNoopSender()) } @@ -53,7 +53,7 @@ func New(ctx context.Context, opts *Options, logger *zap.Logger, storage *storag Email: emailClient, opts: opts, Logger: logger, - storage: storage, + storage: st, activity: ac, queryCache: newQueryCache(opts.QueryCacheSizeBytes), securityEngine: newSecurityEngine(opts.SecurityEngineCacheSize, logger), From 3f86e55e06b68e8af4b3e6ee715986ff6b2a625b Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:02:19 +0530 Subject: [PATCH 39/64] small refactor --- runtime/drivers/drivers.go | 11 +-- runtime/drivers/duckdb/duckdb.go | 22 ----- runtime/drivers/duckdb/duckdb_test.go | 24 ------ runtime/drivers/slack/slack.go | 4 - runtime/registry.go | 5 +- runtime/storage/storage.go | 117 +++++++++++++------------- 6 files changed, 65 insertions(+), 118 deletions(-) diff --git a/runtime/drivers/drivers.go b/runtime/drivers/drivers.go index cf8543080a2..e1b4944c800 100644 --- a/runtime/drivers/drivers.go +++ b/runtime/drivers/drivers.go @@ -8,7 +8,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" - "gocloud.dev/blob" ) // ErrNotFound indicates the resource wasn't found. @@ -34,20 +33,16 @@ func Register(name string, driver Driver) { Drivers[name] = driver } -type OpenDataBucketFn func(ctx context.Context, instanceID string, elem ...string) (*blob.Bucket, error) - -var OpenNilDataBucket OpenDataBucketFn = func(ctx context.Context, instanceID string, elem ...string) (*blob.Bucket, error) { return nil, nil } - // Open opens a new connection. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. // If instanceID is not empty, the connection is considered instance-specific and its As...() functions will only be invoked with the same instance ID. -func Open(driver, instanceID string, config map[string]any, st *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) { +func Open(driver, instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (Handle, error) { d, ok := Drivers[driver] if !ok { return nil, fmt.Errorf("unknown driver: %s", driver) } - conn, err := d.Open(instanceID, config, st, activity, logger) + conn, err := d.Open(instanceID, config, st, ac, logger) if err != nil { return nil, err } @@ -62,7 +57,7 @@ type Driver interface { // Open opens a new handle. // If instanceID is empty, the connection is considered shared and its As...() functions may be invoked with different instance IDs. - Open(instanceID string, config map[string]any, st *storage.Client, activity *activity.Client, logger *zap.Logger) (Handle, error) + Open(instanceID string, config map[string]any, st *storage.Client, ac *activity.Client, logger *zap.Logger) (Handle, error) // HasAnonymousSourceAccess returns true if the driver can access the data identified by srcProps without any additional configuration. HasAnonymousSourceAccess(ctx context.Context, srcProps map[string]any, logger *zap.Logger) (bool, error) diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 2eb0a91cb39..0630c8873da 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -245,28 +245,6 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien return c, nil } -func (d Driver) Drop(cfgMap map[string]any, logger *zap.Logger) error { - cfg, err := newConfig(cfgMap, "") - if err != nil { - return err - } - if cfg.DBStoragePath != "" { - return os.RemoveAll(cfg.DBStoragePath) - } - if cfg.DBFilePath != "" { - err = os.Remove(cfg.DBFilePath) - if err != nil && !os.IsNotExist(err) { - return err - } - // Hacky approach to remove the wal file - _ = os.Remove(cfg.DBFilePath + ".wal") - // also temove the temp dir - _ = os.RemoveAll(cfg.DBFilePath + ".tmp") - } - - return nil -} - func (d Driver) Spec() drivers.Spec { if d.name == "motherduck" { return motherduckSpec diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index 6a26e7f6cb2..6b8510b89f4 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -15,30 +15,6 @@ import ( "go.uber.org/zap" ) -func TestOpenDrop(t *testing.T) { - path := filepath.Join(t.TempDir(), "tmp.db") - walpath := path + ".wal" - dsn := path - - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - - olap, ok := handle.AsOLAP("") - require.True(t, ok) - - err = olap.Exec(context.Background(), &drivers.Statement{Query: "CREATE TABLE foo (bar INTEGER)"}) - require.NoError(t, err) - - err = handle.Close() - require.NoError(t, err) - require.FileExists(t, path) - - err = Driver{}.Drop(map[string]any{"path": dsn}, zap.NewNop()) - require.NoError(t, err) - require.NoFileExists(t, path) - require.NoFileExists(t, walpath) -} - func TestNoFatalErr(t *testing.T) { // NOTE: Using this issue to create a fatal error: https://github.com/duckdb/duckdb/issues/7905 diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 04490477dc3..046fd2b8f20 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -54,10 +54,6 @@ func (d driver) Open(instanceID string, config map[string]any, st *storage.Clien return conn, nil } -func (d driver) Drop(config map[string]any, logger *zap.Logger) error { - return nil -} - func (d driver) HasAnonymousSourceAccess(ctx context.Context, props map[string]any, logger *zap.Logger) (bool, error) { return false, fmt.Errorf("not implemented") } diff --git a/runtime/registry.go b/runtime/registry.go index e0d3d189898..61f60e655eb 100644 --- a/runtime/registry.go +++ b/runtime/registry.go @@ -16,6 +16,7 @@ import ( "github.com/rilldata/rill/runtime/pkg/logbuffer" "github.com/rilldata/rill/runtime/pkg/logutil" "github.com/rilldata/rill/runtime/pkg/observability" + "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" @@ -108,7 +109,7 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { // Wait for the controller to stop and the connection cache to be evicted <-completed - if err := r.storage.RemoveInstance(instanceID); err != nil { + if err := storage.RemoveInstance(r.storage, inst.ID); err != nil { r.Logger.Error("could not drop instance data directory", zap.Error(err), zap.String("instance_id", instanceID), observability.ZapCtx(ctx)) } @@ -325,7 +326,7 @@ func (r *registryCache) add(inst *drivers.Instance) error { instance: inst, } r.instances[inst.ID] = iwc - err := r.rt.storage.AddInstance(inst.ID) + err := storage.AddInstance(r.rt.storage, inst.ID) if err != nil { return err } diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go index ff125cdff8f..992c1d3aa5c 100644 --- a/runtime/storage/storage.go +++ b/runtime/storage/storage.go @@ -18,13 +18,7 @@ import ( type Client struct { dataDirPath string bucketConfig *gcsBucketConfig - instanceID string -} - -type gcsBucketConfig struct { - Bucket string `mapstructure:"bucket"` - SecretJSON string `mapstructure:"google_application_credentials"` - AllowHostAccess bool `mapstructure:"allow_host_access"` + prefixes []string } func New(dataDir string, bucketCfg map[string]any) (*Client, error) { @@ -44,55 +38,27 @@ func New(dataDir string, bucketCfg map[string]any) (*Client, error) { } func MustNew(dataDir string, bucketCfg map[string]any) *Client { - c := &Client{ - dataDirPath: dataDir, - } - - if len(bucketCfg) != 0 { - gcsBucketConfig := &gcsBucketConfig{} - if err := mapstructure.WeakDecode(bucketCfg, gcsBucketConfig); err != nil { - panic(err) - } - c.bucketConfig = gcsBucketConfig - } - return c -} - -func (c *Client) AddInstance(instanceID string) error { - err := os.Mkdir(filepath.Join(c.dataDirPath, instanceID), os.ModePerm) - if err != nil && !errors.Is(err, fs.ErrExist) { - return fmt.Errorf("could not create instance directory: %w", err) - } - - // recreate instance's tmp directory - tmpDir := filepath.Join(c.dataDirPath, instanceID, "tmp") - if err := os.RemoveAll(tmpDir); err != nil { - return fmt.Errorf("could not remove instance tmp directory: %w", err) - } - if err := os.Mkdir(tmpDir, os.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { - return err - } - - return nil -} - -func (c *Client) RemoveInstance(instanceID string) error { - err := os.RemoveAll(filepath.Join(c.dataDirPath, instanceID)) + c, err := New(dataDir, bucketCfg) if err != nil { - return fmt.Errorf("could not remove instance directory: %w", err) + panic(err) } - return nil + return c } func (c *Client) WithPrefix(prefix string) *Client { - c.instanceID = prefix - return c + newClient := &Client{ + dataDirPath: c.dataDirPath, + bucketConfig: c.bucketConfig, + } + newClient.prefixes = append(newClient.prefixes, c.prefixes...) + newClient.prefixes = append(newClient.prefixes, prefix) + return newClient } func (c *Client) DataDir(elem ...string) string { paths := []string{c.dataDirPath} - if c.instanceID != "" { - paths = append(paths, c.instanceID) + if c.prefixes != nil { + paths = append(paths, c.prefixes...) } paths = append(paths, elem...) return filepath.Join(paths...) @@ -100,8 +66,8 @@ func (c *Client) DataDir(elem ...string) string { func (c *Client) TempDir(elem ...string) string { paths := []string{c.dataDirPath} - if c.instanceID != "" { - paths = append(paths, c.instanceID) + if c.prefixes != nil { + paths = append(paths, c.prefixes...) } paths = append(paths, "tmp") paths = append(paths, elem...) @@ -123,8 +89,8 @@ func (c *Client) OpenBucket(ctx context.Context, elem ...string) (*blob.Bucket, return nil, false, fmt.Errorf("failed to open bucket %q: %w", c.bucketConfig.Bucket, err) } var prefix string - if c.instanceID != "" { - prefix = c.instanceID + "/" + for _, p := range c.prefixes { + prefix = prefix + p + "/" } for _, e := range elem { prefix = prefix + e + "/" @@ -135,16 +101,51 @@ func (c *Client) OpenBucket(ctx context.Context, elem ...string) (*blob.Bucket, return blob.PrefixedBucket(bucket, prefix), true, nil } -func (c *Client) newGCPClient(ctx context.Context) (*gcp.HTTPClient, error) { - creds, err := gcputil.Credentials(ctx, c.bucketConfig.SecretJSON, c.bucketConfig.AllowHostAccess) +func AddInstance(c *Client, instanceID string) error { + if c.prefixes != nil { + return fmt.Errorf("storage: should not call AddInstance with prefixed client") + } + + c = c.WithPrefix(instanceID) + err := os.Mkdir(c.DataDir(), os.ModePerm) + if err != nil && !errors.Is(err, fs.ErrExist) { + return fmt.Errorf("could not create instance directory: %w", err) + } + + // recreate instance's tmp directory + tmpDir := c.TempDir() + if err := os.RemoveAll(tmpDir); err != nil { + return fmt.Errorf("could not remove instance tmp directory: %w", err) + } + if err := os.Mkdir(tmpDir, os.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { + return err + } + + return nil +} + +func RemoveInstance(c *Client, instanceID string) error { + if c.prefixes != nil { + return fmt.Errorf("storage: should not call RemoveInstance with prefixed client") + } + + err := os.RemoveAll(c.DataDir()) if err != nil { - if !errors.Is(err, gcputil.ErrNoCredentials) { - return nil, err - } + return fmt.Errorf("could not remove instance directory: %w", err) + } + return nil +} - // no credentials set, we try with a anonymous client in case user is trying to access public buckets - return gcp.NewAnonymousHTTPClient(gcp.DefaultTransport()), nil +func (c *Client) newGCPClient(ctx context.Context) (*gcp.HTTPClient, error) { + creds, err := gcputil.Credentials(ctx, c.bucketConfig.SecretJSON, false) + if err != nil { + return nil, err } // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) } + +type gcsBucketConfig struct { + Bucket string `mapstructure:"bucket"` + SecretJSON string `mapstructure:"google_application_credentials"` +} From 1907725b97d87e218739969534942dfb823dcbda Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:46:33 +0530 Subject: [PATCH 40/64] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Benjamin Egelund-Müller --- runtime/storage/storage.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go index 992c1d3aa5c..29606c71e84 100644 --- a/runtime/storage/storage.go +++ b/runtime/storage/storage.go @@ -75,7 +75,7 @@ func (c *Client) TempDir(elem ...string) string { } func (c *Client) OpenBucket(ctx context.Context, elem ...string) (*blob.Bucket, bool, error) { - if c.bucketConfig == nil { + if len(c.bucketConfig) == 0 { return nil, false, nil } // Init dataBucket @@ -147,5 +147,5 @@ func (c *Client) newGCPClient(ctx context.Context) (*gcp.HTTPClient, error) { type gcsBucketConfig struct { Bucket string `mapstructure:"bucket"` - SecretJSON string `mapstructure:"google_application_credentials"` + GoogleApplicationCredentialsJSON string `mapstructure:"google_application_credentials_json"` } From ea8040c0ba1bc883d927c9ccf1cde8566c044592 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:53:18 +0530 Subject: [PATCH 41/64] name in connection cache --- runtime/connection_cache.go | 15 +++++++-------- runtime/connections.go | 14 ++++++++++++-- runtime/drivers/duckdb/duckdb.go | 2 +- runtime/storage/storage.go | 4 ++-- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/runtime/connection_cache.go b/runtime/connection_cache.go index 25f98ffdaa7..140d18bab8f 100644 --- a/runtime/connection_cache.go +++ b/runtime/connection_cache.go @@ -27,6 +27,7 @@ var ( type cachedConnectionConfig struct { instanceID string // Empty if connection is shared + name string driver string config map[string]any } @@ -66,13 +67,7 @@ func (r *Runtime) newConnectionCache() conncache.Cache { // getConnection returns a cached connection for the given driver configuration. // If instanceID is empty, the connection is considered shared (see drivers.Open for details). -func (r *Runtime) getConnection(ctx context.Context, instanceID, driver string, config map[string]any) (drivers.Handle, func(), error) { - cfg := cachedConnectionConfig{ - instanceID: instanceID, - driver: driver, - config: config, - } - +func (r *Runtime) getConnection(ctx context.Context, cfg cachedConnectionConfig) (drivers.Handle, func(), error) { handle, release, err := r.connCache.Acquire(ctx, cfg) if err != nil { return nil, nil, err @@ -110,7 +105,7 @@ func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig } } - handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, r.storage.WithPrefix(cfg.instanceID), activityClient, logger) + handle, err := drivers.Open(cfg.driver, cfg.instanceID, cfg.config, r.storage.WithPrefix(cfg.instanceID, cfg.name), activityClient, logger) if err == nil && ctx.Err() != nil { err = fmt.Errorf("timed out while opening driver %q", cfg.driver) } @@ -132,7 +127,11 @@ func (r *Runtime) openAndMigrate(ctx context.Context, cfg cachedConnectionConfig func generateKey(cfg cachedConnectionConfig) string { sb := strings.Builder{} sb.WriteString(cfg.instanceID) // Empty if cfg.shared + sb.WriteString(":") + sb.WriteString(cfg.name) + sb.WriteString(":") sb.WriteString(cfg.driver) + sb.WriteString(":") keys := maps.Keys(cfg.config) slices.Sort(keys) for _, key := range keys { diff --git a/runtime/connections.go b/runtime/connections.go index eff10e90e14..a9e2d3f84ce 100644 --- a/runtime/connections.go +++ b/runtime/connections.go @@ -24,7 +24,12 @@ func (r *Runtime) AcquireSystemHandle(ctx context.Context, connector string) (dr cfg[strings.ToLower(k)] = v } cfg["allow_host_access"] = r.opts.AllowHostAccess - return r.getConnection(ctx, "", c.Type, cfg) + return r.getConnection(ctx, cachedConnectionConfig{ + instanceID: "", + name: connector, + driver: c.Type, + config: cfg, + }) } } return nil, nil, fmt.Errorf("connector %s doesn't exist", connector) @@ -41,7 +46,12 @@ func (r *Runtime) AcquireHandle(ctx context.Context, instanceID, connector strin // So we take this moment to make sure the ctx gets checked for cancellation at least every once in a while. return nil, nil, ctx.Err() } - return r.getConnection(ctx, instanceID, cfg.Driver, cfg.Resolve()) + return r.getConnection(ctx, cachedConnectionConfig{ + instanceID: instanceID, + name: connector, + driver: cfg.Driver, + config: cfg.Resolve(), + }) } func (r *Runtime) Repo(ctx context.Context, instanceID string) (drivers.RepoStore, func(), error) { diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 0630c8873da..9ea47f8d6b5 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -146,7 +146,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien logger.Warn("failed to install embedded DuckDB extensions, let DuckDB download them", zap.Error(err)) } - cfg, err := newConfig(cfgMap, st.DataDir("duckdb")) + cfg, err := newConfig(cfgMap, st.DataDir()) if err != nil { return nil, err } diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go index 29606c71e84..27e4b69410b 100644 --- a/runtime/storage/storage.go +++ b/runtime/storage/storage.go @@ -45,13 +45,13 @@ func MustNew(dataDir string, bucketCfg map[string]any) *Client { return c } -func (c *Client) WithPrefix(prefix string) *Client { +func (c *Client) WithPrefix(prefix ...string) *Client { newClient := &Client{ dataDirPath: c.dataDirPath, bucketConfig: c.bucketConfig, } newClient.prefixes = append(newClient.prefixes, c.prefixes...) - newClient.prefixes = append(newClient.prefixes, prefix) + newClient.prefixes = append(newClient.prefixes, prefix...) return newClient } From e4b216f693bc1297598076552dc81e52abe19ba7 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:56:37 +0530 Subject: [PATCH 42/64] fix build errors --- cli/cmd/runtime/start.go | 4 ++-- runtime/storage/storage.go | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index 83fcdee0227..14f89086644 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -201,8 +201,8 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { // storage client bucketConfig := map[string]interface{}{ - "bucket": conf.DataBucket, - "google_application_credentials": conf.DataBucketCredentialsJSON, + "bucket": conf.DataBucket, + "google_application_credentials_json": conf.DataBucketCredentialsJSON, } storage, err := storage.New(conf.DataDir, bucketConfig) if err != nil { diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go index 27e4b69410b..641e07e78bc 100644 --- a/runtime/storage/storage.go +++ b/runtime/storage/storage.go @@ -75,7 +75,7 @@ func (c *Client) TempDir(elem ...string) string { } func (c *Client) OpenBucket(ctx context.Context, elem ...string) (*blob.Bucket, bool, error) { - if len(c.bucketConfig) == 0 { + if c.bucketConfig == nil { return nil, false, nil } // Init dataBucket @@ -137,7 +137,7 @@ func RemoveInstance(c *Client, instanceID string) error { } func (c *Client) newGCPClient(ctx context.Context) (*gcp.HTTPClient, error) { - creds, err := gcputil.Credentials(ctx, c.bucketConfig.SecretJSON, false) + creds, err := gcputil.Credentials(ctx, c.bucketConfig.GoogleApplicationCredentialsJSON, false) if err != nil { return nil, err } @@ -146,6 +146,6 @@ func (c *Client) newGCPClient(ctx context.Context) (*gcp.HTTPClient, error) { } type gcsBucketConfig struct { - Bucket string `mapstructure:"bucket"` + Bucket string `mapstructure:"bucket"` GoogleApplicationCredentialsJSON string `mapstructure:"google_application_credentials_json"` } From 25665376921ebf16b8b77bd1b068a675ac51b54f Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 2 Dec 2024 18:30:57 +0530 Subject: [PATCH 43/64] transporters fixed --- runtime/drivers/admin/admin.go | 5 - runtime/drivers/athena/athena.go | 5 - runtime/drivers/azure/azure.go | 5 - runtime/drivers/bigquery/bigquery.go | 5 - runtime/drivers/clickhouse/clickhouse.go | 6 - runtime/drivers/drivers.go | 4 - runtime/drivers/druid/druid.go | 6 - runtime/drivers/duckdb/config_test.go | 12 +- runtime/drivers/duckdb/duckdb.go | 50 +-- runtime/drivers/duckdb/duckdb_test.go | 186 ---------- runtime/drivers/duckdb/information_schema.go | 4 +- .../drivers/duckdb/information_schema_test.go | 14 +- .../duckdb/model_executor_warehouse_self.go | 2 +- runtime/drivers/duckdb/olap_crud_test.go | 8 +- runtime/drivers/duckdb/olap_test.go | 52 +-- .../duckdb/transporter_duckDB_to_duckDB.go | 130 ++++--- .../transporter_motherduck_to_duckDB.go | 114 ++---- .../transporter_mysql_to_duckDB_test.go | 248 +++++++------ .../transporter_postgres_to_duckDB_test.go | 11 +- .../transporter_sqlite_to_duckDB_test.go | 2 +- .../duckdb/transporter_sqlstore_to_duckDB.go | 146 -------- .../duckdb/transporter_warehouse_to_duckDB.go | 2 +- runtime/drivers/duckdb/utils.go | 20 -- runtime/drivers/file/file.go | 5 - runtime/drivers/gcs/gcs.go | 5 - runtime/drivers/https/https.go | 5 - .../drivers/mock/object_store/object_store.go | 5 - runtime/drivers/mysql/mysql.go | 5 - runtime/drivers/mysql/parser.go | 314 ---------------- runtime/drivers/mysql/sql_store.go | 186 ---------- runtime/drivers/pinot/pinot.go | 4 - runtime/drivers/postgres/parser.go | 339 ------------------ runtime/drivers/postgres/postgres.go | 5 - runtime/drivers/postgres/sql_store.go | 253 ------------- runtime/drivers/redshift/redshift.go | 5 - runtime/drivers/s3/s3.go | 5 - runtime/drivers/salesforce/salesforce.go | 5 - runtime/drivers/slack/slack.go | 4 - runtime/drivers/snowflake/snowflake.go | 5 - .../snowflake/{sql_store.go => warehouse.go} | 0 runtime/drivers/sql_store.go | 34 -- runtime/drivers/sqlite/sqlite.go | 5 - runtime/drivers/warehouse.go | 3 + runtime/pkg/rduckdb/db.go | 21 +- 44 files changed, 289 insertions(+), 1961 deletions(-) delete mode 100644 runtime/drivers/duckdb/duckdb_test.go delete mode 100644 runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go delete mode 100644 runtime/drivers/mysql/parser.go delete mode 100644 runtime/drivers/mysql/sql_store.go delete mode 100644 runtime/drivers/postgres/parser.go delete mode 100644 runtime/drivers/postgres/sql_store.go rename runtime/drivers/snowflake/{sql_store.go => warehouse.go} (100%) delete mode 100644 runtime/drivers/sql_store.go diff --git a/runtime/drivers/admin/admin.go b/runtime/drivers/admin/admin.go index 9ac4dc46e66..7d12e06a2a1 100644 --- a/runtime/drivers/admin/admin.go +++ b/runtime/drivers/admin/admin.go @@ -236,11 +236,6 @@ func (h *Handle) AsTransporter(from, to drivers.Handle) (drivers.Transporter, bo return nil, false } -// AsSQLStore implements drivers.Handle. -func (h *Handle) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Handle. func (h *Handle) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go index 50bbd73636d..1ab20626e4f 100644 --- a/runtime/drivers/athena/athena.go +++ b/runtime/drivers/athena/athena.go @@ -214,11 +214,6 @@ func (c *Connection) AsWarehouse() (drivers.Warehouse, bool) { return c, true } -// AsSQLStore implements drivers.Connection. -func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Handle. func (c *Connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/azure/azure.go b/runtime/drivers/azure/azure.go index 0282b0cbc63..6a442400cd6 100644 --- a/runtime/drivers/azure/azure.go +++ b/runtime/drivers/azure/azure.go @@ -227,11 +227,6 @@ func (c *Connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *Connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index 86644749f10..1bf38c52380 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -183,11 +183,6 @@ func (c *Connection) AsObjectStore() (drivers.ObjectStore, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsModelExecutor implements drivers.Handle. func (c *Connection) AsModelExecutor(instanceID string, opts *drivers.ModelExecutorOptions) (drivers.ModelExecutor, bool) { return nil, false diff --git a/runtime/drivers/clickhouse/clickhouse.go b/runtime/drivers/clickhouse/clickhouse.go index fa8bef6edad..c6638640420 100644 --- a/runtime/drivers/clickhouse/clickhouse.go +++ b/runtime/drivers/clickhouse/clickhouse.go @@ -373,12 +373,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -// Use OLAPStore instead. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/drivers.go b/runtime/drivers/drivers.go index 6d3afb3018b..8cceeec0c76 100644 --- a/runtime/drivers/drivers.go +++ b/runtime/drivers/drivers.go @@ -107,10 +107,6 @@ type Handle interface { // An AI service enables an instance to request prompt-based text inference. AsAI(instanceID string) (AIService, bool) - // AsSQLStore returns a SQLStore if the driver can serve as such, otherwise returns false. - // A SQL store represents a service that can execute SQL statements and return the resulting rows. - AsSQLStore() (SQLStore, bool) - // AsOLAP returns an OLAPStore if the driver can serve as such, otherwise returns false. // An OLAP store is used to serve interactive, low-latency, analytical queries. // NOTE: We should consider merging the OLAPStore and SQLStore interfaces. diff --git a/runtime/drivers/druid/druid.go b/runtime/drivers/druid/druid.go index 2e02543cecd..e38097e4900 100644 --- a/runtime/drivers/druid/druid.go +++ b/runtime/drivers/druid/druid.go @@ -254,12 +254,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -// Use OLAPStore instead. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 6ef2ae04e92..649561d1215 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -17,8 +17,8 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"dsn": "", "memory_limit_gb": "1", "cpu": 2}) require.NoError(t, err) - require.Equal(t, "1", cfg.readSettings()["threads"]) - require.Equal(t, "1", cfg.readSettings()["threads"]) + require.Equal(t, "2", cfg.readSettings()["threads"]) + require.Equal(t, "", cfg.writeSettings()["threads"]) require.Equal(t, 2, cfg.PoolSize) cfg, err = newConfig(map[string]any{"data_dir": "path/to"}) @@ -41,9 +41,9 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}) require.NoError(t, err) - require.Equal(t, "1", cfg.readSettings()["threads"]) - require.Equal(t, "1", cfg.writeSettings()["threads"]) - require.Equal(t, "4", cfg.readSettings()["max_memory"]) - require.Equal(t, "4", cfg.writeSettings()["max_memory"]) + require.Equal(t, "2", cfg.readSettings()["threads"]) + require.Equal(t, "", cfg.writeSettings()["threads"]) + require.Equal(t, "8GB", cfg.readSettings()["max_memory"]) + require.Equal(t, "", cfg.writeSettings()["max_memory"]) require.Equal(t, 2, cfg.PoolSize) } diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index ab3fb92a0b7..c7292ffd7d7 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -178,28 +178,17 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, ac *activity.Clie }, connectionsInUse)) // Open the DB - err = c.reopenDB(ctx, false) + err = c.reopenDB(ctx) if err != nil { // Check for another process currently accessing the DB if strings.Contains(err.Error(), "Could not set lock on file") { return nil, fmt.Errorf("failed to open database (is Rill already running?): %w", err) } - - c.logger.Debug("Resetting .db file because it was created with an older, incompatible version of Rill") - // reopen connection again - if err := c.reopenDB(ctx, true); err != nil { - return nil, err + // Return nice error for old macOS versions + if strings.Contains(err.Error(), "Symbol not found") { + fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") + os.Exit(1) } - } - - // Return nice error for old macOS versions - _, release, err := c.db.AcquireReadConnection(context.Background()) - if err != nil && strings.Contains(err.Error(), "Symbol not found") { - fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") - os.Exit(1) - } else if err == nil { - _ = release() - } else { return nil, err } @@ -373,12 +362,6 @@ func (c *connection) AsObjectStore() (drivers.ObjectStore, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -// Use OLAPStore instead. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsModelExecutor implements drivers.Handle. func (c *connection) AsModelExecutor(instanceID string, opts *drivers.ModelExecutorOptions) (drivers.ModelExecutor, bool) { if opts.InputHandle == c && opts.OutputHandle == c { @@ -416,13 +399,15 @@ func (c *connection) AsTransporter(from, to drivers.Handle) (drivers.Transporter olap, _ := to.(*connection) if c == to { if from == to { - return newDuckDBToDuckDB(c, c.logger), true + return newDuckDBToDuckDB(c, "duckdb", c.logger), true } - if from.Driver() == "motherduck" { + switch from.Driver() { + case "motherduck": return newMotherduckToDuckDB(from, olap, c.logger), true - } - if store, ok := from.AsSQLStore(); ok { - return newSQLStoreToDuckDB(store, olap, c.logger), true + case "postgres": + return newDuckDBToDuckDB(c, "postgres", c.logger), true + case "mysql": + return newDuckDBToDuckDB(c, "mysql", c.logger), true } if store, ok := from.AsWarehouse(); ok { return NewWarehouseToDuckDB(store, olap, c.logger), true @@ -452,7 +437,7 @@ func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, er } // reopenDB opens the DuckDB handle anew. If c.db is already set, it closes the existing handle first. -func (c *connection) reopenDB(ctx context.Context, clean bool) error { +func (c *connection) reopenDB(ctx context.Context) error { // If c.db is already open, close it first if c.db != nil { err := c.db.Close() @@ -634,7 +619,7 @@ func (c *connection) acquireConn(ctx context.Context) (*sqlx.Conn, func() error, c.dbConnCount-- if c.dbConnCount == 0 && c.dbReopen { c.dbReopen = false - err = c.reopenDB(ctx, false) + err = c.reopenDB(ctx) if err == nil { c.logger.Debug("reopened DuckDB successfully") } else { @@ -711,10 +696,3 @@ func (c *connection) periodicallyCheckConnDurations(d time.Duration) { } } } - -// fatalInternalError logs a critical internal error and exits the process. -// This is used for errors that are completely unrecoverable. -// Ideally, we should refactor to cleanup/reopen/rebuild so that we don't need this. -func (c *connection) fatalInternalError(err error) { - c.logger.Fatal("duckdb: critical internal error", zap.Error(err)) -} diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go deleted file mode 100644 index b55f319de28..00000000000 --- a/runtime/drivers/duckdb/duckdb_test.go +++ /dev/null @@ -1,186 +0,0 @@ -package duckdb - -import ( - "context" - "database/sql" - "path/filepath" - "sync" - "testing" - "time" - - "github.com/rilldata/rill/runtime/drivers" - "github.com/rilldata/rill/runtime/pkg/activity" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - "gocloud.dev/blob/memblob" -) - -func TestNoFatalErr(t *testing.T) { - // NOTE: Using this issue to create a fatal error: https://github.com/duckdb/duckdb/issues/7905 - - dsn := filepath.Join(t.TempDir(), "tmp.db") - - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) - require.NoError(t, err) - - olap, ok := handle.AsOLAP("") - require.True(t, ok) - - qry := ` - CREATE TABLE a( - a1 VARCHAR, - ); - - CREATE TABLE b( - b1 VARCHAR, - b2 TIMESTAMP, - b3 TIMESTAMP, - b4 VARCHAR, - b5 VARCHAR, - b6 VARCHAR, - b7 TIMESTAMP, - b8 TIMESTAMP, - b9 VARCHAR, - b10 VARCHAR, - b11 VARCHAR, - b12 VARCHAR, - b13 VARCHAR, - b14 VARCHAR, - ); - - INSERT INTO b VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); - - CREATE TABLE c( - c1 VARCHAR, - ); - - CREATE TABLE d( - d1 VARCHAR, - d2 VARCHAR, - ); - - SELECT * - FROM a - LEFT JOIN b ON b.b14 = a.a1 - LEFT JOIN c ON b.b13 = c.c1 - LEFT JOIN d ON b.b12 = d.d1 - WHERE d.d2 IN (''); - ` - - err = olap.Exec(context.Background(), &drivers.Statement{Query: qry}) - require.NoError(t, err) - - err = olap.Exec(context.Background(), &drivers.Statement{Query: "SELECT * FROM a"}) - require.NoError(t, err) - - err = handle.Close() - require.NoError(t, err) -} - -func TestNoFatalErrConcurrent(t *testing.T) { - // NOTE: Using this issue to create a fatal error: https://github.com/duckdb/duckdb/issues/7905 - - dsn := filepath.Join(t.TempDir(), "tmp.db") - - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) - require.NoError(t, err) - - olap, ok := handle.AsOLAP("") - require.True(t, ok) - - qry := ` - CREATE TABLE a( - a1 VARCHAR, - ); - - CREATE TABLE b( - b1 VARCHAR, - b2 TIMESTAMP, - b3 TIMESTAMP, - b4 VARCHAR, - b5 VARCHAR, - b6 VARCHAR, - b7 TIMESTAMP, - b8 TIMESTAMP, - b9 VARCHAR, - b10 VARCHAR, - b11 VARCHAR, - b12 VARCHAR, - b13 VARCHAR, - b14 VARCHAR, - ); - - INSERT INTO b VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); - - CREATE TABLE c( - c1 VARCHAR, - ); - - CREATE TABLE d( - d1 VARCHAR, - d2 VARCHAR, - ); - ` - err = olap.Exec(context.Background(), &drivers.Statement{Query: qry}) - require.NoError(t, err) - - wg := sync.WaitGroup{} - - // Func 1 acquires conn immediately, runs query after 500ms. - // It should fail with an internal error. - wg.Add(1) - var err1 error - go func() { - qry := ` - SELECT * - FROM a - LEFT JOIN b ON b.b14 = a.a1 - LEFT JOIN c ON b.b13 = c.c1 - LEFT JOIN d ON b.b12 = d.d1 - WHERE d.d2 IN (''); - ` - err1 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { - time.Sleep(500 * time.Millisecond) - return olap.Exec(ctx, &drivers.Statement{Query: qry}) - }) - wg.Done() - }() - - // Func 2 acquires conn immediately, runs query after 1000ms - // It should fail with a fatal error, because the DB has been invalidated by the previous query. - wg.Add(1) - var err2 error - go func() { - qry := `SELECT * FROM a;` - err2 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { - time.Sleep(1000 * time.Millisecond) - return olap.Exec(ctx, &drivers.Statement{Query: qry}) - }) - wg.Done() - }() - - // Func 3 acquires conn after 250ms and runs query immediately. It will be enqueued (because the OLAP conns limit is pool_size-1 = 2). - // By the time it's dequeued, the DB will have been invalidated, and it will wait for the reopen before returning a conn. So the query should succeed. - wg.Add(1) - var err3 error - go func() { - time.Sleep(250 * time.Millisecond) - qry := `SELECT * FROM a;` - err3 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { - return olap.Exec(ctx, &drivers.Statement{Query: qry}) - }) - wg.Done() - }() - - wg.Wait() - - require.NoError(t, err1) - require.NoError(t, err2) - require.NoError(t, err3) - - err = olap.Exec(context.Background(), &drivers.Statement{Query: "SELECT * FROM a"}) - require.NoError(t, err) - - err = handle.Close() - require.NoError(t, err) -} diff --git a/runtime/drivers/duckdb/information_schema.go b/runtime/drivers/duckdb/information_schema.go index 9e1168ec2e1..ee199f1aaf2 100644 --- a/runtime/drivers/duckdb/information_schema.go +++ b/runtime/drivers/duckdb/information_schema.go @@ -43,7 +43,7 @@ func (i informationSchema) All(ctx context.Context, like string) ([]*drivers.Tab array_agg(c.is_nullable = 'YES' order by c.ordinal_position) as "column_nullable" from information_schema.tables t join information_schema.columns c on t.table_schema = c.table_schema and t.table_name = c.table_name - where database = current_database() and t.table_schema = 'main' + where database = current_database() and t.table_schema = current_schema() %s group by 1, 2, 3, 4 order by 1, 2, 3, 4 @@ -81,7 +81,7 @@ func (i informationSchema) Lookup(ctx context.Context, db, schema, name string) array_agg(c.is_nullable = 'YES' order by c.ordinal_position) as "column_nullable" from information_schema.tables t join information_schema.columns c on t.table_schema = c.table_schema and t.table_name = c.table_name - where database = current_database() and t.table_schema = 'main' and lower(t.table_name) = lower(?) + where database = current_database() and t.table_schema = current_schema() and lower(t.table_name) = lower(?) group by 1, 2, 3, 4 order by 1, 2, 3, 4 ` diff --git a/runtime/drivers/duckdb/information_schema_test.go b/runtime/drivers/duckdb/information_schema_test.go index 42e6eaa1036..4ebcaf42697 100644 --- a/runtime/drivers/duckdb/information_schema_test.go +++ b/runtime/drivers/duckdb/information_schema_test.go @@ -13,9 +13,7 @@ func TestInformationSchemaAll(t *testing.T) { conn := prepareConn(t) olap, _ := conn.AsOLAP("") - err := olap.Exec(context.Background(), &drivers.Statement{ - Query: "CREATE VIEW model as (select 1, 2, 3)", - }) + err := olap.CreateTableAsSelect(context.Background(), "model", true, "select 1, 2, 3", nil) require.NoError(t, err) tables, err := olap.InformationSchema().All(context.Background(), "") @@ -39,9 +37,7 @@ func TestInformationSchemaAllLike(t *testing.T) { conn := prepareConn(t) olap, _ := conn.AsOLAP("") - err := olap.Exec(context.Background(), &drivers.Statement{ - Query: "CREATE VIEW model as (select 1, 2, 3)", - }) + err := olap.CreateTableAsSelect(context.Background(), "model", true, "select 1, 2, 3", nil) require.NoError(t, err) tables, err := olap.InformationSchema().All(context.Background(), "%odel") @@ -49,7 +45,7 @@ func TestInformationSchemaAllLike(t *testing.T) { require.Equal(t, 1, len(tables)) require.Equal(t, "model", tables[0].Name) - tables, err = olap.InformationSchema().All(context.Background(), "%main.model%") + tables, err = olap.InformationSchema().All(context.Background(), "%model%") require.NoError(t, err) require.Equal(t, 1, len(tables)) require.Equal(t, "model", tables[0].Name) @@ -60,9 +56,7 @@ func TestInformationSchemaLookup(t *testing.T) { olap, _ := conn.AsOLAP("") ctx := context.Background() - err := olap.Exec(ctx, &drivers.Statement{ - Query: "CREATE VIEW model as (select 1, 2, 3)", - }) + err := olap.CreateTableAsSelect(context.Background(), "model", true, "select 1, 2, 3", nil) require.NoError(t, err) table, err := olap.InformationSchema().Lookup(ctx, "", "", "foo") diff --git a/runtime/drivers/duckdb/model_executor_warehouse_self.go b/runtime/drivers/duckdb/model_executor_warehouse_self.go index ecccbc78850..55a762c86ed 100644 --- a/runtime/drivers/duckdb/model_executor_warehouse_self.go +++ b/runtime/drivers/duckdb/model_executor_warehouse_self.go @@ -114,7 +114,7 @@ func (e *warehouseToSelfExecutor) queryAndInsert(ctx context.Context, opts *driv files, err := iter.Next() if err != nil { // TODO: Why is this not just one error? - if errors.Is(err, io.EOF) || errors.Is(err, drivers.ErrNoRows) || errors.Is(err, drivers.ErrIteratorDone) { + if errors.Is(err, io.EOF) || errors.Is(err, drivers.ErrNoRows) { break } return err diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index 5c9e83d9ebf..4bd981ba9e9 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -87,7 +87,7 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { require.NoError(t, res.Scan(&count)) require.Equal(t, 1, count) require.NoError(t, res.Close()) - contents, err := os.ReadFile(filepath.Join(temp, "default", "read", tt.name, "version.txt")) + contents, err := os.ReadFile(filepath.Join(temp, "default", tt.name, "version.txt")) require.NoError(t, err) version, err := strconv.ParseInt(string(contents), 10, 64) require.NoError(t, err) @@ -112,7 +112,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test-select-multiple", false, "select 'hello'", nil) require.NoError(t, err) - dirs, err := os.ReadDir(filepath.Join(temp, "read", "test-select-multiple")) + dirs, err := os.ReadDir(filepath.Join(temp, "test-select-multiple")) require.NoError(t, err) names := make([]string, 0) for _, dir := range dirs { @@ -122,7 +122,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test-select-multiple", false, "select fail query", nil) require.Error(t, err) - dirs, err = os.ReadDir(filepath.Join(temp, "read", "test-select-multiple")) + dirs, err = os.ReadDir(filepath.Join(temp, "test-select-multiple")) require.NoError(t, err) newNames := make([]string, 0) for _, dir := range dirs { @@ -157,7 +157,7 @@ func Test_connection_DropTable(t *testing.T) { err = c.DropTable(context.Background(), "test-drop", true) require.NoError(t, err) - _, err = os.ReadDir(filepath.Join(temp, "read", "test-drop")) + _, err = os.ReadDir(filepath.Join(temp, "test-drop")) require.True(t, os.IsNotExist(err)) res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) FROM information_schema.tables WHERE table_name='test-drop' AND table_type='VIEW'"}) diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index ee2ba123425..1b855284ed5 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -2,7 +2,6 @@ package duckdb import ( "context" - "fmt" "io/fs" "os" "path/filepath" @@ -12,6 +11,7 @@ import ( "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/pkg/rduckdb" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -212,56 +212,32 @@ func TestClose(t *testing.T) { require.Greater(t, x, 0) } -func prepareConn(t *testing.T) drivers.Handle { - conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) - require.NoError(t, err) - - olap, ok := conn.AsOLAP("") - require.True(t, ok) - - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "CREATE TABLE foo(bar VARCHAR, baz INTEGER)", - }) - require.NoError(t, err) - - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "INSERT INTO foo VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)", - }) - require.NoError(t, err) - - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "CREATE TABLE bar(bar VARCHAR, baz INTEGER)", - }) - require.NoError(t, err) - - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "INSERT INTO bar VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)", - }) - require.NoError(t, err) - - return conn -} - func Test_safeSQLString(t *testing.T) { + conn := prepareConn(t) tempDir := t.TempDir() path := filepath.Join(tempDir, "let's t@st \"weird\" dirs") err := os.Mkdir(path, fs.ModePerm) require.NoError(t, err) - dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + // dbFile := filepath.Join(path, "st@g3's.db") + err = conn.db.CreateTableAsSelect(context.Background(), "foo", "SELECT 'a' AS bar, 1 AS baz", &rduckdb.CreateTableOptions{ + // InitSQL: fmt.Sprintf("ATTACH %s", safeSQLString(dbFile)), + }) require.NoError(t, err) - require.NoError(t, conn.Close()) +} - conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) +func prepareConn(t *testing.T) *connection { + conn, err := Driver{}.Open("default", map[string]any{"data_dir": t.TempDir(), "pool_size": 4}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") require.True(t, ok) - err = olap.Exec(context.Background(), &drivers.Statement{Query: fmt.Sprintf("ATTACH '%s'", dbFile)}) - require.Error(t, err) + ctx := context.Background() + err = olap.CreateTableAsSelect(ctx, "foo", false, "SELECT 'a' AS bar, 1 AS baz UNION ALL SELECT 'a', 2 UNION ALL SELECT 'b', 3 UNION ALL SELECT 'c', 4", nil) + require.NoError(t, err) - err = olap.Exec(context.Background(), &drivers.Statement{Query: fmt.Sprintf("ATTACH %s", safeSQLString(dbFile))}) + err = olap.CreateTableAsSelect(ctx, "bar", false, "SELECT 'a' AS bar, 1 AS baz UNION ALL SELECT 'a', 2 UNION ALL SELECT 'b', 3 UNION ALL SELECT 'c', 4", nil) require.NoError(t, err) + return conn.(*connection) } diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index aba534b2d6e..99964c785d2 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -7,21 +7,25 @@ import ( "net/url" "strings" + "github.com/jmoiron/sqlx" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/duckdbsql" "github.com/rilldata/rill/runtime/pkg/fileutil" + "github.com/rilldata/rill/runtime/pkg/rduckdb" "go.uber.org/zap" ) type duckDBToDuckDB struct { - to *connection - logger *zap.Logger + to *connection + logger *zap.Logger + database string // mysql, postgres, duckdb } -func newDuckDBToDuckDB(c *connection, logger *zap.Logger) drivers.Transporter { +func newDuckDBToDuckDB(c *connection, db string, logger *zap.Logger) drivers.Transporter { return &duckDBToDuckDB{ - to: c, - logger: logger, + to: c, + logger: logger, + database: db, } } @@ -41,13 +45,13 @@ func (t *duckDBToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[s t.logger = t.logger.With(zap.String("source", sinkCfg.Table)) if srcCfg.Database != "" { // query to be run against an external DB - if !strings.HasPrefix(srcCfg.Database, "md:") { + if t.database == "duckdb" { srcCfg.Database, err = fileutil.ResolveLocalPath(srcCfg.Database, opts.RepoRoot, opts.AllowHostAccess) if err != nil { return err } } - // return t.transferFromExternalDB(ctx, srcCfg, sinkCfg) + return t.transferFromExternalDB(ctx, srcCfg, sinkCfg) } // We can't just pass the SQL statement to DuckDB outright. @@ -113,66 +117,58 @@ func (t *duckDBToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[s return t.to.CreateTableAsSelect(ctx, sinkCfg.Table, false, srcCfg.SQL, nil) } -// func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *dbSourceProperties, sinkProps *sinkProperties) error { -// t.to.db.CreateTableAsSelect(ctx, sinkProps.Table, ) - -// var localDB, localSchema string -// err = conn.QueryRowContext(ctx, "SELECT current_database(),current_schema()").Scan(&localDB, &localSchema) -// if err != nil { -// return err -// } - -// // duckdb considers everything before first . as db name -// // alternative solution can be to query `show databases()` before and after to identify db name -// dbName, _, _ := strings.Cut(filepath.Base(srcProps.Database), ".") -// if dbName == "main" { -// return fmt.Errorf("`main` is a reserved db name") -// } - -// if _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS %s", safeSQLString(srcProps.Database), safeSQLName(dbName))); err != nil { -// return fmt.Errorf("failed to attach db %q: %w", srcProps.Database, err) -// } - -// defer func() { -// _, err = conn.ExecContext(context.Background(), fmt.Sprintf("DETACH %s", safeSQLName(dbName))) -// }() - -// if _, err := conn.ExecContext(ctx, fmt.Sprintf("USE %s;", safeName(dbName))); err != nil { -// return err -// } - -// defer func() { -// _, err = conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))) -// if err != nil { -// t.logger.Error("failed to switch back to original database", zap.Error(err)) -// } -// }() - -// userQuery := strings.TrimSpace(srcProps.SQL) -// userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon -// safeTempTable := safeName(fmt.Sprintf("%s_tmp_", sinkProps.Table)) -// defer func() { -// // ensure temporary table is cleaned -// _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTempTable)) -// if err != nil { -// t.logger.Error("failed to drop temp table", zap.String("table", safeTempTable), zap.Error(err)) -// } -// }() - -// query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeTempTable, userQuery) -// _, err = conn.ExecContext(ctx, query) -// // first revert to original database -// if _, switchErr := conn.ExecContext(context.Background(), fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))); switchErr != nil { -// t.to.fatalInternalError(fmt.Errorf("failed to switch back to original database: %w", err)) -// } -// // check for the original error -// if err != nil { -// return fmt.Errorf("failed to create table: %w", err) -// } - -// // create permanent table from temp table using crud API -// return rwConn.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), nil) -// } +func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *dbSourceProperties, sinkProps *sinkProperties) error { + var attachSQL string + safeDBName := safeName(sinkProps.Table + "_external_db_") + safeTempTable := safeName(sinkProps.Table + "__temp__") + switch t.database { + case "mysql": + attachSQL = fmt.Sprintf("ATTACH %s AS %s (TYPE mysql)", safeSQLString(srcProps.Database), safeDBName) + case "postgres": + attachSQL = fmt.Sprintf("ATTACH %s AS %s (TYPE postgres)", safeSQLString(srcProps.Database), safeDBName) + case "duckdb": + attachSQL = fmt.Sprintf("ATTACH %s AS %s", safeSQLString(srcProps.Database), safeDBName) + default: + return fmt.Errorf("internal error: unsupported external database: %s", t.database) + } + beforeCreateFn := func(ctx context.Context, conn *sqlx.Conn) error { + _, err := conn.ExecContext(ctx, attachSQL) + if err != nil { + return err + } + + var localDB, localSchema string + err = conn.QueryRowxContext(ctx, "SELECT current_database(),current_schema();").Scan(&localDB, &localSchema) + if err != nil { + return err + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("USE %s;", safeDBName)) + if err != nil { + return err + } + + userQuery := strings.TrimSpace(srcProps.SQL) + userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + query := fmt.Sprintf("CREATE OR REPLACE TABLE %s.%s.%s AS (%s\n);", safeName(localDB), safeName(localSchema), safeTempTable, userQuery) + _, err = conn.ExecContext(ctx, query) + // first revert back to localdb + if err != nil { + return err + } + // revert to localdb and schema before returning + _, err = conn.ExecContext(ctx, fmt.Sprintf("USE %s.%s;", safeName(localDB), safeName(localSchema))) + return err + } + afterCreateFn := func(ctx context.Context, conn *sqlx.Conn) error { + _, err := conn.ExecContext(ctx, fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTempTable)) + return err + } + return t.to.db.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), &rduckdb.CreateTableOptions{ + BeforeCreateFn: beforeCreateFn, + AfterCreateFn: afterCreateFn, + }) +} // rewriteLocalPaths rewrites a DuckDB SQL statement such that relative paths become absolute paths relative to the basePath, // and if allowHostAccess is false, returns an error if any of the paths resolve to a path outside of the basePath. diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index 8f69048331d..42ecf653453 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -2,8 +2,12 @@ package duckdb import ( "context" + "fmt" + "os" + "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" + "github.com/rilldata/rill/runtime/pkg/rduckdb" "go.uber.org/zap" ) @@ -35,86 +39,40 @@ func newMotherduckToDuckDB(from drivers.Handle, to *connection, logger *zap.Logg } func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[string]any, opts *drivers.TransferOptions) error { - // srcConfig := &mdSrcProps{} - // err := mapstructure.WeakDecode(srcProps, srcConfig) - // if err != nil { - // return err - // } - // if srcConfig.SQL == "" { - // return fmt.Errorf("property \"sql\" is mandatory for connector \"motherduck\"") - // } - - // sinkCfg, err := parseSinkProperties(sinkProps) - // if err != nil { - // return err - // } - - // mdConfig := &mdConfigProps{} - // err = mapstructure.WeakDecode(t.from.Config(), mdConfig) - // if err != nil { - // return err - // } - - // // get token - // var token string - // if srcConfig.Token != "" { - // token = srcConfig.Token - // } else if mdConfig.Token != "" { - // token = mdConfig.Token - // } else if mdConfig.AllowHostAccess { - // token = os.Getenv("motherduck_token") - // } - // if token == "" { - // return fmt.Errorf("no motherduck token found. Refer to this documentation for instructions: https://docs.rilldata.com/reference/connectors/motherduck") - // } - - // t.logger = t.logger.With(zap.String("source", sinkCfg.Table)) - - // rwConn, release, err := t.to.acquireConn(ctx, false) - // if err != nil { - // return err - // } - // defer func() { - // _ = release() - // }() - - // conn := rwConn.Connx() - - // // load motherduck extension; connect to motherduck service - // _, err = conn.ExecContext(ctx, "INSTALL 'motherduck'; LOAD 'motherduck';") - // if err != nil { - // return fmt.Errorf("failed to load motherduck extension %w", err) - // } - - // if _, err = conn.ExecContext(ctx, fmt.Sprintf("SET motherduck_token='%s'", token)); err != nil { - // if !strings.Contains(err.Error(), "can only be set during initialization") { - // return fmt.Errorf("failed to set motherduck token %w", err) - // } - // } + srcConfig := &mdSrcProps{} + err := mapstructure.WeakDecode(srcProps, srcConfig) + if err != nil { + return err + } + if srcConfig.SQL == "" { + return fmt.Errorf("property \"sql\" is mandatory for connector \"motherduck\"") + } - // // ignore attach error since it might be already attached - // _, _ = conn.ExecContext(ctx, fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)) - // userQuery := strings.TrimSpace(srcConfig.SQL) - // userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + sinkCfg, err := parseSinkProperties(sinkProps) + if err != nil { + return err + } - // // we first ingest data in a temporary table in the main db - // // and then copy it to the final table to ensure that the final table is always created using CRUD APIs - // safeTmpTable := safeName(fmt.Sprintf("__%s_tmp_motherduck", sinkCfg.Table)) - // defer func() { - // // ensure temporary table is cleaned - // _, err := conn.ExecContext(context.Background(), fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTmpTable)) - // if err != nil { - // t.logger.Error("failed to drop temp table", zap.String("table", safeTmpTable), zap.Error(err)) - // } - // }() + mdConfig := &mdConfigProps{} + err = mapstructure.WeakDecode(t.from.Config(), mdConfig) + if err != nil { + return err + } - // query := fmt.Sprintf("CREATE OR REPLACE TABLE %s AS (%s\n);", safeTmpTable, userQuery) - // _, err = conn.ExecContext(ctx, query) - // if err != nil { - // return err - // } + // get token + var token string + if srcConfig.Token != "" { + token = srcConfig.Token + } else if mdConfig.Token != "" { + token = mdConfig.Token + } else if mdConfig.AllowHostAccess { + token = os.Getenv("motherduck_token") + } + if token == "" { + return fmt.Errorf("no motherduck token found. Refer to this documentation for instructions: https://docs.rilldata.com/reference/connectors/motherduck") + } - // // copy data from temp table to target table - // return rwConn.CreateTableAsSelect(ctx, sinkCfg.Table, fmt.Sprintf("SELECT * FROM %s", safeTmpTable), nil) - return nil + return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, srcConfig.SQL, &rduckdb.CreateTableOptions{ + // InitSQL: fmt.Sprintf("INSTALL 'motherduck'; LOAD 'motherduck'; SET motherduck_token='%s'; ATTACH '%s'", token, srcConfig.DSN), + }) } diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index 37c24c2880d..9bccec0f5dd 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -1,127 +1,125 @@ package duckdb -// import ( -// "context" -// "database/sql" -// "testing" - -// "github.com/rilldata/rill/runtime/drivers" -// "github.com/rilldata/rill/runtime/pkg/activity" -// "github.com/stretchr/testify/require" -// "go.uber.org/zap" - -// "fmt" -// "time" - -// _ "github.com/rilldata/rill/runtime/drivers/mysql" -// "github.com/testcontainers/testcontainers-go" -// "github.com/testcontainers/testcontainers-go/wait" -// ) - -// var mysqlInitStmt = ` -// CREATE TABLE all_data_types_table ( -// id INT AUTO_INCREMENT PRIMARY KEY, -// sample_char CHAR(1), -// sample_varchar VARCHAR(100), -// sample_tinytext TINYTEXT, -// sample_text TEXT, -// sample_mediumtext MEDIUMTEXT, -// sample_longtext LONGTEXT, -// sample_binary BINARY(1), -// sample_varbinary VARBINARY(100), -// sample_tinyblob TINYBLOB, -// sample_blob BLOB, -// sample_mediumblob MEDIUMBLOB, -// sample_longblob LONGBLOB, -// sample_enum ENUM('value1', 'value2'), -// sample_set SET('value1', 'value2'), -// sample_bit BIT(8), -// sample_tinyint TINYINT, -// sample_tinyint_unsigned TINYINT UNSIGNED NOT NULL, -// sample_smallint SMALLINT, -// sample_smallint_unsigned SMALLINT UNSIGNED NOT NULL, -// sample_mediumint MEDIUMINT, -// sample_mediumint_unsigned MEDIUMINT UNSIGNED NOT NULL, -// sample_int INT, -// sample_int_unsigned INT UNSIGNED NOT NULL, -// sample_bigint BIGINT, -// sample_bigint_unsigned BIGINT UNSIGNED NOT NULL, -// sample_float FLOAT, -// sample_double DOUBLE, -// sample_decimal DECIMAL(10,2), -// sample_date DATE, -// sample_datetime DATETIME, -// sample_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, -// sample_time TIME, -// sample_year YEAR, -// sample_json JSON -// ); - -// INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) -// VALUES ('A', 'Sample Text', 'Tiny Text', 'Some Longer Text.', 'Medium Length Text', 'This is an example of really long text for the LONGTEXT column.', BINARY '1', 'Sample Binary', 'Tiny Blob Data', 'Sample Blob Data', 'Medium Blob Data', 'Long Blob Data', 'value1', 'value1,value2', b'10101010', -128, 255, -32768, 65535, -8388608, 16777215, -2147483648, 4294967295, -9223372036854775808, 18446744073709551615, 123.45, 1234567890.123, 12345.67, '2023-01-01', '2023-01-01 12:00:00', CURRENT_TIMESTAMP, '12:00:00', 2023, JSON_OBJECT('key', 'value')); - -// INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) -// VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); -// ` - -// func TestMySQLToDuckDBTransfer(t *testing.T) { -// ctx := context.Background() -// container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ -// Started: true, -// ContainerRequest: testcontainers.ContainerRequest{ -// WaitingFor: wait.ForLog("mysqld: ready for connections").WithOccurrence(2).WithStartupTimeout(15 * time.Second), -// Image: "mysql:8.3.0", -// ExposedPorts: []string{"3306/tcp"}, -// Env: map[string]string{ -// "MYSQL_ROOT_PASSWORD": "mypassword", -// "MYSQL_DATABASE": "mydb", -// "MYSQL_USER": "myuser", -// "MYSQL_PASSWORD": "mypassword", -// }, -// }, -// }) -// require.NoError(t, err) -// defer container.Terminate(ctx) - -// host, err := container.Host(ctx) -// require.NoError(t, err) -// port, err := container.MappedPort(ctx, "3306/tcp") -// require.NoError(t, err) - -// dsn := fmt.Sprintf("myuser:mypassword@tcp(%s:%d)/mydb?multiStatements=true", host, port.Int()) - -// db, err := sql.Open("mysql", dsn) -// require.NoError(t, err) -// defer db.Close() - -// t.Run("AllDataTypes", func(t *testing.T) { allMySQLDataTypesTest(t, db, dsn) }) -// } - -// func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { -// ctx := context.Background() -// _, err := db.ExecContext(ctx, mysqlInitStmt) -// require.NoError(t, err) - -// handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, activity.NewNoopClient(), zap.NewNop()) -// require.NoError(t, err) -// require.NotNil(t, handle) - -// sqlStore, _ := handle.AsSQLStore() -// to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), zap.NewNop()) -// require.NoError(t, err) -// olap, _ := to.AsOLAP("") - -// tr := newSQLStoreToDuckDB(sqlStore, to.(*connection), zap.NewNop()) -// err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_data_types_table;"}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) -// require.NoError(t, err) -// res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "select count(*) from sink"}) -// require.NoError(t, err) -// for res.Next() { -// var count int -// err = res.Rows.Scan(&count) -// require.NoError(t, err) -// require.Equal(t, count, 2) -// } -// require.NoError(t, res.Close()) -// require.NoError(t, to.Close()) -// } +import ( + "context" + "database/sql" + "testing" + + "github.com/rilldata/rill/runtime/drivers" + "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "gocloud.dev/blob/memblob" + + "fmt" + "time" + + _ "github.com/rilldata/rill/runtime/drivers/mysql" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" +) + +var mysqlInitStmt = ` +CREATE TABLE all_data_types_table ( + id INT AUTO_INCREMENT PRIMARY KEY, + sample_char CHAR(1), + sample_varchar VARCHAR(100), + sample_tinytext TINYTEXT, + sample_text TEXT, + sample_mediumtext MEDIUMTEXT, + sample_longtext LONGTEXT, + sample_binary BINARY(1), + sample_varbinary VARBINARY(100), + sample_tinyblob TINYBLOB, + sample_blob BLOB, + sample_mediumblob MEDIUMBLOB, + sample_longblob LONGBLOB, + sample_enum ENUM('value1', 'value2'), + sample_set SET('value1', 'value2'), + sample_bit BIT(8), + sample_tinyint TINYINT, + sample_tinyint_unsigned TINYINT UNSIGNED NOT NULL, + sample_smallint SMALLINT, + sample_smallint_unsigned SMALLINT UNSIGNED NOT NULL, + sample_mediumint MEDIUMINT, + sample_mediumint_unsigned MEDIUMINT UNSIGNED NOT NULL, + sample_int INT, + sample_int_unsigned INT UNSIGNED NOT NULL, + sample_bigint BIGINT, + sample_bigint_unsigned BIGINT UNSIGNED NOT NULL, + sample_float FLOAT, + sample_double DOUBLE, + sample_decimal DECIMAL(10,2), + sample_date DATE, + sample_datetime DATETIME, + sample_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + sample_time TIME, + sample_year YEAR, + sample_json JSON +); + +INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) +VALUES ('A', 'Sample Text', 'Tiny Text', 'Some Longer Text.', 'Medium Length Text', 'This is an example of really long text for the LONGTEXT column.', BINARY '1', 'Sample Binary', 'Tiny Blob Data', 'Sample Blob Data', 'Medium Blob Data', 'Long Blob Data', 'value1', 'value1,value2', b'10101010', -128, 255, -32768, 65535, -8388608, 16777215, -2147483648, 4294967295, -9223372036854775808, 18446744073709551615, 123.45, 1234567890.123, 12345.67, '2023-01-01', '2023-01-01 12:00:00', CURRENT_TIMESTAMP, '12:00:00', 2023, JSON_OBJECT('key', 'value')); + +INSERT INTO all_data_types_table (sample_char, sample_varchar, sample_tinytext, sample_text, sample_mediumtext, sample_longtext, sample_binary, sample_varbinary, sample_tinyblob, sample_blob, sample_mediumblob, sample_longblob, sample_enum, sample_set, sample_bit, sample_tinyint, sample_tinyint_unsigned, sample_smallint, sample_smallint_unsigned, sample_mediumint, sample_mediumint_unsigned, sample_int, sample_int_unsigned, sample_bigint, sample_bigint_unsigned, sample_float, sample_double, sample_decimal, sample_date, sample_datetime, sample_timestamp, sample_time, sample_year, sample_json) +VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +` + +func TestMySQLToDuckDBTransfer(t *testing.T) { + ctx := context.Background() + container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ + Started: true, + ContainerRequest: testcontainers.ContainerRequest{ + WaitingFor: wait.ForLog("mysqld: ready for connections").WithOccurrence(2).WithStartupTimeout(15 * time.Second), + Image: "mysql:8.3.0", + ExposedPorts: []string{"3306/tcp"}, + Env: map[string]string{ + "MYSQL_ROOT_PASSWORD": "mypassword", + "MYSQL_DATABASE": "mydb", + "MYSQL_USER": "myuser", + "MYSQL_PASSWORD": "mypassword", + }, + }, + }) + require.NoError(t, err) + defer container.Terminate(ctx) + + host, err := container.Host(ctx) + require.NoError(t, err) + port, err := container.MappedPort(ctx, "3306/tcp") + require.NoError(t, err) + + dsn := fmt.Sprintf("myuser:mypassword@tcp(%s:%d)/mydb?multiStatements=true", host, port.Int()) + + db, err := sql.Open("mysql", dsn) + require.NoError(t, err) + defer db.Close() + + t.Run("AllDataTypes", func(t *testing.T) { + allMySQLDataTypesTest(t, db, fmt.Sprintf("host=%s user=myuser password=mypassword port=%v database=mydb", host, port.Int())) + }) +} + +func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { + ctx := context.Background() + _, err := db.ExecContext(ctx, mysqlInitStmt) + require.NoError(t, err) + + to, err := drivers.Open("duckdb", "default", map[string]any{"data_dir": t.TempDir()}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + require.NoError(t, err) + olap, _ := to.AsOLAP("") + + tr := newDuckDBToDuckDB(to.(*connection), "mysql", zap.NewNop()) + err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_data_types_table;", "db": dsn}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) + require.NoError(t, err) + res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "select count(*) from sink"}) + require.NoError(t, err) + for res.Next() { + var count int + err = res.Rows.Scan(&count) + require.NoError(t, err) + require.Equal(t, count, 2) + } + require.NoError(t, res.Close()) + require.NoError(t, to.Close()) +} diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index 998458d322b..8eb26badcfe 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -68,17 +68,12 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { _, err := db.ExecContext(ctx, sqlStmt) require.NoError(t, err) - handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) - require.NoError(t, err) - require.NotNil(t, handle) - - sqlStore, _ := handle.AsSQLStore() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"data_dir": t.TempDir()}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") - tr := newSQLStoreToDuckDB(sqlStore, to.(*connection), zap.NewNop()) - err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_datatypes;"}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) + tr := newDuckDBToDuckDB(to.(*connection), "postgres", zap.NewNop()) + err = tr.Transfer(ctx, map[string]any{"sql": "select * from all_datatypes;", "db": dbURL}, map[string]any{"table": "sink"}, &drivers.TransferOptions{}) require.NoError(t, err) res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "select count(*) from sink"}) require.NoError(t, err) diff --git a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go index 275029c690b..dc0ebaaff3a 100644 --- a/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_sqlite_to_duckDB_test.go @@ -30,7 +30,7 @@ func Test_sqliteToDuckDB_Transfer(t *testing.T) { require.NoError(t, err) db.Close() - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{"data_dir": t.TempDir()}, activity.NewNoopClient(), memblob.OpenBucket(nil), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go deleted file mode 100644 index e4c6179c083..00000000000 --- a/runtime/drivers/duckdb/transporter_sqlstore_to_duckDB.go +++ /dev/null @@ -1,146 +0,0 @@ -package duckdb - -import ( - "context" - "database/sql/driver" - "errors" - "fmt" - - "github.com/marcboeker/go-duckdb" - runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" - "github.com/rilldata/rill/runtime/drivers" - "go.uber.org/zap" -) - -type sqlStoreToDuckDB struct { - to *connection - from drivers.SQLStore - logger *zap.Logger -} - -var _ drivers.Transporter = &sqlStoreToDuckDB{} - -func newSQLStoreToDuckDB(from drivers.SQLStore, c *connection, logger *zap.Logger) drivers.Transporter { - return &sqlStoreToDuckDB{ - to: c, - from: from, - logger: logger, - } -} - -func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[string]any, opts *drivers.TransferOptions) (transferErr error) { - sinkCfg, err := parseSinkProperties(sinkProps) - if err != nil { - return err - } - - s.logger = s.logger.With(zap.String("source", sinkCfg.Table)) - - rowIter, err := s.from.Query(ctx, srcProps) - if err != nil { - return err - } - defer func() { - err := rowIter.Close() - if err != nil && !errors.Is(err, ctx.Err()) { - s.logger.Error("error in closing row iterator", zap.Error(err)) - } - }() - return s.transferFromRowIterator(ctx, rowIter, sinkCfg.Table) -} - -func (s *sqlStoreToDuckDB) transferFromRowIterator(ctx context.Context, iter drivers.RowIterator, table string) error { - return nil -} - -func createTableQuery(schema *runtimev1.StructType, name string) (string, error) { - query := fmt.Sprintf("CREATE OR REPLACE TABLE %s(", safeName(name)) - for i, s := range schema.Fields { - i++ - duckDBType, err := pbTypeToDuckDB(s.Type) - if err != nil { - return "", err - } - query += fmt.Sprintf("%s %s", safeName(s.Name), duckDBType) - if i != len(schema.Fields) { - query += "," - } - } - query += ")" - return query, nil -} - -func convert(row []driver.Value, schema *runtimev1.StructType) error { - for i, v := range row { - if v == nil { - continue - } - if schema.Fields[i].Type.Code == runtimev1.Type_CODE_UUID { - val, ok := v.([16]byte) - if !ok { - return fmt.Errorf("unknown type for UUID field %s: %T", schema.Fields[i].Name, v) - } - var uuid duckdb.UUID - copy(uuid[:], val[:]) - row[i] = uuid - } - } - return nil -} - -func pbTypeToDuckDB(t *runtimev1.Type) (string, error) { - code := t.Code - switch code { - case runtimev1.Type_CODE_UNSPECIFIED: - return "", fmt.Errorf("unspecified code") - case runtimev1.Type_CODE_BOOL: - return "BOOLEAN", nil - case runtimev1.Type_CODE_INT8: - return "TINYINT", nil - case runtimev1.Type_CODE_INT16: - return "SMALLINT", nil - case runtimev1.Type_CODE_INT32: - return "INTEGER", nil - case runtimev1.Type_CODE_INT64: - return "BIGINT", nil - case runtimev1.Type_CODE_INT128: - return "HUGEINT", nil - case runtimev1.Type_CODE_UINT8: - return "UTINYINT", nil - case runtimev1.Type_CODE_UINT16: - return "USMALLINT", nil - case runtimev1.Type_CODE_UINT32: - return "UINTEGER", nil - case runtimev1.Type_CODE_UINT64: - return "UBIGINT", nil - case runtimev1.Type_CODE_FLOAT32: - return "FLOAT", nil - case runtimev1.Type_CODE_FLOAT64: - return "DOUBLE", nil - case runtimev1.Type_CODE_TIMESTAMP: - return "TIMESTAMP", nil - case runtimev1.Type_CODE_DATE: - return "DATE", nil - case runtimev1.Type_CODE_TIME: - return "TIME", nil - case runtimev1.Type_CODE_STRING: - return "VARCHAR", nil - case runtimev1.Type_CODE_BYTES: - return "BLOB", nil - case runtimev1.Type_CODE_ARRAY: - return "", fmt.Errorf("array is not supported") - case runtimev1.Type_CODE_STRUCT: - return "", fmt.Errorf("struct is not supported") - case runtimev1.Type_CODE_MAP: - return "", fmt.Errorf("map is not supported") - case runtimev1.Type_CODE_DECIMAL: - return "DECIMAL", nil - case runtimev1.Type_CODE_JSON: - // keeping type as json but appending varchar using the appender API causes duckdb invalid vector error intermittently - return "VARCHAR", nil - case runtimev1.Type_CODE_UUID: - return "UUID", nil - default: - return "", fmt.Errorf("unknown type_code %s", code) - } -} diff --git a/runtime/drivers/duckdb/transporter_warehouse_to_duckDB.go b/runtime/drivers/duckdb/transporter_warehouse_to_duckDB.go index 24d9f70d518..3f7c3af836d 100644 --- a/runtime/drivers/duckdb/transporter_warehouse_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_warehouse_to_duckDB.go @@ -19,7 +19,7 @@ type warehouseToDuckDB struct { logger *zap.Logger } -var _ drivers.Transporter = &sqlStoreToDuckDB{} +var _ drivers.Transporter = &warehouseToDuckDB{} func NewWarehouseToDuckDB(from drivers.Warehouse, to drivers.OLAPStore, logger *zap.Logger) drivers.Transporter { return &warehouseToDuckDB{ diff --git a/runtime/drivers/duckdb/utils.go b/runtime/drivers/duckdb/utils.go index 7377e58faa0..907b1faa1c9 100644 --- a/runtime/drivers/duckdb/utils.go +++ b/runtime/drivers/duckdb/utils.go @@ -1,8 +1,6 @@ package duckdb import ( - "database/sql" - "database/sql/driver" "fmt" "os" "path/filepath" @@ -12,24 +10,6 @@ import ( "github.com/rilldata/rill/runtime/drivers" ) -// rawConn is similar to *sql.Conn.Raw, but additionally unwraps otelsql (which we use for instrumentation). -func rawConn(conn *sql.Conn, f func(driver.Conn) error) error { - return conn.Raw(func(raw any) error { - // For details, see: https://github.com/XSAM/otelsql/issues/98 - if c, ok := raw.(interface{ Raw() driver.Conn }); ok { - raw = c.Raw() - } - - // This is currently guaranteed, but adding check to be safe - driverConn, ok := raw.(driver.Conn) - if !ok { - return fmt.Errorf("internal: did not obtain a driver.Conn") - } - - return f(driverConn) - }) -} - type sinkProperties struct { Table string `mapstructure:"table"` } diff --git a/runtime/drivers/file/file.go b/runtime/drivers/file/file.go index 929d92fd5f1..29583624f3d 100644 --- a/runtime/drivers/file/file.go +++ b/runtime/drivers/file/file.go @@ -238,11 +238,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/gcs/gcs.go b/runtime/drivers/gcs/gcs.go index 4fd2e05597b..7b0351d3d90 100644 --- a/runtime/drivers/gcs/gcs.go +++ b/runtime/drivers/gcs/gcs.go @@ -264,11 +264,6 @@ func (c *Connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *Connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/https/https.go b/runtime/drivers/https/https.go index febdf4b117e..486a89d776f 100644 --- a/runtime/drivers/https/https.go +++ b/runtime/drivers/https/https.go @@ -187,11 +187,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/mock/object_store/object_store.go b/runtime/drivers/mock/object_store/object_store.go index 107add7df59..2567f558579 100644 --- a/runtime/drivers/mock/object_store/object_store.go +++ b/runtime/drivers/mock/object_store/object_store.go @@ -160,11 +160,6 @@ func (h *handle) AsFileStore() (drivers.FileStore, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (h *handle) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsWarehouse implements drivers.Handle. func (h *handle) AsWarehouse() (drivers.Warehouse, bool) { return nil, false diff --git a/runtime/drivers/mysql/mysql.go b/runtime/drivers/mysql/mysql.go index f33b7e9026e..5f8e3f27879 100644 --- a/runtime/drivers/mysql/mysql.go +++ b/runtime/drivers/mysql/mysql.go @@ -174,11 +174,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return c, true -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/mysql/parser.go b/runtime/drivers/mysql/parser.go deleted file mode 100644 index 32922c4a7f8..00000000000 --- a/runtime/drivers/mysql/parser.go +++ /dev/null @@ -1,314 +0,0 @@ -package mysql - -import ( - "database/sql" - "fmt" - "reflect" - "strings" - - runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" -) - -type mapper interface { - runtimeType(st reflect.Type) (*runtimev1.Type, error) - // dest returns a pointer to a destination value that can be used in Rows.Scan - dest(st reflect.Type) (any, error) - // value dereferences a pointer created by dest - value(p any) (any, error) -} - -// refer https://github.com/go-sql-driver/mysql/blob/master/fields.go for base types -func getDBTypeNameToMapperMap() map[string]mapper { - m := make(map[string]mapper) - - bit := bitMapper{} - numeric := numericMapper{} - char := charMapper{} - bytes := byteMapper{} - date := dateMapper{} - json := jsonMapper{} - - // bit - m["BIT"] = bit - - // numeric - m["TINYINT"] = numeric - m["SMALLINT"] = numeric - m["MEDIUMINT"] = numeric - m["INT"] = numeric - m["UNSIGNED TINYINT"] = numeric - m["UNSIGNED SMALLINT"] = numeric - m["UNSIGNED INT"] = numeric - m["UNSIGNED BIGINT"] = numeric - m["BIGINT"] = numeric - m["DOUBLE"] = numeric - m["FLOAT"] = numeric - // MySQL stores DECIMAL value in binary format - // It might be stored as string without losing precision - m["DECIMAL"] = char - - // string - m["CHAR"] = char - m["LONGTEXT"] = char - m["MEDIUMTEXT"] = char - m["TEXT"] = char - m["TINYTEXT"] = char - m["VARCHAR"] = char - - // binary - m["BINARY"] = bytes - m["TINYBLOB"] = bytes - m["BLOB"] = bytes - m["LONGBLOB"] = bytes - m["MEDIUMBLOB"] = bytes - m["VARBINARY"] = bytes - - // date and time - m["DATE"] = date - m["DATETIME"] = date - m["TIMESTAMP"] = date - m["YEAR"] = numeric - // TIME is scanned as bytes and can be converted to string - m["TIME"] = char - - // json - m["JSON"] = json - - return m -} - -var ( - scanTypeFloat32 = reflect.TypeOf(float32(0)) - scanTypeFloat64 = reflect.TypeOf(float64(0)) - scanTypeInt8 = reflect.TypeOf(int8(0)) - scanTypeInt16 = reflect.TypeOf(int16(0)) - scanTypeInt32 = reflect.TypeOf(int32(0)) - scanTypeInt64 = reflect.TypeOf(int64(0)) - scanTypeNullFloat = reflect.TypeOf(sql.NullFloat64{}) - scanTypeNullInt = reflect.TypeOf(sql.NullInt64{}) - scanTypeUint8 = reflect.TypeOf(uint8(0)) - scanTypeUint16 = reflect.TypeOf(uint16(0)) - scanTypeUint32 = reflect.TypeOf(uint32(0)) - scanTypeUint64 = reflect.TypeOf(uint64(0)) -) - -type numericMapper struct{} - -func (m numericMapper) runtimeType(st reflect.Type) (*runtimev1.Type, error) { - switch st { - case scanTypeInt8: - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT8}, nil - case scanTypeInt16: - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT16}, nil - case scanTypeInt32: - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT32}, nil - case scanTypeInt64: - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT64}, nil - case scanTypeUint8: - return &runtimev1.Type{Code: runtimev1.Type_CODE_UINT8}, nil - case scanTypeUint16: - return &runtimev1.Type{Code: runtimev1.Type_CODE_UINT16}, nil - case scanTypeUint32: - return &runtimev1.Type{Code: runtimev1.Type_CODE_UINT32}, nil - case scanTypeUint64: - return &runtimev1.Type{Code: runtimev1.Type_CODE_UINT64}, nil - case scanTypeNullInt: - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT64}, nil - case scanTypeFloat32: - return &runtimev1.Type{Code: runtimev1.Type_CODE_FLOAT32}, nil - case scanTypeFloat64: - return &runtimev1.Type{Code: runtimev1.Type_CODE_FLOAT64}, nil - case scanTypeNullFloat: - return &runtimev1.Type{Code: runtimev1.Type_CODE_FLOAT64}, nil - default: - return nil, fmt.Errorf("numericMapper: unsupported scan type %v", st.Name()) - } -} - -func (m numericMapper) dest(st reflect.Type) (any, error) { - switch st { - case scanTypeInt8: - return new(int8), nil - case scanTypeInt16: - return new(int16), nil - case scanTypeInt32: - return new(int32), nil - case scanTypeInt64: - return new(int64), nil - case scanTypeUint8: - return new(uint8), nil - case scanTypeUint16: - return new(uint16), nil - case scanTypeUint32: - return new(uint32), nil - case scanTypeUint64: - return new(uint64), nil - case scanTypeNullInt: - return new(sql.NullInt64), nil - case scanTypeFloat32: - return new(float32), nil - case scanTypeFloat64: - return new(float64), nil - case scanTypeNullFloat: - return new(sql.NullFloat64), nil - default: - return nil, fmt.Errorf("numericMapper: unsupported scan type %v", st.Name()) - } -} - -func (m numericMapper) value(p any) (any, error) { - switch v := p.(type) { - case *int8: - return *v, nil - case *int16: - return *v, nil - case *int32: - return *v, nil - case *int64: - return *v, nil - case *uint8: - return *v, nil - case *uint16: - return *v, nil - case *uint32: - return *v, nil - case *uint64: - return *v, nil - case *sql.NullInt64: - vl, err := v.Value() - if err != nil { - return nil, err - } - return vl, nil - case *float32: - return *v, nil - case *float64: - return *v, nil - case *sql.NullFloat64: - vl, err := v.Value() - if err != nil { - return nil, err - } - return vl, nil - default: - return nil, fmt.Errorf("numericMapper: unsupported value type %v", p) - } -} - -type bitMapper struct{} - -func (m bitMapper) runtimeType(reflect.Type) (*runtimev1.Type, error) { - return &runtimev1.Type{Code: runtimev1.Type_CODE_STRING}, nil -} - -func (m bitMapper) dest(reflect.Type) (any, error) { - return &[]byte{}, nil -} - -func (m bitMapper) value(p any) (any, error) { - switch bs := p.(type) { - case *[]byte: - if *bs == nil { - return nil, nil - } - str := strings.Builder{} - for _, b := range *bs { - str.WriteString(fmt.Sprintf("%08b ", b)) - } - s := str.String()[:len(*bs)] - return s, nil - default: - return nil, fmt.Errorf("bitMapper: unsupported value type %v", bs) - } -} - -type charMapper struct{} - -func (m charMapper) runtimeType(reflect.Type) (*runtimev1.Type, error) { - return &runtimev1.Type{Code: runtimev1.Type_CODE_STRING}, nil -} - -func (m charMapper) dest(reflect.Type) (any, error) { - return new(sql.NullString), nil -} - -func (m charMapper) value(p any) (any, error) { - switch v := p.(type) { - case *sql.NullString: - vl, err := v.Value() - if err != nil { - return nil, err - } - return vl, nil - default: - return nil, fmt.Errorf("charMapper: unsupported value type %v", v) - } -} - -type byteMapper struct{} - -func (m byteMapper) runtimeType(reflect.Type) (*runtimev1.Type, error) { - return &runtimev1.Type{Code: runtimev1.Type_CODE_BYTES}, nil -} - -func (m byteMapper) dest(reflect.Type) (any, error) { - return &[]byte{}, nil -} - -func (m byteMapper) value(p any) (any, error) { - switch v := p.(type) { - case *[]byte: - if *v == nil { - return nil, nil - } - return *v, nil - default: - return nil, fmt.Errorf("byteMapper: unsupported value type %v", v) - } -} - -type dateMapper struct{} - -func (m dateMapper) runtimeType(reflect.Type) (*runtimev1.Type, error) { - return &runtimev1.Type{Code: runtimev1.Type_CODE_TIMESTAMP}, nil -} - -func (m dateMapper) dest(reflect.Type) (any, error) { - return new(sql.NullTime), nil -} - -func (m dateMapper) value(p any) (any, error) { - switch v := p.(type) { - case *sql.NullTime: - vl, err := v.Value() - if err != nil { - return nil, err - } - return vl, nil - default: - return nil, fmt.Errorf("dateMapper: unsupported value type %v", v) - } -} - -type jsonMapper struct{} - -func (m jsonMapper) runtimeType(reflect.Type) (*runtimev1.Type, error) { - return &runtimev1.Type{Code: runtimev1.Type_CODE_JSON}, nil -} - -func (m jsonMapper) dest(reflect.Type) (any, error) { - return new(sql.NullString), nil -} - -func (m jsonMapper) value(p any) (any, error) { - switch v := p.(type) { - case *sql.NullString: - vl, err := v.Value() - if err != nil { - return nil, err - } - return vl, nil - default: - return nil, fmt.Errorf("jsonMapper: unsupported value type %v", v) - } -} diff --git a/runtime/drivers/mysql/sql_store.go b/runtime/drivers/mysql/sql_store.go deleted file mode 100644 index 35d6d0ef9a0..00000000000 --- a/runtime/drivers/mysql/sql_store.go +++ /dev/null @@ -1,186 +0,0 @@ -package mysql - -import ( - "context" - "database/sql" - sqldriver "database/sql/driver" - "errors" - "fmt" - - "github.com/go-sql-driver/mysql" - "github.com/mitchellh/mapstructure" - runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" - "github.com/rilldata/rill/runtime/drivers" -) - -// Query implements drivers.SQLStore -func (c *connection) Query(ctx context.Context, props map[string]any) (drivers.RowIterator, error) { - srcProps, err := parseSourceProperties(props) - if err != nil { - return nil, err - } - - var dsn string - if srcProps.DSN != "" { // get from src properties - dsn = srcProps.DSN - } else if url, ok := c.config["dsn"].(string); ok && url != "" { // get from driver configs - dsn = url - } else { - return nil, fmt.Errorf("the property 'dsn' is required for MySQL. Provide 'dsn' in the YAML properties or pass '--env connector.mysql.dsn=...' to 'rill start'") - } - - conf, err := mysql.ParseDSN(dsn) - if err != nil { - return nil, err - } - conf.ParseTime = true // if set to false, time is scanned as an array rather than as time.Time - - db, err := sql.Open("mysql", conf.FormatDSN()) - if err != nil { - return nil, err - } - - // Validate DSN data: - err = db.Ping() - if err != nil { - db.Close() - return nil, err - } - - rows, err := db.QueryContext(ctx, srcProps.SQL) - if err != nil { - return nil, err - } - - iter := &rowIterator{ - db: db, - rows: rows, - } - - if err := iter.setSchema(); err != nil { - iter.Close() - return nil, err - } - return iter, nil -} - -type rowIterator struct { - db *sql.DB - rows *sql.Rows - - schema *runtimev1.StructType - row []sqldriver.Value - fieldMappers []mapper - fieldDests []any // Destinations are used while scanning rows - columnTypes []*sql.ColumnType -} - -// Close implements drivers.RowIterator. -func (r *rowIterator) Close() error { - r.rows.Close() - r.db.Close() - return nil -} - -// Next implements drivers.RowIterator. -func (r *rowIterator) Next(ctx context.Context) ([]sqldriver.Value, error) { - var err error - if !r.rows.Next() { - err := r.rows.Err() - if err == nil { - return nil, drivers.ErrIteratorDone - } - if errors.Is(err, sql.ErrNoRows) { - return nil, drivers.ErrNoRows - } - return nil, err - } - - // Scan expects destinations to be pointers - for i := range r.fieldDests { - r.fieldDests[i], err = r.fieldMappers[i].dest(r.columnTypes[i].ScanType()) - if err != nil { - return nil, err - } - } - - if err := r.rows.Scan(r.fieldDests...); err != nil { - return nil, err - } - - for i := range r.schema.Fields { - // Dereference destinations and fill the row - r.row[i], err = r.fieldMappers[i].value(r.fieldDests[i]) - if err != nil { - return nil, err - } - } - return r.row, nil -} - -// Schema implements drivers.RowIterator. -func (r *rowIterator) Schema(ctx context.Context) (*runtimev1.StructType, error) { - return r.schema, nil -} - -// Size implements drivers.RowIterator. -func (r *rowIterator) Size(unit drivers.ProgressUnit) (uint64, bool) { - return 0, false -} - -var _ drivers.RowIterator = &rowIterator{} - -func (r *rowIterator) setSchema() error { - cts, err := r.rows.ColumnTypes() - if err != nil { - return err - } - - mappers := make([]mapper, len(cts)) - fields := make([]*runtimev1.StructType_Field, len(cts)) - dbTypeNameToMapperMap := getDBTypeNameToMapperMap() - - for i, ct := range cts { - mapper, ok := dbTypeNameToMapperMap[ct.DatabaseTypeName()] - if !ok { - return fmt.Errorf("datatype %q is not supported", ct.DatabaseTypeName()) - } - mappers[i] = mapper - runtimeType, err := mapper.runtimeType(ct.ScanType()) - if err != nil { - return err - } - fields[i] = &runtimev1.StructType_Field{ - Name: ct.Name(), - Type: runtimeType, - } - } - - r.schema = &runtimev1.StructType{Fields: fields} - r.row = make([]sqldriver.Value, len(r.schema.Fields)) - r.fieldMappers = mappers - r.fieldDests = make([]any, len(r.schema.Fields)) - r.columnTypes, err = r.rows.ColumnTypes() - if err != nil { - return err - } - - return nil -} - -type sourceProperties struct { - SQL string `mapstructure:"sql"` - DSN string `mapstructure:"dsn"` -} - -func parseSourceProperties(props map[string]any) (*sourceProperties, error) { - conf := &sourceProperties{} - err := mapstructure.Decode(props, conf) - if err != nil { - return nil, err - } - if conf.SQL == "" { - return nil, fmt.Errorf("property 'sql' is mandatory for connector \"mysql\"") - } - return conf, err -} diff --git a/runtime/drivers/pinot/pinot.go b/runtime/drivers/pinot/pinot.go index 02a4f19ce10..a5fef24a060 100644 --- a/runtime/drivers/pinot/pinot.go +++ b/runtime/drivers/pinot/pinot.go @@ -265,10 +265,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/postgres/parser.go b/runtime/drivers/postgres/parser.go deleted file mode 100644 index c3dbe76724b..00000000000 --- a/runtime/drivers/postgres/parser.go +++ /dev/null @@ -1,339 +0,0 @@ -package postgres - -import ( - "encoding/json" - "fmt" - "strings" - "time" - - "github.com/jackc/pgx/v5/pgtype" - runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" -) - -type mapper interface { - runtimeType() *runtimev1.Type - value(pgxVal any) (any, error) -} - -func register(oidToMapperMap map[string]mapper, typ string, m mapper) { - oidToMapperMap[typ] = m - // array of base type - oidToMapperMap[fmt.Sprintf("_%s", typ)] = &arrayMapper{baseMapper: m} -} - -// refer https://github.com/jackc/pgx/blob/master/pgtype/pgtype_default.go for base types -func getOidToMapperMap() map[string]mapper { - m := make(map[string]mapper) - register(m, "bit", &bitMapper{}) - register(m, "bool", &boolMapper{}) - register(m, "bpchar", &charMapper{}) - register(m, "bytea", &byteMapper{}) - register(m, "char", &charMapper{}) - register(m, "date", &dateMapper{}) - register(m, "float4", &float32Mapper{}) - register(m, "float8", &float64Mapper{}) - register(m, "int2", &int16Mapper{}) - register(m, "int4", &int32Mapper{}) - register(m, "int8", &int64Mapper{}) - register(m, "numeric", &numericMapper{}) - register(m, "text", &charMapper{}) - register(m, "time", &timeMapper{}) - register(m, "timestamp", &timeStampMapper{}) - register(m, "timestamptz", &timeStampMapper{}) - register(m, "uuid", &uuidMapper{}) - register(m, "varbit", &bitMapper{}) - register(m, "varchar", &charMapper{}) - register(m, "json", &jsonMapper{}) - register(m, "jsonb", &jsonMapper{}) - return m -} - -type bitMapper struct{} - -func (m *bitMapper) runtimeType() *runtimev1.Type { - // use bitstring once appender supports it - return &runtimev1.Type{Code: runtimev1.Type_CODE_STRING} -} - -func (m *bitMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case pgtype.Bits: - str := strings.Builder{} - for _, n := range b.Bytes { - str.WriteString(fmt.Sprintf("%08b ", n)) - } - return str.String()[:b.Len], nil - default: - return nil, fmt.Errorf("bitMapper: unsupported type %v", b) - } -} - -type boolMapper struct{} - -func (m *boolMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_BOOL} -} - -func (m *boolMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case bool: - return b, nil - default: - return nil, fmt.Errorf("boolMapper: unsupported type %v", b) - } -} - -type charMapper struct{} - -func (m *charMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_STRING} -} - -func (m *charMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case string: - return b, nil - default: - return nil, fmt.Errorf("charMapper: unsupported type %v", b) - } -} - -type byteMapper struct{} - -func (m *byteMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_BYTES} -} - -func (m *byteMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case []byte: - return b, nil - default: - return nil, fmt.Errorf("byteMapper: unsupported type %v", b) - } -} - -type dateMapper struct{} - -func (m *dateMapper) runtimeType() *runtimev1.Type { - // Use runtimev1.Type_CODE_DATE once DATE is supported by DuckDB appender - return &runtimev1.Type{Code: runtimev1.Type_CODE_TIMESTAMP} -} - -func (m *dateMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case time.Time: - return b, nil - default: - return nil, fmt.Errorf("dateMapper: unsupported type %v", b) - } -} - -type float32Mapper struct{} - -func (m *float32Mapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_FLOAT32} -} - -func (m *float32Mapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case float32: - return b, nil - default: - return nil, fmt.Errorf("float32Mapper: unsupported type %v", b) - } -} - -type float64Mapper struct{} - -func (m *float64Mapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_FLOAT64} -} - -func (m *float64Mapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case float64: - return b, nil - default: - return nil, fmt.Errorf("float64Mapper: unsupported type %v", b) - } -} - -type int16Mapper struct{} - -func (m *int16Mapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT16} -} - -func (m *int16Mapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case int16: - return b, nil - default: - return nil, fmt.Errorf("int16Mapper: unsupported type %v", b) - } -} - -type int32Mapper struct{} - -func (m *int32Mapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT32} -} - -func (m *int32Mapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case int32: - return b, nil - default: - return nil, fmt.Errorf("int32Mapper: unsupported type %v", b) - } -} - -type int64Mapper struct{} - -func (m *int64Mapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_INT64} -} - -func (m *int64Mapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case int64: - return b, nil - default: - return nil, fmt.Errorf("int64Mapper: unsupported type %v", b) - } -} - -type timeMapper struct{} - -func (m *timeMapper) runtimeType() *runtimev1.Type { - // Use runtimev1.Type_CODE_TIME once DATE is supported by DuckDB appender - return &runtimev1.Type{Code: runtimev1.Type_CODE_TIMESTAMP} -} - -func (m *timeMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case pgtype.Time: - midnight := time.Date(time.Now().Year(), time.Now().Month(), time.Now().Day(), 0, 0, 0, 0, time.UTC) - duration := time.Duration(b.Microseconds) * time.Microsecond - midnight = midnight.Add(duration) - return midnight, nil - default: - return nil, fmt.Errorf("timeMapper: unsupported type %v", b) - } -} - -type timeStampMapper struct{} - -func (m *timeStampMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_TIMESTAMP} -} - -func (m *timeStampMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case time.Time: - return b, nil - default: - return nil, fmt.Errorf("timeStampMapper: unsupported type %v", b) - } -} - -type uuidMapper struct{} - -func (m *uuidMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_UUID} -} - -func (m *uuidMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case [16]byte: - return b, nil - default: - return nil, fmt.Errorf("uuidMapper: unsupported type %v", b) - } -} - -type numericMapper struct{} - -func (m *numericMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_STRING} -} - -func (m *numericMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case pgtype.NumericValuer: - f, err := b.NumericValue() - if err != nil { - return nil, err - } - bytes, err := f.MarshalJSON() - if err != nil { - return nil, err - } - return string(bytes), nil - case pgtype.Float64Valuer: - f, err := b.Float64Value() - if err != nil { - return nil, err - } - return fmt.Sprint(f.Float64), nil - case pgtype.Int64Valuer: - f, err := b.Int64Value() - if err != nil { - return nil, err - } - return fmt.Sprint(f.Int64), nil - default: - return nil, fmt.Errorf("numericMapper: unsupported type %v", b) - } -} - -type jsonMapper struct{} - -func (m *jsonMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_JSON} -} - -func (m *jsonMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case []byte: - return string(b), nil - case map[string]any: - enc, err := json.Marshal(b) - if err != nil { - return nil, err - } - return string(enc), nil - default: - return nil, fmt.Errorf("jsonMapper: unsupported type %v", b) - } -} - -type arrayMapper struct { - baseMapper mapper -} - -func (m *arrayMapper) runtimeType() *runtimev1.Type { - return &runtimev1.Type{Code: runtimev1.Type_CODE_JSON} -} - -func (m *arrayMapper) value(pgxVal any) (any, error) { - switch b := pgxVal.(type) { - case []interface{}: - arr := make([]any, len(b)) - for i, val := range b { - res, err := m.baseMapper.value(val) - if err != nil { - return nil, err - } - arr[i] = res - } - enc, err := json.Marshal(arr) - if err != nil { - return nil, err - } - return string(enc), nil - default: - return nil, fmt.Errorf("arrayMapper: unsupported type %v", b) - } -} diff --git a/runtime/drivers/postgres/postgres.go b/runtime/drivers/postgres/postgres.go index 641d7d15fbe..00279715fa5 100644 --- a/runtime/drivers/postgres/postgres.go +++ b/runtime/drivers/postgres/postgres.go @@ -172,11 +172,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return c, true -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/postgres/sql_store.go b/runtime/drivers/postgres/sql_store.go deleted file mode 100644 index d0412df8906..00000000000 --- a/runtime/drivers/postgres/sql_store.go +++ /dev/null @@ -1,253 +0,0 @@ -package postgres - -import ( - "context" - "database/sql" - sqldriver "database/sql/driver" - "errors" - "fmt" - "strings" - "time" - - "github.com/jackc/pgx/v5" - "github.com/jackc/pgx/v5/pgtype" - "github.com/jackc/pgx/v5/pgxpool" - "github.com/mitchellh/mapstructure" - runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" - "github.com/rilldata/rill/runtime/drivers" -) - -// Query implements drivers.SQLStore -func (c *connection) Query(ctx context.Context, props map[string]any) (drivers.RowIterator, error) { - srcProps, err := parseSourceProperties(props) - if err != nil { - return nil, err - } - - var dsn string - if srcProps.DatabaseURL != "" { // get from src properties - dsn = srcProps.DatabaseURL - } else if url, ok := c.config["database_url"].(string); ok && url != "" { // get from driver configs - dsn = url - } else { - return nil, fmt.Errorf("the property 'database_url' is required for Postgres. Provide 'database_url' in the YAML properties or pass '--env connector.postgres.database_url=...' to 'rill start'") - } - - config, err := pgxpool.ParseConfig(dsn) - if err != nil { - return nil, err - } - // disable prepared statements which is not supported by some postgres providers like pgedge cloud for non admin users. - // prepared statements are also not supported by proxies like pgbouncer. - // The limiatation of not using prepared statements is not a problem for us as we don't support parameters in source queries. - config.ConnConfig.DefaultQueryExecMode = pgx.QueryExecModeSimpleProtocol - - pool, err := pgxpool.NewWithConfig(ctx, config) - if err != nil { - return nil, err - } - - conn, err := pool.Acquire(ctx) - if err != nil { - pool.Close() - return nil, err - } - - res, err := conn.Query(ctx, srcProps.SQL) - if err != nil { - conn.Release() - pool.Close() - return nil, err - } - - iter := &rowIterator{ - conn: conn, - rows: res, - pool: pool, - } - - if err := iter.setSchema(ctx); err != nil { - iter.Close() - return nil, err - } - return iter, nil -} - -type rowIterator struct { - conn *pgxpool.Conn - rows pgx.Rows - pool *pgxpool.Pool - schema *runtimev1.StructType - - row []sqldriver.Value - fieldMappers []mapper -} - -// Close implements drivers.RowIterator. -func (r *rowIterator) Close() error { - r.rows.Close() - r.conn.Release() - r.pool.Close() - return r.rows.Err() -} - -// Next implements drivers.RowIterator. -func (r *rowIterator) Next(ctx context.Context) ([]sqldriver.Value, error) { - if !r.rows.Next() { - err := r.rows.Err() - if err == nil { - return nil, drivers.ErrIteratorDone - } - if errors.Is(err, sql.ErrNoRows) { - return nil, drivers.ErrNoRows - } - return nil, err - } - - vals, err := r.rows.Values() - if err != nil { - return nil, err - } - - for i := range r.schema.Fields { - if vals[i] == nil { - r.row[i] = nil - continue - } - mapper := r.fieldMappers[i] - r.row[i], err = mapper.value(vals[i]) - if err != nil { - return nil, err - } - } - - return r.row, nil -} - -// Schema implements drivers.RowIterator. -func (r *rowIterator) Schema(ctx context.Context) (*runtimev1.StructType, error) { - return r.schema, nil -} - -// Size implements drivers.RowIterator. -func (r *rowIterator) Size(unit drivers.ProgressUnit) (uint64, bool) { - return 0, false -} - -var _ drivers.RowIterator = &rowIterator{} - -func (r *rowIterator) setSchema(ctx context.Context) error { - fds := r.rows.FieldDescriptions() - conn := r.rows.Conn() - if conn == nil { - // not possible but keeping it for graceful failures - return fmt.Errorf("nil pgx conn") - } - - mappers := make([]mapper, len(fds)) - fields := make([]*runtimev1.StructType_Field, len(fds)) - typeMap := conn.TypeMap() - oidToMapperMap := getOidToMapperMap() - - var newConn *pgxpool.Conn - defer func() { - if newConn != nil { - newConn.Release() - } - }() - for i, fd := range fds { - dt := columnTypeDatabaseTypeName(typeMap, fds[i].DataTypeOID) - if dt == "" { - var err error - if newConn == nil { - newConn, err = r.acquireConn(ctx) - if err != nil { - return err - } - } - dt, err = r.registerIfEnum(ctx, newConn.Conn(), oidToMapperMap, fds[i].DataTypeOID) - if err != nil { - return err - } - } - mapper, ok := oidToMapperMap[dt] - if !ok { - return fmt.Errorf("datatype %q is not supported", dt) - } - mappers[i] = mapper - fields[i] = &runtimev1.StructType_Field{ - Name: fd.Name, - Type: mapper.runtimeType(), - } - } - - r.schema = &runtimev1.StructType{Fields: fields} - r.fieldMappers = mappers - r.row = make([]sqldriver.Value, len(r.schema.Fields)) - return nil -} - -func (r *rowIterator) registerIfEnum(ctx context.Context, conn *pgx.Conn, oidToMapperMap map[string]mapper, oid uint32) (string, error) { - // custom datatypes are not supported - // but it is possible to support enum with this approach - var isEnum bool - var typName string - err := conn.QueryRow(ctx, "SELECT typtype = 'e' AS isEnum, typname FROM pg_type WHERE oid = $1", oid).Scan(&isEnum, &typName) - if err != nil { - return "", err - } - - if !isEnum { - return "", fmt.Errorf("custom datatypes are not supported") - } - - dataType, err := conn.LoadType(ctx, typName) - if err != nil { - return "", err - } - - r.rows.Conn().TypeMap().RegisterType(dataType) - oidToMapperMap[typName] = &charMapper{} - register(oidToMapperMap, typName, &charMapper{}) - return typName, nil -} - -func (r *rowIterator) acquireConn(ctx context.Context) (*pgxpool.Conn, error) { - // acquire another connection - ctxWithTimeOut, cancel := context.WithTimeout(ctx, time.Minute) - defer cancel() - - conn, err := r.pool.Acquire(ctxWithTimeOut) - if err != nil { - if errors.Is(err, context.DeadlineExceeded) { - return nil, fmt.Errorf("postgres connector require 2 connections. Set `max_connections` to atleast 2") - } - return nil, err - } - return conn, nil -} - -// columnTypeDatabaseTypeName returns the database system type name. If the name is unknown the OID is returned. -func columnTypeDatabaseTypeName(typeMap *pgtype.Map, datatypeOID uint32) string { - if dt, ok := typeMap.TypeForOID(datatypeOID); ok { - return strings.ToLower(dt.Name) - } - return "" -} - -type sourceProperties struct { - SQL string `mapstructure:"sql"` - DatabaseURL string `mapstructure:"database_url"` -} - -func parseSourceProperties(props map[string]any) (*sourceProperties, error) { - conf := &sourceProperties{} - err := mapstructure.Decode(props, conf) - if err != nil { - return nil, err - } - if conf.SQL == "" { - return nil, fmt.Errorf("property 'sql' is mandatory for connector \"postgres\"") - } - return conf, err -} diff --git a/runtime/drivers/redshift/redshift.go b/runtime/drivers/redshift/redshift.go index e2e130c3976..1372a73aca4 100644 --- a/runtime/drivers/redshift/redshift.go +++ b/runtime/drivers/redshift/redshift.go @@ -233,11 +233,6 @@ func (c *Connection) AsWarehouse() (drivers.Warehouse, bool) { return c, true } -// AsSQLStore implements drivers.Connection. -func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsAI implements drivers.Handle. func (c *Connection) AsAI(instanceID string) (drivers.AIService, bool) { return nil, false diff --git a/runtime/drivers/s3/s3.go b/runtime/drivers/s3/s3.go index 141e842845f..ce3cc7942ba 100644 --- a/runtime/drivers/s3/s3.go +++ b/runtime/drivers/s3/s3.go @@ -247,11 +247,6 @@ func (c *Connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *Connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/salesforce/salesforce.go b/runtime/drivers/salesforce/salesforce.go index f85be674622..17530254256 100644 --- a/runtime/drivers/salesforce/salesforce.go +++ b/runtime/drivers/salesforce/salesforce.go @@ -247,11 +247,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return c, true } -// AsSQLStore implements drivers.Connection. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/slack/slack.go b/runtime/drivers/slack/slack.go index 9384c0d9691..47740ead9dc 100644 --- a/runtime/drivers/slack/slack.go +++ b/runtime/drivers/slack/slack.go @@ -115,10 +115,6 @@ func (h *handle) AsAI(instanceID string) (drivers.AIService, bool) { return nil, false } -func (h *handle) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - func (h *handle) AsOLAP(instanceID string) (drivers.OLAPStore, bool) { return nil, false } diff --git a/runtime/drivers/snowflake/snowflake.go b/runtime/drivers/snowflake/snowflake.go index 8a3f65759d0..f9970284541 100644 --- a/runtime/drivers/snowflake/snowflake.go +++ b/runtime/drivers/snowflake/snowflake.go @@ -202,11 +202,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return c, true } -// AsSQLStore implements drivers.Connection. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/snowflake/sql_store.go b/runtime/drivers/snowflake/warehouse.go similarity index 100% rename from runtime/drivers/snowflake/sql_store.go rename to runtime/drivers/snowflake/warehouse.go diff --git a/runtime/drivers/sql_store.go b/runtime/drivers/sql_store.go deleted file mode 100644 index 072304d850a..00000000000 --- a/runtime/drivers/sql_store.go +++ /dev/null @@ -1,34 +0,0 @@ -package drivers - -import ( - "context" - "database/sql/driver" - "errors" - - runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" -) - -var ErrIteratorDone = errors.New("empty iterator") - -var ErrNoRows = errors.New("no rows found for the query") - -// SQLStore is implemented by drivers capable of running sql queries and generating an iterator to consume results. -// In future the results can be produced in other formats like arrow as well. -// May be call it DataWarehouse to differentiate from OLAP or postgres? -type SQLStore interface { - // Query returns driver.RowIterator to iterate over results row by row - Query(ctx context.Context, props map[string]any) (RowIterator, error) -} - -// RowIterator returns an iterator to iterate over result of a sql query -type RowIterator interface { - // Schema of the underlying data - Schema(ctx context.Context) (*runtimev1.StructType, error) - // Next fetches next row - Next(ctx context.Context) ([]driver.Value, error) - // Close closes the iterator and frees resources - Close() error - // Size returns total size of data downloaded in unit. - // Returns 0,false if not able to compute size in given unit - Size(unit ProgressUnit) (uint64, bool) -} diff --git a/runtime/drivers/sqlite/sqlite.go b/runtime/drivers/sqlite/sqlite.go index 79c9de4a1d9..ee581deaaa8 100644 --- a/runtime/drivers/sqlite/sqlite.go +++ b/runtime/drivers/sqlite/sqlite.go @@ -181,11 +181,6 @@ func (c *connection) AsWarehouse() (drivers.Warehouse, bool) { return nil, false } -// AsSQLStore implements drivers.Connection. -func (c *connection) AsSQLStore() (drivers.SQLStore, bool) { - return nil, false -} - // AsNotifier implements drivers.Connection. func (c *connection) AsNotifier(properties map[string]any) (drivers.Notifier, error) { return nil, drivers.ErrNotNotifier diff --git a/runtime/drivers/warehouse.go b/runtime/drivers/warehouse.go index a9fa7a963bc..5dd8fbcc6f8 100644 --- a/runtime/drivers/warehouse.go +++ b/runtime/drivers/warehouse.go @@ -2,8 +2,11 @@ package drivers import ( "context" + "errors" ) +var ErrNoRows = errors.New("no rows found for the query") + type Warehouse interface { // QueryAsFiles downloads results into files and returns an iterator to iterate over them QueryAsFiles(ctx context.Context, props map[string]any) (FileIterator, error) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index fceabb4ffd9..4f9660e877c 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -201,8 +201,10 @@ func (d *DBOptions) ValidateSettings() error { type CreateTableOptions struct { // View specifies whether the created table is a view. View bool - // InitSQL is the SQL to run before creating the table. - InitSQL string + // If BeforeCreateFn is set, it will be executed before the create query is executed. + BeforeCreateFn func(ctx context.Context, conn *sqlx.Conn) error + // If AfterCreateFn is set, it will be executed after the create query is executed. + AfterCreateFn func(ctx context.Context, conn *sqlx.Conn) error } // NewDB creates a new DB instance. @@ -236,7 +238,7 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { // create local path err = os.MkdirAll(db.localPath, fs.ModePerm) if err != nil { - return nil, fmt.Errorf("unable to create read path: %w", err) + return nil, fmt.Errorf("unable to create local path: %w", err) } // sync local data @@ -306,7 +308,6 @@ func (d *db) Close() error { defer d.readMu.Unlock() err = d.dbHandle.Close() - d.dbHandle = nil return err } @@ -410,10 +411,10 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * typ = "TABLE" newMeta.Type = "TABLE" } - if opts.InitSQL != "" { - _, err = conn.ExecContext(ctx, opts.InitSQL, nil) + if opts.BeforeCreateFn != nil { + err = opts.BeforeCreateFn(ctx, conn) if err != nil { - return fmt.Errorf("create: init sql failed: %w", err) + return fmt.Errorf("create: BeforeCreateFn returned error: %w", err) } } // ingest data @@ -421,6 +422,12 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * if err != nil { return fmt.Errorf("create: create %s %q failed: %w", typ, name, err) } + if opts.AfterCreateFn != nil { + err = opts.AfterCreateFn(ctx, conn) + if err != nil { + return fmt.Errorf("create: AfterCreateFn returned error: %w", err) + } + } // close write handle before syncing read so that temp files or wal files are removed err = release() From 2ec478037d9ec4bad908426ef753c39e2a98e04a Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 3 Dec 2024 11:27:12 +0530 Subject: [PATCH 44/64] rename fix --- runtime/pkg/rduckdb/db.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 38a70617ac5..4d9e1302de7 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -199,8 +199,10 @@ func (d *DBOptions) ValidateSettings() error { type CreateTableOptions struct { // View specifies whether the created table is a view. View bool - // InitSQL is the SQL to run before creating the table. - InitSQL string + // If BeforeCreateFn is set, it will be executed before the create query is executed. + BeforeCreateFn func(ctx context.Context, conn *sqlx.Conn) error + // If AfterCreateFn is set, it will be executed after the create query is executed. + AfterCreateFn func(ctx context.Context, conn *sqlx.Conn) error } // NewDB creates a new DB instance. @@ -382,10 +384,10 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * typ = "TABLE" } newMeta.Type = typ - if opts.InitSQL != "" { - _, err = conn.ExecContext(ctx, opts.InitSQL, nil) + if opts.BeforeCreateFn != nil { + err = opts.BeforeCreateFn(ctx, conn) if err != nil { - return fmt.Errorf("create: init sql failed: %w", err) + return fmt.Errorf("create: BeforeCreateFn returned error: %w", err) } } // ingest data @@ -393,6 +395,12 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * if err != nil { return fmt.Errorf("create: create %s %q failed: %w", typ, name, err) } + if opts.AfterCreateFn != nil { + err = opts.AfterCreateFn(ctx, conn) + if err != nil { + return fmt.Errorf("create: AfterCreateFn returned error: %w", err) + } + } // close write handle before syncing local so that temp files or wal files are removed err = release() @@ -1013,7 +1021,7 @@ func renameTable(ctx context.Context, dbFile, old, newName string) error { typ = "TABLE" } - _, err = db.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, old, newName)) + _, err = db.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(old), safeSQLName(newName))) return err } From ebfd0ba8e4d13a32c40d475a0ee7200b754cb4cd Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 3 Dec 2024 12:37:24 +0530 Subject: [PATCH 45/64] dsn fix --- runtime/pkg/rduckdb/db.go | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 4d9e1302de7..f95590816dd 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -721,8 +721,9 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read for k, v := range settings { query.Set(k, v) } - dsn.RawQuery = query.Encode() - connector, err := duckdb.NewConnector(dsn.String(), func(execer driver.ExecerContext) error { + // Rebuild DuckDB DSN (which should be "path?key=val&...") + // this is required since spaces and other special characters are valid in db file path but invalid and hence encoded in URL + connector, err := duckdb.NewConnector(generateDSN(dsn.Path, query.Encode()), func(execer driver.ExecerContext) error { for _, qry := range d.opts.InitQueries { _, err := execer.ExecContext(context.Background(), qry, nil) if err != nil && strings.Contains(err.Error(), "Failed to download extension") { @@ -1008,20 +1009,14 @@ func renameTable(ctx context.Context, dbFile, old, newName string) error { } defer db.Close() - var isView bool - err = db.QueryRowContext(ctx, "SELECT lower(table_type) = 'view' FROM INFORMATION_SCHEMA.TABLES WHERE table_name = ?", old).Scan(&isView) + // TODO :: create temporary views when attaching tables to write connection to avoid left views in .db file + // In that case this will not be required. + _, err = db.ExecContext(ctx, fmt.Sprintf("DROP VIEW IF EXISTS %s", safeSQLName(newName))) if err != nil { return err } - var typ string - if isView { - typ = "VIEW" - } else { - typ = "TABLE" - } - - _, err = db.ExecContext(ctx, fmt.Sprintf("ALTER %s %s RENAME TO %s", typ, safeSQLName(old), safeSQLName(newName))) + _, err = db.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s RENAME TO %s", safeSQLName(old), safeSQLName(newName))) return err } @@ -1094,3 +1089,10 @@ func humanReadableSizeToBytes(sizeStr string) (float64, error) { func schemaName(gen int) string { return fmt.Sprintf("main_%v", gen) } + +func generateDSN(path, encodedQuery string) string { + if encodedQuery == "" { + return path + } + return path + "?" + encodedQuery +} From e6d30cde5fb9401aadedcc96bf95b52b29c98f75 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 3 Dec 2024 16:59:55 +0530 Subject: [PATCH 46/64] write should acquire snapshot --- runtime/pkg/rduckdb/catalog.go | 16 ++++++++++++++++ runtime/pkg/rduckdb/db.go | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 395312547a3..7cfc63df339 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -91,7 +91,12 @@ func (c *catalog) addTableVersion(name string, meta *tableMeta) { if oldVersion != "" { c.releaseVersion(t, oldVersion) } + c.currentSnapshotID++ + c.acquireSnapshotUnsafe() + if c.currentSnapshotID > 1 { + c.releaseSnapshotUnsafe(c.snapshots[c.currentSnapshotID-1]) + } } // removeTable removes a table from the catalog. @@ -109,8 +114,13 @@ func (c *catalog) removeTable(name string) { oldVersion := t.currentVersion t.deleted = true t.currentVersion = "" + c.currentSnapshotID++ + c.acquireSnapshotUnsafe() c.releaseVersion(t, oldVersion) + if c.currentSnapshotID > 1 { + c.releaseSnapshotUnsafe(c.snapshots[c.currentSnapshotID-1]) + } } // listTables returns tableMeta for all active tables present in the catalog. @@ -136,7 +146,10 @@ func (c *catalog) listTables() []*tableMeta { func (c *catalog) acquireSnapshot() *snapshot { c.mu.Lock() defer c.mu.Unlock() + return c.acquireSnapshotUnsafe() +} +func (c *catalog) acquireSnapshotUnsafe() *snapshot { s, ok := c.snapshots[c.currentSnapshotID] if ok { s.referenceCount++ @@ -168,7 +181,10 @@ func (c *catalog) acquireSnapshot() *snapshot { func (c *catalog) releaseSnapshot(s *snapshot) { c.mu.Lock() defer c.mu.Unlock() + c.releaseSnapshotUnsafe(s) +} +func (c *catalog) releaseSnapshotUnsafe(s *snapshot) { s.referenceCount-- if s.referenceCount > 0 { return diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index f95590816dd..a433a203ec1 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -962,7 +962,8 @@ func (d *db) prepareSnapshot(ctx context.Context, conn *sqlx.Conn, s *snapshot) defer d.metaSem.Release(1) if s.ready { - return nil + _, err = conn.ExecContext(ctx, "USE "+schemaName(s.id)) + return err } _, err = conn.ExecContext(ctx, "CREATE SCHEMA IF NOT EXISTS "+schemaName(s.id)) From b462a96996db166a0f0c3ad16c2ae956ec0e4325 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:44:28 +0530 Subject: [PATCH 47/64] missing withprefix --- runtime/storage/storage.go | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go index 641e07e78bc..aadc5953cf9 100644 --- a/runtime/storage/storage.go +++ b/runtime/storage/storage.go @@ -129,6 +129,7 @@ func RemoveInstance(c *Client, instanceID string) error { return fmt.Errorf("storage: should not call RemoveInstance with prefixed client") } + c = c.WithPrefix(instanceID) err := os.RemoveAll(c.DataDir()) if err != nil { return fmt.Errorf("could not remove instance directory: %w", err) From ccc57299160b87d672901afc1b4d8a18872b2e2b Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:07:02 +0530 Subject: [PATCH 48/64] storage APIs also create directories --- runtime/drivers/admin/admin.go | 14 ++- runtime/drivers/azure/object_store.go | 7 +- runtime/drivers/bigquery/warehouse.go | 3 +- runtime/drivers/clickhouse/clickhouse.go | 11 ++- runtime/drivers/duckdb/duckdb.go | 7 +- runtime/drivers/gcs/object_store.go | 7 +- runtime/drivers/s3/object_store.go | 6 +- runtime/drivers/snowflake/sql_store.go | 2 +- runtime/metricsview/executor_export.go | 13 ++- runtime/reconcilers/model.go | 9 +- runtime/registry.go | 23 +++-- runtime/registry_test.go | 4 +- runtime/resolvers/glob.go | 6 +- runtime/storage/storage.go | 112 ++++++++++++++--------- 14 files changed, 147 insertions(+), 77 deletions(-) diff --git a/runtime/drivers/admin/admin.go b/runtime/drivers/admin/admin.go index a4191560d99..5d44ce06dd6 100644 --- a/runtime/drivers/admin/admin.go +++ b/runtime/drivers/admin/admin.go @@ -392,7 +392,7 @@ func (h *Handle) checkHandshake(ctx context.Context) error { } if h.repoPath == "" { - h.repoPath, err = os.MkdirTemp(h.storage.TempDir(), "admin_driver_repo") + h.repoPath, err = h.storage.RandomTempDir("admin_driver_repo") if err != nil { return err } @@ -578,7 +578,11 @@ func (h *Handle) stashVirtual() error { return nil } - dst, err := generateTmpPath(h.storage.TempDir(), "admin_driver_virtual_stash", "") + tempPath, err := h.storage.TempDir() + if err != nil { + return fmt.Errorf("stash virtual: %w", err) + } + dst, err := generateTmpPath(tempPath, "admin_driver_virtual_stash", "") if err != nil { return fmt.Errorf("stash virtual: %w", err) } @@ -623,7 +627,11 @@ func (h *Handle) download() error { defer cancel() // generate a temporary file to copy repo tar directory - downloadDst, err := generateTmpPath(h.storage.TempDir(), "admin_driver_zipped_repo", ".tar.gz") + tempPath, err := h.storage.TempDir() + if err != nil { + return fmt.Errorf("download: %w", err) + } + downloadDst, err := generateTmpPath(tempPath, "admin_driver_zipped_repo", ".tar.gz") if err != nil { return fmt.Errorf("download: %w", err) } diff --git a/runtime/drivers/azure/object_store.go b/runtime/drivers/azure/object_store.go index 8e0b85cc961..fe52f7f3a90 100644 --- a/runtime/drivers/azure/object_store.go +++ b/runtime/drivers/azure/object_store.go @@ -115,6 +115,11 @@ func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (d } } + tempDir, err := c.storage.TempDir() + if err != nil { + return nil, err + } + // prepare fetch configs opts := rillblob.Options{ GlobMaxTotalSize: conf.GlobMaxTotalSize, @@ -125,7 +130,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (d ExtractPolicy: conf.extractPolicy, BatchSizeBytes: int64(batchSize.Bytes()), KeepFilesUntilClose: conf.BatchSize == "-1", - TempDir: c.storage.TempDir(), + TempDir: tempDir, } iter, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger) diff --git a/runtime/drivers/bigquery/warehouse.go b/runtime/drivers/bigquery/warehouse.go index 35c0c6d0329..51c65f5c8f0 100644 --- a/runtime/drivers/bigquery/warehouse.go +++ b/runtime/drivers/bigquery/warehouse.go @@ -146,11 +146,10 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any) (dr c.logger.Debug("query took", zap.Duration("duration", time.Since(now)), observability.ZapCtx(ctx)) } - tempDir, err := os.MkdirTemp(c.storage.TempDir(), "bigquery") + tempDir, err := c.storage.RandomTempDir("bigquery-*") if err != nil { return nil, err } - return &fileIterator{ client: client, bqIter: it, diff --git a/runtime/drivers/clickhouse/clickhouse.go b/runtime/drivers/clickhouse/clickhouse.go index a0c5c517110..193dbf6411a 100644 --- a/runtime/drivers/clickhouse/clickhouse.go +++ b/runtime/drivers/clickhouse/clickhouse.go @@ -173,7 +173,16 @@ func (d driver) Open(instanceID string, config map[string]any, st *storage.Clien } } else { // run clickhouse locally - embed = newEmbedClickHouse(conf.EmbedPort, st.DataDir(), st.TempDir(), logger) + dataDir, err := st.DataDir(instanceID) + if err != nil { + return nil, err + } + tempDir, err := st.TempDir(instanceID) + if err != nil { + return nil, err + } + + embed = newEmbedClickHouse(conf.EmbedPort, dataDir, tempDir, logger) opts, err = embed.start() if err != nil { return nil, err diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 9ea47f8d6b5..e402fa5c060 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -146,7 +146,12 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien logger.Warn("failed to install embedded DuckDB extensions, let DuckDB download them", zap.Error(err)) } - cfg, err := newConfig(cfgMap, st.DataDir()) + dataDir, err := st.DataDir() + if err != nil { + return nil, err + } + + cfg, err := newConfig(cfgMap, dataDir) if err != nil { return nil, err } diff --git a/runtime/drivers/gcs/object_store.go b/runtime/drivers/gcs/object_store.go index 28437da94fa..1b7a2069829 100644 --- a/runtime/drivers/gcs/object_store.go +++ b/runtime/drivers/gcs/object_store.go @@ -69,6 +69,11 @@ func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (d return nil, err } } + + tempDir, err := c.storage.TempDir() + if err != nil { + return nil, err + } // prepare fetch configs opts := rillblob.Options{ GlobMaxTotalSize: conf.GlobMaxTotalSize, @@ -79,7 +84,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (d ExtractPolicy: conf.extractPolicy, BatchSizeBytes: int64(batchSize.Bytes()), KeepFilesUntilClose: conf.BatchSize == "-1", - TempDir: c.storage.TempDir(), + TempDir: tempDir, } iter, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger) diff --git a/runtime/drivers/s3/object_store.go b/runtime/drivers/s3/object_store.go index 021f2804c45..bf6e195a779 100644 --- a/runtime/drivers/s3/object_store.go +++ b/runtime/drivers/s3/object_store.go @@ -125,6 +125,10 @@ func (c *Connection) DownloadFiles(ctx context.Context, src map[string]any) (dri return nil, err } } + tempDir, err := c.storage.TempDir() + if err != nil { + return nil, err + } // prepare fetch configs opts := rillblob.Options{ GlobMaxTotalSize: conf.GlobMaxTotalSize, @@ -136,7 +140,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, src map[string]any) (dri BatchSizeBytes: int64(batchSize.Bytes()), KeepFilesUntilClose: conf.BatchSize == "-1", RetainFiles: c.config.RetainFiles, - TempDir: c.storage.TempDir(), + TempDir: tempDir, } it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger) diff --git a/runtime/drivers/snowflake/sql_store.go b/runtime/drivers/snowflake/sql_store.go index 136eaddf32a..70d0cfeb05c 100644 --- a/runtime/drivers/snowflake/sql_store.go +++ b/runtime/drivers/snowflake/sql_store.go @@ -86,7 +86,7 @@ func (c *connection) QueryAsFiles(ctx context.Context, props map[string]any) (dr return nil, drivers.ErrNoRows } - tempDir, err := os.MkdirTemp(c.storage.TempDir(), "snowflake") + tempDir, err := c.storage.RandomTempDir("snowflake") if err != nil { return nil, err } diff --git a/runtime/metricsview/executor_export.go b/runtime/metricsview/executor_export.go index 37d6a52d45a..3ec0c98f329 100644 --- a/runtime/metricsview/executor_export.go +++ b/runtime/metricsview/executor_export.go @@ -23,7 +23,10 @@ func (e *Executor) executeExport(ctx context.Context, format drivers.FileFormat, return "", err } name = format.Filename(name) - path := e.rt.TempDir(e.instanceID, name) + tempPath, err := e.rt.TempDir(e.instanceID, name) + if err != nil { + return "", err + } ic, ir, err := e.rt.AcquireHandle(ctx, e.instanceID, inputConnector) if err != nil { @@ -39,7 +42,7 @@ func (e *Executor) executeExport(ctx context.Context, format drivers.FileFormat, defer or() outputProps := map[string]any{ - "path": path, + "path": tempPath, "format": format, "file_size_limit_bytes": e.instanceCfg.DownloadLimitBytes, } @@ -73,12 +76,12 @@ func (e *Executor) executeExport(ctx context.Context, format drivers.FileFormat, InputProperties: inputProps, OutputProperties: outputProps, Priority: e.priority, - TempDir: e.rt.TempDir(e.instanceID), + TempDir: tempPath, }) if err != nil { - _ = os.Remove(path) + _ = os.Remove(tempPath) return "", fmt.Errorf("failed to execute export: %w", err) } - return path, nil + return tempPath, nil } diff --git a/runtime/reconcilers/model.go b/runtime/reconcilers/model.go index ba70149ee3d..3ca1bfa6106 100644 --- a/runtime/reconcilers/model.go +++ b/runtime/reconcilers/model.go @@ -1089,6 +1089,11 @@ func (r *ModelReconciler) executeSingle(ctx context.Context, executor *wrappedMo return nil, err } + tempDir, err := r.C.Runtime.TempDir(r.C.InstanceID) + if err != nil { + return nil, err + } + // Execute the stage step if configured if executor.stage != nil { // Also resolve templating in the stage props @@ -1107,7 +1112,7 @@ func (r *ModelReconciler) executeSingle(ctx context.Context, executor *wrappedMo IncrementalRun: incrementalRun, PartitionRun: partition != nil, PreviousResult: prevResult, - TempDir: r.C.Runtime.TempDir(r.C.InstanceID), + TempDir: tempDir, }) if err != nil { return nil, err @@ -1137,7 +1142,7 @@ func (r *ModelReconciler) executeSingle(ctx context.Context, executor *wrappedMo IncrementalRun: incrementalRun, PartitionRun: partition != nil, PreviousResult: prevResult, - TempDir: r.C.Runtime.TempDir(r.C.InstanceID), + TempDir: tempDir, }) if err != nil { return nil, err diff --git a/runtime/registry.go b/runtime/registry.go index 61f60e655eb..02d62836694 100644 --- a/runtime/registry.go +++ b/runtime/registry.go @@ -16,7 +16,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/logbuffer" "github.com/rilldata/rill/runtime/pkg/logutil" "github.com/rilldata/rill/runtime/pkg/observability" - "github.com/rilldata/rill/runtime/storage" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" @@ -109,7 +108,7 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { // Wait for the controller to stop and the connection cache to be evicted <-completed - if err := storage.RemoveInstance(r.storage, inst.ID); err != nil { + if err := r.storage.RemovePrefix(ctx, inst.ID); err != nil { r.Logger.Error("could not drop instance data directory", zap.Error(err), zap.String("instance_id", instanceID), observability.ZapCtx(ctx)) } @@ -127,17 +126,17 @@ func (r *Runtime) DeleteInstance(ctx context.Context, instanceID string) error { return nil } -// DataDir returns the path to a persistent data directory for the given instance. +// DataDir returns the path to a persistent data directory for the given instance. The directory is created if it doesn't exist. // Storage usage in the returned directory will be reported in the instance's heartbeat events. -func (r *Runtime) DataDir(instanceID string, elem ...string) string { +func (r *Runtime) DataDir(instanceID string, elem ...string) (string, error) { return r.storage.WithPrefix(instanceID).DataDir(elem...) } -// TempDir returns the path to a temporary directory for the given instance. -// The TempDir is a fixed location. The caller is responsible for using a unique subdirectory name and cleaning up after use. +// TempDir returns the path to a temporary directory for the given instance. The directory is created if it doesn't exist. +// The TempDir is a fixed location. The caller is responsible for cleaning up after use. // The TempDir may be cleared after restarts. // Storage usage in the returned directory will be reported in the instance's heartbeat events. -func (r *Runtime) TempDir(instanceID string, elem ...string) string { +func (r *Runtime) TempDir(instanceID string, elem ...string) (string, error) { return r.storage.WithPrefix(instanceID).TempDir(elem...) } @@ -326,10 +325,6 @@ func (r *registryCache) add(inst *drivers.Instance) error { instance: inst, } r.instances[inst.ID] = iwc - err := storage.AddInstance(r.rt.storage, inst.ID) - if err != nil { - return err - } // Setup the logger to duplicate logs to a) the Zap logger, b) an in-memory buffer that exposes the logs over the API buffer := logbuffer.NewBuffer(r.rt.opts.ControllerLogBufferCapacity, r.rt.opts.ControllerLogBufferSizeBytes) @@ -549,7 +544,11 @@ func (r *registryCache) emitHeartbeats() { } func (r *registryCache) emitHeartbeatForInstance(inst *drivers.Instance) { - dataDir := r.rt.storage.WithPrefix(inst.ID).DataDir() + dataDir, err := r.rt.storage.WithPrefix(inst.ID).DataDir() + if err != nil { + r.logger.Error("failed to send instance heartbeat event, could not get data directory", zap.String("instance_id", inst.ID), zap.Error(err)) + return + } // Add instance annotations as attributes to pass organization id, project id, etc. attrs := instanceAnnotationsToAttribs(inst) diff --git a/runtime/registry_test.go b/runtime/registry_test.go index c376695ac7f..6d097b1f6bb 100644 --- a/runtime/registry_test.go +++ b/runtime/registry_test.go @@ -477,7 +477,9 @@ func TestRuntime_DeleteInstance_DropCorrupted(t *testing.T) { err := rt.CreateInstance(context.Background(), inst) require.NoError(t, err) - dbpath := filepath.Join(rt.storage.DataDir(), inst.ID, "duckdb", "main.db") + dataDir, err := rt.storage.DataDir(inst.ID, "duckdb") + require.NoError(t, err) + dbpath := filepath.Join(dataDir, "main.db") // Put some data into it to create a .db file on disk olap, release, err := rt.OLAP(ctx, inst.ID, "") diff --git a/runtime/resolvers/glob.go b/runtime/resolvers/glob.go index a42efc7d4f2..a925fd1f6fe 100644 --- a/runtime/resolvers/glob.go +++ b/runtime/resolvers/glob.go @@ -361,7 +361,11 @@ func (r *globResolver) transformResult(ctx context.Context, rows []map[string]an } func (r *globResolver) writeTempNDJSONFile(rows []map[string]any) (string, error) { - f, err := os.CreateTemp(r.runtime.TempDir(r.instanceID), "glob_result_*.ndjson") + tempDir, err := r.runtime.TempDir(r.instanceID) + if err != nil { + return "", err + } + f, err := os.CreateTemp(tempDir, "glob_result_*.ndjson") if err != nil { return "", err } diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go index aadc5953cf9..0cc16b71245 100644 --- a/runtime/storage/storage.go +++ b/runtime/storage/storage.go @@ -4,7 +4,7 @@ import ( "context" "errors" "fmt" - "io/fs" + "io" "os" "path/filepath" @@ -55,23 +55,72 @@ func (c *Client) WithPrefix(prefix ...string) *Client { return newClient } -func (c *Client) DataDir(elem ...string) string { - paths := []string{c.dataDirPath} +func (c *Client) RemovePrefix(ctx context.Context, prefix ...string) error { if c.prefixes != nil { - paths = append(paths, c.prefixes...) + return fmt.Errorf("storage: RemovePrefix is not supported for prefixed client") } - paths = append(paths, elem...) - return filepath.Join(paths...) + + // clean data dir + removeErr := os.RemoveAll(c.path(c.dataDirPath, prefix...)) + + // clean temp dir + removeErr = errors.Join(removeErr, os.RemoveAll(c.path(os.TempDir(), prefix...))) + + // clean bucket + bkt, ok, err := c.OpenBucket(ctx, prefix...) + if err != nil { + return errors.Join(removeErr, err) + } + if !ok { + return removeErr + } + defer bkt.Close() + + iter := bkt.List(&blob.ListOptions{}) + for { + obj, err := iter.Next(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return errors.Join(removeErr, err) + } + err = bkt.Delete(ctx, obj.Key) + if err != nil { + return errors.Join(removeErr, err) + } + } + return removeErr } -func (c *Client) TempDir(elem ...string) string { - paths := []string{c.dataDirPath} - if c.prefixes != nil { - paths = append(paths, c.prefixes...) +func (c *Client) DataDir(elem ...string) (string, error) { + path := c.path(c.dataDirPath, elem...) + err := os.MkdirAll(path, os.ModePerm) + if err != nil { + return "", err } - paths = append(paths, "tmp") - paths = append(paths, elem...) - return filepath.Join(paths...) + return path, nil +} + +func (c *Client) TempDir(elem ...string) (string, error) { + path := c.path(os.TempDir(), elem...) + err := os.MkdirAll(path, os.ModePerm) + if err != nil { + return "", err + } + return path, nil +} + +func (c *Client) RandomTempDir(pattern string, elem ...string) (string, error) { + path, err := c.TempDir(elem...) + if err != nil { + return "", err + } + path, err = os.MkdirTemp(path, pattern) + if err != nil { + return "", err + } + return path, nil } func (c *Client) OpenBucket(ctx context.Context, elem ...string) (*blob.Bucket, bool, error) { @@ -101,40 +150,13 @@ func (c *Client) OpenBucket(ctx context.Context, elem ...string) (*blob.Bucket, return blob.PrefixedBucket(bucket, prefix), true, nil } -func AddInstance(c *Client, instanceID string) error { +func (c *Client) path(base string, elem ...string) string { + paths := []string{base} if c.prefixes != nil { - return fmt.Errorf("storage: should not call AddInstance with prefixed client") - } - - c = c.WithPrefix(instanceID) - err := os.Mkdir(c.DataDir(), os.ModePerm) - if err != nil && !errors.Is(err, fs.ErrExist) { - return fmt.Errorf("could not create instance directory: %w", err) - } - - // recreate instance's tmp directory - tmpDir := c.TempDir() - if err := os.RemoveAll(tmpDir); err != nil { - return fmt.Errorf("could not remove instance tmp directory: %w", err) - } - if err := os.Mkdir(tmpDir, os.ModePerm); err != nil && !errors.Is(err, fs.ErrExist) { - return err - } - - return nil -} - -func RemoveInstance(c *Client, instanceID string) error { - if c.prefixes != nil { - return fmt.Errorf("storage: should not call RemoveInstance with prefixed client") - } - - c = c.WithPrefix(instanceID) - err := os.RemoveAll(c.DataDir()) - if err != nil { - return fmt.Errorf("could not remove instance directory: %w", err) + paths = append(paths, c.prefixes...) } - return nil + paths = append(paths, elem...) + return filepath.Join(paths...) } func (c *Client) newGCPClient(ctx context.Context) (*gcp.HTTPClient, error) { From 84b4f59e08d89d5301ac9da6b71667ee084bde6c Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 3 Dec 2024 22:21:29 +0530 Subject: [PATCH 49/64] fix and add unit test --- runtime/metricsview/executor_export.go | 5 +- runtime/storage/storage_test.go | 111 +++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 runtime/storage/storage_test.go diff --git a/runtime/metricsview/executor_export.go b/runtime/metricsview/executor_export.go index 3ec0c98f329..8fe41f49edf 100644 --- a/runtime/metricsview/executor_export.go +++ b/runtime/metricsview/executor_export.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "path/filepath" "github.com/rilldata/rill/runtime/drivers" ) @@ -23,10 +24,12 @@ func (e *Executor) executeExport(ctx context.Context, format drivers.FileFormat, return "", err } name = format.Filename(name) - tempPath, err := e.rt.TempDir(e.instanceID, name) + + tempDir, err := e.rt.TempDir(e.instanceID) if err != nil { return "", err } + tempPath := filepath.Join(tempDir, name) ic, ir, err := e.rt.AcquireHandle(ctx, e.instanceID, inputConnector) if err != nil { diff --git a/runtime/storage/storage_test.go b/runtime/storage/storage_test.go new file mode 100644 index 00000000000..87063a5b6e8 --- /dev/null +++ b/runtime/storage/storage_test.go @@ -0,0 +1,111 @@ +package storage + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestClient_DataDir(t *testing.T) { + tempDir := os.TempDir() + client := &Client{ + dataDirPath: tempDir, + } + + client = client.WithPrefix("testprefix") + + tests := []struct { + name string + elem []string + }{ + { + name: "create single directory", + elem: []string{"testdir"}, + }, + { + name: "create nested directories", + elem: []string{"testdir", "nested"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := client.DataDir(tt.elem...) + require.NoError(t, err) + if _, err := os.Stat(got); os.IsNotExist(err) { + t.Errorf("Client.DataDir() path = %v, directory does not exist", got) + } + require.Equal(t, filepath.Join(append([]string{tempDir, "testprefix"}, tt.elem...)...), got) + }) + } +} + +func TestClient_TempDir(t *testing.T) { + tempDir := os.TempDir() + client := &Client{ + dataDirPath: tempDir, + } + client = client.WithPrefix("testprefix", "testtempdir") + + tests := []struct { + name string + elem []string + }{ + { + name: "create single temp directory", + elem: []string{"testtempdir"}, + }, + { + name: "create nested temp directories", + elem: []string{"testtempdir", "nested"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := client.TempDir(tt.elem...) + require.NoError(t, err) + if _, err := os.Stat(got); os.IsNotExist(err) { + t.Errorf("Client.TempDir() path = %v, directory does not exist", got) + } + require.Equal(t, filepath.Join(append([]string{tempDir, "testprefix", "testtempdir"}, tt.elem...)...), got) + }) + } +} + +func TestClient_RandomTempDir(t *testing.T) { + tempDir := os.TempDir() + client := &Client{ + dataDirPath: tempDir, + } + + tests := []struct { + name string + pattern string + elem []string + }{ + { + name: "create single random temp directory", + pattern: "testtempdir-*", + elem: []string{"random"}, + }, + { + name: "create nested random temp directories", + pattern: "testtempdir-*", + elem: []string{"random", "nested"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := client.RandomTempDir(tt.pattern, tt.elem...) + require.NoError(t, err) + if _, err := os.Stat(got); os.IsNotExist(err) { + t.Errorf("Client.RandomTempDir() path = %v, directory does not exist", got) + } + require.Equal(t, filepath.Join(append([]string{tempDir}, tt.elem...)...), filepath.Dir(got)) + }) + } +} From 590dee89680769fa38cc77d0ba4637e27ac17f75 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 4 Dec 2024 12:18:44 +0530 Subject: [PATCH 50/64] pullFromRemote fix and other review comments --- runtime/pkg/rduckdb/catalog.go | 9 +-- runtime/pkg/rduckdb/db.go | 103 +++++++++++++++++++++------------ runtime/pkg/rduckdb/db_test.go | 1 + runtime/pkg/rduckdb/remote.go | 77 +++++++++++++----------- 4 files changed, 116 insertions(+), 74 deletions(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 7cfc63df339..7c9387eed73 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -62,7 +62,7 @@ func (c *catalog) tableMeta(name string) (*tableMeta, error) { } meta, ok := t.versionMeta[t.currentVersion] if !ok { - return nil, fmt.Errorf("internal error: meta for version %q not found", t.currentVersion) + panic(fmt.Errorf("internal error: meta for table %q and version %q not found", name, t.currentVersion)) } return meta, nil } @@ -109,6 +109,7 @@ func (c *catalog) removeTable(name string) { t, ok := c.tables[name] if !ok { c.logger.Debug("table not found in rduckdb catalog", slog.String("name", name)) + return } oldVersion := t.currentVersion @@ -168,7 +169,7 @@ func (c *catalog) acquireSnapshotUnsafe() *snapshot { meta, ok := t.versionMeta[t.currentVersion] if !ok { - c.logger.Error("internal error: meta for table not found in catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) + panic(fmt.Errorf("internal error: meta for table %q version %q not found in catalog", t.name, t.currentVersion)) } s.tables = append(s.tables, meta) c.acquireVersion(t, t.currentVersion) @@ -193,7 +194,7 @@ func (c *catalog) releaseSnapshotUnsafe(s *snapshot) { for _, meta := range s.tables { t, ok := c.tables[meta.Name] if !ok { - c.logger.Error("internal error: table not found in catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) + panic(fmt.Errorf("internal error: table %q not found in catalog", meta.Name)) } c.releaseVersion(t, meta.Version) } @@ -214,7 +215,7 @@ func (c *catalog) acquireVersion(t *table, version string) { func (c *catalog) releaseVersion(t *table, version string) { referenceCount, ok := t.versionReferenceCounts[version] if !ok { - c.logger.Error("internal error: version of table not found in catalog", slog.String("name", t.name), slog.String("version", t.currentVersion)) + panic(fmt.Errorf("internal error: version %q of table %q not found in catalog", t.currentVersion, t.name)) } referenceCount-- if referenceCount > 0 { diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index a433a203ec1..d26871e8a38 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -225,6 +225,18 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { ctx: bgctx, cancel: cancel, } + // create local path + err = os.MkdirAll(db.localPath, fs.ModePerm) + if err != nil { + return nil, fmt.Errorf("unable to create local path: %w", err) + } + + // sync local data + err = db.pullFromRemote(ctx, false) + if err != nil { + return nil, err + } + // catalog db.catalog = newCatalog( func(name, version string) { @@ -246,19 +258,12 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { opts.Logger, ) - // create local path - err = os.MkdirAll(db.localPath, fs.ModePerm) - if err != nil { - return nil, fmt.Errorf("unable to create local path: %w", err) - } - - // sync local data - err = db.pullFromRemote(ctx) - if err != nil { - return nil, err - } + // populate catalog + _ = db.iterateLocalTables(func(name string, meta *tableMeta) error { + db.catalog.addTableVersion(meta.Name, meta) + return nil + }) - // create db handle db.dbHandle, err = db.openDBAndAttach(ctx, "", "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { @@ -333,7 +338,7 @@ func (d *db) CreateTableAsSelect(ctx context.Context, name, query string, opts * defer d.writeSem.Release(1) // pull latest changes from remote - err = d.pullFromRemote(ctx) + err = d.pullFromRemote(ctx, true) if err != nil { return err } @@ -436,7 +441,7 @@ func (d *db) MutateTable(ctx context.Context, name string, mutateFn func(ctx con defer d.writeSem.Release(1) // pull latest changes from remote - err = d.pullFromRemote(ctx) + err = d.pullFromRemote(ctx, true) if err != nil { return err } @@ -509,7 +514,7 @@ func (d *db) DropTable(ctx context.Context, name string) error { defer d.writeSem.Release(1) // pull latest changes from remote - err = d.pullFromRemote(ctx) + err = d.pullFromRemote(ctx, true) if err != nil { return fmt.Errorf("drop: unable to pull from remote: %w", err) } @@ -548,7 +553,7 @@ func (d *db) RenameTable(ctx context.Context, oldName, newName string) error { defer d.writeSem.Release(1) // pull latest changes from remote - err = d.pullFromRemote(ctx) + err = d.pullFromRemote(ctx, true) if err != nil { return fmt.Errorf("rename: unable to pull from remote: %w", err) } @@ -628,8 +633,10 @@ func (d *db) localDBMonitor() { return case <-ticker.C: err := d.writeSem.Acquire(d.ctx, 1) - if err != nil && !errors.Is(err, context.Canceled) { - d.logger.Error("localDBMonitor: error in acquiring write sem", slog.String("error", err.Error())) + if err != nil { + if !errors.Is(err, context.Canceled) { + d.logger.Error("localDBMonitor: error in acquiring write sem", slog.String("error", err.Error())) + } continue } if !d.localDirty { @@ -637,7 +644,7 @@ func (d *db) localDBMonitor() { // all good continue } - err = d.pullFromRemote(d.ctx) + err = d.pullFromRemote(d.ctx, true) if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in pulling from remote", slog.String("error", err.Error())) } @@ -648,25 +655,14 @@ func (d *db) localDBMonitor() { func (d *db) Size() int64 { var paths []string - entries, err := os.ReadDir(d.localPath) - if err != nil { // ignore error - return 0 - } - for _, entry := range entries { - if !entry.IsDir() { - continue - } + _ = d.iterateLocalTables(func(name string, meta *tableMeta) error { // this is to avoid counting temp tables during source ingestion // in certain cases we only want to compute the size of the serving db files - // TODO :: remove this when removing staged table concepts - if strings.HasPrefix(entry.Name(), "__rill_tmp_") { - continue - } - meta, _ := d.catalog.tableMeta(entry.Name()) - if meta != nil { + if !strings.HasPrefix(name, "__rill_tmp_") { paths = append(paths, d.localDBPath(meta.Name, meta.Version)) } - } + return nil + }) return fileSize(paths) } @@ -755,9 +751,13 @@ func (d *db) openDBAndAttach(ctx context.Context, uri, ignoreTable string, read tables := d.catalog.listTables() err = d.attachTables(ctx, conn, tables, ignoreTable) if err != nil { + conn.Close() db.Close() return nil, err } + if err := conn.Close(); err != nil { + return nil, err + } // 2023-12-11: Hail mary for solving this issue: https://github.com/duckdblabs/rilldata/issues/6. // Forces DuckDB to create catalog entries for the information schema up front (they are normally created lazily). @@ -831,6 +831,10 @@ func (d *db) attachTables(ctx context.Context, conn *sqlx.Conn, tables []*tableM } // retry creating views + // views may depend on other views, without building a dependency graph we can not recreate them in correct order + // so we recreate all failed views and collect the ones that failed + // once a view is created successfully, it may be possible that other views that depend on it can be created in the next iteration + // if in a iteration no views are created successfully, it means either all views are invalid or there is a circular dependency for len(failedViews) > 0 { allViewsFailed := true size := len(failedViews) @@ -850,7 +854,6 @@ func (d *db) attachTables(ctx context.Context, conn *sqlx.Conn, tables []*tableM allViewsFailed = false } if !allViewsFailed { - // at least one view should always be created unless there is a circular dependency which is not allowed continue } @@ -859,7 +862,14 @@ func (d *db) attachTables(ctx context.Context, conn *sqlx.Conn, tables []*tableM for i := 0; i < len(failedViews); i++ { table := failedViews[i] safeTable := safeSQLName(table.Name) - _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT error('View %s is incompatible with the underlying data')", safeTable, safeTable)) + // capture the error in creating the view + _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS %s", safeTable, table.SQL)) + if err == nil { + // not possible but just to be safe + continue + } + safeErr := strings.Trim(safeSQLString(err.Error()), "'") + _, err = conn.ExecContext(ctx, fmt.Sprintf("CREATE OR REPLACE VIEW %s AS SELECT error('View %s is incompatible with the underlying data: %s')", safeTable, safeTable, safeErr)) if err != nil { return err } @@ -954,6 +964,27 @@ func (d *db) deleteLocalTableFiles(name, version string) error { return os.RemoveAll(d.localTableDir(name, version)) } +func (d *db) iterateLocalTables(fn func(name string, meta *tableMeta) error) error { + entries, err := os.ReadDir(d.localPath) + if err != nil { + return err + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + meta, err := d.tableMeta(entry.Name()) + if err != nil { + continue + } + err = fn(entry.Name(), meta) + if err != nil { + return err + } + } + return nil +} + func (d *db) prepareSnapshot(ctx context.Context, conn *sqlx.Conn, s *snapshot) error { err := d.metaSem.Acquire(ctx, 1) if err != nil { diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 71722e39879..1071e935b26 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -224,6 +224,7 @@ func TestResetLocal(t *testing.T) { }) require.NoError(t, err) verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + require.NoError(t, db.Close()) } func TestConcurrentReads(t *testing.T) { diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index 73e8ae6eb56..b719858dcb8 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -20,7 +20,7 @@ import ( // pullFromRemote updates local data with the latest data from remote. // This is not safe for concurrent calls. -func (d *db) pullFromRemote(ctx context.Context) error { +func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { if !d.localDirty { // optimisation to skip sync if write was already synced return nil @@ -34,7 +34,7 @@ func (d *db) pullFromRemote(ctx context.Context) error { Delimiter: "/", // only list directories with a trailing slash and IsDir set to true }) - tblMetas := make(map[string]*tableMeta) + remoteTables := make(map[string]*tableMeta) for { // Stop the loop if the ctx was cancelled var stop bool @@ -80,33 +80,25 @@ func (d *db) pullFromRemote(ctx context.Context) error { } return err } - backedUpMeta := &tableMeta{} - err = json.Unmarshal(b, backedUpMeta) + remoteMeta := &tableMeta{} + err = json.Unmarshal(b, remoteMeta) if err != nil { d.logger.Debug("SyncWithObjectStorage: failed to unmarshal table metadata", slog.String("table", table), slog.Any("error", err)) continue } + remoteTables[table] = remoteMeta - // check if table in catalog is already upto date - meta, _ := d.catalog.tableMeta(table) - if meta != nil && meta.Version == backedUpMeta.Version { - d.logger.Debug("SyncWithObjectStorage: table is already up to date", slog.String("table", table)) - continue - } - tblMetas[table] = backedUpMeta - - // check if table is locally present but not added to catalog yet - meta, _ = d.tableMeta(table) - if meta != nil && meta.Version == backedUpMeta.Version { + // check if table is locally present + meta, _ := d.tableMeta(table) + if meta != nil && meta.Version == remoteMeta.Version { d.logger.Debug("SyncWithObjectStorage: local table is not present in catalog", slog.String("table", table)) - tblMetas[table] = backedUpMeta continue } - if err := d.initLocalTable(table, backedUpMeta.Version); err != nil { + if err := d.initLocalTable(table, remoteMeta.Version); err != nil { return err } - tblIter := d.remote.List(&blob.ListOptions{Prefix: path.Join(table, backedUpMeta.Version)}) + tblIter := d.remote.List(&blob.ListOptions{Prefix: path.Join(table, remoteMeta.Version)}) // download all objects in the table and current version for { obj, err := tblIter.Next(gctx) @@ -143,29 +135,49 @@ func (d *db) pullFromRemote(ctx context.Context) error { return err } - // Update table versions - for table, meta := range tblMetas { + // Update table versions(updates even if local is same as remote) + for table, meta := range remoteTables { err = d.writeTableMeta(table, meta) if err != nil { return err } - d.catalog.addTableVersion(table, meta) } - // mark tables that are not in remote for delete later - entries, err := os.ReadDir(d.localPath) - if err != nil { - return err + if !updateCatalog { + return nil } - for _, entry := range entries { - if !entry.IsDir() { - continue + + // iterate over all remote tables and update catalog + for table, remoteMeta := range remoteTables { + meta, err := d.catalog.tableMeta(table) + if err != nil { + if errors.Is(err, errNotFound) { + // table not found in catalog + d.catalog.addTableVersion(table, remoteMeta) + } + return err } - if _, ok := tblMetas[entry.Name()]; ok { - continue + // table is present in catalog but has version mismatch + if meta.Version != remoteMeta.Version { + d.catalog.addTableVersion(table, remoteMeta) } - d.catalog.removeTable(entry.Name()) } + + // iterate over local entries and remove if not present in remote + _ = d.iterateLocalTables(func(name string, meta *tableMeta) error { + if _, ok := remoteTables[name]; ok { + // table is present in remote + return nil + } + // check if table is present in catalog + _, err := d.catalog.tableMeta(name) + if err != nil { + return d.deleteLocalTableFiles(name, "") + } + // remove table from catalog + d.catalog.removeTable(name) + return nil + }) return nil } @@ -277,13 +289,10 @@ func retry(ctx context.Context, fn func() error) error { break // break and return error } - timer := time.NewTimer(_retryDelay) select { case <-ctx.Done(): - timer.Stop() return ctx.Err() // return on context cancellation case <-time.After(_retryDelay): - timer.Stop() } } return err From 385652f750c2f5dce76fa8c5b58bc2b3e73d8682 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:08:28 +0530 Subject: [PATCH 51/64] some more tests --- runtime/pkg/rduckdb/catalog.go | 14 +++- runtime/pkg/rduckdb/db.go | 14 ++-- runtime/pkg/rduckdb/db_test.go | 126 +++++++++++++++++++++++++++++++++ runtime/pkg/rduckdb/remote.go | 7 ++ 4 files changed, 153 insertions(+), 8 deletions(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 7c9387eed73..0a4114682cd 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -42,14 +42,24 @@ type catalog struct { // newCatalog creates a new catalog. // The removeSnapshotFunc func will be called exactly once for each snapshot ID when it is no longer the current snapshot and is no longer held by any readers. // The removeVersionFunc func will be called exactly once for each table version when it is no longer the current version and is no longer used by any active snapshots. -func newCatalog(removeVersionFunc func(string, string), removeSnapshotFunc func(int), logger *slog.Logger) *catalog { - return &catalog{ +func newCatalog(removeVersionFunc func(string, string), removeSnapshotFunc func(int), tables []*tableMeta, logger *slog.Logger) *catalog { + c := &catalog{ tables: make(map[string]*table), snapshots: make(map[int]*snapshot), removeVersionFunc: removeVersionFunc, removeSnapshotFunc: removeSnapshotFunc, logger: logger, } + for _, meta := range tables { + c.tables[meta.Name] = &table{ + name: meta.Name, + currentVersion: meta.Version, + versionReferenceCounts: map[string]int{}, + versionMeta: map[string]*tableMeta{meta.Version: meta}, + } + } + _ = c.acquireSnapshotUnsafe() + return c } func (c *catalog) tableMeta(name string) (*tableMeta, error) { diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index d26871e8a38..afb299be43d 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -237,6 +237,13 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { return nil, err } + // collect all tables + var tables []*tableMeta + _ = db.iterateLocalTables(func(name string, meta *tableMeta) error { + tables = append(tables, meta) + return nil + }) + // catalog db.catalog = newCatalog( func(name, version string) { @@ -255,15 +262,10 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } }() }, + tables, opts.Logger, ) - // populate catalog - _ = db.iterateLocalTables(func(name string, meta *tableMeta) error { - db.catalog.addTableVersion(meta.Name, meta) - return nil - }) - db.dbHandle, err = db.openDBAndAttach(ctx, "", "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 1071e935b26..45089e3ce5b 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -6,6 +6,7 @@ import ( "io" "log/slog" "os" + "path/filepath" "testing" "github.com/jmoiron/sqlx" @@ -227,6 +228,131 @@ func TestResetLocal(t *testing.T) { require.NoError(t, db.Close()) } +func TestResetSelectiveLocal(t *testing.T) { + db, localDir, remoteDir := prepareDB(t) + ctx := context.Background() + + // create table + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + + // create two views on this + err = db.CreateTableAsSelect(ctx, "test_view", "SELECT * FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + err = db.CreateTableAsSelect(ctx, "test_view2", "SELECT * FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + + // create another table + err = db.CreateTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'USA' AS country", &CreateTableOptions{}) + require.NoError(t, err) + + // create views on this + err = db.CreateTableAsSelect(ctx, "test2_view", "SELECT * FROM test2", &CreateTableOptions{View: true}) + require.NoError(t, err) + + // reset local for some tables + require.NoError(t, db.Close()) + require.NoError(t, os.RemoveAll(filepath.Join(localDir, "test2"))) + require.NoError(t, os.RemoveAll(filepath.Join(localDir, "test_view2"))) + + logger := slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + bucket, err := fileblob.OpenBucket(remoteDir, nil) + require.NoError(t, err) + db, err = NewDB(ctx, &DBOptions{ + LocalPath: localDir, + Remote: bucket, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: logger, + }) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test2_view", []testData{{ID: 2, Country: "USA"}}) + verifyTable(t, db, "SELECT id, country FROM test_view2", []testData{{ID: 1, Country: "India"}}) + require.NoError(t, db.Close()) +} + +func TestResetTablesRemote(t *testing.T) { + db, localDir, remoteDir := prepareDB(t) + ctx := context.Background() + + // create table + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + + require.NoError(t, db.Close()) + + // remove remote data + require.NoError(t, os.RemoveAll(remoteDir)) + + logger := slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + bucket, err := fileblob.OpenBucket(remoteDir, &fileblob.Options{CreateDir: true}) + require.NoError(t, err) + db, err = NewDB(ctx, &DBOptions{ + LocalPath: localDir, + Remote: bucket, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: logger, + }) + require.NoError(t, err) + require.ErrorContains(t, db.DropTable(ctx, "test"), "not found") + require.NoError(t, db.Close()) +} + +func TestResetSelectiveTablesRemote(t *testing.T) { + db, localDir, remoteDir := prepareDB(t) + ctx := context.Background() + + // create table + err := db.CreateTableAsSelect(ctx, "test", "SELECT 1 AS id, 'India' AS country", &CreateTableOptions{}) + require.NoError(t, err) + + // create two views on this + err = db.CreateTableAsSelect(ctx, "test_view", "SELECT * FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + err = db.CreateTableAsSelect(ctx, "test_view2", "SELECT * FROM test", &CreateTableOptions{View: true}) + require.NoError(t, err) + + // create another table + err = db.CreateTableAsSelect(ctx, "test2", "SELECT 2 AS id, 'USA' AS country", &CreateTableOptions{}) + require.NoError(t, err) + + // create views on this + err = db.CreateTableAsSelect(ctx, "test2_view", "SELECT * FROM test2", &CreateTableOptions{View: true}) + require.NoError(t, err) + + require.NoError(t, db.Close()) + + // remove remote data for some tables + require.NoError(t, os.RemoveAll(filepath.Join(remoteDir, "test2"))) + require.NoError(t, os.RemoveAll(filepath.Join(remoteDir, "test_view2"))) + + logger := slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + bucket, err := fileblob.OpenBucket(remoteDir, nil) + require.NoError(t, err) + db, err = NewDB(ctx, &DBOptions{ + LocalPath: localDir, + Remote: bucket, + ReadSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + WriteSettings: map[string]string{"memory_limit": "2GB", "threads": "1"}, + InitQueries: []string{"SET autoinstall_known_extensions=true", "SET autoload_known_extensions=true"}, + Logger: logger, + }) + require.NoError(t, err) + verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + verifyTable(t, db, "SELECT id, country FROM test_view", []testData{{ID: 1, Country: "India"}}) + require.NoError(t, db.Close()) +} + func TestConcurrentReads(t *testing.T) { testDB, _, _ := prepareDB(t) ctx := context.Background() diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index b719858dcb8..c4e7d86f2b0 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -144,6 +144,13 @@ func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { } if !updateCatalog { + // delete all local tables which are not present in remote + _ = d.iterateLocalTables(func(name string, meta *tableMeta) error { + if _, ok := remoteTables[name]; !ok { + return d.deleteLocalTableFiles(name, "") + } + return nil + }) return nil } From ed7fd8102109125774672f411c15a099becedbc4 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:29:41 +0530 Subject: [PATCH 52/64] remove invalid tables --- runtime/pkg/rduckdb/catalog.go | 1 + runtime/pkg/rduckdb/db.go | 13 ++++++++++--- runtime/pkg/rduckdb/db_test.go | 18 +++++++++++++++++- runtime/pkg/rduckdb/remote.go | 4 ++-- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/runtime/pkg/rduckdb/catalog.go b/runtime/pkg/rduckdb/catalog.go index 0a4114682cd..09ea358cf24 100644 --- a/runtime/pkg/rduckdb/catalog.go +++ b/runtime/pkg/rduckdb/catalog.go @@ -57,6 +57,7 @@ func newCatalog(removeVersionFunc func(string, string), removeSnapshotFunc func( versionReferenceCounts: map[string]int{}, versionMeta: map[string]*tableMeta{meta.Version: meta}, } + c.acquireVersion(c.tables[meta.Name], meta.Version) } _ = c.acquireSnapshotUnsafe() return c diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index afb299be43d..76d8e494e75 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -239,7 +239,7 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { // collect all tables var tables []*tableMeta - _ = db.iterateLocalTables(func(name string, meta *tableMeta) error { + _ = db.iterateLocalTables(false, func(name string, meta *tableMeta) error { tables = append(tables, meta) return nil }) @@ -657,7 +657,7 @@ func (d *db) localDBMonitor() { func (d *db) Size() int64 { var paths []string - _ = d.iterateLocalTables(func(name string, meta *tableMeta) error { + _ = d.iterateLocalTables(false, func(name string, meta *tableMeta) error { // this is to avoid counting temp tables during source ingestion // in certain cases we only want to compute the size of the serving db files if !strings.HasPrefix(name, "__rill_tmp_") { @@ -966,7 +966,7 @@ func (d *db) deleteLocalTableFiles(name, version string) error { return os.RemoveAll(d.localTableDir(name, version)) } -func (d *db) iterateLocalTables(fn func(name string, meta *tableMeta) error) error { +func (d *db) iterateLocalTables(removeInvalidTable bool, fn func(name string, meta *tableMeta) error) error { entries, err := os.ReadDir(d.localPath) if err != nil { return err @@ -977,6 +977,13 @@ func (d *db) iterateLocalTables(fn func(name string, meta *tableMeta) error) err } meta, err := d.tableMeta(entry.Name()) if err != nil { + if !removeInvalidTable { + continue + } + err = d.deleteLocalTableFiles(entry.Name(), "") + if err != nil { + return err + } continue } err = fn(entry.Name(), meta) diff --git a/runtime/pkg/rduckdb/db_test.go b/runtime/pkg/rduckdb/db_test.go index 45089e3ce5b..ca712974ce8 100644 --- a/runtime/pkg/rduckdb/db_test.go +++ b/runtime/pkg/rduckdb/db_test.go @@ -224,7 +224,23 @@ func TestResetLocal(t *testing.T) { Logger: logger, }) require.NoError(t, err) - verifyTable(t, db, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + + // acquire connection + conn, release, err := db.AcquireReadConnection(ctx) + require.NoError(t, err) + + // drop table + err = db.DropTable(ctx, "test") + require.NoError(t, err) + + // verify table is still accessible + verifyTableForConn(t, conn, "SELECT id, country FROM test", []testData{{ID: 1, Country: "India"}}) + require.NoError(t, release()) + + // verify table is now dropped + err = db.DropTable(ctx, "test") + require.ErrorContains(t, err, "not found") + require.NoError(t, db.Close()) } diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index c4e7d86f2b0..a4bf9eaf659 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -145,7 +145,7 @@ func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { if !updateCatalog { // delete all local tables which are not present in remote - _ = d.iterateLocalTables(func(name string, meta *tableMeta) error { + _ = d.iterateLocalTables(true, func(name string, meta *tableMeta) error { if _, ok := remoteTables[name]; !ok { return d.deleteLocalTableFiles(name, "") } @@ -171,7 +171,7 @@ func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { } // iterate over local entries and remove if not present in remote - _ = d.iterateLocalTables(func(name string, meta *tableMeta) error { + _ = d.iterateLocalTables(false, func(name string, meta *tableMeta) error { if _, ok := remoteTables[name]; ok { // table is present in remote return nil From 952651a313cc202a68d76adc9870457661f277e5 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:31:53 +0530 Subject: [PATCH 53/64] use unique directory in temp directory --- runtime/storage/storage.go | 10 ++++++++-- runtime/storage/storage_test.go | 8 ++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/runtime/storage/storage.go b/runtime/storage/storage.go index 0cc16b71245..6ea79e0b27f 100644 --- a/runtime/storage/storage.go +++ b/runtime/storage/storage.go @@ -17,13 +17,19 @@ import ( type Client struct { dataDirPath string + tempDirPath string bucketConfig *gcsBucketConfig prefixes []string } func New(dataDir string, bucketCfg map[string]any) (*Client, error) { + tempDirPath, err := os.MkdirTemp("", "rill") + if err != nil { + return nil, err + } c := &Client{ dataDirPath: dataDir, + tempDirPath: tempDirPath, } if len(bucketCfg) != 0 { @@ -64,7 +70,7 @@ func (c *Client) RemovePrefix(ctx context.Context, prefix ...string) error { removeErr := os.RemoveAll(c.path(c.dataDirPath, prefix...)) // clean temp dir - removeErr = errors.Join(removeErr, os.RemoveAll(c.path(os.TempDir(), prefix...))) + removeErr = errors.Join(removeErr, os.RemoveAll(c.path(c.tempDirPath, prefix...))) // clean bucket bkt, ok, err := c.OpenBucket(ctx, prefix...) @@ -103,7 +109,7 @@ func (c *Client) DataDir(elem ...string) (string, error) { } func (c *Client) TempDir(elem ...string) (string, error) { - path := c.path(os.TempDir(), elem...) + path := c.path(c.tempDirPath, elem...) err := os.MkdirAll(path, os.ModePerm) if err != nil { return "", err diff --git a/runtime/storage/storage_test.go b/runtime/storage/storage_test.go index 87063a5b6e8..2aebc854b71 100644 --- a/runtime/storage/storage_test.go +++ b/runtime/storage/storage_test.go @@ -9,7 +9,7 @@ import ( ) func TestClient_DataDir(t *testing.T) { - tempDir := os.TempDir() + tempDir := t.TempDir() client := &Client{ dataDirPath: tempDir, } @@ -70,13 +70,13 @@ func TestClient_TempDir(t *testing.T) { if _, err := os.Stat(got); os.IsNotExist(err) { t.Errorf("Client.TempDir() path = %v, directory does not exist", got) } - require.Equal(t, filepath.Join(append([]string{tempDir, "testprefix", "testtempdir"}, tt.elem...)...), got) + require.Equal(t, filepath.Join(append([]string{client.tempDirPath, "testprefix", "testtempdir"}, tt.elem...)...), got) }) } } func TestClient_RandomTempDir(t *testing.T) { - tempDir := os.TempDir() + tempDir := t.TempDir() client := &Client{ dataDirPath: tempDir, } @@ -105,7 +105,7 @@ func TestClient_RandomTempDir(t *testing.T) { if _, err := os.Stat(got); os.IsNotExist(err) { t.Errorf("Client.RandomTempDir() path = %v, directory does not exist", got) } - require.Equal(t, filepath.Join(append([]string{tempDir}, tt.elem...)...), filepath.Dir(got)) + require.Equal(t, filepath.Join(append([]string{client.tempDirPath}, tt.elem...)...), filepath.Dir(got)) }) } } From 788a4590612e377e641b94a023ec3f1e4a3b69d5 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:25:45 +0530 Subject: [PATCH 54/64] interim commit --- go.mod | 1 - runtime/connection_cache.go | 1 - runtime/controller_test.go | 3 +- runtime/drivers/duckdb/config_test.go | 14 +- runtime/drivers/duckdb/duckdb.go | 12 +- runtime/drivers/duckdb/duckdb_test.go | 41 +---- runtime/drivers/duckdb/olap.go | 2 +- runtime/drivers/duckdb/olap_crud_test.go | 165 +----------------- runtime/drivers/duckdb/olap_test.go | 22 +-- .../transporter_duckDB_to_duckDB_test.go | 25 +-- .../transporter_motherduck_to_duckDB.go | 25 ++- .../transporter_mysql_to_duckDB_test.go | 5 - .../transporter_postgres_to_duckDB_test.go | 5 - runtime/drivers/redshift/redshift.go | 1 - runtime/pkg/rduckdb/remote.go | 8 +- runtime/registry_test.go | 1 - runtime/testruntime/testruntime.go | 11 +- 17 files changed, 73 insertions(+), 269 deletions(-) diff --git a/go.mod b/go.mod index 85a6cefe861..4b73ef37581 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,6 @@ require ( github.com/go-logr/zapr v1.2.4 github.com/go-playground/validator/v10 v10.14.0 github.com/go-redis/redis_rate/v10 v10.0.1 - github.com/go-sql-driver/mysql v1.7.1 github.com/gocarina/gocsv v0.0.0-20231116093920-b87c2d0e983a github.com/golang-jwt/jwt/v4 v4.5.0 github.com/google/go-github/v50 v50.2.0 diff --git a/runtime/connection_cache.go b/runtime/connection_cache.go index 66125093be2..140d18bab8f 100644 --- a/runtime/connection_cache.go +++ b/runtime/connection_cache.go @@ -13,7 +13,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/observability" "go.opentelemetry.io/otel/metric" "go.uber.org/zap" - "gocloud.dev/blob" "golang.org/x/exp/maps" ) diff --git a/runtime/controller_test.go b/runtime/controller_test.go index 8d1c6a91ef9..d54f614a9f1 100644 --- a/runtime/controller_test.go +++ b/runtime/controller_test.go @@ -9,7 +9,6 @@ import ( runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1" "github.com/rilldata/rill/runtime" - "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/testruntime" "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/structpb" @@ -243,7 +242,7 @@ path: data/foo.csv // Delete the underlying table olap, release, err := rt.OLAP(context.Background(), id, "") require.NoError(t, err) - err = olap.Exec(context.Background(), &drivers.Statement{Query: "DROP TABLE foo;"}) + err = olap.DropTable(context.Background(), "foo", false) require.NoError(t, err) release() testruntime.RequireNoOLAPTable(t, rt, id, "foo") diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index e5a9fd81a5c..e4dd1eb98d8 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -48,15 +48,15 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?rill_pool_size=10"}, "path/to") require.NoError(t, err) - require.Equal(t, "path/to/duck.db?custom_user_agent=rill", cfg.DSN) - require.Equal(t, "path/to/duck.db", cfg.DBFilePath) + // require.Equal(t, "path/to/duck.db?custom_user_agent=rill", cfg.DSN) + // require.Equal(t, "path/to/duck.db", cfg.DBFilePath) require.Equal(t, 10, cfg.PoolSize) cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB&rill_pool_size=10"}, "path/to") require.NoError(t, err) - require.Equal(t, "path/to/duck.db?custom_user_agent=rill&max_memory=4GB", cfg.DSN) + // require.Equal(t, "path/to/duck.db?custom_user_agent=rill&max_memory=4GB", cfg.DSN) require.Equal(t, 10, cfg.PoolSize) - require.Equal(t, "path/to/duck.db", cfg.DBFilePath) + // require.Equal(t, "path/to/duck.db", cfg.DBFilePath) _, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB", "pool_size": "abc"}, "path/to") require.Error(t, err) @@ -77,8 +77,8 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"dsn": "duck.db?max_memory=2GB&rill_pool_size=4"}, "path/to") require.NoError(t, err) - require.Equal(t, "duck.db", cfg.DBFilePath) - require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=2GB", cfg.DSN) + // require.Equal(t, "duck.db", cfg.DBFilePath) + // require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=2GB", cfg.DSN) require.Equal(t, 4, cfg.PoolSize) } @@ -92,7 +92,7 @@ func Test_specialCharInPath(t *testing.T) { conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) config := conn.(*connection).config - require.Equal(t, filepath.Join(path, "st@g3's.db?custom_user_agent=rill&max_memory=4GB&threads=1"), config.DSN) + // require.Equal(t, filepath.Join(path, "st@g3's.db?custom_user_agent=rill&max_memory=4GB&threads=1"), config.DSN) require.Equal(t, 2, config.PoolSize) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index cc802c5e5eb..699e97d234d 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -164,7 +164,6 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien config: cfg, logger: logger, activity: ac, - data: blob.PrefixedBucket(data, "duckdb"), // todo : ideally the drivers should get name prefixed buckets metaSem: semaphore.NewWeighted(1), olapSem: priorityqueue.NewSemaphore(olapSemSize), longRunningSem: semaphore.NewWeighted(1), // Currently hard-coded to 1 @@ -175,6 +174,13 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien ctx: ctx, cancel: cancel, } + remote, ok, err := st.OpenBucket(context.Background()) + if err != nil { + return nil, err + } + if ok { + c.remote = remote + } // register a callback to add a gauge on number of connections in use per db attrs := []attribute.KeyValue{attribute.String("instance_id", instanceID)} @@ -269,7 +275,7 @@ type connection struct { config *config logger *zap.Logger activity *activity.Client - data *blob.Bucket + remote *blob.Bucket // This driver may issue both OLAP and "meta" queries (like catalog info) against DuckDB. // Meta queries are usually fast, but OLAP queries may take a long time. To enable predictable parallel performance, // we gate queries with semaphores that limits the number of concurrent queries of each type. @@ -496,7 +502,7 @@ func (c *connection) reopenDB(ctx context.Context) error { var err error c.db, err = rduckdb.NewDB(ctx, &rduckdb.DBOptions{ LocalPath: c.config.DataDir, - Remote: c.data, + Remote: c.remote, ReadSettings: c.config.readSettings(), WriteSettings: c.config.writeSettings(), InitQueries: bootQueries, diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index 6b8510b89f4..e837b77af5d 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -139,7 +139,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { LEFT JOIN d ON b.b12 = d.d1 WHERE d.d2 IN (''); ` - err1 = olap.WithConnection(context.Background(), 0, false, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + err1 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { time.Sleep(500 * time.Millisecond) return olap.Exec(ctx, &drivers.Statement{Query: qry}) }) @@ -152,7 +152,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { var err2 error go func() { qry := `SELECT * FROM a;` - err2 = olap.WithConnection(context.Background(), 0, false, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + err2 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { time.Sleep(1000 * time.Millisecond) return olap.Exec(ctx, &drivers.Statement{Query: qry}) }) @@ -166,7 +166,7 @@ func TestNoFatalErrConcurrent(t *testing.T) { go func() { time.Sleep(250 * time.Millisecond) qry := `SELECT * FROM a;` - err3 = olap.WithConnection(context.Background(), 0, false, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { + err3 = olap.WithConnection(context.Background(), 0, false, func(ctx, ensuredCtx context.Context, _ *sql.Conn) error { return olap.Exec(ctx, &drivers.Statement{Query: qry}) }) wg.Done() @@ -184,38 +184,3 @@ func TestNoFatalErrConcurrent(t *testing.T) { err = handle.Close() require.NoError(t, err) } - -func TestHumanReadableSizeToBytes(t *testing.T) { - tests := []struct { - input string - expected float64 - shouldErr bool - }{ - {"1 byte", 1, false}, - {"2 bytes", 2, false}, - {"1KB", 1000, false}, - {"1.5KB", 1500, false}, - {"1MB", 1000 * 1000, false}, - {"2.5MB", 2.5 * 1000 * 1000, false}, - {"1GB", 1000 * 1000 * 1000, false}, - {"1.5GB", 1.5 * 1000 * 1000 * 1000, false}, - {"1TB", 1000 * 1000 * 1000 * 1000, false}, - {"1.5TB", 1.5 * 1000 * 1000 * 1000 * 1000, false}, - {"1PB", 1000 * 1000 * 1000 * 1000 * 1000, false}, - {"1.5PB", 1.5 * 1000 * 1000 * 1000 * 1000 * 1000, false}, - {"invalid", 0, true}, - {"123invalid", 0, true}, - {"123 ZZ", 0, true}, - } - - for _, tt := range tests { - result, err := humanReadableSizeToBytes(tt.input) - if (err != nil) != tt.shouldErr { - t.Errorf("expected error: %v, got error: %v for input: %s", tt.shouldErr, err, tt.input) - } - - if !tt.shouldErr && result != tt.expected { - t.Errorf("expected: %v, got: %v for input: %s", tt.expected, result, tt.input) - } - } -} diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index 0108364df27..441f4fd3551 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -191,7 +191,7 @@ func (c *connection) AddTableColumn(ctx context.Context, tableName, columnName, // AlterTableColumn implements drivers.OLAPStore. func (c *connection) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { err := c.db.MutateTable(ctx, tableName, func(ctx context.Context, conn *sqlx.Conn) error { - _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ADD COLUMN %s %s", safeSQLName(tableName), safeSQLName(columnName), newType)) + _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ALTER %s TYPE %s", safeSQLName(tableName), safeSQLName(columnName), newType)) return err }) return c.checkErr(err) diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index ea524bac837..df93df0a040 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -6,7 +6,6 @@ import ( "io/fs" "os" "path/filepath" - "strconv" "testing" "time" @@ -19,16 +18,8 @@ import ( func Test_connection_CreateTableAsSelect(t *testing.T) { temp := t.TempDir() - require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) - dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - normalConn := handle.(*connection) - normalConn.AsOLAP("default") - require.NoError(t, normalConn.Migrate(context.Background())) - dbPath = filepath.Join(temp, "default", "view.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) viewConnection := handle.(*connection) require.NoError(t, viewConnection.Migrate(context.Background())) @@ -41,17 +32,6 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { tableAsView bool c *connection }{ - { - testName: "select from view", - name: "my-view", - view: true, - c: normalConn, - }, - { - testName: "select from table", - name: "my-table", - c: normalConn, - }, { testName: "select from view with external_table_storage", name: "my-view", @@ -87,11 +67,6 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { require.NoError(t, res.Scan(&count)) require.Equal(t, 1, count) require.NoError(t, res.Close()) - contents, err := os.ReadFile(filepath.Join(temp, "default", tt.name, "version.txt")) - require.NoError(t, err) - version, err := strconv.ParseInt(string(contents), 10, 64) - require.NoError(t, err) - require.True(t, time.Since(time.UnixMilli(version)).Seconds() < 1) } }) } @@ -100,8 +75,7 @@ func Test_connection_CreateTableAsSelect(t *testing.T) { func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { temp := t.TempDir() - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -113,25 +87,9 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test-select-multiple", false, "select 'hello'", nil) require.NoError(t, err) - dirs, err := os.ReadDir(filepath.Join(temp, "test-select-multiple")) - require.NoError(t, err) - names := make([]string, 0) - for _, dir := range dirs { - names = append(names, dir.Name()) - } - err = c.CreateTableAsSelect(context.Background(), "test-select-multiple", false, "select fail query", nil) require.Error(t, err) - dirs, err = os.ReadDir(filepath.Join(temp, "test-select-multiple")) - require.NoError(t, err) - newNames := make([]string, 0) - for _, dir := range dirs { - newNames = append(newNames, dir.Name()) - } - - require.Equal(t, names, newNames) - res, err := c.Execute(context.Background(), &drivers.Statement{Query: fmt.Sprintf("SELECT * FROM %q", "test-select-multiple")}) require.NoError(t, err) require.True(t, res.Next()) @@ -145,8 +103,7 @@ func Test_connection_CreateTableAsSelectMultipleTimes(t *testing.T) { func Test_connection_DropTable(t *testing.T) { temp := t.TempDir() - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -159,9 +116,6 @@ func Test_connection_DropTable(t *testing.T) { err = c.DropTable(context.Background(), "test-drop", true) require.NoError(t, err) - _, err = os.ReadDir(filepath.Join(temp, "test-drop")) - require.True(t, os.IsNotExist(err)) - res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) FROM information_schema.tables WHERE table_name='test-drop' AND table_type='VIEW'"}) require.NoError(t, err) require.True(t, res.Next()) @@ -174,8 +128,7 @@ func Test_connection_DropTable(t *testing.T) { func Test_connection_InsertTableAsSelect(t *testing.T) { temp := t.TempDir() - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -202,8 +155,7 @@ func Test_connection_InsertTableAsSelect(t *testing.T) { func Test_connection_RenameTable(t *testing.T) { temp := t.TempDir() - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -226,9 +178,7 @@ func Test_connection_RenameTable(t *testing.T) { func Test_connection_RenameToExistingTable(t *testing.T) { temp := t.TempDir() - - dbPath := filepath.Join(temp, "default", "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -256,8 +206,7 @@ func Test_connection_AddTableColumn(t *testing.T) { temp := t.TempDir() os.Mkdir(temp, fs.ModePerm) - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -310,52 +259,6 @@ func Test_connection_RenameToExistingTableOld(t *testing.T) { require.NoError(t, res.Close()) } -func Test_connection_CastEnum(t *testing.T) { - temp := t.TempDir() - os.Mkdir(temp, fs.ModePerm) - - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - c := handle.(*connection) - require.NoError(t, c.Migrate(context.Background())) - c.AsOLAP("default") - - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 1 AS id, 'bglr' AS city, 'IND' AS country", nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 2, 'mUm', 'IND'", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 3, 'Perth', 'Aus'", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 3, null, 'Aus'", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.InsertTableAsSelect(context.Background(), "test", "SELECT 3, 'bglr', null", false, true, drivers.IncrementalStrategyAppend, nil) - require.NoError(t, err) - - err = c.convertToEnum(context.Background(), "test", []string{"city", "country"}) - require.NoError(t, err) - - res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE column_name='city' AND table_name='test' AND table_catalog = 'view'"}) - require.NoError(t, err) - - var typ string - require.True(t, res.Next()) - require.NoError(t, res.Scan(&typ)) - require.Equal(t, "ENUM('bglr', 'Perth', 'mUm')", typ) - require.NoError(t, res.Close()) - - res, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE column_name='country' AND table_name='test' AND table_catalog = 'view'"}) - require.NoError(t, err) - require.True(t, res.Next()) - require.NoError(t, res.Scan(&typ)) - require.Equal(t, "ENUM('Aus', 'IND')", typ) - require.NoError(t, res.Close()) -} - func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) @@ -391,60 +294,6 @@ func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { require.NoError(t, err) } -func Test_connection_ChangingOrder(t *testing.T) { - temp := t.TempDir() - os.Mkdir(temp, fs.ModePerm) - - // on cloud - dbPath := filepath.Join(temp, "view.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": false}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - c := handle.(*connection) - require.NoError(t, c.Migrate(context.Background())) - c.AsOLAP("default") - - // create table - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 1 AS id, 'India' AS 'coun\"try'", nil) - require.NoError(t, err) - - // create view - err = c.CreateTableAsSelect(context.Background(), "test_view", true, "SELECT * FROM test", nil) - require.NoError(t, err) - verifyCount(t, c, "test_view", 1) - - // change sequence - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 'India' AS 'coun\"try', 1 AS id", nil) - require.NoError(t, err) - // view should still work - verifyCount(t, c, "test_view", 1) - - // on local - dbPath = filepath.Join(temp, "local.db") - handle, err = Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": true, "allow_host_access": true}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - c = handle.(*connection) - require.NoError(t, c.Migrate(context.Background())) - c.AsOLAP("default") - - // create table - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 1 AS id, 'India' AS 'coun\"try'", nil) - require.NoError(t, err) - - // create view - err = c.CreateTableAsSelect(context.Background(), "test_view", true, "SELECT * FROM test", nil) - require.NoError(t, err) - verifyCount(t, c, "test_view", 1) - - // change sequence - err = c.CreateTableAsSelect(context.Background(), "test", false, "SELECT 'India' AS 'coun\"try', 1 AS id", nil) - require.NoError(t, err) - - // view no longer works - _, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) from test_view"}) - require.Error(t, err) - require.Contains(t, err.Error(), "Binder Error: Contents of view were altered: types don't match!") -} - func verifyCount(t *testing.T, c *connection, table string, expected int) { res, err := c.Execute(context.Background(), &drivers.Statement{Query: fmt.Sprintf("SELECT count(*) from %s", table)}) require.NoError(t, err) diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index 92263880c7a..8f9bd06465e 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -15,7 +15,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/memblob" "golang.org/x/sync/errgroup" ) @@ -243,28 +242,13 @@ func prepareConn(t *testing.T) drivers.Handle { } func Test_safeSQLString(t *testing.T) { - conn := prepareConn(t) tempDir := t.TempDir() path := filepath.Join(tempDir, "let's t@st \"weird\" dirs") err := os.Mkdir(path, fs.ModePerm) require.NoError(t, err) - dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) -} - - conn, err = Driver{}.Open("default", map[string]any{"external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - - olap, ok := conn.AsOLAP("") - require.True(t, ok) - - ctx := context.Background() - err = olap.CreateTableAsSelect(ctx, "foo", false, "SELECT 'a' AS bar, 1 AS baz UNION ALL SELECT 'a', 2 UNION ALL SELECT 'b', 3 UNION ALL SELECT 'c', 4", nil) - require.NoError(t, err) - - err = olap.CreateTableAsSelect(ctx, "bar", false, "SELECT 'a' AS bar, 1 AS baz UNION ALL SELECT 'a', 2 UNION ALL SELECT 'b', 3 UNION ALL SELECT 'c', 4", nil) + conn, err := Driver{}.Open("default", map[string]any{"data_dir": path}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) - return conn.(*connection) + require.NotNil(t, conn) + require.NoError(t, conn.Close()) } diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 2a4664edb51..86e405c39e3 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -1,22 +1,9 @@ package duckdb -// import ( -// "context" -// "fmt" -// "path/filepath" -// "testing" - - "github.com/rilldata/rill/runtime/drivers" - activity "github.com/rilldata/rill/runtime/pkg/activity" - "github.com/rilldata/rill/runtime/storage" - "github.com/stretchr/testify/require" - "go.uber.org/zap" -) - -func TestDuckDBToDuckDBTransfer(t *testing.T) { - tempDir := t.TempDir() - conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) +// func TestDuckDBToDuckDBTransfer(t *testing.T) { +// tempDir := t.TempDir() +// conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) +// require.NoError(t, err) // olap, ok := conn.AsOLAP("") // require.True(t, ok) @@ -32,8 +19,8 @@ func TestDuckDBToDuckDBTransfer(t *testing.T) { // require.NoError(t, err) // require.NoError(t, conn.Close()) - to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) +// to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) +// require.NoError(t, err) // tr := newDuckDBToDuckDB(to.(*connection), zap.NewNop()) diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index 42ecf653453..f3f30a23b69 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -4,7 +4,9 @@ import ( "context" "fmt" "os" + "strings" + "github.com/jmoiron/sqlx" "github.com/mitchellh/mapstructure" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/rduckdb" @@ -72,7 +74,26 @@ func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps m return fmt.Errorf("no motherduck token found. Refer to this documentation for instructions: https://docs.rilldata.com/reference/connectors/motherduck") } - return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, srcConfig.SQL, &rduckdb.CreateTableOptions{ - // InitSQL: fmt.Sprintf("INSTALL 'motherduck'; LOAD 'motherduck'; SET motherduck_token='%s'; ATTACH '%s'", token, srcConfig.DSN), + beforeCreateFn := func(ctx context.Context, conn *sqlx.Conn) error { + _, err := conn.ExecContext(ctx, "INSTALL 'motherduck'; LOAD 'motherduck';") + if err != nil { + return fmt.Errorf("failed to load motherduck extension %w", err) + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("SET motherduck_token='%s'", token)) + if err != nil { + return fmt.Errorf("failed to set motherduck token %w", err) + } + + _, err = conn.ExecContext(ctx, fmt.Sprintf("ATTACH '%s'", srcConfig.DSN)) + if err != nil { + return fmt.Errorf("failed to attach motherduck DSN: %w", err) + } + return err + } + userQuery := strings.TrimSpace(srcConfig.SQL) + userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon + return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, userQuery, &rduckdb.CreateTableOptions{ + BeforeCreateFn: beforeCreateFn, }) } diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index 82f1cd20a71..314942e1e73 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -105,11 +105,6 @@ func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { _, err := db.ExecContext(ctx, mysqlInitStmt) require.NoError(t, err) - handle, err := drivers.Open("mysql", "default", map[string]any{"dsn": dsn}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - require.NotNil(t, handle) - - sqlStore, _ := handle.AsSQLStore() to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index be09f8700e7..87b9769db1e 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -68,11 +68,6 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { _, err := db.ExecContext(ctx, sqlStmt) require.NoError(t, err) - handle, err := drivers.Open("postgres", "default", map[string]any{"database_url": dbURL}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - require.NotNil(t, handle) - - sqlStore, _ := handle.AsSQLStore() to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/redshift/redshift.go b/runtime/drivers/redshift/redshift.go index 7ce87575dac..41c7e47336e 100644 --- a/runtime/drivers/redshift/redshift.go +++ b/runtime/drivers/redshift/redshift.go @@ -9,7 +9,6 @@ import ( "github.com/rilldata/rill/runtime/pkg/activity" "github.com/rilldata/rill/runtime/storage" "go.uber.org/zap" - "gocloud.dev/blob" ) func init() { diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index a4bf9eaf659..86db7daf7f7 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -21,7 +21,7 @@ import ( // pullFromRemote updates local data with the latest data from remote. // This is not safe for concurrent calls. func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { - if !d.localDirty { + if !d.localDirty || d.remote == nil { // optimisation to skip sync if write was already synced return nil } @@ -191,6 +191,9 @@ func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { // pushToRemote syncs the remote location with the local path for given table. // If oldVersion is specified, it is deleted after successful sync. func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tableMeta) error { + if d.remote == nil { + return nil + } if meta.Type == "TABLE" { localPath := d.localTableDir(table, meta.Version) entries, err := os.ReadDir(localPath) @@ -249,6 +252,9 @@ func (d *db) pushToRemote(ctx context.Context, table string, oldMeta, meta *tabl // If table is specified, only that table is deleted. // If table and version is specified, only that version of the table is deleted. func (d *db) deleteRemote(ctx context.Context, table, version string) error { + if d.remote == nil { + return nil + } if table == "" && version != "" { return fmt.Errorf("table must be specified if version is specified") } diff --git a/runtime/registry_test.go b/runtime/registry_test.go index 8a6bb2c003c..7d3779f7ceb 100644 --- a/runtime/registry_test.go +++ b/runtime/registry_test.go @@ -18,7 +18,6 @@ import ( "github.com/rilldata/rill/runtime/storage" "github.com/stretchr/testify/require" "go.uber.org/zap" - "gocloud.dev/blob/fileblob" ) func TestRuntime_EditInstance(t *testing.T) { diff --git a/runtime/testruntime/testruntime.go b/runtime/testruntime/testruntime.go index f0b601a9151..f4a7a8a7d43 100644 --- a/runtime/testruntime/testruntime.go +++ b/runtime/testruntime/testruntime.go @@ -67,11 +67,11 @@ func New(t TestingT) *runtime.Runtime { } logger := zap.NewNop() - var err error - if os.Getenv("DEBUG") == "1" { - logger, err = zap.NewDevelopment() - require.NoError(t, err) - } + // var err error + // if os.Getenv("DEBUG") == "1" { + // logger, err := zap.NewDevelopment() + // require.NoError(t, err) + // } rt, err := runtime.New(context.Background(), opts, logger, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), email.New(email.NewTestSender())) require.NoError(t, err) @@ -100,6 +100,7 @@ func NewInstanceWithOptions(t TestingT, opts InstanceOptions) (*runtime.Runtime, } olapDSN := os.Getenv("RILL_RUNTIME_TEST_OLAP_DSN") if olapDSN == "" { + // TODO : fix - ignored for duckdb olapDSN = ":memory:" } From c6019375bfb270ccf6515e52b0a7684715fca077 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Thu, 5 Dec 2024 12:35:43 +0530 Subject: [PATCH 55/64] remove isview param --- cli/cmd/runtime/start.go | 11 --- cli/pkg/local/app.go | 31 +------ go.mod | 32 +++---- go.sum | 90 ++++++++----------- runtime/controller_test.go | 11 ++- .../model_executor_localfile_self.go | 8 +- .../drivers/clickhouse/model_executor_self.go | 6 +- runtime/drivers/clickhouse/model_manager.go | 11 +-- runtime/drivers/clickhouse/olap.go | 6 +- runtime/drivers/clickhouse/olap_test.go | 12 +-- runtime/drivers/druid/olap.go | 4 +- runtime/drivers/duckdb/config.go | 13 +-- runtime/drivers/duckdb/config_test.go | 30 +------ runtime/drivers/duckdb/duckdb_test.go | 9 +- .../duckdb/model_executor_localfile_self.go | 6 +- runtime/drivers/duckdb/model_executor_self.go | 6 +- .../duckdb/model_executor_warehouse_self.go | 6 +- runtime/drivers/duckdb/model_manager.go | 17 +--- runtime/drivers/duckdb/olap.go | 4 +- runtime/drivers/duckdb/olap_crud_test.go | 14 ++- runtime/drivers/duckdb/olap_test.go | 2 +- runtime/drivers/olap.go | 4 +- runtime/drivers/pinot/olap.go | 4 +- runtime/reconcilers/source.go | 16 ++-- runtime/reconcilers/util.go | 8 +- runtime/runtime.go | 2 - runtime/testruntime/testruntime.go | 11 ++- 27 files changed, 134 insertions(+), 240 deletions(-) diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go index bc7ded9ef7f..14f89086644 100644 --- a/cli/cmd/runtime/start.go +++ b/cli/cmd/runtime/start.go @@ -24,8 +24,6 @@ import ( "github.com/spf13/cobra" "go.uber.org/zap" "go.uber.org/zap/zapcore" - "gocloud.dev/gcp" - "golang.org/x/oauth2/google" "golang.org/x/sync/errgroup" // Load connectors and reconcilers for runtime @@ -281,12 +279,3 @@ func StartCmd(ch *cmdutil.Helper) *cobra.Command { } return startCmd } - -func newClient(ctx context.Context, jsonData string) (*gcp.HTTPClient, error) { - creds, err := google.CredentialsFromJSON(ctx, []byte(jsonData), "https://www.googleapis.com/auth/cloud-platform") - if err != nil { - return nil, fmt.Errorf("failed to create credentials: %w", err) - } - // the token source returned from credentials works for all kind of credentials like serviceAccountKey, credentialsKey etc. - return gcp.NewHTTPClient(gcp.DefaultTransport(), gcp.CredentialsTokenSource(creds)) -} diff --git a/cli/pkg/local/app.go b/cli/pkg/local/app.go index 93df78ad80b..071ae89fbae 100644 --- a/cli/pkg/local/app.go +++ b/cli/pkg/local/app.go @@ -8,7 +8,6 @@ import ( "net/http" "os" "path/filepath" - "strconv" "time" "github.com/c2h5oh/datasize" @@ -197,26 +196,13 @@ func NewApp(ctx context.Context, opts *AppOptions) (*App, error) { } } - // If the OLAP is the default OLAP (DuckDB in stage.db), we make it relative to the project directory (not the working directory) - defaultOLAP := false olapCfg := make(map[string]string) - if opts.OlapDriver == DefaultOLAPDriver && opts.OlapDSN == DefaultOLAPDSN { - defaultOLAP = true - val, err := isExternalStorageEnabled(vars) - if err != nil { - return nil, err - } - olapCfg["external_table_storage"] = strconv.FormatBool(val) - } - if opts.OlapDriver == "duckdb" { + if opts.OlapDSN != DefaultOLAPDSN { + return nil, fmt.Errorf("setting DSN for DuckDB is not supported") + } // Set default DuckDB pool size to 4 olapCfg["pool_size"] = "4" - if !defaultOLAP { - // dsn is automatically computed by duckdb driver so we set only when non default dsn is passed - olapCfg["dsn"] = opts.OlapDSN - olapCfg["error_on_incompatible_version"] = "true" - } } // Add OLAP connector @@ -609,14 +595,3 @@ func (s skipFieldZapEncoder) AddString(key, val string) { s.Encoder.AddString(key, val) } } - -// isExternalStorageEnabled determines if external storage can be enabled. -func isExternalStorageEnabled(variables map[string]string) (bool, error) { - // check if flag explicitly passed - val, ok := variables["connector.duckdb.external_table_storage"] - if !ok { - // mark enabled by default - return true, nil - } - return strconv.ParseBool(val) -} diff --git a/go.mod b/go.mod index 4726d1f19a2..9f76750be53 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,8 @@ go 1.23 toolchain go1.23.3 require ( - cloud.google.com/go/bigquery v1.62.0 - cloud.google.com/go/storage v1.43.0 + cloud.google.com/go/bigquery v1.61.0 + cloud.google.com/go/storage v1.42.0 connectrpc.com/connect v1.16.1 github.com/AlecAivazis/survey/v2 v2.3.7 github.com/Andrew-M-C/go.jsonvalue v1.3.4 @@ -94,8 +94,8 @@ require ( github.com/testcontainers/testcontainers-go/modules/clickhouse v0.33.0 github.com/testcontainers/testcontainers-go/modules/compose v0.33.0 github.com/xuri/excelize/v2 v2.7.1 - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 go.opentelemetry.io/contrib/instrumentation/runtime v0.52.0 go.opentelemetry.io/otel v1.32.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.27.0 @@ -110,12 +110,12 @@ require ( go.uber.org/zap v1.27.0 go.uber.org/zap/exp v0.2.0 gocloud.dev v0.36.0 - golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c + golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 golang.org/x/oauth2 v0.22.0 golang.org/x/sync v0.8.0 golang.org/x/sys v0.26.0 golang.org/x/text v0.19.0 - google.golang.org/api v0.191.0 + google.golang.org/api v0.184.0 google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.35.1 @@ -130,7 +130,7 @@ require ( require ( cloud.google.com/go v0.115.0 // indirect cloud.google.com/go/compute/metadata v0.5.0 // indirect - cloud.google.com/go/iam v1.1.13 // indirect + cloud.google.com/go/iam v1.1.8 // indirect dario.cat/mergo v1.0.1 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.2 // indirect @@ -149,7 +149,7 @@ require ( github.com/kylelemons/godebug v1.1.0 // indirect github.com/mtibben/percent v0.2.1 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect - google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 // indirect + google.golang.org/genproto v0.0.0-20240604185151-ef581f913117 // indirect ) require ( @@ -160,8 +160,8 @@ require ( ) require ( - cloud.google.com/go/auth v0.8.1 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect + cloud.google.com/go/auth v0.5.1 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect github.com/ClickHouse/ch-go v0.61.3 // indirect github.com/ForceCLI/config v0.0.0-20230217143549-9149d42a3c99 // indirect @@ -259,11 +259,11 @@ require ( github.com/google/go-github/v52 v52.0.0 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/s2a-go v0.1.8 // indirect + github.com/google/s2a-go v0.1.7 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/google/wire v0.6.0 // indirect + github.com/google/wire v0.5.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect - github.com/googleapis/gax-go/v2 v2.13.0 // indirect + github.com/googleapis/gax-go/v2 v2.12.4 // indirect github.com/gorilla/mux v1.8.1 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect @@ -393,10 +393,10 @@ require ( golang.org/x/mod v0.21.0 // indirect golang.org/x/net v0.30.0 // indirect golang.org/x/term v0.25.0 // indirect - golang.org/x/time v0.6.0 // indirect + golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.26.0 // indirect - golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect gopkg.in/cenkalti/backoff.v1 v1.1.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect diff --git a/go.sum b/go.sum index 18cd89a7f53..8b5d1163329 100644 --- a/go.sum +++ b/go.sum @@ -97,10 +97,10 @@ cloud.google.com/go/assuredworkloads v1.7.0/go.mod h1:z/736/oNmtGAyU47reJgGN+KVo cloud.google.com/go/assuredworkloads v1.8.0/go.mod h1:AsX2cqyNCOvEQC8RMPnoc0yEarXQk6WEKkxYfL6kGIo= cloud.google.com/go/assuredworkloads v1.9.0/go.mod h1:kFuI1P78bplYtT77Tb1hi0FMxM0vVpRC7VVoJC3ZoT0= cloud.google.com/go/assuredworkloads v1.10.0/go.mod h1:kwdUQuXcedVdsIaKgKTp9t0UJkE5+PAVNhdQm4ZVq2E= -cloud.google.com/go/auth v0.8.1 h1:QZW9FjC5lZzN864p13YxvAtGUlQ+KgRL+8Sg45Z6vxo= -cloud.google.com/go/auth v0.8.1/go.mod h1:qGVp/Y3kDRSDZ5gFD/XPUfYQ9xW1iI7q8RIRoCyBbJc= -cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= -cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= +cloud.google.com/go/auth v0.5.1 h1:0QNO7VThG54LUzKiQxv8C6x1YX7lUrzlAa1nVLF8CIw= +cloud.google.com/go/auth v0.5.1/go.mod h1:vbZT8GjzDf3AVqCcQmqeeM32U9HBFc32vVVAbwDsa6s= +cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= +cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= cloud.google.com/go/automl v1.5.0/go.mod h1:34EjfoFGMZ5sgJ9EoLsRtdPSNZLcfflJR39VbVNS2M0= cloud.google.com/go/automl v1.6.0/go.mod h1:ugf8a6Fx+zP0D59WLhqgTDsQI9w07o64uf/Is3Nh5p8= cloud.google.com/go/automl v1.7.0/go.mod h1:RL9MYCCsJEOmt0Wf3z9uzG0a7adTT1fe+aObgSpkCt8= @@ -128,8 +128,8 @@ cloud.google.com/go/bigquery v1.44.0/go.mod h1:0Y33VqXTEsbamHJvJHdFmtqHvMIY28aK1 cloud.google.com/go/bigquery v1.47.0/go.mod h1:sA9XOgy0A8vQK9+MWhEQTY6Tix87M/ZurWFIxmF9I/E= cloud.google.com/go/bigquery v1.48.0/go.mod h1:QAwSz+ipNgfL5jxiaK7weyOhzdoAy1zFm0Nf1fysJac= cloud.google.com/go/bigquery v1.49.0/go.mod h1:Sv8hMmTFFYBlt/ftw2uN6dFdQPzBlREY9yBh7Oy7/4Q= -cloud.google.com/go/bigquery v1.62.0 h1:SYEA2f7fKqbSRRBHb7g0iHTtZvtPSPYdXfmqsjpsBwo= -cloud.google.com/go/bigquery v1.62.0/go.mod h1:5ee+ZkF1x/ntgCsFQJAQTM3QkAZOecfCmvxhkJsWRSA= +cloud.google.com/go/bigquery v1.61.0 h1:w2Goy9n6gh91LVi6B2Sc+HpBl8WbWhIyzdvVvrAuEIw= +cloud.google.com/go/bigquery v1.61.0/go.mod h1:PjZUje0IocbuTOdq4DBOJLNYB0WF3pAKBHzAYyxCwFo= cloud.google.com/go/billing v1.4.0/go.mod h1:g9IdKBEFlItS8bTtlrZdVLWSSdSyFUZKXNS02zKMOZY= cloud.google.com/go/billing v1.5.0/go.mod h1:mztb1tBc3QekhjSgmpf/CV4LzWXLzCArwpLmP2Gm88s= cloud.google.com/go/billing v1.6.0/go.mod h1:WoXzguj+BeHXPbKfNWkqVtDdzORazmCjraY+vrxcyvI= @@ -201,8 +201,8 @@ cloud.google.com/go/datacatalog v1.8.0/go.mod h1:KYuoVOv9BM8EYz/4eMFxrr4DUKhGIOX cloud.google.com/go/datacatalog v1.8.1/go.mod h1:RJ58z4rMp3gvETA465Vg+ag8BGgBdnRPEMMSTr5Uv+M= cloud.google.com/go/datacatalog v1.12.0/go.mod h1:CWae8rFkfp6LzLumKOnmVh4+Zle4A3NXLzVJ1d1mRm0= cloud.google.com/go/datacatalog v1.13.0/go.mod h1:E4Rj9a5ZtAxcQJlEBTLgMTphfP11/lNaAshpoBgemX8= -cloud.google.com/go/datacatalog v1.21.0 h1:vl0pQT9TZ5rKi9e69FgtXNCR7I8MVRj4+CnbeXhz6UQ= -cloud.google.com/go/datacatalog v1.21.0/go.mod h1:DB0QWF9nelpsbB0eR/tA0xbHZZMvpoFD1XFy3Qv/McI= +cloud.google.com/go/datacatalog v1.20.1 h1:czcba5mxwRM5V//jSadyig0y+8aOHmN7gUl9GbHu59E= +cloud.google.com/go/datacatalog v1.20.1/go.mod h1:Jzc2CoHudhuZhpv78UBAjMEg3w7I9jHA11SbRshWUjk= cloud.google.com/go/dataflow v0.6.0/go.mod h1:9QwV89cGoxjjSR9/r7eFDqqjtvbKxAK2BaYU6PVk9UM= cloud.google.com/go/dataflow v0.7.0/go.mod h1:PX526vb4ijFMesO1o202EaUmouZKBpjHsTlCtB4parQ= cloud.google.com/go/dataflow v0.8.0/go.mod h1:Rcf5YgTKPtQyYz8bLYhFoIV/vP39eL7fWNcSOyFfLJE= @@ -315,8 +315,8 @@ cloud.google.com/go/iam v0.8.0/go.mod h1:lga0/y3iH6CX7sYqypWJ33hf7kkfXJag67naqGE cloud.google.com/go/iam v0.11.0/go.mod h1:9PiLDanza5D+oWFZiH1uG+RnRCfEGKoyl6yo4cgWZGY= cloud.google.com/go/iam v0.12.0/go.mod h1:knyHGviacl11zrtZUoDuYpDgLjvr28sLQaG0YB2GYAY= cloud.google.com/go/iam v0.13.0/go.mod h1:ljOg+rcNfzZ5d6f1nAUJ8ZIxOaZUVoS14bKCtaLZ/D0= -cloud.google.com/go/iam v1.1.13 h1:7zWBXG9ERbMLrzQBRhFliAV+kjcRToDTgQT3CTwYyv4= -cloud.google.com/go/iam v1.1.13/go.mod h1:K8mY0uSXwEXS30KrnVb+j54LB/ntfZu1dr+4zFMNbus= +cloud.google.com/go/iam v1.1.8 h1:r7umDwhj+BQyz0ScZMp4QrGXjSTI3ZINnpgU2nlB/K0= +cloud.google.com/go/iam v1.1.8/go.mod h1:GvE6lyMmfxXauzNq8NbgJbeVQNspG+tcdL/W8QO1+zE= cloud.google.com/go/iap v1.4.0/go.mod h1:RGFwRJdihTINIe4wZ2iCP0zF/qu18ZwyKxrhMhygBEc= cloud.google.com/go/iap v1.5.0/go.mod h1:UH/CGgKd4KyohZL5Pt0jSKE4m3FR51qg6FKQ/z/Ix9A= cloud.google.com/go/iap v1.6.0/go.mod h1:NSuvI9C/j7UdjGjIde7t7HBz+QTwBcapPE07+sSRcLk= @@ -347,8 +347,8 @@ cloud.google.com/go/logging v1.7.0/go.mod h1:3xjP2CjkM3ZkO73aj4ASA5wRPGGCRrPIAeN cloud.google.com/go/longrunning v0.1.1/go.mod h1:UUFxuDWkv22EuY93jjmDMFT5GPQKeFVJBIF6QlTqdsE= cloud.google.com/go/longrunning v0.3.0/go.mod h1:qth9Y41RRSUE69rDcOn6DdK3HfQfsUI0YSmW3iIlLJc= cloud.google.com/go/longrunning v0.4.1/go.mod h1:4iWDqhBZ70CvZ6BfETbvam3T8FMvLK+eFj0E6AaRQTo= -cloud.google.com/go/longrunning v0.5.11 h1:Havn1kGjz3whCfoD8dxMLP73Ph5w+ODyZB9RUsDxtGk= -cloud.google.com/go/longrunning v0.5.11/go.mod h1:rDn7//lmlfWV1Dx6IB4RatCPenTwwmqXuiP0/RgoEO4= +cloud.google.com/go/longrunning v0.5.7 h1:WLbHekDbjK1fVFD3ibpFFVoyizlLRl73I7YKuAKilhU= +cloud.google.com/go/longrunning v0.5.7/go.mod h1:8GClkudohy1Fxm3owmBGid8W0pSgodEMwEAztp38Xng= cloud.google.com/go/managedidentities v1.3.0/go.mod h1:UzlW3cBOiPrzucO5qWkNkh0w33KFtBJU281hacNvsdE= cloud.google.com/go/managedidentities v1.4.0/go.mod h1:NWSBYbEMgqmbZsLIyKvxrYbtqOsxY1ZrGM+9RgDqInM= cloud.google.com/go/managedidentities v1.5.0/go.mod h1:+dWcZ0JlUmpuxpIDfyP5pP5y0bLdRwOS4Lp7gMni/LA= @@ -533,8 +533,8 @@ cloud.google.com/go/storage v1.23.0/go.mod h1:vOEEDNFnciUMhBeT6hsJIn3ieU5cFRmzeL cloud.google.com/go/storage v1.27.0/go.mod h1:x9DOL8TK/ygDUMieqwfhdpQryTeEkhGKMi80i/iqR2s= cloud.google.com/go/storage v1.28.1/go.mod h1:Qnisd4CqDdo6BGs2AD5LLnEsmSQ80wQ5ogcBBKhU86Y= cloud.google.com/go/storage v1.29.0/go.mod h1:4puEjyTKnku6gfKoTfNOU/W+a9JyuVNxjpS5GBrB8h4= -cloud.google.com/go/storage v1.43.0 h1:CcxnSohZwizt4LCzQHWvBf1/kvtHUn7gk9QERXPyXFs= -cloud.google.com/go/storage v1.43.0/go.mod h1:ajvxEa7WmZS1PxvKRq4bq0tFT3vMd502JwstCcYv0Q0= +cloud.google.com/go/storage v1.42.0 h1:4QtGpplCVt1wz6g5o1ifXd656P5z+yNgzdw1tVfp0cU= +cloud.google.com/go/storage v1.42.0/go.mod h1:HjMXRFq65pGKFn6hxj6x3HCyR41uSB72Z0SO/Vn6JFQ= cloud.google.com/go/storagetransfer v1.5.0/go.mod h1:dxNzUopWy7RQevYFHewchb29POFv3/AaBgnhqzqiK0w= cloud.google.com/go/storagetransfer v1.6.0/go.mod h1:y77xm4CQV/ZhFZH75PLEXY0ROiS7Gh6pSKrM8dJyg6I= cloud.google.com/go/storagetransfer v1.7.0/go.mod h1:8Giuj1QNb1kfLAiWM1bN6dHzfdlDAVC9rv9abHot2W4= @@ -1475,11 +1475,11 @@ github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk= github.com/google/pprof v0.0.0-20230323073829-e72429f035bd/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= -github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= +github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= +github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= -github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= +github.com/google/subcommands v1.0.1/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -1487,8 +1487,8 @@ github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/wire v0.6.0 h1:HBkoIh4BdSxoyo9PveV8giw7ZsaBOvzWKfcg/6MrVwI= -github.com/google/wire v0.6.0/go.mod h1:F4QhpQ9EDIdJ1Mbop/NZBRB+5yrR6qg3BnctaoUk6NA= +github.com/google/wire v0.5.0 h1:I7ELFeVBr3yfPIcc8+MWvrjk+3VjbcSzoXm3JVa+jD8= +github.com/google/wire v0.5.0/go.mod h1:ngWDr9Qvq3yZA10YrxfyGELY/AFWGVpy9c1LTRi1EoU= github.com/googleapis/enterprise-certificate-proxy v0.0.0-20220520183353-fd19c99a87aa/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.1.0/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg= @@ -1507,8 +1507,8 @@ github.com/googleapis/gax-go/v2 v2.5.1/go.mod h1:h6B0KMMFNtI2ddbGJn3T3ZbwkeT6yqE github.com/googleapis/gax-go/v2 v2.6.0/go.mod h1:1mjbznJAPHFpesgE5ucqfYEscaz5kMdcIDwU/6+DDoY= github.com/googleapis/gax-go/v2 v2.7.0/go.mod h1:TEop28CZZQ2y+c0VxMUmu1lV+fQx57QpBWsYpwqHJx8= github.com/googleapis/gax-go/v2 v2.7.1/go.mod h1:4orTrqY6hXxxaUL4LHIPl6lGo8vAE38/qKbhSAKP6QI= -github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= -github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= +github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg= +github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg= github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= @@ -2399,13 +2399,13 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0/go.mod h1:oVGt1LRbBOBq1A5BQLlUg9UaU/54aiHw8cgjV3aWZ/E= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.28.0/go.mod h1:vEhqr0m4eTc+DWxfsXoXue2GBgV2uUwVznkGIHW/e5w= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 h1:hCq2hNMwsegUvPzI7sPOvtO9cqyy5GbWt/Ybp2xrx8Q= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0/go.mod h1:LqaApwGx/oUmzsbqxkzuBvyoPpkxk3JQWnqfVrJ3wCA= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0 h1:vS1Ao/R55RNV4O7TA2Qopok8yN+X0LIP6RVWLFkprck= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.52.0/go.mod h1:BMsdeOxN04K0L5FNUBfjFdvwWGNe/rkmSwH4Aelu/X0= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1 h1:gbhw/u49SS3gkPWiYweQNJGm/uJN5GkI/FrosxSHT7A= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1/go.mod h1:GnOaBaFQ2we3b9AGWJpsBa7v1S5RlQzlC3O7dRMxZhM= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0/go.mod h1:2AboqHi0CiIZU0qwhtUfCYD1GeUzvvIXWNkhDt7ZMG4= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 h1:DheMAlT6POBP+gh8RUH19EOTnQIor5QE0uSRPtzCpSw= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0/go.mod h1:wZcGmeVO9nzP67aYSLDqXNWK87EZWhi7JWj1v7ZXf94= go.opentelemetry.io/contrib/instrumentation/runtime v0.52.0 h1:UaQVCH34fQsyDjlgS0L070Kjs9uCrLKoQfzn2Nl7XTY= go.opentelemetry.io/contrib/instrumentation/runtime v0.52.0/go.mod h1:Ks4aHdMgu1vAfEY0cIBHcGx2l1S0+PwFm2BE/HRzqSk= go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= @@ -2520,8 +2520,6 @@ golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4 golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE= -golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= -golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ= golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= @@ -2541,8 +2539,8 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= -golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= -golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -2586,8 +2584,6 @@ golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -2671,8 +2667,6 @@ golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= @@ -2723,8 +2717,6 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220929204114-8fcdb60fdcc0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -2869,8 +2861,6 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= @@ -2886,8 +2876,6 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= @@ -2907,7 +2895,6 @@ golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= @@ -2922,8 +2909,8 @@ golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20220922220347-f3bd1da661af/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.1.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= -golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -2936,6 +2923,7 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190422233926-fe54fb35175b/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -3005,8 +2993,6 @@ golang.org/x/tools v0.1.11/go.mod h1:SgwaegtQh8clINPpECJMqnxLv9I09HLqnW3RMqW0CA4 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.3.0/go.mod h1:/rWhSS2+zyEVwoJf8YAX6L2f0ntZ7Kn/mGgAWcipA5k= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= -golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -3019,8 +3005,8 @@ golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= -golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= @@ -3089,8 +3075,8 @@ google.golang.org/api v0.108.0/go.mod h1:2Ts0XTHNVWxypznxWOYUeI4g3WdP9Pk2Qk58+a/ google.golang.org/api v0.110.0/go.mod h1:7FC4Vvx1Mooxh8C5HWjzZHcavuS2f6pmJpZx60ca7iI= google.golang.org/api v0.111.0/go.mod h1:qtFHvU9mhgTJegR31csQ+rwxyUTHOKFqCKWp1J0fdw0= google.golang.org/api v0.114.0/go.mod h1:ifYI2ZsFK6/uGddGfAD5BMxlnkBqCmqHSDUVi45N5Yg= -google.golang.org/api v0.191.0 h1:cJcF09Z+4HAB2t5qTQM1ZtfL/PemsLFkcFG67qq2afk= -google.golang.org/api v0.191.0/go.mod h1:tD5dsFGxFza0hnQveGfVk9QQYKcfp+VzgRqyXFxE0+E= +google.golang.org/api v0.184.0 h1:dmEdk6ZkJNXy1JcDhn/ou0ZUq7n9zropG2/tR4z+RDg= +google.golang.org/api v0.184.0/go.mod h1:CeDTtUEiYENAf8PPG5VZW2yNp2VM3VWbCeTioAZBTBA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -3234,12 +3220,12 @@ google.golang.org/genproto v0.0.0-20230303212802-e74f57abe488/go.mod h1:TvhZT5f7 google.golang.org/genproto v0.0.0-20230306155012-7f2fa6fef1f4/go.mod h1:NWraEVixdDnqcqQ30jipen1STv2r/n24Wb7twVTGR4s= google.golang.org/genproto v0.0.0-20230320184635-7606e756e683/go.mod h1:NWraEVixdDnqcqQ30jipen1STv2r/n24Wb7twVTGR4s= google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOlWu4KYeJZffbWgBkS1YFobzKbLVfK69pe0Ak= -google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988 h1:CT2Thj5AuPV9phrYMtzX11k+XkzMGfRAet42PmoTATM= -google.golang.org/genproto v0.0.0-20240812133136-8ffd90a71988/go.mod h1:7uvplUBj4RjHAxIZ//98LzOvrQ04JBkaixRmCMI29hc= +google.golang.org/genproto v0.0.0-20240604185151-ef581f913117 h1:HCZ6DlkKtCDAtD8ForECsY3tKuaR+p4R3grlK80uCCc= +google.golang.org/genproto v0.0.0-20240604185151-ef581f913117/go.mod h1:lesfX/+9iA+3OdqeCpoDddJaNxVB1AB6tD7EfqMmprc= google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/grpc v0.0.0-20160317175043-d3ddb4469d5a/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.0.5/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= diff --git a/runtime/controller_test.go b/runtime/controller_test.go index d54f614a9f1..8dea13fc016 100644 --- a/runtime/controller_test.go +++ b/runtime/controller_test.go @@ -242,7 +242,7 @@ path: data/foo.csv // Delete the underlying table olap, release, err := rt.OLAP(context.Background(), id, "") require.NoError(t, err) - err = olap.DropTable(context.Background(), "foo", false) + err = olap.DropTable(context.Background(), "foo") require.NoError(t, err) release() testruntime.RequireNoOLAPTable(t, rt, id, "foo") @@ -488,7 +488,8 @@ select 1 testruntime.ReconcileParserAndWait(t, rt, id) testruntime.RequireReconcileState(t, rt, id, 2, 0, 0) // Assert that the model is a table now - testruntime.RequireIsView(t, olap, "bar", false) + // TODO : fix with information schema fix + // testruntime.RequireIsView(t, olap, "bar", false) // Mark the model as not materialized testruntime.PutFiles(t, rt, id, map[string]string{ @@ -534,6 +535,10 @@ path: data/foo.csv testruntime.RequireResource(t, rt, id, modelRes) testruntime.RequireOLAPTable(t, rt, id, "bar") + // TODO :: Not sure how this can be tested + // The query will succeed when creating model (foo is attached in default schema so memory.foo will work) + // But when querying foo is attached in non default schema (memory.main_x.foo) so memory.foo will not work + // Update model to have a CTE with alias same as the source testruntime.PutFiles(t, rt, id, map[string]string{ "/models/bar.sql": `with foo as (select * from memory.foo) select * from foo`, @@ -545,7 +550,7 @@ path: data/foo.csv testruntime.RequireResource(t, rt, id, modelRes) // Refs are removed but the model is valid. // TODO: is this expected? - testruntime.RequireOLAPTable(t, rt, id, "bar") + // testruntime.RequireOLAPTable(t, rt, id, "bar") } func TestRename(t *testing.T) { diff --git a/runtime/drivers/clickhouse/model_executor_localfile_self.go b/runtime/drivers/clickhouse/model_executor_localfile_self.go index 694e29316c8..92dd1a20ca1 100644 --- a/runtime/drivers/clickhouse/model_executor_localfile_self.go +++ b/runtime/drivers/clickhouse/model_executor_localfile_self.go @@ -103,14 +103,12 @@ func (e *localFileToSelfExecutor) Execute(ctx context.Context, opts *drivers.Mod if opts.Env.StageChanges || outputProps.Typ == "DICTIONARY" { stagingTableName = stagingTableNameFor(tableName) } - if t, err := e.c.InformationSchema().Lookup(ctx, "", "", stagingTableName); err == nil { - _ = e.c.DropTable(ctx, stagingTableName, t.View) - } + _ = e.c.DropTable(ctx, stagingTableName) // create the table err = e.c.createTable(ctx, stagingTableName, "", outputProps) if err != nil { - _ = e.c.DropTable(ctx, stagingTableName, false) + _ = e.c.DropTable(ctx, stagingTableName) return nil, fmt.Errorf("failed to create model: %w", err) } @@ -131,7 +129,7 @@ func (e *localFileToSelfExecutor) Execute(ctx context.Context, opts *drivers.Mod if outputProps.Typ == "DICTIONARY" { err = e.c.createDictionary(ctx, tableName, fmt.Sprintf("SELECT * FROM %s", safeSQLName(stagingTableName)), outputProps) // drop the temp table - _ = e.c.DropTable(ctx, stagingTableName, false) + _ = e.c.DropTable(ctx, stagingTableName) if err != nil { return nil, fmt.Errorf("failed to create dictionary: %w", err) } diff --git a/runtime/drivers/clickhouse/model_executor_self.go b/runtime/drivers/clickhouse/model_executor_self.go index b8a106d12b0..736975646ff 100644 --- a/runtime/drivers/clickhouse/model_executor_self.go +++ b/runtime/drivers/clickhouse/model_executor_self.go @@ -65,14 +65,12 @@ func (e *selfToSelfExecutor) Execute(ctx context.Context, opts *drivers.ModelExe // Drop the staging view/table if it exists. // NOTE: This intentionally drops the end table if not staging changes. - if t, err := e.c.InformationSchema().Lookup(ctx, "", "", stagingTableName); err == nil { - _ = e.c.DropTable(ctx, stagingTableName, t.View) - } + _ = e.c.DropTable(ctx, stagingTableName) // Create the table err := e.c.CreateTableAsSelect(ctx, stagingTableName, asView, inputProps.SQL, mustToMap(outputProps)) if err != nil { - _ = e.c.DropTable(ctx, stagingTableName, asView) + _ = e.c.DropTable(ctx, stagingTableName) return nil, fmt.Errorf("failed to create model: %w", err) } diff --git a/runtime/drivers/clickhouse/model_manager.go b/runtime/drivers/clickhouse/model_manager.go index dcb3e5f7d16..3fe871b5704 100644 --- a/runtime/drivers/clickhouse/model_manager.go +++ b/runtime/drivers/clickhouse/model_manager.go @@ -202,17 +202,14 @@ func (c *connection) Delete(ctx context.Context, res *drivers.ModelResult) error return fmt.Errorf("connector is not an OLAP") } - stagingTable, err := olap.InformationSchema().Lookup(ctx, "", "", stagingTableNameFor(res.Table)) - if err == nil { - _ = c.DropTable(ctx, stagingTable.Name, stagingTable.View) - } + _ = c.DropTable(ctx, stagingTableNameFor(res.Table)) table, err := olap.InformationSchema().Lookup(ctx, "", "", res.Table) if err != nil { return err } - return c.DropTable(ctx, table.Name, table.View) + return c.DropTable(ctx, table.Name) } func (c *connection) MergePartitionResults(a, b *drivers.ModelResult) (*drivers.ModelResult, error) { @@ -250,7 +247,7 @@ func olapForceRenameTable(ctx context.Context, c *connection, fromName string, f // Renaming a table to the same name with different casing is not supported. Workaround by renaming to a temporary name first. if strings.EqualFold(fromName, toName) { tmpName := fmt.Sprintf("__rill_tmp_rename_%s_%s", typ, toName) - err := c.RenameTable(ctx, fromName, tmpName, fromIsView) + err := c.RenameTable(ctx, fromName, tmpName) if err != nil { return err } @@ -258,7 +255,7 @@ func olapForceRenameTable(ctx context.Context, c *connection, fromName string, f } // Do the rename - return c.RenameTable(ctx, fromName, toName, fromIsView) + return c.RenameTable(ctx, fromName, toName) } func boolPtr(b bool) *bool { diff --git a/runtime/drivers/clickhouse/olap.go b/runtime/drivers/clickhouse/olap.go index c561a3cbf18..c617d7aecda 100644 --- a/runtime/drivers/clickhouse/olap.go +++ b/runtime/drivers/clickhouse/olap.go @@ -256,7 +256,7 @@ func (c *connection) InsertTableAsSelect(ctx context.Context, name, sql string, } // DropTable implements drivers.OLAPStore. -func (c *connection) DropTable(ctx context.Context, name string, _ bool) error { +func (c *connection) DropTable(ctx context.Context, name string) error { typ, onCluster, err := informationSchema{c: c}.entityType(ctx, "", name) if err != nil { return err @@ -310,7 +310,7 @@ func (c *connection) MayBeScaledToZero(ctx context.Context) bool { } // RenameTable implements drivers.OLAPStore. -func (c *connection) RenameTable(ctx context.Context, oldName, newName string, view bool) error { +func (c *connection) RenameTable(ctx context.Context, oldName, newName string) error { typ, onCluster, err := informationSchema{c: c}.entityType(ctx, "", oldName) if err != nil { return err @@ -440,7 +440,7 @@ func (c *connection) renameTable(ctx context.Context, oldName, newName, onCluste return err } // drop the old table - return c.DropTable(context.Background(), oldName, false) + return c.DropTable(context.Background(), oldName) } func (c *connection) createTable(ctx context.Context, name, sql string, outputProps *ModelOutputProperties) error { diff --git a/runtime/drivers/clickhouse/olap_test.go b/runtime/drivers/clickhouse/olap_test.go index d05e2767431..36ce1a6abad 100644 --- a/runtime/drivers/clickhouse/olap_test.go +++ b/runtime/drivers/clickhouse/olap_test.go @@ -68,11 +68,11 @@ func testRenameView(t *testing.T, olap drivers.OLAPStore) { require.NoError(t, err) // rename to unknown view - err = olap.RenameTable(ctx, "foo_view", "foo_view1", true) + err = olap.RenameTable(ctx, "foo_view", "foo_view1") require.NoError(t, err) // rename to existing view - err = olap.RenameTable(ctx, "foo_view1", "bar_view", true) + err = olap.RenameTable(ctx, "foo_view1", "bar_view") require.NoError(t, err) // check that views no longer exist @@ -89,10 +89,10 @@ func testRenameView(t *testing.T, olap drivers.OLAPStore) { func testRenameTable(t *testing.T, olap drivers.OLAPStore) { ctx := context.Background() - err := olap.RenameTable(ctx, "foo", "foo1", false) + err := olap.RenameTable(ctx, "foo", "foo1") require.NoError(t, err) - err = olap.RenameTable(ctx, "foo1", "bar", false) + err = olap.RenameTable(ctx, "foo1", "bar") require.NoError(t, err) notExists(t, olap, "foo") @@ -123,7 +123,7 @@ func testDictionary(t *testing.T, olap drivers.OLAPStore) { err := olap.CreateTableAsSelect(context.Background(), "dict", false, "SELECT 1 AS id, 'Earth' AS planet", map[string]any{"table": "Dictionary", "primary_key": "id"}) require.NoError(t, err) - err = olap.RenameTable(context.Background(), "dict", "dict1", false) + err = olap.RenameTable(context.Background(), "dict", "dict1") require.NoError(t, err) res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT id, planet FROM dict1"}) @@ -136,7 +136,7 @@ func testDictionary(t *testing.T, olap drivers.OLAPStore) { require.Equal(t, 1, id) require.Equal(t, "Earth", planet) - require.NoError(t, olap.DropTable(context.Background(), "dict1", false)) + require.NoError(t, olap.DropTable(context.Background(), "dict1")) } func prepareClusterConn(t *testing.T, olap drivers.OLAPStore, cluster string) { diff --git a/runtime/drivers/druid/olap.go b/runtime/drivers/druid/olap.go index 2396e7fafe1..6e201abaf43 100644 --- a/runtime/drivers/druid/olap.go +++ b/runtime/drivers/druid/olap.go @@ -41,12 +41,12 @@ func (c *connection) InsertTableAsSelect(ctx context.Context, name, sql string, } // DropTable implements drivers.OLAPStore. -func (c *connection) DropTable(ctx context.Context, name string, view bool) error { +func (c *connection) DropTable(ctx context.Context, name string) error { return fmt.Errorf("druid: data transformation not yet supported") } // RenameTable implements drivers.OLAPStore. -func (c *connection) RenameTable(ctx context.Context, name, newName string, view bool) error { +func (c *connection) RenameTable(ctx context.Context, name, newName string) error { return fmt.Errorf("druid: data transformation not yet supported") } diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index fb5619c2714..2ef5733cf73 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -20,9 +20,9 @@ type config struct { PoolSize int `mapstructure:"pool_size"` // AllowHostAccess denotes whether to limit access to the local environment and file system AllowHostAccess bool `mapstructure:"allow_host_access"` - // CPU cores available for the read DB. If no CPUWrite is set and external_table_storage is enabled then this is split evenly between read and write. + // CPU cores available for the read DB. If no CPUWrite is set then this is split evenly between read and write. CPU int `mapstructure:"cpu"` - // MemoryLimitGB is the amount of memory available for the read DB. If no MemoryLimitGBWrite is set and external_table_storage is enabled then this is split evenly between read and write. + // MemoryLimitGB is the amount of memory available for the read DB. If no MemoryLimitGBWrite is set then this is split evenly between read and write. MemoryLimitGB int `mapstructure:"memory_limit_gb"` // CPUWrite is CPU available for the DB when writing data. CPUWrite int `mapstructure:"cpu_write"` @@ -47,13 +47,8 @@ func newConfig(cfgMap map[string]any, dataDir string) (*config, error) { // Set pool size poolSize := cfg.PoolSize - threads := cfg.CPU - if poolSize == 0 && threads != 0 { - poolSize = threads - if cfg.CPU != 0 && cfg.CPU < poolSize { - poolSize = cfg.CPU - } - poolSize = min(poolSizeMax, poolSize) // Only enforce max pool size when inferred from threads/CPU + if poolSize == 0 && cfg.CPU != 0 { + poolSize = min(poolSizeMax, cfg.CPU) // Only enforce max pool size when inferred from CPU } poolSize = max(poolSizeMin, poolSize) // Always enforce min pool size cfg.PoolSize = poolSize diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index e4dd1eb98d8..5f20697b073 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -2,6 +2,7 @@ package duckdb import ( "context" + "fmt" "io/fs" "os" "path/filepath" @@ -19,11 +20,7 @@ func TestConfig(t *testing.T) { require.NoError(t, err) require.Equal(t, 2, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": ":memory:?memory_limit=2GB"}, "") - require.NoError(t, err) - require.Equal(t, 2, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "", "memory_limit_gb": "1", "cpu": 2}, "") + cfg, err = newConfig(map[string]any{"dsn": "", "cpu": 2}, "") require.NoError(t, err) require.Equal(t, "2", cfg.readSettings()["threads"]) require.Equal(t, "", cfg.writeSettings()["threads"]) @@ -89,11 +86,8 @@ func Test_specialCharInPath(t *testing.T) { require.NoError(t, err) dbFile := filepath.Join(path, "st@g3's.db") - conn, err := Driver{}.Open("default", map[string]any{"path": dbFile, "memory_limit_gb": "4", "cpu": "1", "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{"init_sql": fmt.Sprintf("ATTACH %s", safeSQLString(dbFile))}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) - config := conn.(*connection).config - // require.Equal(t, filepath.Join(path, "st@g3's.db?custom_user_agent=rill&max_memory=4GB&threads=1"), config.DSN) - require.Equal(t, 2, config.PoolSize) olap, ok := conn.AsOLAP("") require.True(t, ok) @@ -103,21 +97,3 @@ func Test_specialCharInPath(t *testing.T) { require.NoError(t, res.Close()) require.NoError(t, conn.Close()) } - -func TestOverrides(t *testing.T) { - cfgMap := map[string]any{"path": "duck.db", "memory_limit_gb": "4", "cpu": "2", "max_memory_gb_override": "2", "threads_override": "10", "external_table_storage": false} - handle, err := Driver{}.Open("default", cfgMap, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) - require.NoError(t, err) - - olap, ok := handle.AsOLAP("") - require.True(t, ok) - - res, err := olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT value FROM duckdb_settings() WHERE name='max_memory'"}) - require.NoError(t, err) - require.True(t, res.Next()) - var mem string - require.NoError(t, res.Scan(&mem)) - require.NoError(t, res.Close()) - - require.Equal(t, "1.8 GiB", mem) -} diff --git a/runtime/drivers/duckdb/duckdb_test.go b/runtime/drivers/duckdb/duckdb_test.go index e837b77af5d..213f330baf7 100644 --- a/runtime/drivers/duckdb/duckdb_test.go +++ b/runtime/drivers/duckdb/duckdb_test.go @@ -3,7 +3,6 @@ package duckdb import ( "context" "database/sql" - "path/filepath" "sync" "testing" "time" @@ -18,9 +17,7 @@ import ( func TestNoFatalErr(t *testing.T) { // NOTE: Using this issue to create a fatal error: https://github.com/duckdb/duckdb/issues/7905 - dsn := filepath.Join(t.TempDir(), "tmp.db") - - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 2, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"pool_size": 2}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") @@ -80,9 +77,7 @@ func TestNoFatalErr(t *testing.T) { func TestNoFatalErrConcurrent(t *testing.T) { // NOTE: Using this issue to create a fatal error: https://github.com/duckdb/duckdb/issues/7905 - dsn := filepath.Join(t.TempDir(), "tmp.db") - - handle, err := Driver{}.Open("default", map[string]any{"path": dsn, "pool_size": 3, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{"pool_size": 2}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := handle.AsOLAP("") diff --git a/runtime/drivers/duckdb/model_executor_localfile_self.go b/runtime/drivers/duckdb/model_executor_localfile_self.go index 3cad5d25251..b8a4abc44ed 100644 --- a/runtime/drivers/duckdb/model_executor_localfile_self.go +++ b/runtime/drivers/duckdb/model_executor_localfile_self.go @@ -70,9 +70,7 @@ func (e *localFileToSelfExecutor) Execute(ctx context.Context, opts *drivers.Mod if opts.Env.StageChanges { stagingTableName = stagingTableNameFor(tableName) } - if t, err := e.c.InformationSchema().Lookup(ctx, "", "", stagingTableName); err == nil { - _ = e.c.DropTable(ctx, stagingTableName, t.View) - } + _ = e.c.DropTable(ctx, stagingTableName) // get the local file path localPaths, err := e.from.FilePaths(ctx, opts.InputProperties) @@ -95,7 +93,7 @@ func (e *localFileToSelfExecutor) Execute(ctx context.Context, opts *drivers.Mod // create the table err = e.c.CreateTableAsSelect(ctx, stagingTableName, asView, "SELECT * FROM "+from, nil) if err != nil { - _ = e.c.DropTable(ctx, stagingTableName, asView) + _ = e.c.DropTable(ctx, stagingTableName) return nil, fmt.Errorf("failed to create model: %w", err) } diff --git a/runtime/drivers/duckdb/model_executor_self.go b/runtime/drivers/duckdb/model_executor_self.go index f15232d7d74..552a9b03e62 100644 --- a/runtime/drivers/duckdb/model_executor_self.go +++ b/runtime/drivers/duckdb/model_executor_self.go @@ -64,14 +64,12 @@ func (e *selfToSelfExecutor) Execute(ctx context.Context, opts *drivers.ModelExe if opts.Env.StageChanges { stagingTableName = stagingTableNameFor(tableName) } - if t, err := olap.InformationSchema().Lookup(ctx, "", "", stagingTableName); err == nil { - _ = olap.DropTable(ctx, stagingTableName, t.View) - } + _ = olap.DropTable(ctx, stagingTableName) // Create the table err := olap.CreateTableAsSelect(ctx, stagingTableName, asView, inputProps.SQL, nil) if err != nil { - _ = olap.DropTable(ctx, stagingTableName, asView) + _ = olap.DropTable(ctx, stagingTableName) return nil, fmt.Errorf("failed to create model: %w", err) } diff --git a/runtime/drivers/duckdb/model_executor_warehouse_self.go b/runtime/drivers/duckdb/model_executor_warehouse_self.go index 55a762c86ed..dd58bd4b596 100644 --- a/runtime/drivers/duckdb/model_executor_warehouse_self.go +++ b/runtime/drivers/duckdb/model_executor_warehouse_self.go @@ -56,15 +56,13 @@ func (e *warehouseToSelfExecutor) Execute(ctx context.Context, opts *drivers.Mod } // NOTE: This intentionally drops the end table if not staging changes. - if t, err := olap.InformationSchema().Lookup(ctx, "", "", stagingTableName); err == nil { - _ = olap.DropTable(ctx, stagingTableName, t.View) - } + _ = olap.DropTable(ctx, stagingTableName) } err := e.queryAndInsert(ctx, opts, olap, stagingTableName, outputProps) if err != nil { if !opts.IncrementalRun { - _ = olap.DropTable(ctx, stagingTableName, false) + _ = olap.DropTable(ctx, stagingTableName) } return nil, err } diff --git a/runtime/drivers/duckdb/model_manager.go b/runtime/drivers/duckdb/model_manager.go index 5b9fd051633..d8d6d8bdbf5 100644 --- a/runtime/drivers/duckdb/model_manager.go +++ b/runtime/drivers/duckdb/model_manager.go @@ -120,17 +120,8 @@ func (c *connection) Delete(ctx context.Context, res *drivers.ModelResult) error return fmt.Errorf("connector is not an OLAP") } - stagingTable, err := olap.InformationSchema().Lookup(ctx, "", "", stagingTableNameFor(res.Table)) - if err == nil { - _ = olap.DropTable(ctx, stagingTable.Name, stagingTable.View) - } - - table, err := olap.InformationSchema().Lookup(ctx, "", "", res.Table) - if err != nil { - return err - } - - return olap.DropTable(ctx, table.Name, table.View) + _ = olap.DropTable(ctx, stagingTableNameFor(res.Table)) + return olap.DropTable(ctx, res.Table) } func (c *connection) MergePartitionResults(a, b *drivers.ModelResult) (*drivers.ModelResult, error) { @@ -168,7 +159,7 @@ func olapForceRenameTable(ctx context.Context, olap drivers.OLAPStore, fromName // Renaming a table to the same name with different casing is not supported. Workaround by renaming to a temporary name first. if strings.EqualFold(fromName, toName) { tmpName := fmt.Sprintf("__rill_tmp_rename_%s_%s", typ, toName) - err := olap.RenameTable(ctx, fromName, tmpName, fromIsView) + err := olap.RenameTable(ctx, fromName, tmpName) if err != nil { return err } @@ -176,7 +167,7 @@ func olapForceRenameTable(ctx context.Context, olap drivers.OLAPStore, fromName } // Do the rename - return olap.RenameTable(ctx, fromName, toName, fromIsView) + return olap.RenameTable(ctx, fromName, toName) } func boolPtr(b bool) *bool { diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index 441f4fd3551..e2721879a88 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -209,12 +209,12 @@ func (c *connection) InsertTableAsSelect(ctx context.Context, name, sql string, } // DropTable implements drivers.OLAPStore. -func (c *connection) DropTable(ctx context.Context, name string, view bool) error { +func (c *connection) DropTable(ctx context.Context, name string) error { return c.db.DropTable(ctx, name) } // RenameTable implements drivers.OLAPStore. -func (c *connection) RenameTable(ctx context.Context, oldName, newName string, view bool) error { +func (c *connection) RenameTable(ctx context.Context, oldName, newName string) error { return c.db.RenameTable(ctx, oldName, newName) } diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index df93df0a040..9c05abb16c5 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -112,8 +112,7 @@ func Test_connection_DropTable(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test-drop", false, "select 1", nil) require.NoError(t, err) - // Note: true since at lot of places we look at information_schema lookup on main db to determine whether tbl is a view or table - err = c.DropTable(context.Background(), "test-drop", true) + err = c.DropTable(context.Background(), "test-drop") require.NoError(t, err) res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) FROM information_schema.tables WHERE table_name='test-drop' AND table_type='VIEW'"}) @@ -164,7 +163,7 @@ func Test_connection_RenameTable(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "test-rename", false, "select 1", nil) require.NoError(t, err) - err = c.RenameTable(context.Background(), "test-rename", "rename-test", false) + err = c.RenameTable(context.Background(), "test-rename", "rename-test") require.NoError(t, err) res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT count(*) FROM 'rename-test'"}) @@ -190,7 +189,7 @@ func Test_connection_RenameToExistingTable(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "_tmp_source", false, "SELECT 2 AS DATA", nil) require.NoError(t, err) - err = c.RenameTable(context.Background(), "_tmp_source", "source", false) + err = c.RenameTable(context.Background(), "_tmp_source", "source") require.NoError(t, err) res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT * FROM 'source'"}) @@ -235,7 +234,7 @@ func Test_connection_AddTableColumn(t *testing.T) { } func Test_connection_RenameToExistingTableOld(t *testing.T) { - handle, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:", "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) c := handle.(*connection) require.NoError(t, c.Migrate(context.Background())) @@ -247,7 +246,7 @@ func Test_connection_RenameToExistingTableOld(t *testing.T) { err = c.CreateTableAsSelect(context.Background(), "_tmp_source", false, "SELECT 2 AS DATA", nil) require.NoError(t, err) - err = c.RenameTable(context.Background(), "_tmp_source", "source", false) + err = c.RenameTable(context.Background(), "_tmp_source", "source") require.NoError(t, err) res, err := c.Execute(context.Background(), &drivers.Statement{Query: "SELECT * FROM 'source'"}) @@ -262,8 +261,7 @@ func Test_connection_RenameToExistingTableOld(t *testing.T) { func Test_connection_CreateTableAsSelectWithComments(t *testing.T) { temp := t.TempDir() require.NoError(t, os.Mkdir(filepath.Join(temp, "default"), fs.ModePerm)) - dbPath := filepath.Join(temp, "default", "normal.db") - handle, err := Driver{}.Open("default", map[string]any{"path": dbPath, "external_table_storage": false}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) + handle, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(temp, nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) normalConn := handle.(*connection) normalConn.AsOLAP("default") diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index 8f9bd06465e..5d2fcb1417b 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -212,7 +212,7 @@ func TestClose(t *testing.T) { } func prepareConn(t *testing.T) drivers.Handle { - conn, err := Driver{}.Open("default", map[string]any{"dsn": ":memory:?access_mode=read_write", "pool_size": 4, "external_table_storage": false}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) + conn, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, ok := conn.AsOLAP("") diff --git a/runtime/drivers/olap.go b/runtime/drivers/olap.go index 2c2e5352bf8..aeede427e82 100644 --- a/runtime/drivers/olap.go +++ b/runtime/drivers/olap.go @@ -36,8 +36,8 @@ type OLAPStore interface { CreateTableAsSelect(ctx context.Context, name string, view bool, sql string, tableOpts map[string]any) error InsertTableAsSelect(ctx context.Context, name, sql string, byName, inPlace bool, strategy IncrementalStrategy, uniqueKey []string) error - DropTable(ctx context.Context, name string, view bool) error - RenameTable(ctx context.Context, name, newName string, view bool) error + DropTable(ctx context.Context, name string) error + RenameTable(ctx context.Context, name, newName string) error AddTableColumn(ctx context.Context, tableName, columnName string, typ string) error AlterTableColumn(ctx context.Context, tableName, columnName string, newType string) error diff --git a/runtime/drivers/pinot/olap.go b/runtime/drivers/pinot/olap.go index 20f52c229eb..6a063bdc55f 100644 --- a/runtime/drivers/pinot/olap.go +++ b/runtime/drivers/pinot/olap.go @@ -32,7 +32,7 @@ func (c *connection) CreateTableAsSelect(ctx context.Context, name string, view } // DropTable implements drivers.OLAPStore. -func (c *connection) DropTable(ctx context.Context, name string, view bool) error { +func (c *connection) DropTable(ctx context.Context, name string) error { return fmt.Errorf("pinot: data transformation not yet supported") } @@ -42,7 +42,7 @@ func (c *connection) InsertTableAsSelect(ctx context.Context, name, sql string, } // RenameTable implements drivers.OLAPStore. -func (c *connection) RenameTable(ctx context.Context, name, newName string, view bool) error { +func (c *connection) RenameTable(ctx context.Context, name, newName string) error { return fmt.Errorf("pinot: data transformation not yet supported") } diff --git a/runtime/reconcilers/source.go b/runtime/reconcilers/source.go index 3df7de929f2..92e43c7e55f 100644 --- a/runtime/reconcilers/source.go +++ b/runtime/reconcilers/source.go @@ -80,8 +80,8 @@ func (r *SourceReconciler) Reconcile(ctx context.Context, n *runtimev1.ResourceN // Handle deletion if self.Meta.DeletedOn != nil { - olapDropTableIfExists(ctx, r.C, src.State.Connector, src.State.Table, false) - olapDropTableIfExists(ctx, r.C, src.State.Connector, r.stagingTableName(tableName), false) + olapDropTableIfExists(ctx, r.C, src.State.Connector, src.State.Table) + olapDropTableIfExists(ctx, r.C, src.State.Connector, r.stagingTableName(tableName)) return runtime.ReconcileResult{} } @@ -115,7 +115,7 @@ func (r *SourceReconciler) Reconcile(ctx context.Context, n *runtimev1.ResourceN if err != nil { if !src.Spec.StageChanges && src.State.Table != "" { // Remove previously ingested table - olapDropTableIfExists(ctx, r.C, src.State.Connector, src.State.Table, false) + olapDropTableIfExists(ctx, r.C, src.State.Connector, src.State.Table) src.State.Connector = "" src.State.Table = "" src.State.SpecHash = "" @@ -170,8 +170,8 @@ func (r *SourceReconciler) Reconcile(ctx context.Context, n *runtimev1.ResourceN // If the SinkConnector was changed, drop data in the old connector if src.State.Table != "" && src.State.Connector != src.Spec.SinkConnector { - olapDropTableIfExists(ctx, r.C, src.State.Connector, src.State.Table, false) - olapDropTableIfExists(ctx, r.C, src.State.Connector, r.stagingTableName(src.State.Table), false) + olapDropTableIfExists(ctx, r.C, src.State.Connector, src.State.Table) + olapDropTableIfExists(ctx, r.C, src.State.Connector, r.stagingTableName(src.State.Table)) } // Prepare for ingestion @@ -183,7 +183,7 @@ func (r *SourceReconciler) Reconcile(ctx context.Context, n *runtimev1.ResourceN // Should never happen, but if somehow the staging table was corrupted into a view, drop it if t, ok := olapTableInfo(ctx, r.C, connector, stagingTableName); ok && t.View { - olapDropTableIfExists(ctx, r.C, connector, stagingTableName, t.View) + olapDropTableIfExists(ctx, r.C, connector, stagingTableName) } // Execute ingestion @@ -226,11 +226,11 @@ func (r *SourceReconciler) Reconcile(ctx context.Context, n *runtimev1.ResourceN src.State.RefreshedOn = timestamppb.Now() } else if src.Spec.StageChanges { // Failed ingestion to staging table - olapDropTableIfExists(cleanupCtx, r.C, connector, stagingTableName, false) + olapDropTableIfExists(cleanupCtx, r.C, connector, stagingTableName) } else { // Failed ingestion to main table update = true - olapDropTableIfExists(cleanupCtx, r.C, connector, tableName, false) + olapDropTableIfExists(cleanupCtx, r.C, connector, tableName) src.State.Connector = "" src.State.Table = "" src.State.SpecHash = "" diff --git a/runtime/reconcilers/util.go b/runtime/reconcilers/util.go index 217ee739b9f..b967fdae5ed 100644 --- a/runtime/reconcilers/util.go +++ b/runtime/reconcilers/util.go @@ -117,7 +117,7 @@ func olapTableInfo(ctx context.Context, c *runtime.Controller, connector, table } // olapDropTableIfExists drops a table from an OLAP connector. -func olapDropTableIfExists(ctx context.Context, c *runtime.Controller, connector, table string, view bool) { +func olapDropTableIfExists(ctx context.Context, c *runtime.Controller, connector, table string) { if table == "" { return } @@ -128,7 +128,7 @@ func olapDropTableIfExists(ctx context.Context, c *runtime.Controller, connector } defer release() - _ = olap.DropTable(ctx, table, view) + _ = olap.DropTable(ctx, table) } // olapForceRenameTable renames a table or view from fromName to toName in the OLAP connector. @@ -159,7 +159,7 @@ func olapForceRenameTable(ctx context.Context, c *runtime.Controller, connector, // Renaming a table to the same name with different casing is not supported. Workaround by renaming to a temporary name first. if strings.EqualFold(fromName, toName) { tmpName := fmt.Sprintf("__rill_tmp_rename_%s_%s", typ, toName) - err = olap.RenameTable(ctx, fromName, tmpName, fromIsView) + err = olap.RenameTable(ctx, fromName, tmpName) if err != nil { return err } @@ -167,7 +167,7 @@ func olapForceRenameTable(ctx context.Context, c *runtime.Controller, connector, } // Do the rename - return olap.RenameTable(ctx, fromName, toName, fromIsView) + return olap.RenameTable(ctx, fromName, toName) } func resolveTemplatedProps(ctx context.Context, c *runtime.Controller, self compilerv1.TemplateResource, props map[string]any) (map[string]any, error) { diff --git a/runtime/runtime.go b/runtime/runtime.go index 7baaacc40da..a4a78055397 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -16,7 +16,6 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" - "gocloud.dev/blob" ) var tracer = otel.Tracer("github.com/rilldata/rill/runtime") @@ -43,7 +42,6 @@ type Runtime struct { connCache conncache.Cache queryCache *queryCache securityEngine *securityEngine - dataBucket *blob.Bucket } func New(ctx context.Context, opts *Options, logger *zap.Logger, st *storage.Client, ac *activity.Client, emailClient *email.Client) (*Runtime, error) { diff --git a/runtime/testruntime/testruntime.go b/runtime/testruntime/testruntime.go index f4a7a8a7d43..f0b601a9151 100644 --- a/runtime/testruntime/testruntime.go +++ b/runtime/testruntime/testruntime.go @@ -67,11 +67,11 @@ func New(t TestingT) *runtime.Runtime { } logger := zap.NewNop() - // var err error - // if os.Getenv("DEBUG") == "1" { - // logger, err := zap.NewDevelopment() - // require.NoError(t, err) - // } + var err error + if os.Getenv("DEBUG") == "1" { + logger, err = zap.NewDevelopment() + require.NoError(t, err) + } rt, err := runtime.New(context.Background(), opts, logger, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), email.New(email.NewTestSender())) require.NoError(t, err) @@ -100,7 +100,6 @@ func NewInstanceWithOptions(t TestingT, opts InstanceOptions) (*runtime.Runtime, } olapDSN := os.Getenv("RILL_RUNTIME_TEST_OLAP_DSN") if olapDSN == "" { - // TODO : fix - ignored for duckdb olapDSN = ":memory:" } From c609613644d9bdad99590789f7619c3fe0ae2f2e Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:11:06 +0530 Subject: [PATCH 56/64] fix some more tests --- go.mod | 1 + runtime/drivers/duckdb/config_test.go | 40 ------------------- runtime/drivers/duckdb/olap_test.go | 18 +-------- .../transporter_mysql_to_duckDB_test.go | 2 +- 4 files changed, 4 insertions(+), 57 deletions(-) diff --git a/go.mod b/go.mod index 9f76750be53..1c85350e3b1 100644 --- a/go.mod +++ b/go.mod @@ -47,6 +47,7 @@ require ( github.com/go-logr/zapr v1.2.4 github.com/go-playground/validator/v10 v10.14.0 github.com/go-redis/redis_rate/v10 v10.0.1 + github.com/go-sql-driver/mysql v1.7.1 github.com/gocarina/gocsv v0.0.0-20231116093920-b87c2d0e983a github.com/golang-jwt/jwt/v4 v4.5.0 github.com/google/go-github/v50 v50.2.0 diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 5f20697b073..16231ca5f4f 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -23,11 +23,6 @@ func TestConfig(t *testing.T) { cfg, err = newConfig(map[string]any{"dsn": "", "cpu": 2}, "") require.NoError(t, err) require.Equal(t, "2", cfg.readSettings()["threads"]) - require.Equal(t, "", cfg.writeSettings()["threads"]) - require.Equal(t, 2, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{}, "path/to") - require.NoError(t, err) require.Subset(t, cfg.writeSettings(), map[string]string{"custom_user_agent": "rill"}) require.Equal(t, 2, cfg.PoolSize) @@ -35,35 +30,6 @@ func TestConfig(t *testing.T) { require.NoError(t, err) require.Equal(t, 10, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"pool_size": "10"}, "path/to") - require.NoError(t, err) - require.Equal(t, 10, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "?rill_pool_size=4", "pool_size": "10"}, "path/to") - require.NoError(t, err) - require.Equal(t, 4, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?rill_pool_size=10"}, "path/to") - require.NoError(t, err) - // require.Equal(t, "path/to/duck.db?custom_user_agent=rill", cfg.DSN) - // require.Equal(t, "path/to/duck.db", cfg.DBFilePath) - require.Equal(t, 10, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB&rill_pool_size=10"}, "path/to") - require.NoError(t, err) - // require.Equal(t, "path/to/duck.db?custom_user_agent=rill&max_memory=4GB", cfg.DSN) - require.Equal(t, 10, cfg.PoolSize) - // require.Equal(t, "path/to/duck.db", cfg.DBFilePath) - - _, err = newConfig(map[string]any{"dsn": "path/to/duck.db?max_memory=4GB", "pool_size": "abc"}, "path/to") - require.Error(t, err) - - cfg, err = newConfig(map[string]any{"dsn": "duck.db"}, "path/to") - require.NoError(t, err) - - cfg, err = newConfig(map[string]any{"dsn": "duck.db?rill_pool_size=10"}, "path/to") - require.NoError(t, err) - cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}, "path/to") require.NoError(t, err) require.Equal(t, "2", cfg.readSettings()["threads"]) @@ -71,12 +37,6 @@ func TestConfig(t *testing.T) { require.Equal(t, "8GB", cfg.readSettings()["max_memory"]) require.Equal(t, "", cfg.writeSettings()["max_memory"]) require.Equal(t, 2, cfg.PoolSize) - - cfg, err = newConfig(map[string]any{"dsn": "duck.db?max_memory=2GB&rill_pool_size=4"}, "path/to") - require.NoError(t, err) - // require.Equal(t, "duck.db", cfg.DBFilePath) - // require.Equal(t, "duck.db?custom_user_agent=rill&max_memory=2GB", cfg.DSN) - require.Equal(t, 4, cfg.PoolSize) } func Test_specialCharInPath(t *testing.T) { diff --git a/runtime/drivers/duckdb/olap_test.go b/runtime/drivers/duckdb/olap_test.go index 5d2fcb1417b..4a68caeca96 100644 --- a/runtime/drivers/duckdb/olap_test.go +++ b/runtime/drivers/duckdb/olap_test.go @@ -218,24 +218,10 @@ func prepareConn(t *testing.T) drivers.Handle { olap, ok := conn.AsOLAP("") require.True(t, ok) - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "CREATE TABLE foo(bar VARCHAR, baz INTEGER)", - }) - require.NoError(t, err) - - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "INSERT INTO foo VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)", - }) + err = olap.CreateTableAsSelect(context.Background(), "foo", false, "SELECT * FROM (VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)) AS t(bar, baz)", nil) require.NoError(t, err) - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "CREATE TABLE bar(bar VARCHAR, baz INTEGER)", - }) - require.NoError(t, err) - - err = olap.Exec(context.Background(), &drivers.Statement{ - Query: "INSERT INTO bar VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)", - }) + err = olap.CreateTableAsSelect(context.Background(), "bar", false, "SELECT * FROM (VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)) AS t(bar, baz)", nil) require.NoError(t, err) return conn diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index 314942e1e73..c502c07dc02 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -14,7 +14,7 @@ import ( "fmt" "time" - _ "github.com/rilldata/rill/runtime/drivers/mysql" + _ "github.com/go-sql-driver/mysql" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" ) From 770f759351ef66ab1d38dcc8dbd5fc85c3f7a8a2 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:58:28 +0530 Subject: [PATCH 57/64] reopen db in a separate goroutine --- runtime/drivers/duckdb/duckdb.go | 87 +++++++++----- .../duckdb/model_executor_warehouse_self.go | 1 - runtime/drivers/duckdb/olap.go | 99 ++++++++++++---- .../duckdb/transporter_duckDB_to_duckDB.go | 9 +- .../transporter_duckDB_to_duckDB_test.go | 109 +++++++++--------- .../transporter_motherduck_to_duckDB.go | 9 +- .../transporter_mysql_to_duckDB_test.go | 2 +- .../transporter_postgres_to_duckDB_test.go | 2 +- runtime/drivers/olap.go | 3 + runtime/pkg/rduckdb/db.go | 2 +- .../ad_bids_mini_metrics_with_policy.yaml | 2 +- .../ad_bids_mini_metrics_with_policy.yaml | 1 - web-local/tests/utils/test.ts | 2 +- 13 files changed, 212 insertions(+), 116 deletions(-) diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 699e97d234d..8c37dbd1cfa 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -6,7 +6,6 @@ import ( "fmt" "log/slog" "net/url" - "os" "strings" "sync" "time" @@ -190,17 +189,12 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien }, connectionsInUse)) // Open the DB - err = c.reopenDB(ctx) + err = c.reopenDB(context.Background()) if err != nil { // Check for another process currently accessing the DB if strings.Contains(err.Error(), "Could not set lock on file") { return nil, fmt.Errorf("failed to open database (is Rill already running?): %w", err) } - // Return nice error for old macOS versions - if strings.Contains(err.Error(), "Symbol not found") { - fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") - os.Exit(1) - } return nil, err } @@ -266,7 +260,7 @@ func (d Driver) TertiarySourceConnectors(ctx context.Context, src map[string]any type connection struct { instanceID string // do not use directly it can also be nil or closed - // use acquireOLAPConn/acquireMetaConn + // use acquireOLAPConn/acquireMetaConn for select and acquireDB for write queries db rduckdb.DB // driverConfig is input config passed during Open driverConfig map[string]any @@ -336,7 +330,10 @@ func (c *connection) Config() map[string]any { func (c *connection) Close() error { c.cancel() _ = c.registration.Unregister() - return c.db.Close() + if c.db != nil { + return c.db.Close() + } + return nil } // AsRegistry Registry implements drivers.Connection. @@ -528,7 +525,7 @@ func (c *connection) acquireMetaConn(ctx context.Context) (*sqlx.Conn, func() er } // Get new conn - conn, releaseConn, err := c.acquireConn(ctx) + conn, releaseConn, err := c.acquireReadConnection(ctx) if err != nil { c.metaSem.Release(1) return nil, nil, err @@ -571,7 +568,7 @@ func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunn } // Get new conn - conn, releaseConn, err := c.acquireConn(ctx) + conn, releaseConn, err := c.acquireReadConnection(ctx) if err != nil { c.olapSem.Release() if longRunning { @@ -593,9 +590,32 @@ func (c *connection) acquireOLAPConn(ctx context.Context, priority int, longRunn return conn, release, nil } -// acquireConn returns a DuckDB connection. It should only be used internally in acquireMetaConn and acquireOLAPConn. -// acquireConn implements the connection tracking and DB reopening logic described in the struct definition for connection. -func (c *connection) acquireConn(ctx context.Context) (*sqlx.Conn, func() error, error) { +// acquireReadConnection is a helper function to acquire a read connection from rduckdb. +// Do not use this function directly for OLAP queries. Use acquireOLAPConn, acquireMetaConn instead. +func (c *connection) acquireReadConnection(ctx context.Context) (*sqlx.Conn, func() error, error) { + db, releaseDB, err := c.acquireDB() + if err != nil { + return nil, nil, err + } + + conn, releaseConn, err := db.AcquireReadConnection(ctx) + if err != nil { + _ = releaseDB() + return nil, nil, err + } + + release := func() error { + err := releaseConn() + return errors.Join(err, releaseDB()) + } + return conn, release, nil +} + +// acquireDB returns rduckDB handle. +// acquireDB implements the connection tracking and DB reopening logic described in the struct definition for connection. +// It should not be used directly for select queries. For select queries use acquireOLAPConn and acquireMetaConn. +// It should only be used for write queries. +func (c *connection) acquireDB() (rduckdb.DB, func() error, error) { c.dbCond.L.Lock() for { if c.dbErr != nil { @@ -611,11 +631,6 @@ func (c *connection) acquireConn(ctx context.Context) (*sqlx.Conn, func() error, c.dbConnCount++ c.dbCond.L.Unlock() - conn, releaseConn, err := c.db.AcquireReadConnection(ctx) - if err != nil { - return nil, nil, err - } - c.connTimesMu.Lock() connID := c.nextConnID c.nextConnID++ @@ -623,28 +638,38 @@ func (c *connection) acquireConn(ctx context.Context) (*sqlx.Conn, func() error, c.connTimesMu.Unlock() release := func() error { - err := releaseConn() c.connTimesMu.Lock() delete(c.connTimes, connID) c.connTimesMu.Unlock() c.dbCond.L.Lock() c.dbConnCount-- if c.dbConnCount == 0 && c.dbReopen { - c.dbReopen = false - err = c.reopenDB(ctx) - if err == nil { - c.logger.Debug("reopened DuckDB successfully") - } else { - c.logger.Debug("reopen of DuckDB failed - the handle is now permanently locked", zap.Error(err)) - } - c.dbErr = err - c.dbCond.Broadcast() + c.triggerReopen() } c.dbCond.L.Unlock() - return err + return nil } + return c.db, release, nil +} - return conn, release, nil +func (c *connection) triggerReopen() { + go func() { + c.dbCond.L.Lock() + defer c.dbCond.L.Unlock() + if !c.dbReopen || c.dbConnCount == 0 { + c.logger.Error("triggerReopen called but should not reopen", zap.Bool("dbReopen", c.dbReopen), zap.Int("dbConnCount", c.dbConnCount)) + return + } + c.dbReopen = false + err := c.reopenDB(c.ctx) + if err != nil { + if !errors.Is(err, context.Canceled) { + c.logger.Error("reopen of DuckDB failed - the handle is now permanently locked", zap.Error(err)) + } + } + c.dbErr = err + c.dbCond.Broadcast() + }() } // checkErr marks the DB for reopening if the error is an internal DuckDB error. diff --git a/runtime/drivers/duckdb/model_executor_warehouse_self.go b/runtime/drivers/duckdb/model_executor_warehouse_self.go index dd58bd4b596..0c85ff4a234 100644 --- a/runtime/drivers/duckdb/model_executor_warehouse_self.go +++ b/runtime/drivers/duckdb/model_executor_warehouse_self.go @@ -111,7 +111,6 @@ func (e *warehouseToSelfExecutor) queryAndInsert(ctx context.Context, opts *driv for { files, err := iter.Next() if err != nil { - // TODO: Why is this not just one error? if errors.Is(err, io.EOF) || errors.Is(err, drivers.ErrNoRows) { break } diff --git a/runtime/drivers/duckdb/olap.go b/runtime/drivers/duckdb/olap.go index e2721879a88..a3642e75095 100644 --- a/runtime/drivers/duckdb/olap.go +++ b/runtime/drivers/duckdb/olap.go @@ -176,12 +176,26 @@ func (c *connection) Execute(ctx context.Context, stmt *drivers.Statement) (res } func (c *connection) estimateSize() int64 { - return c.db.Size() + db, release, err := c.acquireDB() + if err != nil { + return 0 + } + size := db.Size() + _ = release() + return size } // AddTableColumn implements drivers.OLAPStore. func (c *connection) AddTableColumn(ctx context.Context, tableName, columnName, typ string) error { - err := c.db.MutateTable(ctx, tableName, func(ctx context.Context, conn *sqlx.Conn) error { + db, release, err := c.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() + + err = db.MutateTable(ctx, tableName, func(ctx context.Context, conn *sqlx.Conn) error { _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ADD COLUMN %s %s", safeSQLName(tableName), safeSQLName(columnName), typ)) return err }) @@ -190,7 +204,15 @@ func (c *connection) AddTableColumn(ctx context.Context, tableName, columnName, // AlterTableColumn implements drivers.OLAPStore. func (c *connection) AlterTableColumn(ctx context.Context, tableName, columnName, newType string) error { - err := c.db.MutateTable(ctx, tableName, func(ctx context.Context, conn *sqlx.Conn) error { + db, release, err := c.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() + + err = db.MutateTable(ctx, tableName, func(ctx context.Context, conn *sqlx.Conn) error { _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s ALTER %s TYPE %s", safeSQLName(tableName), safeSQLName(columnName), newType)) return err }) @@ -200,43 +222,41 @@ func (c *connection) AlterTableColumn(ctx context.Context, tableName, columnName // CreateTableAsSelect implements drivers.OLAPStore. // We add a \n at the end of the any user query to ensure any comment at the end of model doesn't make the query incomplete. func (c *connection) CreateTableAsSelect(ctx context.Context, name string, view bool, sql string, tableOpts map[string]any) error { - return c.db.CreateTableAsSelect(ctx, name, sql, &rduckdb.CreateTableOptions{View: view}) + db, release, err := c.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() + err = db.CreateTableAsSelect(ctx, name, sql, &rduckdb.CreateTableOptions{View: view}) + return c.checkErr(err) } // InsertTableAsSelect implements drivers.OLAPStore. func (c *connection) InsertTableAsSelect(ctx context.Context, name, sql string, byName, inPlace bool, strategy drivers.IncrementalStrategy, uniqueKey []string) error { - return c.execIncrementalInsert(ctx, name, sql, byName, strategy, uniqueKey) -} - -// DropTable implements drivers.OLAPStore. -func (c *connection) DropTable(ctx context.Context, name string) error { - return c.db.DropTable(ctx, name) -} - -// RenameTable implements drivers.OLAPStore. -func (c *connection) RenameTable(ctx context.Context, oldName, newName string) error { - return c.db.RenameTable(ctx, oldName, newName) -} - -func (c *connection) MayBeScaledToZero(ctx context.Context) bool { - return false -} - -func (c *connection) execIncrementalInsert(ctx context.Context, name, sql string, byName bool, strategy drivers.IncrementalStrategy, uniqueKey []string) error { + db, release, err := c.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() var byNameClause string if byName { byNameClause = "BY NAME" } if strategy == drivers.IncrementalStrategyAppend { - return c.db.MutateTable(ctx, name, func(ctx context.Context, conn *sqlx.Conn) error { + err = db.MutateTable(ctx, name, func(ctx context.Context, conn *sqlx.Conn) error { _, err := conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s (%s\n)", safeSQLName(name), byNameClause, sql)) return err }) + return c.checkErr(err) } if strategy == drivers.IncrementalStrategyMerge { - return c.db.MutateTable(ctx, name, func(ctx context.Context, conn *sqlx.Conn) error { + err = db.MutateTable(ctx, name, func(ctx context.Context, conn *sqlx.Conn) error { // Create a temporary table with the new data tmp := uuid.New().String() _, err := conn.ExecContext(ctx, fmt.Sprintf("CREATE TEMPORARY TABLE %s AS (%s\n)", safeSQLName(tmp), sql)) @@ -274,11 +294,42 @@ func (c *connection) execIncrementalInsert(ctx context.Context, name, sql string _, err = conn.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s %s SELECT * FROM %s", safeSQLName(name), byNameClause, safeSQLName(tmp))) return err }) + return c.checkErr(err) } return fmt.Errorf("incremental insert strategy %q not supported", strategy) } +// DropTable implements drivers.OLAPStore. +func (c *connection) DropTable(ctx context.Context, name string) error { + db, release, err := c.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() + err = db.DropTable(ctx, name) + return c.checkErr(err) +} + +// RenameTable implements drivers.OLAPStore. +func (c *connection) RenameTable(ctx context.Context, oldName, newName string) error { + db, release, err := c.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() + err = db.RenameTable(ctx, oldName, newName) + return c.checkErr(err) +} + +func (c *connection) MayBeScaledToZero(ctx context.Context) bool { + return false +} + func RowsToSchema(r *sqlx.Rows) (*runtimev1.StructType, error) { if r == nil { return nil, nil diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 99964c785d2..45b89d56cc2 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -164,7 +164,14 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d _, err := conn.ExecContext(ctx, fmt.Sprintf("DROP TABLE IF EXISTS %s", safeTempTable)) return err } - return t.to.db.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), &rduckdb.CreateTableOptions{ + db, release, err := t.to.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() + return db.CreateTableAsSelect(ctx, sinkProps.Table, fmt.Sprintf("SELECT * FROM %s", safeTempTable), &rduckdb.CreateTableOptions{ BeforeCreateFn: beforeCreateFn, AfterCreateFn: afterCreateFn, }) diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go index 86e405c39e3..1da46f1cdd5 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB_test.go @@ -1,54 +1,59 @@ package duckdb -// func TestDuckDBToDuckDBTransfer(t *testing.T) { -// tempDir := t.TempDir() -// conn, err := Driver{}.Open("default", map[string]any{"path": fmt.Sprintf("%s.db", filepath.Join(tempDir, "tranfser")), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) -// require.NoError(t, err) - -// olap, ok := conn.AsOLAP("") -// require.True(t, ok) - -// err = olap.Exec(context.Background(), &drivers.Statement{ -// Query: "CREATE TABLE foo(bar VARCHAR, baz INTEGER)", -// }) -// require.NoError(t, err) - -// err = olap.Exec(context.Background(), &drivers.Statement{ -// Query: "INSERT INTO foo VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)", -// }) -// require.NoError(t, err) -// require.NoError(t, conn.Close()) - -// to, err := Driver{}.Open("default", map[string]any{"path": filepath.Join(tempDir, "main.db"), "external_table_storage": false}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) -// require.NoError(t, err) - -// tr := newDuckDBToDuckDB(to.(*connection), zap.NewNop()) - -// // transfer once -// err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) -// require.NoError(t, err) - -// olap, ok = to.AsOLAP("") -// require.True(t, ok) - -// rows, err := to.(*connection).Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) -// require.NoError(t, err) - -// var count int -// rows.Next() -// require.NoError(t, rows.Scan(&count)) -// require.Equal(t, 4, count) -// require.NoError(t, rows.Close()) - -// // transfer again -// err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": filepath.Join(tempDir, "tranfser.db")}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) -// require.NoError(t, err) - -// rows, err = olap.Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) -// require.NoError(t, err) - -// rows.Next() -// require.NoError(t, rows.Scan(&count)) -// require.Equal(t, 4, count) -// require.NoError(t, rows.Close()) -// } +import ( + "context" + "database/sql" + "path/filepath" + "testing" + + _ "github.com/marcboeker/go-duckdb" + "github.com/rilldata/rill/runtime/drivers" + activity "github.com/rilldata/rill/runtime/pkg/activity" + "github.com/rilldata/rill/runtime/storage" + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestDuckDBToDuckDBTransfer(t *testing.T) { + tempDir := t.TempDir() + dbFile := filepath.Join(tempDir, "transfer.db") + db, err := sql.Open("duckdb", dbFile) + require.NoError(t, err) + + _, err = db.ExecContext(context.Background(), "CREATE TABLE foo(bar VARCHAR, baz INTEGER)") + require.NoError(t, err) + + _, err = db.ExecContext(context.Background(), "INSERT INTO foo VALUES ('a', 1), ('a', 2), ('b', 3), ('c', 4)") + require.NoError(t, err) + require.NoError(t, db.Close()) + + to, err := Driver{}.Open("default", map[string]any{}, storage.MustNew(tempDir, nil), activity.NewNoopClient(), zap.NewNop()) + require.NoError(t, err) + + tr := newDuckDBToDuckDB(to.(*connection), "duckdb", zap.NewNop()) + + // transfer once + err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": dbFile}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) + require.NoError(t, err) + + rows, err := to.(*connection).Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) + require.NoError(t, err) + + var count int + rows.Next() + require.NoError(t, rows.Scan(&count)) + require.Equal(t, 4, count) + require.NoError(t, rows.Close()) + + // transfer again + err = tr.Transfer(context.Background(), map[string]any{"sql": "SELECT * FROM foo", "db": dbFile}, map[string]any{"table": "test"}, &drivers.TransferOptions{}) + require.NoError(t, err) + + rows, err = to.(*connection).Execute(context.Background(), &drivers.Statement{Query: "SELECT COUNT(*) FROM test"}) + require.NoError(t, err) + + rows.Next() + require.NoError(t, rows.Scan(&count)) + require.Equal(t, 4, count) + require.NoError(t, rows.Close()) +} diff --git a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go index f3f30a23b69..cd8560af805 100644 --- a/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_motherduck_to_duckDB.go @@ -93,7 +93,14 @@ func (t *motherduckToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps m } userQuery := strings.TrimSpace(srcConfig.SQL) userQuery, _ = strings.CutSuffix(userQuery, ";") // trim trailing semi colon - return t.to.db.CreateTableAsSelect(ctx, sinkCfg.Table, userQuery, &rduckdb.CreateTableOptions{ + db, release, err := t.to.acquireDB() + if err != nil { + return err + } + defer func() { + _ = release() + }() + return db.CreateTableAsSelect(ctx, sinkCfg.Table, userQuery, &rduckdb.CreateTableOptions{ BeforeCreateFn: beforeCreateFn, }) } diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index c502c07dc02..d3708342c85 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -105,7 +105,7 @@ func allMySQLDataTypesTest(t *testing.T, db *sql.DB, dsn string) { _, err := db.ExecContext(ctx, mysqlInitStmt) require.NoError(t, err) - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go index 87b9769db1e..07615f425e9 100644 --- a/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_postgres_to_duckDB_test.go @@ -68,7 +68,7 @@ func allDataTypesTest(t *testing.T, db *sql.DB, dbURL string) { _, err := db.ExecContext(ctx, sqlStmt) require.NoError(t, err) - to, err := drivers.Open("duckdb", "default", map[string]any{"dsn": ":memory:"}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) + to, err := drivers.Open("duckdb", "default", map[string]any{}, storage.MustNew(t.TempDir(), nil), activity.NewNoopClient(), zap.NewNop()) require.NoError(t, err) olap, _ := to.AsOLAP("") diff --git a/runtime/drivers/olap.go b/runtime/drivers/olap.go index aeede427e82..b59d32a6e2d 100644 --- a/runtime/drivers/olap.go +++ b/runtime/drivers/olap.go @@ -249,6 +249,9 @@ func (d Dialect) RequiresCastForLike() bool { // EscapeTable returns an esacped fully qualified table name func (d Dialect) EscapeTable(db, schema, table string) string { + if d == DialectDuckDB { + return d.EscapeIdentifier(table) + } var sb strings.Builder if db != "" { sb.WriteString(d.EscapeIdentifier(db)) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 34f4d5c7753..01f493d95bc 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -1031,7 +1031,7 @@ func (d *db) removeSnapshot(ctx context.Context, id int) error { } defer d.metaSem.Release(1) - _, err = d.dbHandle.Exec(fmt.Sprintf("DROP SCHEMA %s CASCADE", schemaName(id))) + _, err = d.dbHandle.Exec(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE", schemaName(id))) return err } diff --git a/runtime/testruntime/testdata/ad_bids/dashboards/ad_bids_mini_metrics_with_policy.yaml b/runtime/testruntime/testdata/ad_bids/dashboards/ad_bids_mini_metrics_with_policy.yaml index 6ea0a916e9b..724f2b1fa2b 100644 --- a/runtime/testruntime/testdata/ad_bids/dashboards/ad_bids_mini_metrics_with_policy.yaml +++ b/runtime/testruntime/testdata/ad_bids/dashboards/ad_bids_mini_metrics_with_policy.yaml @@ -6,7 +6,7 @@ timeseries: timestamp smallest_time_grain: "" dimensions: - - name: publisher + - name: publisher_dim display_name: Publisher expression: upper(publisher) description: "" diff --git a/runtime/testruntime/testdata/ad_bids_clickhouse/dashboards/ad_bids_mini_metrics_with_policy.yaml b/runtime/testruntime/testdata/ad_bids_clickhouse/dashboards/ad_bids_mini_metrics_with_policy.yaml index c730cbad7b7..05d9741f6f3 100644 --- a/runtime/testruntime/testdata/ad_bids_clickhouse/dashboards/ad_bids_mini_metrics_with_policy.yaml +++ b/runtime/testruntime/testdata/ad_bids_clickhouse/dashboards/ad_bids_mini_metrics_with_policy.yaml @@ -1,6 +1,5 @@ model: ad_bids_mini display_name: Ad bids -display_name: "" timeseries: timestamp diff --git a/web-local/tests/utils/test.ts b/web-local/tests/utils/test.ts index abda353bc76..994d3aef27b 100644 --- a/web-local/tests/utils/test.ts +++ b/web-local/tests/utils/test.ts @@ -29,7 +29,7 @@ export const test = base.extend({ 'compiler: rill-beta\ntitle: "Test Project"', ); - const cmd = `start --no-open --port ${TEST_PORT} --port-grpc ${TEST_PORT_GRPC} --db ${TEST_PROJECT_DIRECTORY}/stage.db?rill_pool_size=4 ${TEST_PROJECT_DIRECTORY} --env connector.duckdb.external_table_storage=false`; + const cmd = `start --no-open --port ${TEST_PORT} --port-grpc ${TEST_PORT_GRPC} ${TEST_PROJECT_DIRECTORY}`; const childProcess = spawn("../rill", cmd.split(" "), { stdio: "inherit", From da86f4f9fa32505d6863c5afec111fe9d379cb40 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Sat, 7 Dec 2024 14:51:57 +0530 Subject: [PATCH 58/64] fix more tests --- runtime/drivers/duckdb/olap_crud_test.go | 2 +- .../duckdb/transporter_duckDB_to_duckDB.go | 18 ++++++++++-------- runtime/pkg/rduckdb/db.go | 2 +- .../ad_bids/apis/mv_sql_policy_api.yaml | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/runtime/drivers/duckdb/olap_crud_test.go b/runtime/drivers/duckdb/olap_crud_test.go index 9c05abb16c5..637ee012af6 100644 --- a/runtime/drivers/duckdb/olap_crud_test.go +++ b/runtime/drivers/duckdb/olap_crud_test.go @@ -225,7 +225,7 @@ func Test_connection_AddTableColumn(t *testing.T) { err = c.AlterTableColumn(context.Background(), "test alter column", "data", "VARCHAR") require.NoError(t, err) - res, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE table_name='test alter column'"}) + res, err = c.Execute(context.Background(), &drivers.Statement{Query: "SELECT data_type FROM information_schema.columns WHERE table_name='test alter column' AND table_schema=current_schema()"}) require.NoError(t, err) require.True(t, res.Next()) require.NoError(t, res.Scan(&typ)) diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 45b89d56cc2..61eed9a9d9c 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -118,27 +118,29 @@ func (t *duckDBToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[s } func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *dbSourceProperties, sinkProps *sinkProperties) error { - var attachSQL string + var initSQL []string safeDBName := safeName(sinkProps.Table + "_external_db_") safeTempTable := safeName(sinkProps.Table + "__temp__") switch t.database { case "mysql": - attachSQL = fmt.Sprintf("ATTACH %s AS %s (TYPE mysql)", safeSQLString(srcProps.Database), safeDBName) + initSQL = append(initSQL, "INSTALL 'MYSQL'; LOAD 'MYSQL';", fmt.Sprintf("ATTACH %s AS %s (TYPE mysql, READ_ONLY)", safeSQLString(srcProps.Database), safeDBName)) case "postgres": - attachSQL = fmt.Sprintf("ATTACH %s AS %s (TYPE postgres)", safeSQLString(srcProps.Database), safeDBName) + initSQL = append(initSQL, "INSTALL 'POSTGRES'; LOAD 'POSTGRES';", fmt.Sprintf("ATTACH %s AS %s (TYPE postgres, READ_ONLY)", safeSQLString(srcProps.Database), safeDBName)) case "duckdb": - attachSQL = fmt.Sprintf("ATTACH %s AS %s", safeSQLString(srcProps.Database), safeDBName) + initSQL = append(initSQL, fmt.Sprintf("ATTACH %s AS %s (READ_ONLY)", safeSQLString(srcProps.Database), safeDBName)) default: return fmt.Errorf("internal error: unsupported external database: %s", t.database) } beforeCreateFn := func(ctx context.Context, conn *sqlx.Conn) error { - _, err := conn.ExecContext(ctx, attachSQL) - if err != nil { - return err + for _, sql := range initSQL { + _, err := conn.ExecContext(ctx, sql) + if err != nil { + return err + } } var localDB, localSchema string - err = conn.QueryRowxContext(ctx, "SELECT current_database(),current_schema();").Scan(&localDB, &localSchema) + err := conn.QueryRowxContext(ctx, "SELECT current_database(),current_schema();").Scan(&localDB, &localSchema) if err != nil { return err } diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 01f493d95bc..ede4987005c 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -954,7 +954,7 @@ func (d *db) removeTableVersion(ctx context.Context, name, version string) error } defer d.metaSem.Release(1) - _, err = d.dbHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+dbName(name, version)) + _, err = d.dbHandle.ExecContext(ctx, "DETACH DATABASE IF EXISTS "+safeSQLName(dbName(name, version))) if err != nil { return err } diff --git a/runtime/testruntime/testdata/ad_bids/apis/mv_sql_policy_api.yaml b/runtime/testruntime/testdata/ad_bids/apis/mv_sql_policy_api.yaml index ad74d8a022b..86352b648d5 100644 --- a/runtime/testruntime/testdata/ad_bids/apis/mv_sql_policy_api.yaml +++ b/runtime/testruntime/testdata/ad_bids/apis/mv_sql_policy_api.yaml @@ -2,7 +2,7 @@ kind : api metrics_sql: | select - publisher, + publisher_dim, domain, "total impressions", "total volume" From 8842b70b5b825e03173bbf01ad4f773f23bc88ac Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:04:36 +0530 Subject: [PATCH 59/64] more cleanups --- .../duckdb/transporter_duckDB_to_duckDB.go | 18 ++++++------- runtime/drivers/duckdb/utils.go | 4 +++ runtime/pkg/rduckdb/db.go | 25 +++++++++++++++++-- runtime/pkg/rduckdb/remote.go | 6 +++++ 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 61eed9a9d9c..5f9718463f0 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -16,16 +16,16 @@ import ( ) type duckDBToDuckDB struct { - to *connection - logger *zap.Logger - database string // mysql, postgres, duckdb + to *connection + logger *zap.Logger + externalDBType string // mysql, postgres, duckdb } func newDuckDBToDuckDB(c *connection, db string, logger *zap.Logger) drivers.Transporter { return &duckDBToDuckDB{ - to: c, - logger: logger, - database: db, + to: c, + logger: logger, + externalDBType: db, } } @@ -45,7 +45,7 @@ func (t *duckDBToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map[s t.logger = t.logger.With(zap.String("source", sinkCfg.Table)) if srcCfg.Database != "" { // query to be run against an external DB - if t.database == "duckdb" { + if t.externalDBType == "duckdb" { srcCfg.Database, err = fileutil.ResolveLocalPath(srcCfg.Database, opts.RepoRoot, opts.AllowHostAccess) if err != nil { return err @@ -121,7 +121,7 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d var initSQL []string safeDBName := safeName(sinkProps.Table + "_external_db_") safeTempTable := safeName(sinkProps.Table + "__temp__") - switch t.database { + switch t.externalDBType { case "mysql": initSQL = append(initSQL, "INSTALL 'MYSQL'; LOAD 'MYSQL';", fmt.Sprintf("ATTACH %s AS %s (TYPE mysql, READ_ONLY)", safeSQLString(srcProps.Database), safeDBName)) case "postgres": @@ -129,7 +129,7 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d case "duckdb": initSQL = append(initSQL, fmt.Sprintf("ATTACH %s AS %s (READ_ONLY)", safeSQLString(srcProps.Database), safeDBName)) default: - return fmt.Errorf("internal error: unsupported external database: %s", t.database) + return fmt.Errorf("internal error: unsupported external database: %s", t.externalDBType) } beforeCreateFn := func(ctx context.Context, conn *sqlx.Conn) error { for _, sql := range initSQL { diff --git a/runtime/drivers/duckdb/utils.go b/runtime/drivers/duckdb/utils.go index 907b1faa1c9..480109ea28f 100644 --- a/runtime/drivers/duckdb/utils.go +++ b/runtime/drivers/duckdb/utils.go @@ -24,6 +24,7 @@ func parseSinkProperties(props map[string]any) (*sinkProperties, error) { type dbSourceProperties struct { Database string `mapstructure:"db"` + DSN string `mapstructure:"dsn"` SQL string `mapstructure:"sql"` } @@ -32,6 +33,9 @@ func parseDBSourceProperties(props map[string]any) (*dbSourceProperties, error) if err := mapstructure.Decode(props, cfg); err != nil { return nil, fmt.Errorf("failed to parse source properties: %w", err) } + if cfg.DSN != "" { // For mysql, postgres the property is called as dsn and not db + cfg.Database = cfg.DSN + } if cfg.SQL == "" { return nil, fmt.Errorf("property 'sql' is mandatory") } diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index ede4987005c..c4e404c7ab7 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -966,7 +966,7 @@ func (d *db) deleteLocalTableFiles(name, version string) error { return os.RemoveAll(d.localTableDir(name, version)) } -func (d *db) iterateLocalTables(removeInvalidTable bool, fn func(name string, meta *tableMeta) error) error { +func (d *db) iterateLocalTables(cleanup bool, fn func(name string, meta *tableMeta) error) error { entries, err := os.ReadDir(d.localPath) if err != nil { return err @@ -977,15 +977,36 @@ func (d *db) iterateLocalTables(removeInvalidTable bool, fn func(name string, me } meta, err := d.tableMeta(entry.Name()) if err != nil { - if !removeInvalidTable { + if !cleanup { continue } + d.logger.Debug("cleanup: remove table", slog.String("table", entry.Name())) err = d.deleteLocalTableFiles(entry.Name(), "") if err != nil { return err } continue } + // also remove older versions + if cleanup { + versions, err := os.ReadDir(d.localTableDir(entry.Name(), "")) + if err != nil { + return err + } + for _, version := range versions { + if !version.IsDir() { + continue + } + if version.Name() == meta.Version { + continue + } + d.logger.Debug("cleanup: remove old version", slog.String("table", entry.Name()), slog.String("version", version.Name())) + err = d.deleteLocalTableFiles(entry.Name(), version.Name()) + if err != nil { + return err + } + } + } err = fn(entry.Name(), meta) if err != nil { return err diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index 86db7daf7f7..c5344040fca 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -23,6 +23,12 @@ import ( func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { if !d.localDirty || d.remote == nil { // optimisation to skip sync if write was already synced + if !updateCatalog { + // cleanup of older versions of table + _ = d.iterateLocalTables(true, func(name string, meta *tableMeta) error { + return nil + }) + } return nil } d.logger.Debug("syncing from remote") From 1f1ddfab6c7e488e11ecfec2458eb5472e6e8fde Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:20:51 +0530 Subject: [PATCH 60/64] small background task fix --- runtime/pkg/rduckdb/db.go | 1 + runtime/pkg/rduckdb/remote.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index c4e404c7ab7..0eb1dca7c9e 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -650,6 +650,7 @@ func (d *db) localDBMonitor() { if err != nil && !errors.Is(err, context.Canceled) { d.logger.Error("localDBMonitor: error in pulling from remote", slog.String("error", err.Error())) } + d.localDirty = false d.writeSem.Release(1) } } diff --git a/runtime/pkg/rduckdb/remote.go b/runtime/pkg/rduckdb/remote.go index c5344040fca..150c255e588 100644 --- a/runtime/pkg/rduckdb/remote.go +++ b/runtime/pkg/rduckdb/remote.go @@ -97,7 +97,7 @@ func (d *db) pullFromRemote(ctx context.Context, updateCatalog bool) error { // check if table is locally present meta, _ := d.tableMeta(table) if meta != nil && meta.Version == remoteMeta.Version { - d.logger.Debug("SyncWithObjectStorage: local table is not present in catalog", slog.String("table", table)) + d.logger.Debug("SyncWithObjectStorage: local table is in sync with remote", slog.String("table", table)) continue } if err := d.initLocalTable(table, remoteMeta.Version); err != nil { @@ -264,6 +264,7 @@ func (d *db) deleteRemote(ctx context.Context, table, version string) error { if table == "" && version != "" { return fmt.Errorf("table must be specified if version is specified") } + d.logger.Debug("deleting remote", slog.String("table", table), slog.String("version", version)) var prefix string if table != "" { if version != "" { From 584bfba56b2a08600ee3b21c21a4808594118573 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:32:04 +0530 Subject: [PATCH 61/64] add backward compatibility --- runtime/drivers/duckdb/duckdb.go | 2 +- .../duckdb/transporter_duckDB_to_duckDB.go | 46 ++++++++++++++++++- .../transporter_mysql_to_duckDB_test.go | 2 +- runtime/pkg/rduckdb/db.go | 9 ++++ 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 8c37dbd1cfa..ad683edb414 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -193,7 +193,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien if err != nil { // Check for another process currently accessing the DB if strings.Contains(err.Error(), "Could not set lock on file") { - return nil, fmt.Errorf("failed to open database (is Rill already running?): %w", err) + panic(fmt.Errorf("failed to open database (is Rill already running?): %w", err)) } return nil, err } diff --git a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go index 5f9718463f0..bda440fe593 100644 --- a/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go +++ b/runtime/drivers/duckdb/transporter_duckDB_to_duckDB.go @@ -4,9 +4,11 @@ import ( "context" "errors" "fmt" + "net" "net/url" "strings" + "github.com/go-sql-driver/mysql" "github.com/jmoiron/sqlx" "github.com/rilldata/rill/runtime/drivers" "github.com/rilldata/rill/runtime/pkg/duckdbsql" @@ -123,7 +125,8 @@ func (t *duckDBToDuckDB) transferFromExternalDB(ctx context.Context, srcProps *d safeTempTable := safeName(sinkProps.Table + "__temp__") switch t.externalDBType { case "mysql": - initSQL = append(initSQL, "INSTALL 'MYSQL'; LOAD 'MYSQL';", fmt.Sprintf("ATTACH %s AS %s (TYPE mysql, READ_ONLY)", safeSQLString(srcProps.Database), safeDBName)) + dsn := rewriteMySQLDSN(srcProps.Database) + initSQL = append(initSQL, "INSTALL 'MYSQL'; LOAD 'MYSQL';", fmt.Sprintf("ATTACH %s AS %s (TYPE mysql, READ_ONLY)", safeSQLString(dsn), safeDBName)) case "postgres": initSQL = append(initSQL, "INSTALL 'POSTGRES'; LOAD 'POSTGRES';", fmt.Sprintf("ATTACH %s AS %s (TYPE postgres, READ_ONLY)", safeSQLString(srcProps.Database), safeDBName)) case "duckdb": @@ -209,3 +212,44 @@ func rewriteLocalPaths(ast *duckdbsql.AST, basePath string, allowHostAccess bool return ast.Format() } + +// rewriteMySQLDSN rewrites a MySQL DSN to a format that DuckDB expects. +// DuckDB does not support the URI based DSN format yet. It expects the DSN to be in the form of key=value pairs. +// This function parses the MySQL URI based DSN and converts it to the key=value format. It only converts the common parameters. +// For more advanced parameters like SSL configs, the user should manually convert the DSN to the key=value format. +// If there is an error parsing the DSN, it returns the DSN as is. +func rewriteMySQLDSN(dsn string) string { + cfg, err := mysql.ParseDSN(dsn) + if err != nil { + // If we can't parse the DSN, just return it as is. May be it is already in the form duckdb expects. + return dsn + } + + var sb strings.Builder + + if cfg.User != "" { + sb.WriteString(fmt.Sprintf("user=%s ", cfg.User)) + } + if cfg.Passwd != "" { + sb.WriteString(fmt.Sprintf("password=%s ", cfg.Passwd)) + } + if cfg.DBName != "" { + sb.WriteString(fmt.Sprintf("database=%s ", cfg.DBName)) + } + switch cfg.Net { + case "unix": + sb.WriteString(fmt.Sprintf("socket=%s ", cfg.Addr)) + case "tcp", "tcp6": + host, port, err := net.SplitHostPort(cfg.Addr) + if err != nil { + return dsn + } + sb.WriteString(fmt.Sprintf("host=%s ", host)) + if port != "" { + sb.WriteString(fmt.Sprintf("port=%s ", port)) + } + default: + return dsn + } + return sb.String() +} diff --git a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go index d3708342c85..db8f3575e9a 100644 --- a/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go +++ b/runtime/drivers/duckdb/transporter_mysql_to_duckDB_test.go @@ -96,7 +96,7 @@ func TestMySQLToDuckDBTransfer(t *testing.T) { defer db.Close() t.Run("AllDataTypes", func(t *testing.T) { - allMySQLDataTypesTest(t, db, fmt.Sprintf("host=%s user=myuser password=mypassword port=%v database=mydb", host, port.Int())) + allMySQLDataTypesTest(t, db, dsn) }) } diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 0eb1dca7c9e..13040c192da 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -274,6 +274,15 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { } return nil, err } + + // We want to prevent multiple rill process accessing same db files. + // All the files are accessed in read-only mode so it is possible for multiple rill process to access same db files. + // To prevent this we attach a dummy db file to the main in-memory db in write mode. + // This is required for local rill only but since there is no way to determine it in this package so we do it for all. + _, err = db.dbHandle.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS __ymmud__", safeSQLString(filepath.Join(db.localPath, "main.db")))) + if err != nil { + return nil, err + } go db.localDBMonitor() return db, nil } From 058c21a17da7979e3cdde952135ecf25e687cbb3 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:34:10 +0530 Subject: [PATCH 62/64] review comments --- runtime/controller_test.go | 49 --------------------------- runtime/drivers/duckdb/config.go | 8 ++--- runtime/drivers/duckdb/config_test.go | 8 ++--- runtime/drivers/duckdb/duckdb.go | 24 ++++++++----- runtime/pkg/rduckdb/db.go | 10 +----- 5 files changed, 22 insertions(+), 77 deletions(-) diff --git a/runtime/controller_test.go b/runtime/controller_test.go index 6def61e3082..1fe8c72968e 100644 --- a/runtime/controller_test.go +++ b/runtime/controller_test.go @@ -507,55 +507,6 @@ select 1 testruntime.RequireIsView(t, olap, "bar", true) } -func TestModelCTE(t *testing.T) { - // Create a model that references a source - rt, id := testruntime.NewInstance(t) - testruntime.PutFiles(t, rt, id, map[string]string{ - "/data/foo.csv": `a,b,c,d,e -1,2,3,4,5 -1,2,3,4,5 -1,2,3,4,5 -`, - "/sources/foo.yaml": ` -connector: local_file -path: data/foo.csv -`, - "/models/bar.sql": `SELECT * FROM foo`, - }) - testruntime.ReconcileParserAndWait(t, rt, id) - testruntime.RequireReconcileState(t, rt, id, 3, 0, 0) - model, modelRes := newModel("SELECT * FROM foo", "bar", "foo") - testruntime.RequireResource(t, rt, id, modelRes) - testruntime.RequireOLAPTable(t, rt, id, "bar") - - // Update model to have a CTE with alias different from the source - testruntime.PutFiles(t, rt, id, map[string]string{ - "/models/bar.sql": `with CTEAlias as (select * from foo) select * from CTEAlias`, - }) - testruntime.ReconcileParserAndWait(t, rt, id) - testruntime.RequireReconcileState(t, rt, id, 3, 0, 0) - model.Spec.InputProperties = must(structpb.NewStruct(map[string]any{"sql": `with CTEAlias as (select * from foo) select * from CTEAlias`})) - testruntime.RequireResource(t, rt, id, modelRes) - testruntime.RequireOLAPTable(t, rt, id, "bar") - - // TODO :: Not sure how this can be tested - // The query will succeed when creating model (foo is attached in default schema so memory.foo will work) - // But when querying foo is attached in non default schema (memory.main_x.foo) so memory.foo will not work - - // Update model to have a CTE with alias same as the source - testruntime.PutFiles(t, rt, id, map[string]string{ - "/models/bar.sql": `with foo as (select * from memory.foo) select * from foo`, - }) - testruntime.ReconcileParserAndWait(t, rt, id) - testruntime.RequireReconcileState(t, rt, id, 3, 0, 0) - model.Spec.InputProperties = must(structpb.NewStruct(map[string]any{"sql": `with foo as (select * from memory.foo) select * from foo`})) - modelRes.Meta.Refs = []*runtimev1.ResourceName{} - testruntime.RequireResource(t, rt, id, modelRes) - // Refs are removed but the model is valid. - // TODO: is this expected? - // testruntime.RequireOLAPTable(t, rt, id, "bar") -} - func TestRename(t *testing.T) { // Rename model A to B and model B to A, verify success // Rename model A to B and source B to A, verify success diff --git a/runtime/drivers/duckdb/config.go b/runtime/drivers/duckdb/config.go index 2ef5733cf73..a952a410538 100644 --- a/runtime/drivers/duckdb/config.go +++ b/runtime/drivers/duckdb/config.go @@ -14,8 +14,6 @@ const ( // config represents the DuckDB driver config type config struct { - // DataDir is the path to directory where duckdb files will be created. - DataDir string `mapstructure:"data_dir"` // PoolSize is the number of concurrent connections and queries allowed PoolSize int `mapstructure:"pool_size"` // AllowHostAccess denotes whether to limit access to the local environment and file system @@ -36,10 +34,8 @@ type config struct { LogQueries bool `mapstructure:"log_queries"` } -func newConfig(cfgMap map[string]any, dataDir string) (*config, error) { - cfg := &config{ - DataDir: dataDir, - } +func newConfig(cfgMap map[string]any) (*config, error) { + cfg := &config{} err := mapstructure.WeakDecode(cfgMap, cfg) if err != nil { return nil, fmt.Errorf("could not decode config: %w", err) diff --git a/runtime/drivers/duckdb/config_test.go b/runtime/drivers/duckdb/config_test.go index 16231ca5f4f..f10fabe65f2 100644 --- a/runtime/drivers/duckdb/config_test.go +++ b/runtime/drivers/duckdb/config_test.go @@ -16,21 +16,21 @@ import ( ) func TestConfig(t *testing.T) { - cfg, err := newConfig(map[string]any{}, "") + cfg, err := newConfig(map[string]any{}) require.NoError(t, err) require.Equal(t, 2, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": "", "cpu": 2}, "") + cfg, err = newConfig(map[string]any{"dsn": "", "cpu": 2}) require.NoError(t, err) require.Equal(t, "2", cfg.readSettings()["threads"]) require.Subset(t, cfg.writeSettings(), map[string]string{"custom_user_agent": "rill"}) require.Equal(t, 2, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"pool_size": 10}, "path/to") + cfg, err = newConfig(map[string]any{"pool_size": 10}) require.NoError(t, err) require.Equal(t, 10, cfg.PoolSize) - cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}, "path/to") + cfg, err = newConfig(map[string]any{"dsn": "duck.db", "memory_limit_gb": "8", "cpu": "2"}) require.NoError(t, err) require.Equal(t, "2", cfg.readSettings()["threads"]) require.Equal(t, "", cfg.writeSettings()["threads"]) diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index ad683edb414..c96883b5601 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -141,12 +141,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien logger.Warn("failed to install embedded DuckDB extensions, let DuckDB download them", zap.Error(err)) } - dataDir, err := st.DataDir() - if err != nil { - return nil, err - } - - cfg, err := newConfig(cfgMap, dataDir) + cfg, err := newConfig(cfgMap) if err != nil { return nil, err } @@ -163,6 +158,7 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien config: cfg, logger: logger, activity: ac, + storage: st, metaSem: semaphore.NewWeighted(1), olapSem: priorityqueue.NewSemaphore(olapSemSize), longRunningSem: semaphore.NewWeighted(1), // Currently hard-coded to 1 @@ -191,9 +187,12 @@ func (d Driver) Open(instanceID string, cfgMap map[string]any, st *storage.Clien // Open the DB err = c.reopenDB(context.Background()) if err != nil { + if remote != nil { + _ = remote.Close() + } // Check for another process currently accessing the DB if strings.Contains(err.Error(), "Could not set lock on file") { - panic(fmt.Errorf("failed to open database (is Rill already running?): %w", err)) + return nil, fmt.Errorf("failed to open database (is Rill already running?): %w", err) } return nil, err } @@ -269,6 +268,7 @@ type connection struct { config *config logger *zap.Logger activity *activity.Client + storage *storage.Client remote *blob.Bucket // This driver may issue both OLAP and "meta" queries (like catalog info) against DuckDB. // Meta queries are usually fast, but OLAP queries may take a long time. To enable predictable parallel performance, @@ -330,6 +330,9 @@ func (c *connection) Config() map[string]any { func (c *connection) Close() error { c.cancel() _ = c.registration.Unregister() + if c.remote != nil { + _ = c.remote.Close() + } if c.db != nil { return c.db.Close() } @@ -493,12 +496,15 @@ func (c *connection) reopenDB(ctx context.Context) error { } // Create new DB + dataDir, err := c.storage.DataDir() + if err != nil { + return err + } logger := slog.New(zapslog.NewHandler(c.logger.Core(), &zapslog.HandlerOptions{ AddSource: true, })) - var err error c.db, err = rduckdb.NewDB(ctx, &rduckdb.DBOptions{ - LocalPath: c.config.DataDir, + LocalPath: dataDir, Remote: c.remote, ReadSettings: c.config.readSettings(), WriteSettings: c.config.writeSettings(), diff --git a/runtime/pkg/rduckdb/db.go b/runtime/pkg/rduckdb/db.go index 13040c192da..ceaf975d488 100644 --- a/runtime/pkg/rduckdb/db.go +++ b/runtime/pkg/rduckdb/db.go @@ -266,7 +266,7 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { opts.Logger, ) - db.dbHandle, err = db.openDBAndAttach(ctx, "", "", true) + db.dbHandle, err = db.openDBAndAttach(ctx, filepath.Join(db.localPath, "main.db"), "", true) if err != nil { if strings.Contains(err.Error(), "Symbol not found") { fmt.Printf("Your version of macOS is not supported. Please upgrade to the latest major release of macOS. See this link for details: https://support.apple.com/en-in/macos/upgrade") @@ -275,14 +275,6 @@ func NewDB(ctx context.Context, opts *DBOptions) (DB, error) { return nil, err } - // We want to prevent multiple rill process accessing same db files. - // All the files are accessed in read-only mode so it is possible for multiple rill process to access same db files. - // To prevent this we attach a dummy db file to the main in-memory db in write mode. - // This is required for local rill only but since there is no way to determine it in this package so we do it for all. - _, err = db.dbHandle.ExecContext(ctx, fmt.Sprintf("ATTACH %s AS __ymmud__", safeSQLString(filepath.Join(db.localPath, "main.db")))) - if err != nil { - return nil, err - } go db.localDBMonitor() return db, nil } From 56f4e0bac17730d568b41e72cffc7e8a79723c3f Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:10:05 +0530 Subject: [PATCH 63/64] custom temp and secret directory --- runtime/drivers/duckdb/duckdb.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index c96883b5601..bcbcfa133aa 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -6,6 +6,7 @@ import ( "fmt" "log/slog" "net/url" + "path/filepath" "strings" "sync" "time" @@ -484,10 +485,23 @@ func (c *connection) reopenDB(ctx context.Context) error { "SET old_implicit_casting = true", // Implicit Cast to VARCHAR ) + dataDir, err := c.storage.DataDir() + if err != nil { + return err + } + // We want to set preserve_insertion_order=false in hosted environments only (where source data is never viewed directly). Setting it reduces batch data ingestion time by ~40%. // Hack: Using AllowHostAccess as a proxy indicator for a hosted environment. if !c.config.AllowHostAccess { - bootQueries = append(bootQueries, "SET preserve_insertion_order TO false") + tempDir, err := c.storage.TempDir() + if err != nil { + return err + } + bootQueries = append(bootQueries, + "SET preserve_insertion_order TO false", + fmt.Sprintf("SET temp_directory = %s", safeSQLString(tempDir)), + fmt.Sprintf("SET secret_directory = %s", safeSQLString(filepath.Join(dataDir, ".duckdb", "secrets"))), + ) } // Add init SQL if provided @@ -496,10 +510,6 @@ func (c *connection) reopenDB(ctx context.Context) error { } // Create new DB - dataDir, err := c.storage.DataDir() - if err != nil { - return err - } logger := slog.New(zapslog.NewHandler(c.logger.Core(), &zapslog.HandlerOptions{ AddSource: true, })) From f45c3796d2183cb1ea48b49ddcbfcd47a7e93b5d Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 13 Dec 2024 19:22:18 +0530 Subject: [PATCH 64/64] remove custom temp directory --- runtime/drivers/duckdb/duckdb.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index bcbcfa133aa..edb15421052 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -493,13 +493,8 @@ func (c *connection) reopenDB(ctx context.Context) error { // We want to set preserve_insertion_order=false in hosted environments only (where source data is never viewed directly). Setting it reduces batch data ingestion time by ~40%. // Hack: Using AllowHostAccess as a proxy indicator for a hosted environment. if !c.config.AllowHostAccess { - tempDir, err := c.storage.TempDir() - if err != nil { - return err - } bootQueries = append(bootQueries, "SET preserve_insertion_order TO false", - fmt.Sprintf("SET temp_directory = %s", safeSQLString(tempDir)), fmt.Sprintf("SET secret_directory = %s", safeSQLString(filepath.Join(dataDir, ".duckdb", "secrets"))), ) }