Skip to content

Commit

Permalink
Checkpoint.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 committed Dec 16, 2024
1 parent a725cd4 commit 4e0497b
Showing 1 changed file with 24 additions and 1 deletion.
25 changes: 24 additions & 1 deletion clients/databricks/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"log/slog"
"os"
"path/filepath"
"strings"

_ "github.com/databricks/databricks-sql-go"
"github.com/databricks/databricks-sql-go/driverctx"
Expand Down Expand Up @@ -120,9 +121,31 @@ func (s Store) PrepareTemporaryTable(ctx context.Context, tableData *optimizatio
}
}()

var ordinalColumns []string
for idx, column := range tableData.ReadOnlyInMemoryCols().ValidColumns() {
ordinalColumn := fmt.Sprintf("_c%d", idx)
switch column.KindDetails.Kind {
case typing.Array.Kind:
ordinalColumn = fmt.Sprintf(`parse_json(%s)`, ordinalColumn)
}

ordinalColumns = append(ordinalColumns, ordinalColumn)
}

// Copy file from DBFS -> table via COPY INTO, ref: https://docs.databricks.com/en/sql/language-manual/delta-copy-into.html
// We'll need \\\\N here because we need to string escape.
copyCommand := fmt.Sprintf(`COPY INTO %s BY POSITION FROM '%s' FILEFORMAT = CSV FORMAT_OPTIONS ('escape' = '"', 'delimiter' = '\t', 'header' = 'false', 'nullValue' = '\\\\N')`, tempTableID.FullyQualifiedName(), file.DBFSFilePath())
copyCommand := fmt.Sprintf(`
COPY INTO %s BY POSITION FROM (SELECT %s FROM '%s') FILEFORMAT = CSV FORMAT_OPTIONS ('escape' = '"', 'delimiter' = '\t', 'header' = 'false', 'nullValue' = '\\\\N')`,
// COPY INTO
tempTableID.FullyQualifiedName(),
// SELECT columns
strings.Join(ordinalColumns, ", "),
// FROM
file.DBFSFilePath(),
)

fmt.Println("copyCommand", copyCommand)

if _, err = s.ExecContext(ctx, copyCommand); err != nil {
return fmt.Errorf("failed to run COPY INTO for temporary table: %w", err)
}
Expand Down

0 comments on commit 4e0497b

Please sign in to comment.