diff --git a/examples/image_annotate/definitions/all_imagesets.sqlx b/examples/image_annotate/definitions/all_imagesets.sqlx new file mode 100644 index 000000000..398a3996f --- /dev/null +++ b/examples/image_annotate/definitions/all_imagesets.sqlx @@ -0,0 +1,10 @@ +config { + type: "operations", + hasOutput: true +} + + -- Create the object table for the images +CREATE EXTERNAL TABLE IF NOT EXISTS + ${self()} +WITH CONNECTION `us.gcs` OPTIONS( object_metadata = 'SIMPLE', + uris = ['${constants.IMAGE_BUCKET}'] ); diff --git a/examples/image_annotate/definitions/annotated_imagesets.sqlx b/examples/image_annotate/definitions/annotated_imagesets.sqlx new file mode 100644 index 000000000..a808d0551 --- /dev/null +++ b/examples/image_annotate/definitions/annotated_imagesets.sqlx @@ -0,0 +1,25 @@ +config { + type: "incremental" +} + +SELECT + * +FROM + ${ref("object_table")} +WHERE + ml_annotate_image_status NOT LIKE "${constants.RETRYABLE_ERROR}" + ${ + when(incremental(), `AND + ${constants.MERGE_COLUMN} NOT IN ( + SELECT + ${constants.MERGE_COLUMN} + FROM + ${self()}) + OR updated > ( + SELECT + MAX(updated) + FROM + ${self()})`) + } +LIMIT + ${constants.BATCH_SIZE} diff --git a/examples/image_annotate/definitions/annotated_imagesets_errors.sqlx b/examples/image_annotate/definitions/annotated_imagesets_errors.sqlx new file mode 100644 index 000000000..fbda6909d --- /dev/null +++ b/examples/image_annotate/definitions/annotated_imagesets_errors.sqlx @@ -0,0 +1,10 @@ +config { + type: "assertion" +} + +SELECT + * +FROM + ${ref("object_table")} +WHERE + ml_annotate_image_status LIKE "${constants.RETRYABLE_ERROR}" diff --git a/examples/image_annotate/definitions/object_table.sqlx b/examples/image_annotate/definitions/object_table.sqlx new file mode 100644 index 000000000..b9b2c9b67 --- /dev/null +++ b/examples/image_annotate/definitions/object_table.sqlx @@ -0,0 +1,10 @@ +config { + type: "table", +} + +SELECT + * +FROM + ML.ANNOTATE_IMAGE( MODEL ${ref("vision_model")}, + TABLE ${ref("unannotated_imagesets")}, + STRUCT(['LABEL_DETECTION'] AS vision_features)) diff --git a/examples/image_annotate/definitions/unannotated_imagesets.sqlx b/examples/image_annotate/definitions/unannotated_imagesets.sqlx new file mode 100644 index 000000000..fd9d35c65 --- /dev/null +++ b/examples/image_annotate/definitions/unannotated_imagesets.sqlx @@ -0,0 +1,19 @@ +config { + type: "view" +} + +js { + // Ref is avoided here to avoid the circular dependency chain. + const ANNOTATED_IMAGESETS = `${ctx.schema()}` + ".annotated_imagesets" +} + +SELECT + * +FROM + ${ref("all_imagesets")} +LEFT JOIN + `${ANNOTATED_IMAGESETS}` +ON + ${ref("all_imagesets")}.uri = `${ANNOTATED_IMAGESETS}`.uri +WHERE + `${ANNOTATED_IMAGESETS}`.uri IS NULL diff --git a/examples/image_annotate/definitions/vision_model.sqlx b/examples/image_annotate/definitions/vision_model.sqlx new file mode 100644 index 000000000..b3c28f3bc --- /dev/null +++ b/examples/image_annotate/definitions/vision_model.sqlx @@ -0,0 +1,9 @@ +config { + type: "operations", + hasOutput: true +} + + -- Model for connecting to the Vision API +CREATE MODEL IF NOT EXISTS + ${self()} REMOTE +WITH CONNECTION `us.gcs` OPTIONS (remote_service_type = 'cloud_ai_vision_v1'); diff --git a/examples/image_annotate/image-annotation-dag.png b/examples/image_annotate/image-annotation-dag.png new file mode 100644 index 000000000..0df6719dd Binary files /dev/null and b/examples/image_annotate/image-annotation-dag.png differ diff --git a/examples/image_annotate/includes/constants.js b/examples/image_annotate/includes/constants.js new file mode 100644 index 000000000..dc63d9f70 --- /dev/null +++ b/examples/image_annotate/includes/constants.js @@ -0,0 +1,6 @@ +module.exports = { + BATCH_SIZE: 100, + MERGE_COLUMN: "uri", + RETRYABLE_ERROR: "A retryable error occurred:", + IMAGE_BUCKET: "gs://example-bucket/imagesets/*" +}; diff --git a/examples/image_annotate/readme.md b/examples/image_annotate/readme.md new file mode 100644 index 000000000..119dc2b75 --- /dev/null +++ b/examples/image_annotate/readme.md @@ -0,0 +1,9 @@ +# Image Annotate + +Use BigQuery ML to annotate images from a GCS bucket. + +Images where annotation failed are filtered out, and retried in subsequent runs. + +![Image Annotation DAG](./image-annotation-dag.png?raw=true "Image Annotation DAG") + + diff --git a/examples/image_annotate/workflow_settings.yaml b/examples/image_annotate/workflow_settings.yaml new file mode 100644 index 000000000..f80dc4117 --- /dev/null +++ b/examples/image_annotate/workflow_settings.yaml @@ -0,0 +1,4 @@ +defaultProject: dataform-demos +defaultLocation: US +defaultDataset: sample +defaultAssertionDataset: sample_assertions