diff --git a/examples/Full_list_of_images.txt b/examples/Full_list_of_images.txt new file mode 100644 index 0000000..8f8b049 --- /dev/null +++ b/examples/Full_list_of_images.txt @@ -0,0 +1,9 @@ +gs://vision-api-pbo2/images/demo-img.jpg +gs://vision-api-pbo2/images/croissant-CW88ysx7Amk.jpg +gs://vision-api-pbo2/images/curology-A9CUBfntOFQ.jpg +gs://vision-api-pbo2/images/fallon-michael-8LKQfBumjMo.jpg +gs://vision-api-pbo2/images/keyur-nandaniya-3isjMlIlj4A.jpg +gs://vision-api-pbo2/images/keyur-nandaniya-oEgiJNbYw8w.jpg +gs://vision-api-pbo2/images/thought-catalog-EMX1eJ1BcgU.jpg +gs://vision-api-pbo2/images/wesley-tingey-0are122T4ho.jpg +gs://vision-api-pbo2/images/20200317_191230.jpg diff --git a/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Gif_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Gif_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..fa9a3ec --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Gif_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Text_Extractor_Gif_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Text Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_GIF_RESULT_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Text Extractor", + "plugin": { + "name": "OfflineTextExtractor", + "type": "action", + "label": "Offline Text Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "mimeType": "image/gif", + "batchSize": "20", + "sourcePath": "${GS_GIF_SOURCE_PATH}", + "destinationPath": "${GS_GIF_RESULT_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Pdf_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Pdf_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..199de09 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Pdf_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Text_Extractor_Pdf_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Text Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PDF_RESULT_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Text Extractor", + "plugin": { + "name": "OfflineTextExtractor", + "type": "action", + "label": "Offline Text Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "mimeType": "application/pdf", + "batchSize": "20", + "sourcePath": "${GS_PDF_SOURCE_PATH}", + "destinationPath": "${GS_PDF_RESULT_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Tiff_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Tiff_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..1fe4358 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineDocumentExtractor/Offline_Text_Extractor_Tiff_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Text_Extractor_Tiff_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Text Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_TIFF_RESULT_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Text Extractor", + "plugin": { + "name": "OfflineTextExtractor", + "type": "action", + "label": "Offline Text Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "mimeType": "image/tiff", + "batchSize": "20", + "sourcePath": "${GS_TIFF_SOURCE_PATH}", + "destinationPath": "${GS_TIFF_RESULT_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Crop_Hints_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Crop_Hints_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..4240c20 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Crop_Hints_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Crop_Hints_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_CROP_HINTS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Crop Hints", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_CROP_HINTS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Explicit_Content_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Explicit_Content_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..63ac35f --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Explicit_Content_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Explicit_Content_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_EXPLICIT_CONTENT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Explicit Content", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_EXPLICIT_CONTENT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Face_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Face_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..c4a430c --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Face_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Face_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_FACE_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Face", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_FACE_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Image_Properties_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Image_Properties_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..b648b5d --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Image_Properties_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Image_Properties_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_IMAGE_PROPERTIES_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Image Properties", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_IMAGE_PROPERTIES_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Labels_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Labels_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..6ace837 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Labels_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Labels_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_LABELS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Labels", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_LABELS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Landmarks_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Landmarks_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..7b9e722 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Landmarks_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Landmarks_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_LANDMARKS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Landmarks", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_LANDMARKS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Logos_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Logos_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..f4ab5b4 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Logos_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Logos_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_LOGOS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Logos", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_LOGOS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Object_Localization_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Object_Localization_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..bd554c4 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Object_Localization_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Object_Localization_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_OBJECT_LOCALIZATION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Object Localization", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_OBJECT_LOCALIZATION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Text_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Text_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..3a65302 --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Text_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,124 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Text_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_IMAGE_TEXT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Text", + "batchSize": "20", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_IMAGE_TEXT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Web_Detection_GCS_Trash-cdap-data-pipeline.json b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Web_Detection_GCS_Trash-cdap-data-pipeline.json new file mode 100644 index 0000000..8ba6dac --- /dev/null +++ b/examples/cloud-data-fusion/action/OfflineImageExtractor/Offline_Image_Extractor_Web_Detection_GCS_Trash-cdap-data-pipeline.json @@ -0,0 +1,125 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM", + "label": "Data Pipeline - Batch" + }, + "description": "", + "name": "Offline_Image_Extractor_Web_Detection_GCS_Trash", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Trash" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "GetResultsFromGS", + "path": "${GS_PATH_TO_WEB_DETECTION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Trash", + "plugin": { + "name": "Trash", + "type": "batchsink", + "label": "Trash", + "artifact": { + "name": "trash-plugin", + "version": "1.2.0", + "scope": "USER" + }, + "properties": { + "referenceName": "Trash" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "features": "Web Detection", + "batchSize": "20", + "includeGeoResults": "on", + "sourcePath": "${GS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GS_PATH_TO_WEB_DETECTION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Crop_Hints_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Crop_Hints_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..d076a5d --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Crop_Hints_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Crop_Hints_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Crop Hints", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"crop-hint-annotation-component-record\",\"fields\":[{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"cropHintAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"confidence\",\"type\":\"float\"},{\"name\":\"importanceFraction\",\"type\":\"float\"}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"crop-hint-annotation-component-record\",\"fields\":[{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"cropHintAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"confidence\",\"type\":\"float\"},{\"name\":\"importanceFraction\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_CROP_HINTS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"crop-hint-annotation-component-record\",\"fields\":[{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"cropHintAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"confidence\",\"type\":\"float\"},{\"name\":\"importanceFraction\",\"type\":\"float\"}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Explicit_Content_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Explicit_Content_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..1e695ff --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Explicit_Content_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Explicit_Content_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Explicit Content", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"record\",\"name\":\"safe-search-annotation-record\",\"fields\":[{\"name\":\"adult\",\"type\":\"string\"},{\"name\":\"spoof\",\"type\":\"string\"},{\"name\":\"medical\",\"type\":\"string\"},{\"name\":\"violence\",\"type\":\"string\"},{\"name\":\"racy\",\"type\":\"string\"}]}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"record\",\"name\":\"safe-search-annotation-record\",\"fields\":[{\"name\":\"adult\",\"type\":\"string\"},{\"name\":\"spoof\",\"type\":\"string\"},{\"name\":\"medical\",\"type\":\"string\"},{\"name\":\"violence\",\"type\":\"string\"},{\"name\":\"racy\",\"type\":\"string\"}]}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_OBJECT_EXPLICIT_CONTENT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"record\",\"name\":\"safe-search-annotation-record\",\"fields\":[{\"name\":\"adult\",\"type\":\"string\"},{\"name\":\"spoof\",\"type\":\"string\"},{\"name\":\"medical\",\"type\":\"string\"},{\"name\":\"violence\",\"type\":\"string\"},{\"name\":\"racy\",\"type\":\"string\"}]}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Face_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Face_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..86b5c39 --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Face_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Face_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Face", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_FACE_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Handwriting_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Handwriting_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..cccd809 --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Handwriting_GCS-cdap-data-pipeline.json @@ -0,0 +1,878 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Handwriting_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Handwriting", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"feature\",\"type\":{\"name\":\"fullTextAnnotation\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"width\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"height\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blocks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-block-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blockType\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"paragraphs\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-paragraph-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"words\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-word-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"symbols\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-symbol-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textWord-detectedLanguage\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textWord-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textBlock-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textBlock-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"property\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textPage-property\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": { + "type": "record", + "name": "etlSchemaBody", + "fields": [ + { + "name": "offset", + "type": "long" + }, + { + "name": "body", + "type": "string" + }, + { + "name": "output", + "type": { + "type": "array", + "items": { + "name": "page-record", + "type": "record", + "fields": [ + { + "name": "page", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "feature", + "type": { + "name": "fullTextAnnotation", + "type": "record", + "fields": [ + { + "name": "text", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "pages", + "type": { + "type": "array", + "items": { + "name": "document-text-page-record", + "type": "record", + "fields": [ + { + "name": "text", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "width", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "height", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "blocks", + "type": { + "type": "array", + "items": { + "name": "document-text-page-block-record", + "type": "record", + "fields": [ + { + "name": "text", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "blockType", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "paragraphs", + "type": { + "type": "array", + "items": { + "name": "document-text-page-paragraph-record", + "type": "record", + "fields": [ + { + "name": "text", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "words", + "type": { + "type": "array", + "items": { + "name": "document-text-page-word-record", + "type": "record", + "fields": [ + { + "name": "text", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "symbols", + "type": { + "type": "array", + "items": { + "name": "document-text-page-symbol-record", + "type": "record", + "fields": [ + { + "name": "text", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedLanguages", + "type": [ + { + "type": "array", + "items": { + "name": "textSymbol-detectedLanguages", + "type": "record", + "fields": [ + { + "name": "languageCode", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedBreak", + "type": [ + "string", + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "boundingBox", + "type": { + "type": "array", + "items": { + "name": "textSymbol-boundingBox", + "type": "record", + "fields": [ + { + "name": "x", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "y", + "type": "int", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedLanguages", + "type": [ + { + "type": "array", + "items": { + "name": "textWord-detectedLanguage", + "type": "record", + "fields": [ + { + "name": "languageCode", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedBreak", + "type": [ + "string", + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "boundingBox", + "type": { + "type": "array", + "items": { + "name": "textWord-boundingBox", + "type": "record", + "fields": [ + { + "name": "x", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "y", + "type": "int", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedLanguages", + "type": [ + { + "type": "array", + "items": { + "name": "textParagraph-detectedLanguages", + "type": "record", + "fields": [ + { + "name": "languageCode", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedBreak", + "type": [ + "string", + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "boundingBox", + "type": { + "type": "array", + "items": { + "name": "textParagraph-boundingBox", + "type": "record", + "fields": [ + { + "name": "x", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "y", + "type": "int", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedLanguages", + "type": [ + { + "type": "array", + "items": { + "name": "textBlock-detectedLanguages", + "type": "record", + "fields": [ + { + "name": "languageCode", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedBreak", + "type": [ + "string", + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "boundingBox", + "type": { + "type": "array", + "items": { + "name": "textBlock-boundingBox", + "type": "record", + "fields": [ + { + "name": "x", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "y", + "type": "int", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + }, + { + "name": "property", + "type": [ + { + "type": "array", + "items": { + "name": "textPage-property", + "type": "record", + "fields": [ + { + "name": "languageCode", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "confidence", + "type": "float", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "null" + ], + "order": "ascending", + "aliases": [] + }, + { + "name": "detectedBreak", + "type": [ + "string", + "null" + ], + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + }, + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + } + } + ] + } + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-annotation-component-record\",\"type\":\"record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"panAngle\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"tiltAngle\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectionConfidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"landmarkingConfidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"angerLikelihood\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"joyLikelihood\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"surpriseLikelihood\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blurredLikelihood\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"underExposedLikelihood\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"sorrowLikelihood\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"headwearLikelihood\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"bounding-vertex\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"fd-bounding-vertex\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"],\"aliases\":[]},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_HANDWRITING_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": { + "type": "record", + "name": "etlSchemaBody", + "fields": [ + { + "name": "offset", + "type": "long" + }, + { + "name": "body", + "type": "string" + }, + { + "name": "output", + "type": { + "type": "array", + "items": { + "name": "face-annotation-component-record", + "type": "record", + "fields": [ + { + "name": "rollAngle", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "panAngle", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "tiltAngle", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "detectionConfidence", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "landmarkingConfidence", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "angerLikelihood", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "joyLikelihood", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "surpriseLikelihood", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "blurredLikelihood", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "underExposedLikelihood", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "sorrowLikelihood", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "headwearLikelihood", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "boundingPoly", + "type": { + "type": "array", + "items": { + "name": "bounding-vertex", + "type": "record", + "fields": [ + { + "name": "x", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "y", + "type": "int", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + }, + { + "name": "fdBoundingPoly", + "type": { + "type": "array", + "items": { + "name": "fd-bounding-vertex", + "type": "record", + "fields": [ + { + "name": "x", + "type": "int", + "order": "ascending", + "aliases": [] + }, + { + "name": "y", + "type": "int", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + }, + { + "name": "landmarks", + "type": { + "type": "array", + "items": { + "name": "face-landmark-record", + "type": "record", + "fields": [ + { + "name": "type", + "type": { + "type": "enum", + "symbols": [ + "UNKNOWN_LANDMARK", + "LEFT_EYE", + "RIGHT_EYE", + "LEFT_OF_LEFT_EYEBROW", + "RIGHT_OF_LEFT_EYEBROW", + "LEFT_OF_RIGHT_EYEBROW", + "RIGHT_OF_RIGHT_EYEBROW", + "MIDPOINT_BETWEEN_EYES", + "NOSE_TIP", + "UPPER_LIP", + "LOWER_LIP", + "MOUTH_LEFT", + "MOUTH_RIGHT", + "MOUTH_CENTER", + "NOSE_BOTTOM_RIGHT", + "NOSE_BOTTOM_LEFT", + "NOSE_BOTTOM_CENTER", + "LEFT_EYE_TOP_BOUNDARY", + "LEFT_EYE_RIGHT_CORNER", + "LEFT_EYE_BOTTOM_BOUNDARY", + "LEFT_EYE_LEFT_CORNER", + "RIGHT_EYE_TOP_BOUNDARY", + "RIGHT_EYE_RIGHT_CORNER", + "RIGHT_EYE_BOTTOM_BOUNDARY", + "RIGHT_EYE_LEFT_CORNER", + "LEFT_EYEBROW_UPPER_MIDPOINT", + "RIGHT_EYEBROW_UPPER_MIDPOINT", + "LEFT_EAR_TRAGION", + "RIGHT_EAR_TRAGION", + "LEFT_EYE_PUPIL", + "RIGHT_EYE_PUPIL", + "FOREHEAD_GLABELLA", + "CHIN_GNATHION", + "CHIN_LEFT_GONION", + "CHIN_RIGHT_GONION", + "UNRECOGNIZED" + ], + "aliases": [] + }, + "order": "ascending", + "aliases": [] + }, + { + "name": "x", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "y", + "type": "float", + "order": "ascending", + "aliases": [] + }, + { + "name": "z", + "type": "float", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + }, + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + } + } + ] + } + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"feature\",\"type\":{\"name\":\"fullTextAnnotation\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"width\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"height\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blocks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-block-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blockType\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"paragraphs\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-paragraph-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"words\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-word-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"symbols\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-symbol-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textWord-detectedLanguage\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textWord-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textBlock-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textBlock-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"property\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textPage-property\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Image_Properties_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Image_Properties_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..8dceb15 --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Image_Properties_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Image_Properties_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Image Properties", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_IMAGE_PROPERTIES_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Labels_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Labels_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..abd8e33 --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Labels_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Labels_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Labels", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"label-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"label-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_LABELS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"label-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Landmarks_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Landmarks_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..a2794a0 --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Landmarks_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Landmarks_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Landmarks", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_LANDMARKS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Logos_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Logos_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..a341bbb --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Logos_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Logos_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Logos", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_LOGOS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Object_Localization_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Object_Localization_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..443b818 --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Object_Localization_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Object_Localization_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Object Localization", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localized-object-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localizedObjectAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localized-object-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localizedObjectAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_OBJECT_LOCALIZATION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localized-object-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localizedObjectAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Text_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Text_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..c63ae50 --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Text_GCS-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Text_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Text", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_TEXT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\"},{\"name\":\"feature\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Web_Detection_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Web_Detection_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..44ea19e --- /dev/null +++ b/examples/cloud-data-fusion/transform/DocumentExtractor/GCS_Document_Extractor_Web_Detection_GCS-cdap-data-pipeline.json @@ -0,0 +1,141 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Document_Extractor_Web_Detection_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_DOCUMENT_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Web Detection", + "mimeType": "application/pdf", + "pages": "1,2,3,4,5", + "includeGeoResults": "true", + "pathField": "body", + "outputField": "output", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"feature\",\"type\":{\"name\":\"web-detection-record\",\"type\":\"record\",\"fields\":[{\"name\":\"webEntities\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"web-entity-record\",\"type\":\"record\",\"fields\":[{\"name\":\"entityId\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"description\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"fmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"pmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pagesWithMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-with-matching-images-record\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pageTitle\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-fmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-pmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"visuallySimilarImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vsiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"bestGuessLabels\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"best-guess-label-record\",\"type\":\"record\",\"fields\":[{\"name\":\"label\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"feature\",\"type\":{\"name\":\"web-detection-record\",\"type\":\"record\",\"fields\":[{\"name\":\"webEntities\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"web-entity-record\",\"type\":\"record\",\"fields\":[{\"name\":\"entityId\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"description\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"fmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"pmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pagesWithMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-with-matching-images-record\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pageTitle\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-fmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-pmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"visuallySimilarImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vsiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"bestGuessLabels\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"best-guess-label-record\",\"type\":\"record\",\"fields\":[{\"name\":\"label\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_OBJECT_WEB_DETECTION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Document Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"page\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"feature\",\"type\":{\"name\":\"web-detection-record\",\"type\":\"record\",\"fields\":[{\"name\":\"webEntities\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"web-entity-record\",\"type\":\"record\",\"fields\":[{\"name\":\"entityId\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"description\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"fmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"pmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pagesWithMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"page-with-matching-images-record\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pageTitle\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-fmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-pmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"visuallySimilarImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vsiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"bestGuessLabels\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"best-guess-label-record\",\"type\":\"record\",\"fields\":[{\"name\":\"label\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Crop_Hints_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Crop_Hints_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..cd67d37 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Crop_Hints_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Crop_Hints_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Crop Hints", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"crop-hint-annotation-component-record\",\"fields\":[{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"cropHintAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"confidence\",\"type\":\"float\"},{\"name\":\"importanceFraction\",\"type\":\"float\"}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"crop-hint-annotation-component-record\",\"fields\":[{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"cropHintAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"confidence\",\"type\":\"float\"},{\"name\":\"importanceFraction\",\"type\":\"float\"}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_IMAGE_CROP_HINTS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"crop-hint-annotation-component-record\",\"fields\":[{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"cropHintAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"confidence\",\"type\":\"float\"},{\"name\":\"importanceFraction\",\"type\":\"float\"}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Explicit_Content_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Explicit_Content_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..1f69f62 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Explicit_Content_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Explicit_Content_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Explicit Content", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"safe-search-annotation-record\",\"fields\":[{\"name\":\"adult\",\"type\":\"string\"},{\"name\":\"spoof\",\"type\":\"string\"},{\"name\":\"medical\",\"type\":\"string\"},{\"name\":\"violence\",\"type\":\"string\"},{\"name\":\"racy\",\"type\":\"string\"}]}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"safe-search-annotation-record\",\"fields\":[{\"name\":\"adult\",\"type\":\"string\"},{\"name\":\"spoof\",\"type\":\"string\"},{\"name\":\"medical\",\"type\":\"string\"},{\"name\":\"violence\",\"type\":\"string\"},{\"name\":\"racy\",\"type\":\"string\"}]}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_EXPLICIT_CONTENT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"safe-search-annotation-record\",\"fields\":[{\"name\":\"adult\",\"type\":\"string\"},{\"name\":\"spoof\",\"type\":\"string\"},{\"name\":\"medical\",\"type\":\"string\"},{\"name\":\"violence\",\"type\":\"string\"},{\"name\":\"racy\",\"type\":\"string\"}]}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Faces_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Faces_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..3f6a23b --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Faces_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Faces_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Face", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_IMAGE_FACE_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Handwriting_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Handwriting_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..81a2735 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Handwriting_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Handwriting_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Handwriting", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"name\":\"fullTextAnnotation\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"pages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"width\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"height\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blocks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-block-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blockType\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"paragraphs\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-paragraph-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"words\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-word-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"symbols\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-symbol-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textWord-detectedLanguage\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textWord-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textBlock-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textBlock-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"property\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textPage-property\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"fullTextAnnotation\",\"fields\":[{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"pages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"width\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"height\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blocks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-block-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blockType\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"paragraphs\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-paragraph-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"words\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-word-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"symbols\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-symbol-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textWord-detectedLanguage\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textWord-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textBlock-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textBlock-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"property\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textPage-property\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_IMAGE_HANDWRITING_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"fullTextAnnotation\",\"fields\":[{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"pages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"width\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"height\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blocks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-block-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"blockType\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"paragraphs\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-paragraph-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"words\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-word-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"symbols\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"document-text-page-symbol-record\",\"type\":\"record\",\"fields\":[{\"name\":\"text\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textSymbol-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textWord-detectedLanguage\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textWord-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textParagraph-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedLanguages\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textBlock-detectedLanguages\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"boundingBox\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"textBlock-boundingBox\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"property\",\"type\":[{\"type\":\"array\",\"items\":{\"name\":\"textPage-property\",\"type\":\"record\",\"fields\":[{\"name\":\"languageCode\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"confidence\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}},\"null\"],\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"detectedBreak\",\"type\":[\"string\",\"null\"],\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Image_Properties_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Image_Properties_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..dc9c7c0 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Image_Properties_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Image_Properties_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Image Properties", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_IMAGE_PROPERTIES_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Labels_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Labels_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..781b727 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Labels_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Labels_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Labels", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_LABELS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"dominant-colors-annotation-component-record\",\"fields\":[{\"name\":\"pixelFraction\",\"type\":\"float\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"red\",\"type\":\"float\"},{\"name\":\"green\",\"type\":\"float\"},{\"name\":\"blue\",\"type\":\"float\"},{\"name\":\"alpha\",\"type\":\"float\"}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Landmarks_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Landmarks_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..00325dd --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Landmarks_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Landmarks_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Landmarks", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_LANDMARKS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Logos_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Logos_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..8e7a103 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Logos_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Logos_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Logos", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_LOGOS_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"landmark-entity-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"locale\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"topicality\",\"type\":\"float\"},{\"name\":\"locations\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"location-info-record\",\"fields\":[{\"name\":\"latitude\",\"type\":\"double\"},{\"name\":\"longitude\",\"type\":\"double\"}]}}},{\"name\":\"position\",\"type\":[{\"type\":\"record\",\"name\":\"entityAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]},\"null\"]},{\"name\":\"properties\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"property-record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"},{\"name\":\"uint64Value\",\"type\":\"long\"}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Object_Localization_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Object_Localization_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..299fc38 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Object_Localization_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Object_Localization_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Object Localization", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localized-object-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localizedObjectAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localized-object-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localizedObjectAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_OBJECT_LOCALIZATION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localized-object-annotation-component-record\",\"fields\":[{\"name\":\"mid\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"localizedObjectAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Product_Search_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Product_Search_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..ee55274 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Product_Search_GCS-cdap-data-pipeline.json @@ -0,0 +1,336 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Product_Search_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Product Search", + "productCategories": "Homegoods", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"productSearch-resultRecord\",\"fields\":[{\"name\":\"indexTime\",\"type\":[\"string\",\"null\"]},{\"name\":\"results\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"productSearch-result\",\"fields\":[{\"name\":\"image\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"product\",\"type\":{\"type\":\"record\",\"name\":\"productSearch-resultresult-product\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"displayName\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"productCategory\",\"type\":\"string\"},{\"name\":\"productLabels\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"productSearch-resultresult-product-keyValue\",\"fields\":[{\"name\":\"key\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"string\"}]}}}]}}]}}},{\"name\":\"productGroupedResults\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"groupedResult-record\",\"fields\":[{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"groupedResult-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"results\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"groupedResult-results\",\"fields\":[{\"name\":\"image\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"product\",\"type\":{\"type\":\"record\",\"name\":\"groupedResult-resultsresult-product\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"displayName\",\"type\":\"string\"},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"productCategory\",\"type\":\"string\"},{\"name\":\"productLabels\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"groupedResult-resultsresult-product-keyValue\",\"type\":\"record\",\"fields\":[{\"name\":\"key\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"value\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}]}}}]}}}]}}]}", + "pathField": "body", + "outputField": "output", + "productSet": "${PRODUCT_SET_RESOURCE_NAME}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": { + "type": "record", + "name": "etlSchemaBody", + "fields": [ + { + "name": "offset", + "type": "long" + }, + { + "name": "body", + "type": "string" + }, + { + "name": "output", + "type": { + "type": "record", + "name": "productSearch-resultRecord", + "fields": [ + { + "name": "indexTime", + "type": [ + "string", + "null" + ] + }, + { + "name": "results", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "productSearch-result", + "fields": [ + { + "name": "image", + "type": "string" + }, + { + "name": "score", + "type": "float" + }, + { + "name": "product", + "type": { + "type": "record", + "name": "productSearch-resultresult-product", + "fields": [ + { + "name": "name", + "type": "string" + }, + { + "name": "displayName", + "type": "string" + }, + { + "name": "description", + "type": "string" + }, + { + "name": "productCategory", + "type": "string" + }, + { + "name": "productLabels", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "productSearch-resultresult-product-keyValue", + "fields": [ + { + "name": "key", + "type": "string" + }, + { + "name": "value", + "type": "string" + } + ] + } + } + } + ] + } + } + ] + } + } + }, + { + "name": "productGroupedResults", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "groupedResult-record", + "fields": [ + { + "name": "position", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "groupedResult-position", + "fields": [ + { + "name": "x", + "type": "int" + }, + { + "name": "y", + "type": "int" + } + ] + } + } + }, + { + "name": "results", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "groupedResult-results", + "fields": [ + { + "name": "image", + "type": "string" + }, + { + "name": "score", + "type": "float" + }, + { + "name": "product", + "type": { + "type": "record", + "name": "groupedResult-resultsresult-product", + "fields": [ + { + "name": "name", + "type": "string" + }, + { + "name": "displayName", + "type": "string" + }, + { + "name": "description", + "type": "string" + }, + { + "name": "productCategory", + "type": "string" + }, + { + "name": "productLabels", + "type": { + "type": "array", + "items": { + "name": "groupedResult-resultsresult-product-keyValue", + "type": "record", + "fields": [ + { + "name": "key", + "type": "string", + "order": "ascending", + "aliases": [] + }, + { + "name": "value", + "type": "string", + "order": "ascending", + "aliases": [] + } + ], + "aliases": [] + } + } + } + ] + } + } + ] + } + } + } + ] + } + } + } + ] + } + } + ] + } + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_PRODUCT_SEARCH_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"web-detection-record\",\"fields\":[{\"name\":\"webEntities\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"web-entity-record\",\"fields\":[{\"name\":\"entityId\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"description\",\"type\":\"string\"}]}}},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"pmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"pagesWithMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-with-matching-images-record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"pageTitle\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-fmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"webPage-pmiWebImage\",\"type\":\"record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"score\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}},{\"name\":\"visuallySimilarImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"vsiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"bestGuessLabels\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"best-guess-label-record\",\"fields\":[{\"name\":\"label\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"}]}}}]}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Text_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Text_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..082f414 --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Text_GCS-cdap-data-pipeline.json @@ -0,0 +1,138 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Text_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Text", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"text-annotation-component-record\",\"fields\":[{\"name\":\"locale\",\"type\":[\"string\",\"null\"]},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"textAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"text-annotation-component-record\",\"fields\":[{\"name\":\"locale\",\"type\":[\"string\",\"null\"]},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"textAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_IMAGE_TEXT_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"text-annotation-component-record\",\"fields\":[{\"name\":\"locale\",\"type\":[\"string\",\"null\"]},{\"name\":\"description\",\"type\":\"string\"},{\"name\":\"position\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"textAnnotation-position\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Web_Detection_GCS-cdap-data-pipeline.json b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Web_Detection_GCS-cdap-data-pipeline.json new file mode 100644 index 0000000..263842f --- /dev/null +++ b/examples/cloud-data-fusion/transform/ImageExtractor/GCS_Image_Extractor_Web_Detection_GCS-cdap-data-pipeline.json @@ -0,0 +1,139 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "GCS_Image_Extractor_Web_Detection_GCS", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "GCS2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "format": "text", + "serviceFilePath": "auto-detect", + "filenameOnly": "false", + "recursive": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${GS_PATH_TO_IMAGE_LIST_FILE}", + "referenceName": "Paths_to_images" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "auto-detect", + "project": "auto-detect", + "features": "Web Detection", + "includeGeoResults": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"web-detection-record\",\"fields\":[{\"name\":\"webEntities\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"web-entity-record\",\"fields\":[{\"name\":\"entityId\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"description\",\"type\":\"string\"}]}}},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"pmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"pagesWithMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-with-matching-images-record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"pageTitle\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"webPage-fmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"webPage-pmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}}]}}},{\"name\":\"visuallySimilarImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"vsiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"bestGuessLabels\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"best-guess-label-record\",\"fields\":[{\"name\":\"label\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"}]}}}]}}]}", + "pathField": "body", + "outputField": "output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"web-detection-record\",\"fields\":[{\"name\":\"webEntities\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"web-entity-record\",\"fields\":[{\"name\":\"entityId\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"description\",\"type\":\"string\"}]}}},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"pmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"pagesWithMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-with-matching-images-record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"pageTitle\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"webPage-fmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"webPage-pmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}}]}}},{\"name\":\"visuallySimilarImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"vsiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"bestGuessLabels\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"best-guess-label-record\",\"fields\":[{\"name\":\"label\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"}]}}}]}}]}" + } + ], + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "GCS2", + "plugin": { + "name": "GCS", + "type": "batchsink", + "label": "GCS2", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "auto-detect", + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "serviceFilePath": "auto-detect", + "location": "us", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}", + "referenceName": "SaveResults", + "path": "${GS_PATH_TO_WEB_DETECTION_RESULTS}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fd-bounding-vertex\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":\"int\"}]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-landmark-record\",\"fields\":[{\"name\":\"type\",\"type\":{\"type\":\"enum\",\"symbols\":[\"UNKNOWN_LANDMARK\",\"LEFT_EYE\",\"RIGHT_EYE\",\"LEFT_OF_LEFT_EYEBROW\",\"RIGHT_OF_LEFT_EYEBROW\",\"LEFT_OF_RIGHT_EYEBROW\",\"RIGHT_OF_RIGHT_EYEBROW\",\"MIDPOINT_BETWEEN_EYES\",\"NOSE_TIP\",\"UPPER_LIP\",\"LOWER_LIP\",\"MOUTH_LEFT\",\"MOUTH_RIGHT\",\"MOUTH_CENTER\",\"NOSE_BOTTOM_RIGHT\",\"NOSE_BOTTOM_LEFT\",\"NOSE_BOTTOM_CENTER\",\"LEFT_EYE_TOP_BOUNDARY\",\"LEFT_EYE_RIGHT_CORNER\",\"LEFT_EYE_BOTTOM_BOUNDARY\",\"LEFT_EYE_LEFT_CORNER\",\"RIGHT_EYE_TOP_BOUNDARY\",\"RIGHT_EYE_RIGHT_CORNER\",\"RIGHT_EYE_BOTTOM_BOUNDARY\",\"RIGHT_EYE_LEFT_CORNER\",\"LEFT_EYEBROW_UPPER_MIDPOINT\",\"RIGHT_EYEBROW_UPPER_MIDPOINT\",\"LEFT_EAR_TRAGION\",\"RIGHT_EAR_TRAGION\",\"LEFT_EYE_PUPIL\",\"RIGHT_EYE_PUPIL\",\"FOREHEAD_GLABELLA\",\"CHIN_GNATHION\",\"CHIN_LEFT_GONION\",\"CHIN_RIGHT_GONION\",\"UNRECOGNIZED\"]}},{\"name\":\"x\",\"type\":\"float\"},{\"name\":\"y\",\"type\":\"float\"},{\"name\":\"z\",\"type\":\"float\"}]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"output\",\"type\":{\"type\":\"record\",\"name\":\"web-detection-record\",\"fields\":[{\"name\":\"webEntities\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"web-entity-record\",\"fields\":[{\"name\":\"entityId\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"description\",\"type\":\"string\"}]}}},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"fmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"pmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"pagesWithMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"page-with-matching-images-record\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"pageTitle\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"fullMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"webPage-fmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"partialMatchingImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"webPage-pmiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}}]}}},{\"name\":\"visuallySimilarImages\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"vsiWebImage\",\"fields\":[{\"name\":\"url\",\"type\":\"string\"},{\"name\":\"score\",\"type\":\"float\"}]}}},{\"name\":\"bestGuessLabels\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"best-guess-label-record\",\"fields\":[{\"name\":\"label\",\"type\":\"string\"},{\"name\":\"languageCode\",\"type\":\"string\"}]}}}]}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/documents/gif/20200317_191230.gif b/examples/documents/gif/20200317_191230.gif new file mode 100644 index 0000000..216d11c Binary files /dev/null and b/examples/documents/gif/20200317_191230.gif differ diff --git a/examples/documents/gif/Source.txt b/examples/documents/gif/Source.txt new file mode 100644 index 0000000..44a093d --- /dev/null +++ b/examples/documents/gif/Source.txt @@ -0,0 +1,25 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +Royalty Free images URLs: + +https://unsplash.com/photos/A9CUBfntOFQ +https://unsplash.com/photos/oEgiJNbYw8w +https://unsplash.com/photos/3isjMlIlj4A +https://unsplash.com/photos/CW88ysx7Amk +https://unsplash.com/photos/8LKQfBumjMo +https://unsplash.com/photos/EMX1eJ1BcgU +https://unsplash.com/photos/0are122T4ho diff --git a/examples/documents/gif/croissant-CW88ysx7Amk.gif b/examples/documents/gif/croissant-CW88ysx7Amk.gif new file mode 100644 index 0000000..2f6e976 Binary files /dev/null and b/examples/documents/gif/croissant-CW88ysx7Amk.gif differ diff --git a/examples/documents/gif/curology-A9CUBfntOFQ.gif b/examples/documents/gif/curology-A9CUBfntOFQ.gif new file mode 100644 index 0000000..33f49db Binary files /dev/null and b/examples/documents/gif/curology-A9CUBfntOFQ.gif differ diff --git a/examples/documents/gif/fallon-michael-8LKQfBumjMo.gif b/examples/documents/gif/fallon-michael-8LKQfBumjMo.gif new file mode 100644 index 0000000..ee71ce1 Binary files /dev/null and b/examples/documents/gif/fallon-michael-8LKQfBumjMo.gif differ diff --git a/examples/documents/gif/images_demo-img.gif b/examples/documents/gif/images_demo-img.gif new file mode 100644 index 0000000..46d4558 Binary files /dev/null and b/examples/documents/gif/images_demo-img.gif differ diff --git a/examples/documents/gif/keyur-nandaniya-3isjMlIlj4A.gif b/examples/documents/gif/keyur-nandaniya-3isjMlIlj4A.gif new file mode 100644 index 0000000..b790e35 Binary files /dev/null and b/examples/documents/gif/keyur-nandaniya-3isjMlIlj4A.gif differ diff --git a/examples/documents/gif/keyur-nandaniya-oEgiJNbYw8w.gif b/examples/documents/gif/keyur-nandaniya-oEgiJNbYw8w.gif new file mode 100644 index 0000000..fabfd66 Binary files /dev/null and b/examples/documents/gif/keyur-nandaniya-oEgiJNbYw8w.gif differ diff --git a/examples/documents/gif/thought-catalog-EMX1eJ1BcgU.gif b/examples/documents/gif/thought-catalog-EMX1eJ1BcgU.gif new file mode 100644 index 0000000..c27cd4d Binary files /dev/null and b/examples/documents/gif/thought-catalog-EMX1eJ1BcgU.gif differ diff --git a/examples/documents/gif/wesley-tingey-0are122T4ho.gif b/examples/documents/gif/wesley-tingey-0are122T4ho.gif new file mode 100644 index 0000000..fe77e99 Binary files /dev/null and b/examples/documents/gif/wesley-tingey-0are122T4ho.gif differ diff --git a/examples/documents/pdf/20200317_191230.pdf b/examples/documents/pdf/20200317_191230.pdf new file mode 100644 index 0000000..7fed44a Binary files /dev/null and b/examples/documents/pdf/20200317_191230.pdf differ diff --git a/examples/documents/pdf/pdfs_Google-Cloud-Platform.pdf b/examples/documents/pdf/pdfs_Google-Cloud-Platform.pdf new file mode 100644 index 0000000..b740af3 Binary files /dev/null and b/examples/documents/pdf/pdfs_Google-Cloud-Platform.pdf differ diff --git a/examples/documents/pdf/pdfs_vision_document_understanding_custom_0773375000.pdf b/examples/documents/pdf/pdfs_vision_document_understanding_custom_0773375000.pdf new file mode 100644 index 0000000..9e27dff Binary files /dev/null and b/examples/documents/pdf/pdfs_vision_document_understanding_custom_0773375000.pdf differ diff --git a/examples/documents/tiff/20200317_191230.tiff b/examples/documents/tiff/20200317_191230.tiff new file mode 100644 index 0000000..b90dd36 Binary files /dev/null and b/examples/documents/tiff/20200317_191230.tiff differ diff --git a/examples/documents/tiff/Source.txt b/examples/documents/tiff/Source.txt new file mode 100644 index 0000000..44a093d --- /dev/null +++ b/examples/documents/tiff/Source.txt @@ -0,0 +1,25 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +Royalty Free images URLs: + +https://unsplash.com/photos/A9CUBfntOFQ +https://unsplash.com/photos/oEgiJNbYw8w +https://unsplash.com/photos/3isjMlIlj4A +https://unsplash.com/photos/CW88ysx7Amk +https://unsplash.com/photos/8LKQfBumjMo +https://unsplash.com/photos/EMX1eJ1BcgU +https://unsplash.com/photos/0are122T4ho diff --git a/examples/documents/tiff/croissant-CW88ysx7Amk.tiff b/examples/documents/tiff/croissant-CW88ysx7Amk.tiff new file mode 100644 index 0000000..079b12e Binary files /dev/null and b/examples/documents/tiff/croissant-CW88ysx7Amk.tiff differ diff --git a/examples/documents/tiff/curology-A9CUBfntOFQ.tiff b/examples/documents/tiff/curology-A9CUBfntOFQ.tiff new file mode 100644 index 0000000..96fb22b Binary files /dev/null and b/examples/documents/tiff/curology-A9CUBfntOFQ.tiff differ diff --git a/examples/documents/tiff/fallon-michael-8LKQfBumjMo.tiff b/examples/documents/tiff/fallon-michael-8LKQfBumjMo.tiff new file mode 100644 index 0000000..6b12e9b Binary files /dev/null and b/examples/documents/tiff/fallon-michael-8LKQfBumjMo.tiff differ diff --git a/examples/documents/tiff/images_demo-img.tiff b/examples/documents/tiff/images_demo-img.tiff new file mode 100644 index 0000000..f587a64 Binary files /dev/null and b/examples/documents/tiff/images_demo-img.tiff differ diff --git a/examples/documents/tiff/keyur-nandaniya-3isjMlIlj4A.tiff b/examples/documents/tiff/keyur-nandaniya-3isjMlIlj4A.tiff new file mode 100644 index 0000000..f8117e9 Binary files /dev/null and b/examples/documents/tiff/keyur-nandaniya-3isjMlIlj4A.tiff differ diff --git a/examples/documents/tiff/keyur-nandaniya-oEgiJNbYw8w.tiff b/examples/documents/tiff/keyur-nandaniya-oEgiJNbYw8w.tiff new file mode 100644 index 0000000..38d0a18 Binary files /dev/null and b/examples/documents/tiff/keyur-nandaniya-oEgiJNbYw8w.tiff differ diff --git a/examples/documents/tiff/thought-catalog-EMX1eJ1BcgU.tiff b/examples/documents/tiff/thought-catalog-EMX1eJ1BcgU.tiff new file mode 100644 index 0000000..ea4e9ef Binary files /dev/null and b/examples/documents/tiff/thought-catalog-EMX1eJ1BcgU.tiff differ diff --git a/examples/documents/tiff/wesley-tingey-0are122T4ho.tiff b/examples/documents/tiff/wesley-tingey-0are122T4ho.tiff new file mode 100644 index 0000000..2725fca Binary files /dev/null and b/examples/documents/tiff/wesley-tingey-0are122T4ho.tiff differ diff --git a/examples/empty_file.txt b/examples/empty_file.txt new file mode 100644 index 0000000..e69de29 diff --git a/examples/images/jpg/20200317_191230.jpg b/examples/images/jpg/20200317_191230.jpg new file mode 100644 index 0000000..ccc3eac Binary files /dev/null and b/examples/images/jpg/20200317_191230.jpg differ diff --git a/examples/images/jpg/Source.txt b/examples/images/jpg/Source.txt new file mode 100644 index 0000000..44a093d --- /dev/null +++ b/examples/images/jpg/Source.txt @@ -0,0 +1,25 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +Royalty Free images URLs: + +https://unsplash.com/photos/A9CUBfntOFQ +https://unsplash.com/photos/oEgiJNbYw8w +https://unsplash.com/photos/3isjMlIlj4A +https://unsplash.com/photos/CW88ysx7Amk +https://unsplash.com/photos/8LKQfBumjMo +https://unsplash.com/photos/EMX1eJ1BcgU +https://unsplash.com/photos/0are122T4ho diff --git a/examples/images/jpg/bali.jpg b/examples/images/jpg/bali.jpg new file mode 100644 index 0000000..e0242fb Binary files /dev/null and b/examples/images/jpg/bali.jpg differ diff --git a/examples/images/jpg/bubble.jpg b/examples/images/jpg/bubble.jpg new file mode 100644 index 0000000..0f1f8eb Binary files /dev/null and b/examples/images/jpg/bubble.jpg differ diff --git a/examples/images/jpg/carnaval.jpg b/examples/images/jpg/carnaval.jpg new file mode 100644 index 0000000..0ca6998 Binary files /dev/null and b/examples/images/jpg/carnaval.jpg differ diff --git a/examples/images/jpg/croissant-CW88ysx7Amk.jpg b/examples/images/jpg/croissant-CW88ysx7Amk.jpg new file mode 100644 index 0000000..ecc2c56 Binary files /dev/null and b/examples/images/jpg/croissant-CW88ysx7Amk.jpg differ diff --git a/examples/images/jpg/curology-A9CUBfntOFQ.jpg b/examples/images/jpg/curology-A9CUBfntOFQ.jpg new file mode 100644 index 0000000..7271d67 Binary files /dev/null and b/examples/images/jpg/curology-A9CUBfntOFQ.jpg differ diff --git a/examples/images/jpg/faces.jpg b/examples/images/jpg/faces.jpg new file mode 100644 index 0000000..d1bdcb7 Binary files /dev/null and b/examples/images/jpg/faces.jpg differ diff --git a/examples/images/jpg/fallon-michael-8LKQfBumjMo.jpg b/examples/images/jpg/fallon-michael-8LKQfBumjMo.jpg new file mode 100644 index 0000000..d47c581 Binary files /dev/null and b/examples/images/jpg/fallon-michael-8LKQfBumjMo.jpg differ diff --git a/examples/images/jpg/google_logo.jpg b/examples/images/jpg/google_logo.jpg new file mode 100644 index 0000000..7187518 Binary files /dev/null and b/examples/images/jpg/google_logo.jpg differ diff --git a/examples/images/jpg/images_demo-img.jpg b/examples/images/jpg/images_demo-img.jpg new file mode 100644 index 0000000..19ed4d6 Binary files /dev/null and b/examples/images/jpg/images_demo-img.jpg differ diff --git a/examples/images/jpg/keyur-nandaniya-3isjMlIlj4A.jpg b/examples/images/jpg/keyur-nandaniya-3isjMlIlj4A.jpg new file mode 100644 index 0000000..d2c04a7 Binary files /dev/null and b/examples/images/jpg/keyur-nandaniya-3isjMlIlj4A.jpg differ diff --git a/examples/images/jpg/keyur-nandaniya-oEgiJNbYw8w.jpg b/examples/images/jpg/keyur-nandaniya-oEgiJNbYw8w.jpg new file mode 100644 index 0000000..a0985b1 Binary files /dev/null and b/examples/images/jpg/keyur-nandaniya-oEgiJNbYw8w.jpg differ diff --git a/examples/images/jpg/setagaya.jpg b/examples/images/jpg/setagaya.jpg new file mode 100644 index 0000000..e788577 Binary files /dev/null and b/examples/images/jpg/setagaya.jpg differ diff --git a/examples/images/jpg/sign.jpg b/examples/images/jpg/sign.jpg new file mode 100644 index 0000000..0871606 Binary files /dev/null and b/examples/images/jpg/sign.jpg differ diff --git a/examples/images/jpg/st_basils.jpg b/examples/images/jpg/st_basils.jpg new file mode 100644 index 0000000..e7505e4 Binary files /dev/null and b/examples/images/jpg/st_basils.jpg differ diff --git a/examples/images/jpg/thought-catalog-EMX1eJ1BcgU.jpg b/examples/images/jpg/thought-catalog-EMX1eJ1BcgU.jpg new file mode 100644 index 0000000..c64a246 Binary files /dev/null and b/examples/images/jpg/thought-catalog-EMX1eJ1BcgU.jpg differ diff --git a/examples/images/jpg/wesley-tingey-0are122T4ho.jpg b/examples/images/jpg/wesley-tingey-0are122T4ho.jpg new file mode 100644 index 0000000..978cf6a Binary files /dev/null and b/examples/images/jpg/wesley-tingey-0are122T4ho.jpg differ diff --git a/examples/paths_to_images.txt b/examples/paths_to_images.txt new file mode 100644 index 0000000..c2228b1 --- /dev/null +++ b/examples/paths_to_images.txt @@ -0,0 +1 @@ +gs://vision-api-pbo2/images/demo-img.jpg diff --git a/examples/paths_to_pdfs.txt b/examples/paths_to_pdfs.txt new file mode 100644 index 0000000..c86fcd8 --- /dev/null +++ b/examples/paths_to_pdfs.txt @@ -0,0 +1,2 @@ +gs://vision-api-pbo2/pdfs/Google-Cloud-Platform.pdf +gs://vision-api-pbo2/pdfs/vision_document_understanding_custom_0773375000.pdf \ No newline at end of file diff --git a/examples/paths_to_tiffs.txt b/examples/paths_to_tiffs.txt new file mode 100644 index 0000000..a6d7112 --- /dev/null +++ b/examples/paths_to_tiffs.txt @@ -0,0 +1 @@ +gs://vision-api-pbo2/tiffs/20200317_191230.tiff diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Crop_Hints-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Crop_Hints-cdap-data-pipeline.json new file mode 100644 index 0000000..df877b4 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Crop_Hints-cdap-data-pipeline.json @@ -0,0 +1,123 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "Offline_Image_Extractor_Crop_Hint", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_CROP_HINT_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Crop Hints", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_CROP_HINT_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Explicit_Contents-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Explicit_Contents-cdap-data-pipeline.json new file mode 100644 index 0000000..91d9146 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Explicit_Contents-cdap-data-pipeline.json @@ -0,0 +1,132 @@ +{ + "name": "Offline_Image_Extractor_Explicit_Contents", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_EXPLICIT_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "GCS", + "icon": "fa-plug" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsink", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Explicit Content", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_EXPLICIT_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ], + "type": "action", + "label": "Offline Image Extractor", + "icon": "fa-plug" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Faces-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Faces-cdap-data-pipeline.json new file mode 100644 index 0000000..cbbdbcd --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Faces-cdap-data-pipeline.json @@ -0,0 +1,118 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "Offline_Image_Extractor_Face", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_IMAGE_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Face", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_FACE_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Image_Properties-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Image_Properties-cdap-data-pipeline.json new file mode 100644 index 0000000..fb90393 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Image_Properties-cdap-data-pipeline.json @@ -0,0 +1,123 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "Offline_Image_Extractor_Image_Properties", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_IMAGE_PROPERTIES_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Image Properties", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_IMAGE_PROPERTIES_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Labels-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Labels-cdap-data-pipeline.json new file mode 100644 index 0000000..7ae2d92 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Labels-cdap-data-pipeline.json @@ -0,0 +1,123 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "Offline_Image_Extractor_Labels", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_LABEL_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Labels", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_LABEL_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Landmarks-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Landmarks-cdap-data-pipeline.json new file mode 100644 index 0000000..35afcd9 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Landmarks-cdap-data-pipeline.json @@ -0,0 +1,132 @@ +{ + "name": "Offline_Image_Extractor_Landmarks", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_LANDMARK_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "GCS", + "icon": "fa-plug" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsink", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Landmarks", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_LANDMARK_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ], + "type": "action", + "label": "Offline Image Extractor", + "icon": "fa-plug" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Localizations-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Localizations-cdap-data-pipeline.json new file mode 100644 index 0000000..221d277 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Localizations-cdap-data-pipeline.json @@ -0,0 +1,132 @@ +{ + "name": "Offline_Image_Extractor_Localizations", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_LOCALIZATION_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "GCS", + "icon": "fa-plug" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsink", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Object Localization", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_LOCALIZATION_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ], + "type": "action", + "label": "Offline Image Extractor", + "icon": "fa-plug" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Logos-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Logos-cdap-data-pipeline.json new file mode 100644 index 0000000..33dccc2 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Logos-cdap-data-pipeline.json @@ -0,0 +1,123 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "Offline_Image_Extractor_Logos", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_LOGO_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Logos", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_LOGO_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Texts-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Texts-cdap-data-pipeline.json new file mode 100644 index 0000000..ef497ff --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Texts-cdap-data-pipeline.json @@ -0,0 +1,132 @@ +{ + "name": "Offline_Image_Extractor_Texts", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_TEXT_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "GCS", + "icon": "fa-plug" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsink", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Text", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_TEXT_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ], + "type": "action", + "label": "Offline Image Extractor", + "icon": "fa-plug" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Web_Detections-cdap-data-pipeline.json b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Web_Detections-cdap-data-pipeline.json new file mode 100644 index 0000000..e0f85d2 --- /dev/null +++ b/examples/sandbox/action/OfflineImageExtractor/Offline_Image_Extractor_Web_Detections-cdap-data-pipeline.json @@ -0,0 +1,133 @@ +{ + "name": "Offline_Image_Extractor_Web_Detections", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Image Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-get-json-image-face", + "path": "${GCS_WEB_DETECTION_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "GCS", + "icon": "fa-plug" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-json-image-face", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsink", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Offline Image Extractor", + "plugin": { + "name": "OfflineImageExtractor", + "type": "action", + "label": "Offline Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "features": "Web Detection", + "batchSize": "${GCS_IMAGE_BATCH_SIZE}", + "includeGeoResults": "on", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_WEB_DETECTION_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ], + "type": "action", + "label": "Offline Image Extractor", + "icon": "fa-plug" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Gif-cdap-data-pipeline.json b/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Gif-cdap-data-pipeline.json new file mode 100644 index 0000000..dd4580f --- /dev/null +++ b/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Gif-cdap-data-pipeline.json @@ -0,0 +1,118 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "Offline_Text_Extractor_Pdf", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Text Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-extracted-text", + "path": "${GCS_TEXT_EXTRACTED_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${FS_DESTINATION_PATH}", + "referenceName": "file-extracted" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Text Extractor", + "plugin": { + "name": "OfflineTextExtractor", + "type": "action", + "label": "Offline Text Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "mimeType": "image/gif", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_TEXT_EXTRACTED_DESTINATION_PATH}", + "batchSize": "2" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Pdf-cdap-data-pipeline.json b/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Pdf-cdap-data-pipeline.json new file mode 100644 index 0000000..c1b2cf8 --- /dev/null +++ b/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Pdf-cdap-data-pipeline.json @@ -0,0 +1,132 @@ +{ + "name": "Offline_Text_Extractor_Pdf", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Text Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-extracted-text", + "path": "${GCS_TEXT_EXTRACTED_DESTINATION_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "GCS", + "icon": "fa-plug" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${FS_DESTINATION_PATH}", + "referenceName": "file-extracted" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsink", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Offline Text Extractor", + "plugin": { + "name": "OfflineTextExtractor", + "type": "action", + "label": "Offline Text Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "mimeType": "application/pdf", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_TEXT_EXTRACTED_DESTINATION_PATH}", + "batchSize": "2" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ], + "type": "action", + "label": "Offline Text Extractor", + "icon": "fa-plug" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Tiff-cdap-data-pipeline.json b/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Tiff-cdap-data-pipeline.json new file mode 100644 index 0000000..08544f7 --- /dev/null +++ b/examples/sandbox/action/OfflineTextExtractor/Offline_Text_Extractor_Tiff-cdap-data-pipeline.json @@ -0,0 +1,118 @@ +{ + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "description": "Data Pipeline Application", + "name": "Offline_Text_Extractor_Pdf", + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCS", + "to": "File" + }, + { + "from": "Offline Text Extractor", + "to": "GCS" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "GCS", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCS", + "artifact": { + "name": "google-cloud", + "version": "0.13.2", + "scope": "SYSTEM" + }, + "properties": { + "project": "${GCS_PROJECT_ID}", + "format": "text", + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "filenameOnly": "false", + "recursive": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "gcs-extracted-text", + "path": "${GCS_TEXT_EXTRACTED_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + }, + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "path": "${FS_DESTINATION_PATH}", + "referenceName": "file-extracted" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "GCS", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Offline Text Extractor", + "plugin": { + "name": "OfflineTextExtractor", + "type": "action", + "label": "Offline Text Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "mimeType": "image/tiff", + "sourcePath": "${GCS_IMAGE_SOURCE_PATH}", + "destinationPath": "${GCS_TEXT_EXTRACTED_DESTINATION_PATH}", + "batchSize": "2" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Crop_Hints-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Crop_Hints-cdap-data-pipeline.json new file mode 100644 index 0000000..ad9f7db --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Crop_Hints-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "name": "Document_Extractor_Crop_Hints", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Crop Hints", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "transform", + "label": "Document Extractor", + "icon": "fa-plug" + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "type": "batchsink", + "label": "File2", + "icon": "icon-file" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Explicit_Content-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Explicit_Content-cdap-data-pipeline.json new file mode 100644 index 0000000..49a3f24 --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Explicit_Content-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "name": "Document_Extractor_Explicit_Content", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Explicit Content", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "transform", + "label": "Document Extractor", + "icon": "fa-plug" + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "type": "batchsink", + "label": "File2", + "icon": "icon-file" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Face-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Face-cdap-data-pipeline.json new file mode 100644 index 0000000..daa6c58 --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Face-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "name": "Document_Extractor_Face", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Face", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "transform", + "label": "Document Extractor", + "icon": "fa-plug" + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "type": "batchsink", + "label": "File2", + "icon": "icon-file" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Hand_Writing-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Hand_Writing-cdap-data-pipeline.json new file mode 100644 index 0000000..46bd8c4 --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Hand_Writing-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "name": "Document_Extractor_Hand_Writing", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Handwriting", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "transform", + "label": "Document Extractor", + "icon": "fa-plug" + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "type": "batchsink", + "label": "File2", + "icon": "icon-file" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Image_Properties-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Image_Properties-cdap-data-pipeline.json new file mode 100644 index 0000000..8437b74 --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Image_Properties-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "name": "Document_Extractor_Image_Properties", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Image Properties", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "transform", + "label": "Document Extractor", + "icon": "fa-plug" + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "type": "batchsink", + "label": "File2", + "icon": "icon-file" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Labels-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Labels-cdap-data-pipeline.json new file mode 100644 index 0000000..3c229ba --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Labels-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "name": "Document_Extractor_Labels", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Labels", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "transform", + "label": "Document Extractor", + "icon": "fa-plug" + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "type": "batchsink", + "label": "File2", + "icon": "icon-file" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Landmarks-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Landmarks-cdap-data-pipeline.json new file mode 100644 index 0000000..0777c09 --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Landmarks-cdap-data-pipeline.json @@ -0,0 +1,140 @@ +{ + "name": "Document_Extractor_Landmarks", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "batchsource", + "label": "File", + "icon": "icon-file" + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Landmarks", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "type": "transform", + "label": "Document Extractor", + "icon": "fa-plug" + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "type": "batchsink", + "label": "File2", + "icon": "icon-file" + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Logos-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Logos-cdap-data-pipeline.json new file mode 100644 index 0000000..9dd9f53 --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Logos-cdap-data-pipeline.json @@ -0,0 +1,131 @@ +{ + "name": "Document_Extractor_Logos", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Logos", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Object_Localization-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Object_Localization-cdap-data-pipeline.json new file mode 100644 index 0000000..aa2f656 --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Object_Localization-cdap-data-pipeline.json @@ -0,0 +1,131 @@ +{ + "name": "Document_Extractor_Object_Localization", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Object Localization", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Text-cdap-data-pipeline.json b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Text-cdap-data-pipeline.json new file mode 100644 index 0000000..e396e9c --- /dev/null +++ b/examples/sandbox/transform/DocumentExtractor/Document_Extractor_Text-cdap-data-pipeline.json @@ -0,0 +1,131 @@ +{ + "name": "Document_Extractor_Text", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Document Extractor" + }, + { + "from": "Document Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_SOURCE_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Document Extractor", + "plugin": { + "name": "DocumentExtractor", + "type": "transform", + "label": "Document Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${PROJECT_ID}", + "features": "Text", + "mimeType": "${MIME_TYPE}", + "pages": "1,2,3,4,5", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Crop-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Crop-cdap-data-pipeline.json new file mode 100644 index 0000000..b1713c6 --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Crop-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Crop", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Crop Hints", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Explicit_Content-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Explicit_Content-cdap-data-pipeline.json new file mode 100644 index 0000000..ec05f1a --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Explicit_Content-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Explicit_Content", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Explicit Content", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Face-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Face-cdap-data-pipeline.json new file mode 100644 index 0000000..cff0497 --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Face-cdap-data-pipeline.json @@ -0,0 +1,135 @@ +{ + "name": "Image_Extractor_Face", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Face", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ], + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Hand_Writing-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Hand_Writing-cdap-data-pipeline.json new file mode 100644 index 0000000..57a5a17 --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Hand_Writing-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Hand_Writing", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Handwriting", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Image_Properties-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Image_Properties-cdap-data-pipeline.json new file mode 100644 index 0000000..b49069b --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Image_Properties-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Image_Properties", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Image Properties", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Labels-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Labels-cdap-data-pipeline.json new file mode 100644 index 0000000..eaf0cf7 --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Labels-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Labels", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Labels", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Landmarks-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Landmarks-cdap-data-pipeline.json new file mode 100644 index 0000000..b23886b --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Landmarks-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Landmarks", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Landmarks", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Logos-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Logos-cdap-data-pipeline.json new file mode 100644 index 0000000..bfdd2f6 --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Logos-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Logos", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Logos", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Object_Localization-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Object_Localization-cdap-data-pipeline.json new file mode 100644 index 0000000..0f8f5d8 --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Object_Localization-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Object_Localization", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Object Localization", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Product_Search-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Product_Search-cdap-data-pipeline.json new file mode 100644 index 0000000..fe0829f --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Product_Search-cdap-data-pipeline.json @@ -0,0 +1,131 @@ +{ + "name": "Image_Extractor_Product_Search", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Product Search", + "productCategories": "Apparel", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Text-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Text-cdap-data-pipeline.json new file mode 100644 index 0000000..c4e6f7e --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Text-cdap-data-pipeline.json @@ -0,0 +1,130 @@ +{ + "name": "Image_Extractor_Text", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Text", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file diff --git a/examples/sandbox/transform/ImageExtractor/Image_Extractor_Web_Detection-cdap-data-pipeline.json b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Web_Detection-cdap-data-pipeline.json new file mode 100644 index 0000000..020ddfe --- /dev/null +++ b/examples/sandbox/transform/ImageExtractor/Image_Extractor_Web_Detection-cdap-data-pipeline.json @@ -0,0 +1,131 @@ +{ + "name": "Image_Extractor_Web_Detection", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.1.1", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "File", + "to": "Image Extractor" + }, + { + "from": "Image Extractor", + "to": "File2" + } + ], + "comments": [], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": false, + "stages": [ + { + "name": "File", + "plugin": { + "name": "File", + "type": "batchsource", + "label": "File", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "format": "text", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "ImagePaths", + "path": "${FS_LIST_OF_IMAGES_PATH}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "Image Extractor", + "plugin": { + "name": "ImageExtractor", + "type": "transform", + "label": "Image Extractor", + "artifact": { + "name": "cloud-vision", + "version": "1.0.0-SNAPSHOT", + "scope": "USER" + }, + "properties": { + "serviceFilePath": "${GCP_JSON_KEY_PATH}", + "project": "${GCS_PROJECT_ID}", + "features": "Web Detection", + "includeGeoResults": "true", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}", + "pathField": "body", + "outputField": "vision_output" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ], + "inputSchema": [ + { + "name": "File", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}" + } + ] + }, + { + "name": "File2", + "plugin": { + "name": "File", + "type": "batchsink", + "label": "File2", + "artifact": { + "name": "core-plugins", + "version": "2.3.4", + "scope": "SYSTEM" + }, + "properties": { + "suffix": "yyyy-MM-dd-HH-mm", + "format": "json", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "referenceName": "visiondata", + "path": "${FS_DESTINATION_PATH}" + + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}", + "inputSchema": [ + { + "name": "Image Extractor", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"FaceInformation\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"face-annotation-component-record\",\"fields\":[{\"name\":\"rollAngle\",\"type\":\"float\"},{\"name\":\"panAngle\",\"type\":\"float\"},{\"name\":\"tiltAngle\",\"type\":\"float\"},{\"name\":\"detectionConfidence\",\"type\":\"float\"},{\"name\":\"landmarkingConfidence\",\"type\":\"float\"},{\"name\":\"angerLikelihood\",\"type\":\"string\"},{\"name\":\"joyLikelihood\",\"type\":\"string\"},{\"name\":\"surpriseLikelihood\",\"type\":\"string\"},{\"name\":\"blurredLikelihood\",\"type\":\"string\"},{\"name\":\"underExposedLikelihood\",\"type\":\"string\"},{\"name\":\"sorrowLikelihood\",\"type\":\"string\"},{\"name\":\"headwearLikelihood\",\"type\":\"string\"},{\"name\":\"boundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"fdBoundingPoly\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"vertex-record\",\"type\":\"record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"int\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}},{\"name\":\"landmarks\",\"type\":{\"type\":\"array\",\"items\":{\"name\":\"face-landmark-record\",\"type\":\"record\",\"fields\":[{\"name\":\"type\",\"type\":\"string\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"x\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"y\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]},{\"name\":\"z\",\"type\":\"float\",\"order\":\"ascending\",\"aliases\":[]}],\"aliases\":[]}}}]}}}]}" + } + ] + } + ], + "schedule": "0 * * * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + } +} \ No newline at end of file