From 427f1a49ac207d166fd004821d2ab9cd6026fd24 Mon Sep 17 00:00:00 2001 From: chrismclarke Date: Thu, 14 Nov 2024 16:14:38 -0800 Subject: [PATCH 1/6] feat: add data_list metadata in parser --- packages/data-models/flowTypes.ts | 16 ++++ .../processors/flowParser/flowParser.ts | 2 +- .../flowParser/parsers/data_list.parser.ts | 81 ++++++++++++++++++- packages/scripts/test/helpers/utils.ts | 2 +- 4 files changed, 98 insertions(+), 3 deletions(-) diff --git a/packages/data-models/flowTypes.ts b/packages/data-models/flowTypes.ts index 51f8f6fabf..aac256aa6f 100644 --- a/packages/data-models/flowTypes.ts +++ b/packages/data-models/flowTypes.ts @@ -74,9 +74,25 @@ export namespace FlowTypes { flow_type: "asset_pack"; rows: Data_listRow[]; } + + /** + * Data types supported within data_lists + * These are a subset of types provided by Json Schema standards + * https://json-schema.org/draft/2020-12/json-schema-core#name-instance-data-model + * */ + export type Data_listColumnType = "boolean" | "number" | "object" | "string"; + + export interface Data_listColumnMetadata { + type: Data_listColumnType; + } + export interface Data_list extends FlowTypeWithData { flow_type: "data_list"; rows: Data_listRow[]; + /** Hashmap of metadata for each data column */ + metadata: { + [column_name: string]: Data_listColumnMetadata; + }; } export interface DataPipeFlow extends FlowTypeWithData { flow_type: "data_pipe"; diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts index 4504d04458..6f0288384d 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts @@ -4,7 +4,7 @@ import { IConverterPaths, IFlowHashmapByType, IParsedWorkbookData } from "../../ import { arrayToHashmap, groupJsonByKey, IContentsEntry } from "../../utils"; import BaseProcessor from "../base"; -const cacheVersion = 20241024.0; +const cacheVersion = 20241114.0; export class FlowParserProcessor extends BaseProcessor { public parsers: { [flowType in FlowTypes.FlowType]: Parsers.DefaultParser } = { diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts index 1a4b509b8e..f2324767a3 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts @@ -6,6 +6,7 @@ import { setNestedProperty, } from "../../../utils"; import { DefaultParser } from "./default.parser"; +import { isObjectLiteral, Logger } from "shared"; export class DataListParser extends DefaultParser { postProcessRow(row: FlowTypes.Data_listRow) { @@ -33,8 +34,86 @@ export class DataListParser extends DefaultParser { return row; } - public postProcessFlows(flows: FlowTypes.FlowTypeWithData[]) { + public override postProcessFlow(flow: FlowTypes.Data_list): FlowTypes.Data_list { + flow.metadata = this.getFlowMetadata(flow); + return flow; + } + + public postProcessFlows(flows: FlowTypes.Data_list[]) { const flowsWithOverrides = assignFlowOverrides(flows); return flowsWithOverrides; } + + /** Assign column metadata from @metadata row if provided, or infer from data if not*/ + private getFlowMetadata(flow: FlowTypes.Data_list) { + const [firstRow] = flow.rows; + const metadata = + firstRow?.id === "@metadata" + ? this.assignMetadataFromRow(firstRow) + : this.inferMetadataFromData(flow.rows); + + const errors = this.qualityCheckMetadata(metadata); + if (errors.length > 0) { + for (const error of errors) { + console.error(error); + } + Logger.error({ + msg1: `[${flow.flow_name}] Data List validation has (${errors.length}) errors`, + msg2: flow._xlsxPath, + }); + } + return metadata; + } + + /** Assign data_list metadata using a first `@metadata` row */ + private assignMetadataFromRow(metadataRow: FlowTypes.Data_listRow) { + // do not extract metadata for id column + const { id, ...values } = metadataRow; + const metadata: FlowTypes.Data_list["metadata"] = {}; + for (const [field, metadataString] of Object.entries(values)) { + metadata[field] = parseAppDataCollectionString(metadataString as string) as any; + } + return metadata; + } + + /** Infer data_list metadata from data */ + private inferMetadataFromData(rows: FlowTypes.Data_listRow[]) { + const metadata: FlowTypes.Data_list["metadata"] = {}; + for (const row of rows) { + // do not extract metadata for id column + const { id, ...values } = row; + for (const [field, value] of Object.entries(values)) { + if (!metadata[field]) { + metadata[field] = { type: undefined }; + } + // only assign type if previously unassigned + if (!metadata[field].type) { + metadata[field].type = this.inferColumnDataType(value); + } + } + } + return metadata; + } + + private inferColumnDataType(value: any): FlowTypes.Data_listColumnType { + if (value === undefined) return undefined; + if (typeof value === "string") return "string"; + if (typeof value === "number") return "number"; + if (typeof value === "boolean") return "boolean"; + if (isObjectLiteral(value)) return "object"; + return undefined; + } + + private qualityCheckMetadata(metadata: FlowTypes.Data_list["metadata"]) { + const errors: string[] = []; + for (const [columnName, columnMetadata] of Object.entries(metadata)) { + // ensure all columns have data type + if (columnMetadata.type === undefined) { + errors.push( + `"${columnName}" column cannot infer type\nEither include an @metadata column or add example value to a row` + ); + } + } + return errors; + } } diff --git a/packages/scripts/test/helpers/utils.ts b/packages/scripts/test/helpers/utils.ts index 0d131d7763..a1d0281735 100644 --- a/packages/scripts/test/helpers/utils.ts +++ b/packages/scripts/test/helpers/utils.ts @@ -11,7 +11,7 @@ import { Logger } from "shared/src/utils/logging/console-logger"; * @param callOriginal specify whether to still call original Logger * methods after spy intercept */ -export function useMockLogger(callOriginal = true) { +export function useMockLogger(callOriginal = false) { const error = jest.spyOn(Logger, "error"); const warning = jest.spyOn(Logger, "warning"); if (!callOriginal) { From e9b35bb586a1bda03704b0a8277c73af94da91ea Mon Sep 17 00:00:00 2001 From: chrismclarke Date: Thu, 14 Nov 2024 16:14:54 -0800 Subject: [PATCH 2/6] test: data_list metadata --- .../parsers/data_list.parser.spec.ts | 103 ++++++++++++++++-- 1 file changed, 93 insertions(+), 10 deletions(-) diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts index ec33ef481e..d5eac110ad 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts @@ -1,8 +1,88 @@ +import { FlowTypes } from "data-models"; import { DataListParser } from "."; -import { TEST_DATA_PATHS } from "../../../../../../../test/helpers/utils"; +import { TEST_DATA_PATHS, useMockLogger } from "../../../../../../../test/helpers/utils"; import { FlowParserProcessor } from "../flowParser"; -const testFlow = { +const MOCK_DATA_LIST = (): FlowTypes.Data_list => ({ + flow_type: "data_list", + flow_name: "mock_data_list", + rows: [ + { + id: "id_1", + number: 1, + text: "hello", + boolean: true, + nested: { string: "hello" }, + }, + { + id: "id_1", + number: 1, + text: "hello", + boolean: true, + nested: { string: "hello" }, + additional: "included", + }, + ], + metadata: {}, + _xlsxPath: "/mock/data/path", +}); + +/*********************************************************************** + * Basic + **********************************************************************/ + +/** yarn workspace scripts test -t data_list.parser.spec.ts **/ +describe("data_list Parser Metadata", () => { + let parser: DataListParser; + + beforeAll(() => { + parser = new DataListParser({ processedFlowHashmap: {} } as any); + }); + it("Infers column metadata from data", () => { + const metadata = parser["getFlowMetadata"](MOCK_DATA_LIST()); + expect(metadata).toEqual({ + number: { type: "number" }, + text: { type: "string" }, + boolean: { type: "boolean" }, + nested: { type: "object" }, + // data missing or undefined in first row identified from subsequent + additional: { type: "string" }, + }); + }); + it("Assigns column metadata from metadata row", () => { + const flowWithMetadataRow = { + ...MOCK_DATA_LIST(), + rows: [ + { id: "@metadata", number: "type: number", text: "type: string" }, + { id: "id_1", number: undefined, text: undefined }, + ], + }; + const metadata = parser["getFlowMetadata"](flowWithMetadataRow); + expect(metadata).toEqual({ + number: { type: "number" }, + text: { type: "string" }, + }); + }); + it("QC metadata", () => { + const loggerSpy = useMockLogger(false); + const flowWithMetadataRow = { + ...MOCK_DATA_LIST(), + rows: [{ id: "id_1", number: 1, text: undefined }], + }; + parser["getFlowMetadata"](flowWithMetadataRow); + expect(loggerSpy.error).toHaveBeenCalledTimes(1); + expect(loggerSpy.error).toHaveBeenCalledWith({ + msg1: "[mock_data_list] Data List validation has (1) errors", + msg2: "/mock/data/path", + }); + }); +}); + +/*********************************************************************** + * Advanced use cases + **********************************************************************/ + +const testFlow: FlowTypes.Data_list = { flow_type: "data_list", flow_name: "test_data_list", rows: [ @@ -14,18 +94,19 @@ const testFlow = { "nested.test_2": "two", }, ], + metadata: {}, }; -/** yarn workspace scripts test -t data_list.parser.spec.ts **/ describe("data_list Parser (single)", () => { - let outputRows: any[]; + let parser: DataListParser; + beforeAll(() => { - const parser = new DataListParser({ processedFlowHashmap: {} } as any); - const output = parser.run(testFlow as any); - outputRows = output.rows; + parser = new DataListParser({ processedFlowHashmap: {} } as any); }); + it("Extracts condition_list", async () => { - const { test_condition_list } = outputRows[0]; + const { rows } = parser.run(testFlow as any); + const { test_condition_list } = rows[0]; expect(test_condition_list).toEqual([ { condition_type: "calc", @@ -35,12 +116,14 @@ describe("data_list Parser (single)", () => { ]); }); it("Extracts nested properties", async () => { - const { nested } = outputRows[0]; + const { rows } = parser.run(testFlow as any); + const { nested } = rows[0]; expect(nested).toEqual({ test_1: 1, test_2: "two" }); }); // TODO - likely deprecated (can't see in codebase) it("Extracts notification_schedule", async () => { - const { test_notification_schedule } = outputRows[0]; + const { rows } = parser.run(testFlow as any); + const { test_notification_schedule } = rows[0]; expect(test_notification_schedule).toEqual({ key_1: "value_1", key_2: "value_2" }); }); }); From d06b39bce6641b18132a25707e5cc23da179f2de Mon Sep 17 00:00:00 2001 From: chrismclarke Date: Thu, 14 Nov 2024 17:27:41 -0800 Subject: [PATCH 3/6] chore: code tidying --- packages/data-models/flowTypes.ts | 7 +- .../processors/flowParser/flowParser.ts | 2 +- .../parsers/data_list.parser.spec.ts | 60 +++++++++------- .../flowParser/parsers/data_list.parser.ts | 72 +++++++++++-------- 4 files changed, 82 insertions(+), 59 deletions(-) diff --git a/packages/data-models/flowTypes.ts b/packages/data-models/flowTypes.ts index aac256aa6f..9363aa0b21 100644 --- a/packages/data-models/flowTypes.ts +++ b/packages/data-models/flowTypes.ts @@ -83,14 +83,15 @@ export namespace FlowTypes { export type Data_listColumnType = "boolean" | "number" | "object" | "string"; export interface Data_listColumnMetadata { - type: Data_listColumnType; + /** Column data type. Default "string" when not defined */ + type?: Data_listColumnType; } export interface Data_list extends FlowTypeWithData { flow_type: "data_list"; rows: Data_listRow[]; - /** Hashmap of metadata for each data column */ - metadata: { + /** Hashmap of any additional column metadata (omitted if all columns default string type) */ + _metadata?: { [column_name: string]: Data_listColumnMetadata; }; } diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts index 6f0288384d..c3bf1bf2cd 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts @@ -4,7 +4,7 @@ import { IConverterPaths, IFlowHashmapByType, IParsedWorkbookData } from "../../ import { arrayToHashmap, groupJsonByKey, IContentsEntry } from "../../utils"; import BaseProcessor from "../base"; -const cacheVersion = 20241114.0; +const cacheVersion = 20241114.2; export class FlowParserProcessor extends BaseProcessor { public parsers: { [flowType in FlowTypes.FlowType]: Parsers.DefaultParser } = { diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts index d5eac110ad..d810303a40 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts @@ -20,10 +20,9 @@ const MOCK_DATA_LIST = (): FlowTypes.Data_list => ({ text: "hello", boolean: true, nested: { string: "hello" }, - additional: "included", + additional: false, }, ], - metadata: {}, _xlsxPath: "/mock/data/path", }); @@ -38,15 +37,14 @@ describe("data_list Parser Metadata", () => { beforeAll(() => { parser = new DataListParser({ processedFlowHashmap: {} } as any); }); - it("Infers column metadata from data", () => { - const metadata = parser["getFlowMetadata"](MOCK_DATA_LIST()); - expect(metadata).toEqual({ + it("Infers column metadata from data (for non-string columns)", () => { + const { _metadata } = parser.postProcessFlow(MOCK_DATA_LIST()); + expect(_metadata).toEqual({ number: { type: "number" }, - text: { type: "string" }, boolean: { type: "boolean" }, nested: { type: "object" }, // data missing or undefined in first row identified from subsequent - additional: { type: "string" }, + additional: { type: "boolean" }, }); }); it("Assigns column metadata from metadata row", () => { @@ -57,34 +55,43 @@ describe("data_list Parser Metadata", () => { { id: "id_1", number: undefined, text: undefined }, ], }; - const metadata = parser["getFlowMetadata"](flowWithMetadataRow); - expect(metadata).toEqual({ + const { _metadata } = parser.postProcessFlow(flowWithMetadataRow); + expect(_metadata).toEqual({ number: { type: "number" }, - text: { type: "string" }, }); }); - it("QC metadata", () => { - const loggerSpy = useMockLogger(false); - const flowWithMetadataRow = { + it("Omits metadata when all columns are strings", () => { + const flowWithStringColumns = { ...MOCK_DATA_LIST(), - rows: [{ id: "id_1", number: 1, text: undefined }], + rows: [{ id: "id_1", text: "hello" }], }; - parser["getFlowMetadata"](flowWithMetadataRow); - expect(loggerSpy.error).toHaveBeenCalledTimes(1); - expect(loggerSpy.error).toHaveBeenCalledWith({ - msg1: "[mock_data_list] Data List validation has (1) errors", - msg2: "/mock/data/path", - }); + const { _metadata } = parser.postProcessFlow(flowWithStringColumns); + expect(_metadata).toBeUndefined(); }); + + // eslint-disable-next-line jest/no-commented-out-tests + // it("QC metadata", () => { + // const loggerSpy = useMockLogger(false); + // const flowWithMetadataRow = { + // ...MOCK_DATA_LIST(), + // rows: [{ id: "id_1", number: 1, text: undefined }], + // }; + // parser.postProcessFlow(flowWithMetadataRow); + // expect(loggerSpy.warning).toHaveBeenCalledTimes(1); + // expect(loggerSpy.warning).toHaveBeenCalledWith({ + // msg1: "[mock_data_list] Data List validation has (1) warnings", + // msg2: "/mock/data/path", + // }); + // }); }); /*********************************************************************** * Advanced use cases **********************************************************************/ -const testFlow: FlowTypes.Data_list = { +const MOCK_DATA_LIST_FLOW_ADVANCED = (): FlowTypes.Data_list => ({ flow_type: "data_list", - flow_name: "test_data_list", + flow_name: "mock_data_list_advanced", rows: [ { id: "test_1", @@ -94,8 +101,7 @@ const testFlow: FlowTypes.Data_list = { "nested.test_2": "two", }, ], - metadata: {}, -}; +}); describe("data_list Parser (single)", () => { let parser: DataListParser; @@ -105,7 +111,7 @@ describe("data_list Parser (single)", () => { }); it("Extracts condition_list", async () => { - const { rows } = parser.run(testFlow as any); + const { rows } = parser.run(MOCK_DATA_LIST_FLOW_ADVANCED() as any); const { test_condition_list } = rows[0]; expect(test_condition_list).toEqual([ { @@ -116,13 +122,13 @@ describe("data_list Parser (single)", () => { ]); }); it("Extracts nested properties", async () => { - const { rows } = parser.run(testFlow as any); + const { rows } = parser.run(MOCK_DATA_LIST_FLOW_ADVANCED() as any); const { nested } = rows[0]; expect(nested).toEqual({ test_1: 1, test_2: "two" }); }); // TODO - likely deprecated (can't see in codebase) it("Extracts notification_schedule", async () => { - const { rows } = parser.run(testFlow as any); + const { rows } = parser.run(MOCK_DATA_LIST_FLOW_ADVANCED() as any); const { test_notification_schedule } = rows[0]; expect(test_notification_schedule).toEqual({ key_1: "value_1", key_2: "value_2" }); }); diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts index f2324767a3..8bdc2f4bd4 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts @@ -6,7 +6,7 @@ import { setNestedProperty, } from "../../../utils"; import { DefaultParser } from "./default.parser"; -import { isObjectLiteral, Logger } from "shared"; +import { isEmptyObjectDeep, isObjectLiteral, Logger } from "shared"; export class DataListParser extends DefaultParser { postProcessRow(row: FlowTypes.Data_listRow) { @@ -34,8 +34,11 @@ export class DataListParser extends DefaultParser { return row; } - public override postProcessFlow(flow: FlowTypes.Data_list): FlowTypes.Data_list { - flow.metadata = this.getFlowMetadata(flow); + public override postProcessFlow(flow: FlowTypes.Data_list) { + const metadata = this.getFlowMetadata(flow); + if (!isEmptyObjectDeep(metadata)) { + flow._metadata = metadata; + } return flow; } @@ -45,20 +48,20 @@ export class DataListParser extends DefaultParser { } /** Assign column metadata from @metadata row if provided, or infer from data if not*/ - private getFlowMetadata(flow: FlowTypes.Data_list) { + private getFlowMetadata(flow: FlowTypes.FlowTypeWithData) { const [firstRow] = flow.rows; - const metadata = + const initialMetadata = firstRow?.id === "@metadata" ? this.assignMetadataFromRow(firstRow) : this.inferMetadataFromData(flow.rows); - const errors = this.qualityCheckMetadata(metadata); - if (errors.length > 0) { - for (const error of errors) { - console.error(error); + const { metadata, warnings } = this.qualityCheckMetadata(initialMetadata); + if (warnings.length > 0) { + for (const warning of warnings) { + console.warn(warning); } - Logger.error({ - msg1: `[${flow.flow_name}] Data List validation has (${errors.length}) errors`, + Logger.warning({ + msg1: `[${flow.flow_name}] Data List validation has (${warnings.length}) warnings`, msg2: flow._xlsxPath, }); } @@ -69,26 +72,30 @@ export class DataListParser extends DefaultParser { private assignMetadataFromRow(metadataRow: FlowTypes.Data_listRow) { // do not extract metadata for id column const { id, ...values } = metadataRow; - const metadata: FlowTypes.Data_list["metadata"] = {}; + const metadata: FlowTypes.Data_list["_metadata"] = {}; for (const [field, metadataString] of Object.entries(values)) { - metadata[field] = parseAppDataCollectionString(metadataString as string) as any; + if (metadataString) { + metadata[field] = parseAppDataCollectionString(metadataString as string) as any; + } } return metadata; } /** Infer data_list metadata from data */ private inferMetadataFromData(rows: FlowTypes.Data_listRow[]) { - const metadata: FlowTypes.Data_list["metadata"] = {}; + const metadata: FlowTypes.Data_list["_metadata"] = {}; for (const row of rows) { // do not extract metadata for id column const { id, ...values } = row; for (const [field, value] of Object.entries(values)) { - if (!metadata[field]) { - metadata[field] = { type: undefined }; - } - // only assign type if previously unassigned - if (!metadata[field].type) { - metadata[field].type = this.inferColumnDataType(value); + // do not assign type to other metadata columns + if (!field.startsWith("_")) { + // only assign type if previously unassigned + if (!metadata[field]?.type) { + const inferredType = this.inferColumnDataType(value); + metadata[field] ??= {}; + metadata[field].type = inferredType; + } } } } @@ -104,16 +111,25 @@ export class DataListParser extends DefaultParser { return undefined; } - private qualityCheckMetadata(metadata: FlowTypes.Data_list["metadata"]) { - const errors: string[] = []; + private qualityCheckMetadata(metadata: FlowTypes.Data_list["_metadata"]) { + const warnings: string[] = []; for (const [columnName, columnMetadata] of Object.entries(metadata)) { - // ensure all columns have data type - if (columnMetadata.type === undefined) { - errors.push( - `"${columnName}" column cannot infer type\nEither include an @metadata column or add example value to a row` - ); + // omit metadata for string or undefined types, will used default "string" at runtime + if (columnMetadata.type === "string" || columnMetadata.type === undefined) { + delete metadata[columnName]; } + + /** + * TODO - consider whether check below actually useful to have + * ensure all columns have data type + */ + // if (columnMetadata.type === undefined) { + // metadata[columnName].type = "string"; + // warnings.push( + // `"${columnName}" column cannot infer type (assume string)\nEither include an @metadata column or add example value to a row` + // ); + // } } - return errors; + return { warnings, metadata }; } } From eb7dff075454a9b75aff3c28f623138473d79bda Mon Sep 17 00:00:00 2001 From: chrismclarke Date: Thu, 21 Nov 2024 16:09:29 -0800 Subject: [PATCH 4/6] fix: metadata row removal --- .../processors/flowParser/flowParser.ts | 2 +- .../parsers/data_list.parser.spec.ts | 7 +++-- .../flowParser/parsers/data_list.parser.ts | 31 +++++++++++-------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts index c3bf1bf2cd..6e33b50a67 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts @@ -4,7 +4,7 @@ import { IConverterPaths, IFlowHashmapByType, IParsedWorkbookData } from "../../ import { arrayToHashmap, groupJsonByKey, IContentsEntry } from "../../utils"; import BaseProcessor from "../base"; -const cacheVersion = 20241114.2; +const cacheVersion = 20241121.0; export class FlowParserProcessor extends BaseProcessor { public parsers: { [flowType in FlowTypes.FlowType]: Parsers.DefaultParser } = { diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts index d810303a40..23b4a1b3cf 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.spec.ts @@ -1,6 +1,6 @@ import { FlowTypes } from "data-models"; import { DataListParser } from "."; -import { TEST_DATA_PATHS, useMockLogger } from "../../../../../../../test/helpers/utils"; +import { TEST_DATA_PATHS } from "../../../../../../../test/helpers/utils"; import { FlowParserProcessor } from "../flowParser"; const MOCK_DATA_LIST = (): FlowTypes.Data_list => ({ @@ -47,7 +47,7 @@ describe("data_list Parser Metadata", () => { additional: { type: "boolean" }, }); }); - it("Assigns column metadata from metadata row", () => { + it("Assigns column metadata from metadata row (and removes it)", () => { const flowWithMetadataRow = { ...MOCK_DATA_LIST(), rows: [ @@ -55,10 +55,11 @@ describe("data_list Parser Metadata", () => { { id: "id_1", number: undefined, text: undefined }, ], }; - const { _metadata } = parser.postProcessFlow(flowWithMetadataRow); + const { _metadata, rows } = parser.postProcessFlow(flowWithMetadataRow); expect(_metadata).toEqual({ number: { type: "number" }, }); + expect(rows.length).toEqual(1); }); it("Omits metadata when all columns are strings", () => { const flowWithStringColumns = { diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts index 8bdc2f4bd4..dd38d3f0ee 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts @@ -35,11 +35,8 @@ export class DataListParser extends DefaultParser { } public override postProcessFlow(flow: FlowTypes.Data_list) { - const metadata = this.getFlowMetadata(flow); - if (!isEmptyObjectDeep(metadata)) { - flow._metadata = metadata; - } - return flow; + const flowWithMetadata = this.getFlowMetadata(flow); + return flowWithMetadata; } public postProcessFlows(flows: FlowTypes.Data_list[]) { @@ -47,15 +44,23 @@ export class DataListParser extends DefaultParser { return flowsWithOverrides; } - /** Assign column metadata from @metadata row if provided, or infer from data if not*/ - private getFlowMetadata(flow: FlowTypes.FlowTypeWithData) { + /** Assign column metadata from @metadata row if provided, or infer from data if not */ + private getFlowMetadata(flow: FlowTypes.Data_list) { const [firstRow] = flow.rows; - const initialMetadata = - firstRow?.id === "@metadata" - ? this.assignMetadataFromRow(firstRow) - : this.inferMetadataFromData(flow.rows); + let metadataInitial: FlowTypes.Data_list["_metadata"]; + // assign from metadata row and remove + if (firstRow?.id === "@metadata") { + metadataInitial = this.assignMetadataFromRow(firstRow); + flow.rows = flow.rows.slice(0, 1); + } + // infer from data when metadata row not available + else { + metadataInitial = this.inferMetadataFromData(flow.rows); + } + // quality check metadata and assign cleaned meta to flow + const { warnings, metadata } = this.qualityCheckMetadata(metadataInitial); + flow._metadata = metadata; - const { metadata, warnings } = this.qualityCheckMetadata(initialMetadata); if (warnings.length > 0) { for (const warning of warnings) { console.warn(warning); @@ -65,7 +70,7 @@ export class DataListParser extends DefaultParser { msg2: flow._xlsxPath, }); } - return metadata; + return flow; } /** Assign data_list metadata using a first `@metadata` row */ From 5e4970903391d7cf5b48d7551925885e1290ba69 Mon Sep 17 00:00:00 2001 From: chrismclarke Date: Thu, 21 Nov 2024 16:57:26 -0800 Subject: [PATCH 5/6] fix: metadata row slice --- .../app-data/convert/processors/flowParser/flowParser.ts | 2 +- .../convert/processors/flowParser/parsers/data_list.parser.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts index 6e33b50a67..773307005f 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts @@ -4,7 +4,7 @@ import { IConverterPaths, IFlowHashmapByType, IParsedWorkbookData } from "../../ import { arrayToHashmap, groupJsonByKey, IContentsEntry } from "../../utils"; import BaseProcessor from "../base"; -const cacheVersion = 20241121.0; +const cacheVersion = 20241121.1; export class FlowParserProcessor extends BaseProcessor { public parsers: { [flowType in FlowTypes.FlowType]: Parsers.DefaultParser } = { diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts index dd38d3f0ee..f2e7c1ccdb 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts @@ -51,7 +51,7 @@ export class DataListParser extends DefaultParser { // assign from metadata row and remove if (firstRow?.id === "@metadata") { metadataInitial = this.assignMetadataFromRow(firstRow); - flow.rows = flow.rows.slice(0, 1); + flow.rows = flow.rows.slice(1); } // infer from data when metadata row not available else { From 0dda00ef85ce6cbffd830081e6982578a55e3708 Mon Sep 17 00:00:00 2001 From: chrismclarke Date: Thu, 21 Nov 2024 17:02:49 -0800 Subject: [PATCH 6/6] chore: code tidying --- .../app-data/convert/processors/flowParser/flowParser.ts | 2 +- .../processors/flowParser/parsers/data_list.parser.ts | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts index 773307005f..0e6c126117 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/flowParser.ts @@ -4,7 +4,7 @@ import { IConverterPaths, IFlowHashmapByType, IParsedWorkbookData } from "../../ import { arrayToHashmap, groupJsonByKey, IContentsEntry } from "../../utils"; import BaseProcessor from "../base"; -const cacheVersion = 20241121.1; +const cacheVersion = 20241121.2; export class FlowParserProcessor extends BaseProcessor { public parsers: { [flowType in FlowTypes.FlowType]: Parsers.DefaultParser } = { diff --git a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts index f2e7c1ccdb..072b11245a 100644 --- a/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts +++ b/packages/scripts/src/commands/app-data/convert/processors/flowParser/parsers/data_list.parser.ts @@ -57,9 +57,11 @@ export class DataListParser extends DefaultParser { else { metadataInitial = this.inferMetadataFromData(flow.rows); } - // quality check metadata and assign cleaned meta to flow + // quality check metadata and assign cleaned meta to flow if non-empty const { warnings, metadata } = this.qualityCheckMetadata(metadataInitial); - flow._metadata = metadata; + if (!isEmptyObjectDeep(metadata)) { + flow._metadata = metadata; + } if (warnings.length > 0) { for (const warning of warnings) {