diff --git a/package.json b/package.json index bc1e625..2a37e0e 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "csv-parse": "^5.4.1", "csv-parser": "^3.0.0", "csvtojson": "^2.0.10", + "decompress": "^4.2.1", "express": "^4.18.2", "fast-csv": "^4.3.6", "form-data": "^4.0.0", diff --git a/src/ingestion/controller/ingestion.controller.ts b/src/ingestion/controller/ingestion.controller.ts index aa7d6b7..225cc99 100644 --- a/src/ingestion/controller/ingestion.controller.ts +++ b/src/ingestion/controller/ingestion.controller.ts @@ -10,15 +10,17 @@ import { Result, EmissionBody, RawDataPullBody } from '../interfaces/Ingestion-data'; import { + BadRequestException, Body, Controller, FileTypeValidator, Get, - MaxFileSizeValidator, + Logger, ParseFilePipe, Post, Query, Res, UploadedFile, + UploadedFiles, UseInterceptors, Put, UseGuards, Req, Param @@ -29,7 +31,6 @@ import {RawDataImportService} from '../services/rawDataImport/rawDataImport.serv import {EventService} from '../services/event/event.service'; import {Response, Request} from 'express'; import {CsvImportService} from "../services/csvImport/csvImport.service"; -import {FileInterceptor} from "@nestjs/platform-express"; import {diskStorage} from "multer"; import {FileIsDefinedValidator} from "../validators/file-is-defined-validator"; import {FileStatusService} from '../services/file-status/file-status.service'; @@ -43,6 +44,13 @@ import { NvskApiService } from '../services/nvsk-api/nvsk-api.service'; import { UploadDimensionFileService } from '../services/upload-dimension-file/upload-dimension-file.service'; import { GrammarService } from '../services/grammar/grammar.service'; import { GenericFunction } from '../services/generic-function'; +import { + FileFieldsInterceptor, + FileInterceptor, +} from '@nestjs/platform-express'; +import { FileType, FileValidateRequest } from '../dto/request'; +import * as fs from 'fs'; +import { ValidatorService } from '../services/validator/validator.service'; let validateBodySchema = { "type": "object", @@ -65,9 +73,18 @@ let validateBodySchema = { ] }; +const defaultStorageConfig = diskStorage({ + destination: './upload', + filename: (req, file, cb) => { + cb(null, Date.now() + '-' + file.originalname); + }, +}); + @ApiTags('ingestion') @Controller('') export class IngestionController { + private logger: Logger; + constructor( private datasetService: DatasetService, private dimensionService: DimensionService , private eventService: EventService, private csvImportService: CsvImportService, private fileStatus: FileStatusService, private updateFileStatus: UpdateFileStatusService, @@ -76,7 +93,9 @@ export class IngestionController { private nvskService:NvskApiService, private grammarService: GrammarService, private service: GenericFunction, - private uploadDimension:UploadDimensionFileService) { + private uploadDimension:UploadDimensionFileService, + private validatorService: ValidatorService) { + this.logger = new Logger(IngestionController.name); } @Get('generatejwt') @@ -327,7 +346,7 @@ export class IngestionController { }) })) - @Post('/validate') + @Post('/validate-old') @ApiConsumes('multipart/form-data') async validateEventOrDimension(@Body() body: any, @Res()response: Response, @UploadedFile( new ParseFilePipe({ @@ -357,4 +376,100 @@ export class IngestionController { // throw new Error(e); } } + + @Post('validate') + @UseInterceptors( + FileFieldsInterceptor( + [ + { name: 'grammar', maxCount: 1 }, + { name: 'data', maxCount: 1 }, + ], + { + storage: defaultStorageConfig, + fileFilter(req, file, callback) { + if (file.mimetype !== 'text/csv') { + return callback( + new BadRequestException('Only CSV files are allowed'), + false, + ); + } + callback(null, true); + }, + }, + ) + ) + uploadFileN( + @UploadedFiles() + files: { + grammar?: Express.Multer.File[]; + data?: Express.Multer.File[]; + }, + @Body() body: FileValidateRequest, + ) { + this.logger.debug(files.grammar); + const grammarFilePath = files.grammar[0].path; + + if (!grammarFilePath || !fs.existsSync(grammarFilePath)) + throw new BadRequestException('Grammar file is required'); + + const grammarContent = fs.readFileSync(grammarFilePath, 'utf8'); + const dataFilePath = files?.data ? files?.data[0]?.path : undefined; + + let resp; + switch (body.type.trim()) { + case FileType.DimensionGrammar: + resp = + this.validatorService.checkDimensionGrammarForValidationErrors( + grammarContent, + ); + break; + case FileType.DimensionData: + if (!dataFilePath || !fs.existsSync(dataFilePath)) + throw new BadRequestException('Data file is required'); + + resp = this.validatorService.checkDimensionDataForValidationErrors( + grammarContent, + fs.readFileSync(dataFilePath, 'utf8'), + ); + break; + case FileType.EventGrammar: + resp = + this.validatorService.checkEventGrammarForValidationErrors( + grammarContent, + ); + break; + case FileType.EventData: + if (!dataFilePath || !fs.existsSync(dataFilePath)) + throw new BadRequestException('Data file is required'); + + resp = this.validatorService.checkEventDataForValidationErrors( + grammarContent, + fs.readFileSync(dataFilePath, 'utf8'), + ); + break; + default: + throw new BadRequestException('Invalid file type'); + } + + // delete the files + if (grammarFilePath) fs.unlinkSync(grammarFilePath); + if (dataFilePath) fs.unlinkSync(dataFilePath); + + return resp; + } + + @Post('bulk') + @UseInterceptors( + FileInterceptor('folder', { + storage: defaultStorageConfig, + }), + ) + async uploadBulkZip(@UploadedFile() file: Express.Multer.File) { + const zipFilePath = file.path; + + const resp = await this.validatorService.handleZipFile(zipFilePath); + // delete the file + if (zipFilePath) fs.unlinkSync(zipFilePath); + return resp; + } } diff --git a/src/ingestion/cqube-spec-checker/dimension.data.validator.ts b/src/ingestion/cqube-spec-checker/dimension.data.validator.ts new file mode 100644 index 0000000..9170f77 --- /dev/null +++ b/src/ingestion/cqube-spec-checker/dimension.data.validator.ts @@ -0,0 +1,54 @@ +export class DimensionDataValidator { + grammarContent: any; + lines: any; + pkIndexLine: any; + dataTypesLine: any; + headerLine: any; + dataContent: any; + dataContentLines: any; + errors: any[]; + constructor(grammarContent, dataContent) { + this.grammarContent = grammarContent; + this.lines = this.grammarContent.trim().split('\n'); + this.pkIndexLine = this.lines[0].trim().split(','); + this.dataTypesLine = this.lines[1].trim().split(','); + this.headerLine = this.lines[2].trim().split(','); + this.dataContent = dataContent; + this.dataContentLines = this.dataContent + .trim() + .split('\n')[0] + .trim() + .split(','); + this.errors = []; + } + + verify() { + this.verifyColumnsToGrammar(); + return this.errors; + } + + verifyColumnsToGrammar() { + this.headerLine.forEach((header, index) => { + this.dataContentLines.indexOf(header) === -1 + ? this.errors.push({ + row: 0, + col: index, + errorCode: 1001, + error: `Missing header from grammar file: ${header}`, + }) + : null; + }); + + this.dataContentLines.forEach((header, index) => { + this.headerLine.indexOf(header) === -1 + ? this.errors.push({ + row: 0, + col: index, + errorCode: 1001, + error: `Extra header not in grammar file: ${header}`, + data: header, + }) + : null; + }); + } +} diff --git a/src/ingestion/cqube-spec-checker/dimension.grammar.validator.ts b/src/ingestion/cqube-spec-checker/dimension.grammar.validator.ts new file mode 100644 index 0000000..fd3d46f --- /dev/null +++ b/src/ingestion/cqube-spec-checker/dimension.grammar.validator.ts @@ -0,0 +1,80 @@ +export class DimensionValidator { + content: any; + lines: any; + pkIndexLine: any; + dataTypesLine: any; + headerLine: any; + constructor(content) { + this.content = content; + this.lines = this.content.trim().split('\n'); + this.pkIndexLine = this.lines[0].trim().split(','); + this.dataTypesLine = this.lines[1].trim().split(','); + this.headerLine = this.lines[2].trim().split(','); + } + + verify() { + const errors = []; + errors.push(...this.verifyColumns()); + errors.push(...this.verifyPkIndexLine()); + errors.push(...this.verifyDataTypes()); + return errors; + } + + verifyColumns() { + const errors = []; + const columnCount = this.pkIndexLine.length; + this.lines.forEach((line, lineNumber) => { + if (line !== '') { + // Ignore last line + const lineColumns = line.split(',').length; + if (lineColumns !== columnCount) { + errors.push({ + row: lineNumber, + col: 0, + errorCode: 2003, + error: `Line ${lineNumber + 1 + }: Invalid number of columns ${lineColumns} (expected ${columnCount}), ${line.split( + ',', + )}`, + data: line, + }); + } + } + }); + return errors; + } + + verifyPkIndexLine() { + const errors = []; + if ( + this.pkIndexLine.indexOf('PK') === -1 || + this.pkIndexLine.indexOf('Index') === -1 + ) { + errors.push({ + row: 0, + col: 0, + errorCode: 1003, + error: `Invalid PK/Index: First row must include 'PK' and 'Index' but found "${this.pkIndexLine}"`, + data: this.pkIndexLine, + }); + } + return errors; + } + + verifyDataTypes() { + const errors = []; + this.dataTypesLine.forEach((dataType, columnIndex) => { + if (dataType !== 'string' && dataType !== 'integer') { + errors.push({ + row: 1, + col: columnIndex, + errorCode: 1002, + error: `Invalid data type at column ${columnIndex + 1 + }: Only 'string' and 'integer' are allowed but found '${dataType}'`, + data: this.dataTypesLine, + }); + } + }); + return errors; + } +} diff --git a/src/ingestion/dto/errors.ts b/src/ingestion/dto/errors.ts new file mode 100644 index 0000000..c3d988f --- /dev/null +++ b/src/ingestion/dto/errors.ts @@ -0,0 +1,7 @@ +export type ValidationErrors = { + row: string | number; + col: string | number; + errorCode: number; + error: string; + data?: any; +}; diff --git a/src/ingestion/dto/request.ts b/src/ingestion/dto/request.ts new file mode 100644 index 0000000..0dfa822 --- /dev/null +++ b/src/ingestion/dto/request.ts @@ -0,0 +1,10 @@ +export enum FileType { + DimensionGrammar = 'dimension-grammar', + DimensionData = 'dimension-data', + EventGrammar = 'event-grammar', + EventData = 'event-data', +} + +export class FileValidateRequest { + type: FileType; +} diff --git a/src/ingestion/dto/response.ts b/src/ingestion/dto/response.ts new file mode 100644 index 0000000..970131c --- /dev/null +++ b/src/ingestion/dto/response.ts @@ -0,0 +1,5 @@ +import { ValidationErrors } from './errors'; + +export class SingleFileValidationResponse { + errors: ValidationErrors[]; +} diff --git a/src/ingestion/ingestion.module.ts b/src/ingestion/ingestion.module.ts index f4afd2d..0b47fc5 100644 --- a/src/ingestion/ingestion.module.ts +++ b/src/ingestion/ingestion.module.ts @@ -17,11 +17,12 @@ import { NvskApiService } from './services/nvsk-api/nvsk-api.service'; import { DateService } from './services/dateService'; import { UploadDimensionFileService } from './services/upload-dimension-file/upload-dimension-file.service'; import { GrammarService } from './services/grammar/grammar.service'; +import { ValidatorService } from './services/validator/validator.service'; @Module({ controllers: [IngestionController], providers: [DatasetService, DimensionService, EventService, GenericFunction, HttpCustomService, CsvImportService, FileStatusService, - UpdateFileStatusService, DataEmissionService, UploadDimensionFileService, UploadService,RawDataImportService,NvskApiService,DateService,GrammarService], + UpdateFileStatusService, DataEmissionService, UploadDimensionFileService, UploadService,RawDataImportService,NvskApiService,DateService,GrammarService, ValidatorService], imports: [DatabaseModule, HttpModule] }) export class IngestionModule { diff --git a/src/ingestion/services/grammar/grammar.service.ts b/src/ingestion/services/grammar/grammar.service.ts index 6605dc6..24072aa 100644 --- a/src/ingestion/services/grammar/grammar.service.ts +++ b/src/ingestion/services/grammar/grammar.service.ts @@ -7,11 +7,11 @@ export class GrammarService { } async getEventSchemas() { - return await this._databaseService.executeQuery(`select id, name, schema from spec."EventGrammar" WHERE eventType='EXTERNAL'`); + return await this._databaseService.executeQuery(`select id, name, schema from spec."EventGrammar" WHERE "eventType"='EXTERNAL'`); } async getDimensionSchemas() { - return await this._databaseService.executeQuery(`select id, name, schema from spec."DimensionGrammar" WHERE dimensionType='EXTERNAL'`); + return await this._databaseService.executeQuery(`select id, name, schema from spec."DimensionGrammar" WHERE "dimensionType"='EXTERNAL'`); } async getEventSchemaByID(id) { diff --git a/src/ingestion/services/validator/validator.service.spec.ts b/src/ingestion/services/validator/validator.service.spec.ts new file mode 100644 index 0000000..8c52daf --- /dev/null +++ b/src/ingestion/services/validator/validator.service.spec.ts @@ -0,0 +1,150 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ValidatorService } from './validator.service'; +import * as fs from 'fs'; + +describe('ValidatorService', () => { + let service: ValidatorService; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + providers: [ValidatorService], + }).compile(); + + service = module.get(ValidatorService); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); + + it('should check a dimension data file with no errors', async () => { + const grammarContent = fs.readFileSync( + './src/admin/fixtures/academicyear-dimension.grammar.csv', + 'utf8', + ); + const dataContent = fs.readFileSync( + './src/admin/fixtures/academicyear-dimension.data.csv', + 'utf8', + ); + + const resp = service.checkDimensionDataForValidationErrors( + grammarContent, + dataContent, + ); + + expect(resp.errors).toBeInstanceOf(Array); + expect(resp.errors.length).toBe(0); + }); + + it('should check a dimension grammar file with no errors', async () => { + const grammarContent = fs.readFileSync( + './src/admin/fixtures/academicyear-dimension.grammar.csv', + 'utf8', + ); + + const resp = + service.checkDimensionGrammarForValidationErrors(grammarContent); + + expect(resp.errors).toBeInstanceOf(Array); + expect(resp.errors.length).toBe(0); + }); + + it('should throw with a wrong data file', () => { + const grammarContent = fs.readFileSync( + './src/admin/fixtures/avgplaytime-event.grammar.csv', + 'utf8', + ); + const dataContent = fs.readFileSync( + './src/admin/fixtures/avgplaytime-event.data.csv', + 'utf8', + ); + const resp = service.checkDimensionDataForValidationErrors( + grammarContent, + dataContent, + ); + expect(resp).toEqual({ + errors: [ + { + row: 1, + col: 0, + errorCode: 1001, + error: 'Missing header from grammar file: string', + }, + { + row: 1, + col: 1, + errorCode: 1001, + error: 'Missing header from grammar file: string', + }, + { + row: 1, + col: 2, + errorCode: 1001, + error: 'Missing header from grammar file: string', + }, + { + row: 1, + col: 3, + errorCode: 1001, + error: 'Missing header from grammar file: integer', + }, + { + row: 1, + col: 0, + errorCode: 1001, + error: 'Extra header not in grammar file: state_id', + }, + { + row: 1, + col: 1, + errorCode: 1001, + error: 'Extra header not in grammar file: grade_diksha', + }, + { + row: 1, + col: 2, + errorCode: 1001, + error: 'Extra header not in grammar file: subject_diksha', + }, + { + row: 1, + col: 3, + errorCode: 1001, + error: + 'Extra header not in grammar file: avg_play_time_in_mins_on_app_and_portal', + }, + ], + }); + }); + + it('should check a valid event grammar file', () => { + const grammarContent = fs.readFileSync( + './src/admin/fixtures/avgplaytime-event.grammar.csv', + 'utf8', + ); + + const resp = service.checkEventGrammarForValidationErrors(grammarContent); + + expect(resp.errors).toBeInstanceOf(Array); + expect(resp.errors.length).toBe(0); + }); + + it('should check a valid event data file', () => { + const grammarContent = fs.readFileSync( + './src/admin/fixtures/avgplaytime-event.grammar.csv', + 'utf8', + ); + const dataContent = fs.readFileSync( + './src/admin/fixtures/avgplaytime-event.data.csv', + 'utf8', + ); + + const resp = service.checkEventDataForValidationErrors( + grammarContent, + dataContent, + ); + + expect(resp.errors).toBeInstanceOf(Array); + expect(resp.errors.length).toBe(0); + }); +}); diff --git a/src/ingestion/services/validator/validator.service.ts b/src/ingestion/services/validator/validator.service.ts new file mode 100644 index 0000000..56c0cba --- /dev/null +++ b/src/ingestion/services/validator/validator.service.ts @@ -0,0 +1,254 @@ +import { BadRequestException, Injectable, Logger } from '@nestjs/common'; +import * as fs from 'fs'; +import * as path from 'path'; +import { EventGrammarValidator } from '../../validators/event-grammar.validator'; +import { SingleFileValidationResponse } from '../../dto/response'; +import { EventDataValidator } from '../../validators/event-data.validator'; +import { DimensionDataValidator } from '../../cqube-spec-checker/dimension.data.validator'; +import { DimensionValidator } from '../../cqube-spec-checker/dimension.grammar.validator'; + +// eslint-disable-next-line @typescript-eslint/no-var-requires +const decompress = require('decompress'); +@Injectable() +export class ValidatorService { + private logger: Logger; + + constructor() { + this.logger = new Logger(ValidatorService.name); + } + checkDimensionDataForValidationErrors( + grammarContent: string, + dataContent: string, + ): SingleFileValidationResponse { + const dimensionDataValidator = new DimensionDataValidator( + grammarContent, + dataContent, + ); + const errors = dimensionDataValidator.verify(); + return { + errors, + }; + } + + checkDimensionGrammarForValidationErrors( + grammarContent: string, + ): SingleFileValidationResponse { + const dimensionGrammarValidator = new DimensionValidator(grammarContent); + + const errors = dimensionGrammarValidator.verify(); + return { + errors, + }; + } + + checkEventGrammarForValidationErrors( + grammarContent: string, + ): SingleFileValidationResponse { + const eventGrammarValidator = new EventGrammarValidator(grammarContent); + + const errors = eventGrammarValidator.verify(); + return { + errors, + }; + } + + checkEventDataForValidationErrors( + grammarContent: string, + dataContent: string, + ): SingleFileValidationResponse { + const eventDataValidator = new EventDataValidator( + grammarContent, + dataContent, + ); + + const errors = eventDataValidator.verify(); + return { + errors, + }; + } + + async handleZipFile(zipFilePath: string) { + // unzip the file first + const errors = { + dimensions: {}, + programs: {}, + }; + + // TODO: validate zips folder structure + if (fs.existsSync('mount')) fs.rmdirSync('mount', { recursive: true }); + fs.mkdirSync('mount'); + await decompress(zipFilePath, 'mount'); + + function getFileType(filePath) { + const stat = fs.statSync(filePath); + return stat.isFile() + ? 'file' + : stat.isDirectory() + ? 'directory' + : 'unknown'; + } + function getFilesWithTypes(directoryPath) { + const files = fs.readdirSync(directoryPath); + + const filesWithTypes = {}; + files.forEach((file) => { + const filePath = path.join(directoryPath, file); + const fileType = getFileType(filePath); + filesWithTypes[file] = fileType; + }); + + return filesWithTypes; + } + + const filesWithTypes = getFilesWithTypes('./mount'); + if (filesWithTypes['config.json'] !== 'file') + throw new BadRequestException('config.json is required in zip'); + if (filesWithTypes['dimensions'] !== 'directory') + throw new BadRequestException('config.json is required required in zip'); + if (filesWithTypes['programs'] !== 'directory') + throw new BadRequestException('config.json is required required in zip'); + + const config = JSON.parse(fs.readFileSync('./mount/config.json', 'utf-8')); + + errors.dimensions = + this.handleDimensionFolderValidation('./mount/dimensions'); + + const programValidationResponse = + this.handleProgramsFolderValidation(config); + errors.programs = programValidationResponse.errors; + + fs.rmdirSync('mount', { recursive: true }); + return { errors, warnings: programValidationResponse.warnings }; + } + + handleDimensionFolderValidation(folderPath: string) { + const regexDimensionGrammar = /\-dimension\.grammar.csv$/i; + const inputFilesForDimensions = fs.readdirSync(folderPath); + + const errors = { + grammar: {}, + data: {}, + }; + + for (let i = 0; i < inputFilesForDimensions.length; i++) { + const grammarErrors = []; + const dataErrors = []; + + if (regexDimensionGrammar.test(inputFilesForDimensions[i])) { + const currentDimensionGrammarFileName = + folderPath + `/${inputFilesForDimensions[i]}`; + const dimensionDataFileName = currentDimensionGrammarFileName.replace( + 'grammar', + 'data', + ); + + this.logger.debug( + 'currentDimensionGrammarFileName: ', + currentDimensionGrammarFileName, + ); + + this.logger.debug('dimensionDataFileName: ', dimensionDataFileName); + + const grammarContent = fs.readFileSync( + currentDimensionGrammarFileName, + 'utf-8', + ); + if (!fs.existsSync(dimensionDataFileName)) { + dataErrors.push( + `Warning: Data file missing for dimension grammar ${currentDimensionGrammarFileName}`, + ); + } else { + const dataContent = fs.readFileSync(dimensionDataFileName, 'utf-8'); + dataErrors.push( + ...this.checkDimensionDataForValidationErrors( + grammarContent, + dataContent, + ).errors, + ); + } + grammarErrors.push( + ...this.checkDimensionGrammarForValidationErrors(grammarContent) + .errors, + ); + + errors.grammar[inputFilesForDimensions[i]] = grammarErrors; + errors.data[inputFilesForDimensions[i].replace('grammar', 'data')] = + dataErrors; + } + } + + return errors; + } + + handleProgramsFolderValidation(config) { + const regexEventGrammar = /\-event\.grammar.csv$/i; + const errors = {}; + const warnings = []; + + this.logger.debug('config: ', config); + + for (let i = 0; i < config?.programs.length; i++) { + const programName = config?.programs[i]?.name; + const programErrors = { + grammar: {}, + data: {}, + }; + + const inputFiles = fs.readdirSync(config?.programs[i].input?.files); + // iterating over all the files in the program folder + for (let j = 0; j < inputFiles.length; j++) { + const grammarErrors = []; + const dataErrors = []; + if (regexEventGrammar.test(inputFiles[j])) { + const currentEventGrammarFilePath = + config?.programs[i].input?.files + `/${inputFiles[j]}`; + const eventGrammarContent = fs.readFileSync( + currentEventGrammarFilePath, + 'utf-8', + ); + + grammarErrors.push( + ...this.checkEventGrammarForValidationErrors(eventGrammarContent) + .errors, + ); + + // programErrors.grammar[inputFiles[j]] = { + // eventGrammarContent, + // grammarErrors, + // }; + + programErrors.grammar[inputFiles[j]] = grammarErrors; + + const dataFilePath = currentEventGrammarFilePath.replace( + 'grammar', + 'data', + ); + + if (!fs.existsSync(dataFilePath)) { + warnings.push( + `Warning: Data file missing for dimension grammar ${currentEventGrammarFilePath}`, + ); + } else { + const eventContent = fs.readFileSync(dataFilePath, 'utf-8'); + dataErrors.push( + ...this.checkEventDataForValidationErrors( + eventGrammarContent, + eventContent, + ).errors, + ); + // programErrors.data[inputFiles[j].replace('grammar', 'data')] = { + // eventDataContent: eventContent, + // dataErrors, + // }; + programErrors.data[inputFiles[j].replace('grammar', 'data')] = + dataErrors; + } + } + } + + errors[programName] = programErrors; + } + + return { errors, warnings }; + } +} diff --git a/src/ingestion/validators/event-data.validator.ts b/src/ingestion/validators/event-data.validator.ts new file mode 100644 index 0000000..12abcb9 --- /dev/null +++ b/src/ingestion/validators/event-data.validator.ts @@ -0,0 +1,142 @@ +import { ValidationErrors } from '../dto/errors'; + +export class EventDataValidator { + private grammarRows: string[]; + private dataRows: string[]; + + constructor(grammarContent: string, dataContent: string) { + this.grammarRows = grammarContent + .split('\n') + .filter((line) => line.trim() !== ''); + + this.dataRows = dataContent + .split('\n') + .filter((line) => line.trim() !== ''); + } + + verify() { + const errors: ValidationErrors[] = []; + + // check that the header matches the grammar + errors.push( + ...this.matchHeaders( + this.dataRows[0].split(','), + this.grammarRows[3].split(','), + ), + ); + // check that datatypes match the grammar + errors.push( + ...this.matchDataTypes(this.dataRows.slice(1), this.grammarRows[2]), + ); + + //TODO: check for structural errors with quote characters and delimiters + + return errors; + } + + private matchHeaders( + contentHeaders: string[], + grammarHeaders: string[], + ): ValidationErrors[] { + const errors: ValidationErrors[] = []; + + if (contentHeaders.length !== grammarHeaders.length) { + return [ + { + row: 0, + col: 0, + errorCode: 2003, + error: + 'Mismatch number of columns: Data file header length does not match the one specified in the grammar', + data: contentHeaders, + }, + ]; + } + + for (let i = 0; i < contentHeaders.length; i++) { + if (contentHeaders[i] !== grammarHeaders[i]) { + errors.push({ + row: 0, + col: i, + errorCode: 1005, + error: `Mismatched header: Expected ${grammarHeaders[i]} but found ${contentHeaders[i]}`, + data: contentHeaders, + }); + } + } + + return errors; + } + + private matchDataTypes(contentData: string[], grammarDataTypes: string) { + const errors: ValidationErrors[] = []; + const dataTypes: string[] = grammarDataTypes.split(','); + const numCols = dataTypes.length; + + for (let i = 0; i < contentData.length; i++) { + const currentRow = contentData[i].split(','); + const len = currentRow.length; + if (currentRow.length !== dataTypes.length) { + errors.push({ + row: i, + col: 0, + errorCode: 2003, + error: `Expected ${numCols} columns at row ${i + 1} got ${len}`, + data: currentRow, + }); + + continue; + } + + dataTypes.forEach((dataType, idx) => { + switch (dataType) { + // TODO: Figure out a better way to manage the supported data types + case 'string': + if (typeof currentRow[idx] !== 'string') { + errors.push({ + row: i, + col: idx, + errorCode: 1002, + error: `Mismatched data type: Expected ${dataType} but found ${typeof currentRow[ + idx + ]}`, + data: currentRow[idx], + }); + } + break; + case 'integer': + if (isNaN(parseFloat(currentRow[idx]))) { + errors.push({ + row: i + 1 + '', + col: idx + '', + errorCode: 1002, + error: `Mismatched data type: Expected ${dataType} but found ${typeof currentRow[ + idx + ]}`, + data: currentRow[idx], + }); + } + break; + case 'date-time': + if ( + typeof currentRow[idx] !== 'string' || + isNaN(Date.parse(currentRow[idx])) + ) { + errors.push({ + row: i + 1 + '', + col: idx + '', + errorCode: 1002, + error: `Mismatched data type: Expected ${dataType} but found ${typeof currentRow[ + idx + ]}`, + data: currentRow[idx], + }); + } + break; + } + }); + } + + return errors; + } +} diff --git a/src/ingestion/validators/event-grammar.validator.ts b/src/ingestion/validators/event-grammar.validator.ts new file mode 100644 index 0000000..15546c4 --- /dev/null +++ b/src/ingestion/validators/event-grammar.validator.ts @@ -0,0 +1,114 @@ +export class EventGrammarValidator { + private content: string[]; + + constructor(grammarContent: string) { + this.content = grammarContent + .split('\n') + .filter((line) => line.trim() !== '') + .map((line) => line.trim()); + } + + verify() { + const errors: any[] = []; + // check for length + const len = this.content.length; + if (len !== 5) { + errors.push({ + row: 0, + col: 0, + errorCode: 2005, + error: `Structural Error: Event Grammar file should contain exactly 5 rows but found ${len} rows`, + }); + } + + // check for equal number of columns in each row + const colCount = []; + this.content.forEach((row) => { + colCount.push(row.split(',').length); + }); + + if (colCount.every((val) => val === colCount[0]) === false) { + errors.push({ + row: 0, + col: 0, + errorCode: 2003, + error: 'Invalid CSV file: all rows should have equal number of columns', + }); + } + // TODO: add check for dimension names + + // TODO: add check for dimension key names + + // Check for supported data types + const dataTypeRow = this.content[2]; + // const dataTypeErrors = []; + + dataTypeRow.split(',').forEach((dataType: string, index: number) => { + if (!['string', 'integer', 'date'].includes(dataType.trim())) { + errors.push({ + row: 1, + col: index, + errorCode: 1002, + error: `Invalid data type: ${dataType}. Supported data types are: string, integer, date`, + data: dataType, + }); + } + }); + + // errors.push({ + // row: 3, + // col: 0, + // errorCode: 1002, + // error: dataTypeErrors, + // }); + // TODO: Add check to ensure the mentioned data type matches the dimension's datatype + + // check that last row only contains dimensions, timeDimensions and metric + const dimensionIdxs = []; + const lastRow = this.content[4]; + lastRow.split(',').forEach((item: string, idx: number) => { + if (!['timeDimension', 'dimension', 'metric'].includes(item.trim())) { + errors.push({ + row: 4, + col: idx, + errorCode: 1004, + error: `Dimension Grammar Specification Error: Wrong values in fieldType row, allowed values are 1. dimension 2.timeDimension 3. metric, but received ${item}`, + data: lastRow, + }); + } else if (item.trim() === 'dimension') { + dimensionIdxs.push(idx); + } + }); + + // make sure second (fk fields row) and fourth row (header row) have same column names + const fkKeysRow = this.content[1] + .split(',') + .map((item: string) => item.trim()); + const headerRow = this.content[3] + .split(',') + .map((item: string) => item.trim()); + + dimensionIdxs.forEach((idx: number) => { + if (fkKeysRow[idx] !== headerRow[idx]) { + errors.push( + { + row: 1, + col: idx, + errorCode: 1005, + error: `Event Grammar Specification Error: Mismatch header and dimension fk field names. Given header field name is: ${headerRow[idx]} and expected fk field name is: ${fkKeysRow[idx]}`, + data: headerRow, + }, + { + row: 3, + col: idx, + errorCode: 1005, + error: `Event Grammar Specification Error: Mismatch header and dimension fk field names. Given header field name is: ${headerRow[idx]} and expected fk field name is: ${fkKeysRow[idx]}`, + data: headerRow, + }, + ); + } + }); + + return errors; + } +}