From bd278b78961ee06ebc5541b13d147419b23d679e Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 8 Feb 2015 08:29:15 -0700 Subject: [PATCH] functionality added for nested objects in arrays --- README.md | 36 ++++- index.js | 338 ++++++++++++++++++++-------------------- test/csvjson_test.js | 34 ++-- test/schema_sample2.csv | 6 +- 4 files changed, 216 insertions(+), 198 deletions(-) diff --git a/README.md b/README.md index 770e764..19ee609 100644 --- a/README.md +++ b/README.md @@ -91,8 +91,9 @@ convert csv data to schema json object for creating schema of json object following key can be used in header of csv file: . for defining nested json object - [] for defining data as array (suffix) -- in addition - can add delimiter in the array (suffix) + [] for defining data as array (suffix) + -- can add delimiter in the array (i.e. [;] for delimiter of ;) + -- can nest objects in the array, index must be listed (i.e. [1] for index 1) + for defining data as integer (suffix) - for omitting data from result output (prefix) @@ -102,10 +103,9 @@ convert csv data to schema json object /* schema_sample.csv - created,contact.name,contact.age+,contact.number+,address[],address[],contact.hobbies[;],-id - 2014-11-12,Pradeep,25,4352436,MG Road,Mumbai,pc games; guitar,5 - 2014-10-06,Arnav,16,7364537,KB Road,Mumbai,pc games; traveling,7 - + created,contact.name,contact.age+,contact.number+,address[],address[],contact.hobbies[;],-id,friends[0].name,friends[0].phone,friends[1].name,friends[1].phone + 2014-11-12,Pradeep,25,4352436,MG Road,Mumbai,pc games; guitar,5,Jeff,8761234567,Mike,1234567890 + 2014-10-06,Arnav,16,7364537,KB Road,Mumbai,pc games; traveling,7,Steve,555555555,Pradeep,4352436 */ @@ -124,7 +124,17 @@ csvjson.toSchemaObject('./schema_sample.csv').output "hobbies":["pc games","guitar"] }, - "address":["MG Road","Mumbai"] + "address":["MG Road","Mumbai"], + "friends":[ + { + "name": "Jeff", + "phone": "8761234567" + }, + { + "name": "Mike", + "phone": "1234567890" + } + ] }, { "created":"2014-10-06", @@ -135,7 +145,17 @@ csvjson.toSchemaObject('./schema_sample.csv').output "hobbies":["pc games","traveling"] }, - "address":["KB Road","Mumbai"] + "address":["KB Road","Mumbai"], + "friends":[ + { + "name": "Steve", + "phone": "5555555555" + }, + { + "name": "Pradeep", + "phone": "4352436" + } + ] } ] diff --git a/index.js b/index.js index 078f248..f749e9c 100644 --- a/index.js +++ b/index.js @@ -1,197 +1,193 @@ var fs = require("fs"), - util = require("util"); + util = require("util"); module.exports = { - toObject : function(data){ - var content = getContentIfFile(data); - if(!content || typeof content !== "string"){ - throw new Error("invalid data"); - } - content = content.split(/[\n\r]+/ig); - var headers = content.shift().split(','), - hashData = []; - content.forEach(function(item){ - if(item){ - item = item.split(','); - var hashItem = {}; - headers.forEach(function(headerItem, index){ - hashItem[headerItem] = trimQuote(item[index]); - }); - hashData.push(hashItem); - } - }); - return outputSave(hashData); - }, + toObject : function(data){ + var content = getContentIfFile(data); + if(!content || typeof content !== "string"){ + throw new Error("invalid data"); + } + content = content.split(/[\n\r]+/ig); + var headers = content.shift().split(','), + hashData = []; + content.forEach(function(item){ + if(item){ + item = item.split(','); + var hashItem = {}; + headers.forEach(function(headerItem, index){ + hashItem[headerItem] = trimQuote(item[index]); + }); + hashData.push(hashItem); + } + }); + return outputSave(hashData); + }, - toArray : function(data){ - var content = getContentIfFile(data); - if(!content || typeof content !== "string"){ - throw new Error("invalid data"); - } - content = content.split(/[\n\r]+/ig); - var arrayData = []; - content.forEach(function(item){ - if(item){ - item = item.split(',').map(function(cItem){ - return trimQuote(cItem); - }); - arrayData.push(item); - } - }); - return outputSave(arrayData); - }, + toArray : function(data){ + var content = getContentIfFile(data); + if(!content || typeof content !== "string"){ + throw new Error("invalid data"); + } + content = content.split(/[\n\r]+/ig); + var arrayData = []; + content.forEach(function(item){ + if(item){ + item = item.split(',').map(function(cItem){ + return trimQuote(cItem); + }); + arrayData.push(item); + } + }); + return outputSave(arrayData); + }, - toCSV : function(data){ - var content = getContentIfFile(data); - if(!content){ - throw new Error("invalid data"); - } - if(typeof content === "string"){ - content = JSON.parse(content); - } - if(!content.length){ - throw new Error("invalid data"); - } - var textContent = [], - headers = false; - content.forEach(function(item){ - if(util.isArray(item)){ - textContent.push(item.join(',')); - }else{ - headers = Object.keys(item).join(','); - var data = []; - for(var i in item){ - data.push(item[i]); - } - textContent.push(data.join(',')); - } - }); - if(headers){ - textContent.unshift(headers); - } - return outputSave(textContent.join("\n")) ; - }, + toCSV : function(data){ + var content = getContentIfFile(data); + if(!content){ + throw new Error("invalid data"); + } + if(typeof content === "string"){ + content = JSON.parse(content); + } + if(!content.length){ + throw new Error("invalid data"); + } + var textContent = [], + headers = false; + content.forEach(function(item){ + if(util.isArray(item)){ + textContent.push(item.join(',')); + }else{ + headers = Object.keys(item).join(','); + var data = []; + for(var i in item){ + data.push(item[i]); + } + textContent.push(data.join(',')); + } + }); + if(headers){ + textContent.unshift(headers); + } + return outputSave(textContent.join("\n")) ; + }, - toColumnArray : function(data){ - var content = getContentIfFile(data); - if(!content || typeof content !== "string"){ - throw new Error("invalid data"); - } - content = content.split(/[\n\r]+/ig); - var headers = content.shift().split(','), - hashData = {}; - headers.forEach(function(item){ - hashData[item] = []; - }); - content.forEach(function(item){ - if(item){ - item = item.split(','); - item.forEach(function(val, index){ - hashData[headers[index]].push(trimQuote(val)); - }); - } - }); - return outputSave(hashData); - }, + toColumnArray : function(data){ + var content = getContentIfFile(data); + if(!content || typeof content !== "string"){ + throw new Error("invalid data"); + } + content = content.split(/[\n\r]+/ig); + var headers = content.shift().split(','), + hashData = {}; + headers.forEach(function(item){ + hashData[item] = []; + }); + content.forEach(function(item){ + if(item){ + item = item.split(','); + item.forEach(function(val, index){ + hashData[headers[index]].push(trimQuote(val)); + }); + } + }); + return outputSave(hashData); + }, - toSchemaObject : function(data){ - var content = getContentIfFile(data); - if(!content || typeof content !== "string"){ - throw new Error("invalid data"); - } - content = content.split(/[\n\r]+/ig); - var headers = content.shift().split(','), - hashData = []; + toSchemaObject : function(data){ + var content = getContentIfFile(data); + if(!content || typeof content !== "string"){ + throw new Error("invalid data"); + } + content = content.split(/[\n\r]+/ig); + var headers = content.shift().split(','), + hashData = []; - content.forEach(function(item){ - if(item){ - item = item.split(','); - var schemaObject = {}; - item.forEach(function(val, index){ - putDataInSchema(headers[index], val, schemaObject); - }); - hashData.push(schemaObject); - } - }); - return outputSave(hashData); - } + content.forEach(function(item){ + if(item){ + item = item.split(','); + var schemaObject = {}; + item.forEach(function(val, index){ + putDataInSchema(headers[index], val, schemaObject); + }); + hashData.push(schemaObject); + } + }); + return outputSave(hashData); + } }; function putDataInSchema(header, item, schema){ - var match = header.match(/\[*[\W]\]\.(\w+)|\.|\[\]|\[(.)\]|-|\+/ig); - var headerName, delimiter, currentPoint; - if(match){ - var testMatch = match[0]; - if(match.indexOf('-') !== -1){ - return true; - }else if(match.indexOf('.') !== -1){ - var headParts = header.split('.'); - currentPoint = headParts.shift(); - schema[currentPoint] = schema[currentPoint] || {}; - putDataInSchema(headParts.join('.'), item, schema[currentPoint]); - }else if(match.indexOf('[]') !== -1){ - headerName = header.replace(/\[\]/ig,''); - if(!schema[headerName]){ - schema[headerName] = []; - } - schema[headerName].push(item); - }else if(/\[*[\W]\]\.(\w+)/.test(testMatch)){ - headerName = header.split('[').shift(); - currentPoint = header.split('.').pop(); - schema[headerName] = schema[headerName] || {}; - schema[headerName][currentPoint] = schema[headerName][currentPoint] || {}; - if(testMatch.match(/\[(.)\]/)) { - delimiter = testMatch.match(/\[(.)\]/).pop(); - schema[headerName][currentPoint] = convertArray(item, delimiter); - }else{ - schema[headerName][currentPoint].push(item); - } - }else if(/\[(.)\]/.test(testMatch)){ - delimiter = testMatch.match(/\[(.)\]/)[1]; - headerName = header.replace(/\[(.)\]/ig,''); - schema[headerName] = convertArray(item, delimiter); - }else if(match.indexOf('+') !== -1){ - headerName = header.replace(/\+/ig,""); - schema[headerName] = Number(item); - } - }else{ - schema[header] = trimQuote(item); - } - return schema ; + var match = header.match(/\[*[\d]\]\.(\w+)|\.|\[\]|\[(.)\]|-|\+/ig); + var headerName, currentPoint; + if(match){ + var testMatch = match[0]; + if(match.indexOf('-') !== -1){ + return true; + }else if(match.indexOf('.') !== -1){ + var headParts = header.split('.'); + currentPoint = headParts.shift(); + schema[currentPoint] = schema[currentPoint] || {}; + putDataInSchema(headParts.join('.'), item, schema[currentPoint]); + }else if(match.indexOf('[]') !== -1){ + headerName = header.replace(/\[\]/ig,''); + if(!schema[headerName]){ + schema[headerName] = []; + } + schema[headerName].push(item); + }else if(/\[*[\d]\]\.(\w+)/.test(testMatch)){ + headerName = header.split('[').shift(); + var index = parseInt(testMatch.match(/\[(.)\]/).pop(),10); + currentPoint = header.split('.').pop(); + schema[headerName] = schema[headerName] || []; + schema[headerName][index] = schema[headerName][index] || {}; + schema[headerName][index][currentPoint] = item; + }else if(/\[(.)\]/.test(testMatch)){ + var delimiter = testMatch.match(/\[(.)\]/).pop(); + headerName = header.replace(/\[(.)\]/ig,''); + schema[headerName] = convertArray(item, delimiter); + }else if(match.indexOf('+') !== -1){ + headerName = header.replace(/\+/ig,""); + schema[headerName] = Number(item); + } + }else{ + schema[header] = trimQuote(item); + } + return schema ; } function getContentIfFile(filepath){ - if (fs.existsSync(filepath)) { - return fs.readFileSync(filepath, 'utf8'); - } - return null; + if (fs.existsSync(filepath)) { + return fs.readFileSync(filepath, 'utf8'); + } + return null; } function outputSave(data){ - return { - output : data, - save : function(filepath){ - if(typeof data === "object"){ - data = JSON.stringify(data); - } - fs.writeFileSync(filepath, data, {encoding:'utf8'}); - return this; - } - }; + return { + output : data, + save : function(filepath){ + if(typeof data === "object"){ + data = JSON.stringify(data); + } + fs.writeFileSync(filepath, data, {encoding:'utf8'}); + return this; + } + }; } function trimQuote(str){ - return str.trim().replace(/^["|'](.*)["|']$/, '$1'); + return str.trim().replace(/^["|'](.*)["|']$/, '$1'); } function convertArray(str, delimiter) { - var output = []; - var arr = str.split(delimiter); - arr.forEach(function(val) { - var trimmed = val.trim(); - output.push(trimmed); - }); - return output; + var output = []; + var arr = str.split(delimiter); + arr.forEach(function(val) { + var trimmed = val.trim(); + output.push(trimmed); + }); + return output; } diff --git a/test/csvjson_test.js b/test/csvjson_test.js index d36a2d3..153124c 100644 --- a/test/csvjson_test.js +++ b/test/csvjson_test.js @@ -7,22 +7,24 @@ describe('csvjson', function() { expect(csvjson).to.be.an('object'); done(); }); + it('should have all exposed functions', function(done) { + expect(csvjson).to.have.keys(['toObject', 'toArray', 'toCSV', 'toColumnArray', 'toSchemaObject']); + done(); + }); }); -describe('csvjson of sample.csv', function() { +describe('toObject', function() { + var result = csvjson.toObject('./test/sample.csv').output; it('should return an array of objects', function(done) { - var result = csvjson.toObject('./test/sample.csv').output; - expect(result).to.be.an('array'); expect(result[0]).to.be.an('object'); done(); }); }); -describe('csvjson of sample_schema.csv', function() { - it('should return an array of objects with next objects', function(done) { - var result = csvjson.toSchemaObject('./test/schema_sample.csv').output; - +describe('toSchemaObject', function() { + var result = csvjson.toSchemaObject('./test/schema_sample.csv').output; + it('should return an array of objects with nested objects', function(done) { expect(result).to.be.an('array'); expect(result[0]).to.be.an('object'); expect(result[0]['contact']).to.be.an('object'); @@ -30,9 +32,10 @@ describe('csvjson of sample_schema.csv', function() { }); }); -describe('csvjson of sample_schema1.csv', function() { +describe('toSchemaObject with nested and delimted arrays', function() { var result = csvjson.toSchemaObject('./test/schema_sample1.csv').output; - it('should return an array of objects with next objects', function(done) { + + it('should return a nested array of objects with next objects', function(done) { expect(result).to.be.an('array'); expect(result[0]).to.be.an('object'); expect(result[0]['instruments']).to.be.an('object'); @@ -49,14 +52,13 @@ describe('csvjson of sample_schema1.csv', function() { }); }); -describe('csvjson of sample_schema2.csv', function() { +describe('toSchemaObject with nested arrays of objects', function() { var result = csvjson.toSchemaObject('./test/schema_sample2.csv').output; - it('should return nested array', function(done) { - expect(result[0]['groups']).to.be.an('object'); - expect(result[0]['groups']['like']).to.be.an('array'); - expect(result[0]['groups']['dislike']).to.have.length(2); - expect(result[1]['groups']['dislike']).to.have.length(1); - expect(result[1]['groups']['ambivalent'][1]).to.equal('Wilco'); + + it('should return an array of objects', function(done) { + expect(result[0]['contacts']).to.be.an('array'); + expect(result[0]['contacts'][1]).to.be.an('object'); + expect(result[1]['contacts'][1]).to.have.keys(['name', 'phone']); done(); }); diff --git a/test/schema_sample2.csv b/test/schema_sample2.csv index d5831bb..79b225f 100644 --- a/test/schema_sample2.csv +++ b/test/schema_sample2.csv @@ -1,3 +1,3 @@ -name,age,groups[;].like,groups[|].dislike,groups[\].ambivalent,musician,instruments.past,instruments.current[],instruments.current[] -Mark,33,Beatles;Rolling Stones;The Band,Right Said Fred|The Monkeys,Parliament\Nirvana\Pearl Jam,Yes,Guitar,Drums,Bass Guitar -Jeff,27,Rush;The Eagles;The Byrds,Partridge Family,The Shins\Wilco,Yes,Guitar,Drums,Bass Guitar \ No newline at end of file +name,age,contacts[0].name,contacts[0].phone,contacts[1].name,contacts[1].phone,musician,instruments.past,instruments.current[],instruments.current[] +Mark,33,Jim Palmer,8888888888,Marcus Aurelius,7309899877,Yes,Guitar,Drums,Bass Guitar +Jeff,27,John Doe,8009008000,Michael Corleone,2121001000,Yes,Drums,Flute,Trumpet \ No newline at end of file