Skip to content

Commit

Permalink
splitting file into lga level csv ref #2
Browse files Browse the repository at this point in the history
  • Loading branch information
myf committed Apr 21, 2014
1 parent e5e5984 commit 6ea1eb9
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
node_modules
raw_data
output
67 changes: 67 additions & 0 deletions csv_split.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
var fs = require("fs"),
through = require("through"),
csv = require("csv-streamify"),
path = require("path"),
out_dir = "output",
write_streams = {},
header = '',
group_sequence;


var abs_resolve = function(relative_location) {
return path.resolve(__dirname, relative_location);
};

var write_csv_line = function(arr) {
//adding quotes
var quoted_arr = arr.map(function(item) {
return '\"' + item + '\"';
});
return quoted_arr.toString() + '\n';
};


var split = through(function(data) {
var name = data[group_sequence];
var write_obj = {"name" : name,
"data" : data
};
this.queue(write_obj);
});

var drain = function(data) {
var name = data.name;
var outfile = path.resolve(out_dir, name + ".csv");
if (write_streams[name]) {
write_streams[name].write(write_csv_line(data.data));
} else {
var ws = fs.createWriteStream(outfile);
write_streams[name] = ws;
ws.write(write_csv_line(header));
ws.write(write_csv_line(data.data));


}
};

var parse = function(group_name) {
var parser = csv({objectMode: true});
parser.on('readable', function() {
if (parser.lineNo === 0) {
header = parser.read();
group_sequence = header.indexOf(group_name);
}
});
return parser;
};



var read_file = "raw_data/education_mopup_2014_04_11_12_56_16.csv";
var read_stream = fs.createReadStream(abs_resolve(read_file));
read_stream
.pipe(parse('lga'))
.pipe(split)
.on('data', function(data){
drain(data);
});
11 changes: 11 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"name" : "mopup_stream",
"main" : "data.js",
"dependencies" : {
"event-stream": "*",
"readable-stream": "*",
"csv-streamify": "*",
"JSONStream": "*",
"through" : "*"
}
}

0 comments on commit 6ea1eb9

Please sign in to comment.