diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5208196 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/test/BAG_Amstelveen_2011feb01.zip +/test/extract/data-bag +/coverage +/test/buildings.ndjson +/test/unzip +/error.log diff --git a/README.md b/README.md index 0974a1c..b04d614 100644 --- a/README.md +++ b/README.md @@ -1 +1,5 @@ # data-bag +Extracts Histograph data from the Base Administration for Buildings and Addresses (BAG) + +# Todo: +- Extract placenames from set \ No newline at end of file diff --git a/bag.dataset.json b/bag.dataset.json new file mode 100644 index 0000000..2f540c3 --- /dev/null +++ b/bag.dataset.json @@ -0,0 +1,11 @@ +{ + "id": "bag", + "title": "Basisregistratie Adressen en Gebouwen", + "license": "http://creativecommons.org/publicdomain/mark/1.0/deed.nl", + "description": "Service Adressen, gevuld met relevante objecten uit de Basisregistratie Adressen en Gebouwen (BAG), beheerd door het Kadaster.", + "author": "Kadaster", + "website": "http://bag.kadaster.nl", + "edits": "", + "editor": "Rein van t Veer", + "creationDate": "" +} diff --git a/bag.js b/bag.js new file mode 100644 index 0000000..68453f2 --- /dev/null +++ b/bag.js @@ -0,0 +1,279 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const Promise = require('bluebird'); +const request = require('request'); +const progress = require('request-progress'); +const yauzl = require('yauzl'); +const mkdirp = require('mkdirp'); +const highland = require('highland'); +const sax = require('sax'); +const saxpath = require('saxpath'); +const xml2js = require('xml2js'); + +const workerFarm = require('worker-farm'); +const NUM_CPUS = require('os').cpus().length; +const FARM_OPTIONS = { + maxConcurrentWorkers: require('os').cpus().length, + maxCallsPerWorker: Infinity, + maxConcurrentCallsPerWorker: 1 +}; + +const buildingsworkers = workerFarm( + FARM_OPTIONS, + require.resolve('./helpers/buildingsextractor.js'), + ['extractFromFile'] +); + +const addressworkers = workerFarm( + FARM_OPTIONS, + require.resolve('./helpers/addressesextractor.js'), + ['extractFromFile'] +); + +const publicSpacesWorkers = workerFarm( + FARM_OPTIONS, + require.resolve('./helpers/publicspacesextractor.js'), + ['extractFromFile'] +); + +const placesWorkers = workerFarm( + FARM_OPTIONS, + require.resolve('./helpers/placesextractor.js'), + ['extractFromFile'] +); + +function extractDownloadSize(atomURL) { + return new Promise((resolve, reject) => { + request(atomURL, + (err, response, body) => { + if (err) return reject(err); + if (!response) return reject(new Error(`No response returned from request to ${atomURL}`)); + if (response.statusCode !== 200) { + return reject(new Error(`Unexpected request to ${atomURL} response status ${response.statusCode}`)); + } + if (!body) return reject(new Error(`The request to ${atomURL} did not return a response body`)); + + const parser = new xml2js.Parser(); + return parser.parseString(body, (error, result) => { + if (error) return reject(new Error(`Error parsing body ${body} \n ${error.stack}`)); + console.log(`Length: ${JSON.stringify(result.feed.entry[0].link[0].$.length, null, 2)}`); + return resolve(parseInt(result.feed.entry[0].link[0].$.length, 10)); + }); + } + ); + }); +} + + +function download(config, dir, writer, callback) { + console.log(`Downloading ${config.baseDownloadUrl}...`); + return extractDownloadSize(config.feedURL) + .then(size => downloadDataFile(config.baseDownloadUrl, config.datafilename, dir, size)) + .then((fullPath) => { + console.log(`${new Date()} download of ${fullPath} complete!`); + return callback; + }) + .catch(error => { + console.error(`${new Date()} Download failed due to ${error}`); + return callback(error); + }); +} +function downloadDataFile(baseURL, filename, dir, size) { + return new Promise((resolve, reject) => { + const fullZipFilePath = path.join(dir, filename); + console.log(`Getting ${baseURL + filename}:`); + console.log(`Total size: ${size}`); + + progress(request + .get(baseURL + filename), { + throttle: 2000, + delay: 1000 + }) + .on('progress', state => { + console.log(`Download progress: ${((state.size.transferred / size) * 100).toFixed(0)}%`); + }) + .on('error', err => reject(err)) + .on('end', () => { + console.log('Download progress: 100%'); + resolve(fullZipFilePath); + }); + }); +} + +function extractZipfile(zipfilename, extractdir) { + return new Promise((resolve, reject) => { + console.log('extractdir: ', extractdir, '\n'); + mkdirp(extractdir); + + console.log('zipfilename: ', zipfilename, '\n'); + yauzl.open(zipfilename, { lazyEntries: true }, (err, zipfile) => { + if (err) reject(err); + + zipfile.readEntry(); + + zipfile.on('entry', entry => { + if (/\/$/.test(entry.fileName)) { + // directory file names end with '/' + mkdirp(entry.fileName, + error => { + if (error) throw error; + return zipfile.readEntry(); + }); + } + + // file entry + zipfile.openReadStream(entry, (err, readStream) => { + if (err) { + console.log(`Error reading ${entry.fileName}`); + reject(err); + } + + // ensure parent directory exists + mkdirp(path.dirname(entry.fileName), err => { + if (err) reject(err); + readStream.pipe(fs.createWriteStream(path.join(extractdir, entry.fileName))); + readStream.on('end', () => { + if (entry.fileName.slice(-4) === '.zip') { + extractZipfile(path.join(extractdir, entry.fileName), extractdir) + .then(() => { + console.log(`Extracted subzip ${entry.fileName}`); + zipfile.readEntry(); + }); + } else { + zipfile.readEntry(); + } + }); + + readStream.on('error', err => reject(err)); + }); + }); + }); + + zipfile.on('end', () => resolve()); + }); + }); +} + +function unzip(config, dir, writer, callback) { + console.log('WARNING, make sure you have at least 45 Gb of free disk space for extraction, or press Ctrl-c to abort.'); + console.log('The unzip phase itself can take up to an hour and will extract about 4.000 XML files.'); + console.log('Since the zipfile consists of sub-zipfiles of unknown size, there cannot be given an estimation of remaining time.'); + console.log('The process will appear to be frozen for quite some time, especially on the ***PND***.zip file.'); + console.log('However, this will at least spare you the logging of about 4000 file names.'); + return extractZipfile(path.join(dir, config.datafilename), dir) + .then(() => { + console.log(`${new Date()} extraction complete!`); + return callback; + }) + .catch(error => { + console.error(`${new Date()} Extraction failed due to ${error}`); + return callback(error); + }); +} + +function mkdir(path) { + return new Promise((resolve, reject) => { + mkdirp(path, err => { + if (err) { + console.log(`Error during directory creation: ${err}`); + reject(err); + } + resolve(); + }); + }); +} + + +function mapFilesToJobs(dir, extractDir) { + const fileTypes = { + PND: { + converter: buildingsworkers, + outputPITsFile: 'pand.pits.ndjson', + outputRelationsFile: 'pand.relations.ndjson' + }, + NUM: { + converter: addressworkers, + outputPITsFile: 'adres.pits.ndjson', + outputRelationsFile: 'adres.relations.ndjson' + }, + OPR: { + converter: publicSpacesWorkers, + outputPITsFile: 'openbareruimte.pits.ndjson', + outputRelationsFile: 'openbareruimte.relations.ndjson' + }, + WPL: { + converter: placesWorkers, + outputPITsFile: 'woonplaats.pits.ndjson', + outputRelationsFile: 'woonplaats.relations.ndjson' + } + }; + + return fs.readdirSync(dir) + .filter(file => file.slice(-4) === '.xml') + .map(file => { + const type = file.slice(4, 7); + const job = {}; + if (!fileTypes[type]) return null; + job.converter = fileTypes[type].converter; + job.inputFile = path.resolve(path.join(dir, file)); + job.outputPITsFile = path.resolve(path.join(extractDir, fileTypes[type].outputPITsFile)); + job.outputRelationsFile = path.resolve(path.join(extractDir, fileTypes[type].outputRelationsFile)); + return job; + }) + .filter(job => (job)); +} + +function convert(config, dir, writer, callback) { + const extractDir = path.join(config.data.generatedDataDir, 'data-bag'); + console.log('WARNING, make sure you have at least 45 Gb of free disk space for conversion, or press Ctrl-c to abort.'); + const jobs = mapFilesToJobs(dir, extractDir); + + mkdir(extractDir) + .then(() => { + const jobStream = highland(jobs); + + jobStream + .map(job => { + console.log(`Processing ${job.inputFile} to output to ${job.outputPITsFile} and ${job.outputRelationsFile}`); + return highland(wrapJob(job.converter.extractFromFile, job.inputFile, job.outputPITsFile, job.outputRelationsFile)); + }) + .parallel(NUM_CPUS - 1) + .errors(err => { + fs.appendFileSync(path.join(__dirname, 'error.log'), JSON.stringify(err)); + return console.log('Stream threw error. Wrote error to error.log.'); + }) + .toArray(result => { + console.log('Done processing all files!'); + return callback(null, result) + }); + + }) + .catch(err => callback(err, null)); +} + +function wrapJob(jobFunction, sourceFile, pitsFile, relationsFile) { + return new Promise((resolve, reject) => { + jobFunction(sourceFile, pitsFile, relationsFile, (err, result) => { + if (err) return reject(err); + return resolve(result); + }); + }); +} + +module.exports = { + download, + extractDownloadSize, + downloadDataFile, + unzip, + extractZipfile, + convert, + mapFilesToJobs, + mkdir, + steps: [ + download, + unzip, + convert + ] +}; diff --git a/helpers/addressesextractor.js b/helpers/addressesextractor.js new file mode 100644 index 0000000..342d8a7 --- /dev/null +++ b/helpers/addressesextractor.js @@ -0,0 +1,73 @@ +'use strict'; +const xml2js = require('xml2js'); +const fs = require('fs'); +const sax = require('sax'); +const saxpath = require('saxpath'); +const highland = require('highland'); +const writer = require('./bagwriter.js'); + +module.exports = { + title: 'BAG', + url: 'http://bag.kadaster.nl', + extractFromFile: extractFromFile +}; + +function extractFromFile(inputFileName, outputPITsFile, outputRelationsFile, callback) { + console.log(`Processing ${inputFileName}`); + const nodes = []; + const edges = []; + const parser = new xml2js.Parser(); + const strict = true; + + const saxStream = sax.createStream(strict); + fs.createReadStream(inputFileName, { encoding: 'utf8' }) + .pipe(saxStream); + + const streamer = new saxpath.SaXPath(saxStream, '//bag_LVC:Nummeraanduiding'); + + streamer.on('match', xml => { + parser.parseString(xml, (err, result) => { + if (err) { + console.error(`Error parsing xml element ${xml} \n ${err.stack}`); + return callback(err); + } + + nodes.push({ + uri: module.exports.url + '/nummeraanduiding/' + result['bag_LVC:Nummeraanduiding']['bag_LVC:identificatie'][0], + id: result['bag_LVC:Nummeraanduiding']['bag_LVC:identificatie'][0], + huisnummer: result['bag_LVC:Nummeraanduiding']['bag_LVC:huisnummer'] ? + result['bag_LVC:Nummeraanduiding']['bag_LVC:huisnummer'][0] : null, + huisletter: result['bag_LVC:Nummeraanduiding']['bag_LVC:huisletter'] ? + result['bag_LVC:Nummeraanduiding']['bag_LVC:huisletter'] : null, + postcode: result['bag_LVC:Nummeraanduiding']['bag_LVC:postcode'] ? + result['bag_LVC:Nummeraanduiding']['bag_LVC:postcode'][0] : null, + startDate: result['bag_LVC:Nummeraanduiding']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'] ? + result['bag_LVC:Nummeraanduiding']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'][0] : null, + endDate: result['bag_LVC:Nummeraanduiding']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'] ? + result['bag_LVC:Nummeraanduiding']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'][0] : null + }); + + if (result['bag_LVC:Nummeraanduiding']['bag_LVC:gerelateerdeOpenbareRuimte']) { + edges.push({ + from: module.exports.url + '/nummeraanduiding/' + result['bag_LVC:Nummeraanduiding']['bag_LVC:identificatie'][0], + to: module.exports.url + '/openbareruimte/' + result['bag_LVC:Nummeraanduiding']['bag_LVC:gerelateerdeOpenbareRuimte'][0]['bag_LVC:identificatie'], + type: 'hg:related' + }); + } + }); + }); + + saxStream.on('error', err => { + console.error(`saxStream threw error ${err.stack}`); + + // clear the error + this._parser.error = null; + this._parser.resume(); + }); + + saxStream.on('end', () => writer.write(nodes, edges, outputPITsFile, outputRelationsFile) + .then(result => callback(null, result)) + .catch(err => callback(err)) + ); + +} diff --git a/helpers/bagwriter.js b/helpers/bagwriter.js new file mode 100644 index 0000000..57d24d0 --- /dev/null +++ b/helpers/bagwriter.js @@ -0,0 +1,26 @@ +'use strict'; +var fs = require('fs'); +var Promise = require('bluebird'); +var highland = require('highland'); + +module.exports.write = write; + +function write(nodes, edges, outputPITsFile, outputRelationsFile){ + return new Promise((resolve, reject) => { + if (!nodes) return reject(new Error('Empty nodes object')); + if (!outputPITsFile) return reject(new Error('Requires an outputPITsFile to write to')); + if (edges && !outputRelationsFile) return reject(new Error('Requires an outputRelationsFile to write to if edges are supplied')); + + console.log(`Writing ${nodes.length} PITs`); + if (edges) console.log(`and ${edges.length} relations`); + var nodeStream = highland(nodes); + nodeStream.each(node => fs.appendFileSync(outputPITsFile, JSON.stringify(node) + '\n')); + nodeStream.done(() => { + if (!edges) return resolve(true); + + var edgeStream = highland(edges); + edgeStream.each(edge => fs.appendFileSync(outputRelationsFile, JSON.stringify(edge) + '\n')); + edgeStream.done(() => resolve(true)); + }); + }); +} diff --git a/helpers/buildingsextractor.js b/helpers/buildingsextractor.js new file mode 100644 index 0000000..3913bbf --- /dev/null +++ b/helpers/buildingsextractor.js @@ -0,0 +1,72 @@ +'use strict'; +var xml2js = require('xml2js'); +var fs = require('fs'); +var sax = require('sax'); +var saxpath = require('saxpath'); +var geometryTools = require('./geometrytools.js'); +var highland = require('highland'); +var writer = require('./bagwriter.js'); + +module.exports = { + title: 'BAG', + url: 'http://bag.kadaster.nl', + extractFromFile: extractFromFile +}; + +function extractFromFile(inputFileName, outputPITsFile, outputRelationsFile, callback) { + console.log(`Processing ${inputFileName}`); + var nodes = []; + var edges = []; + var parser = new xml2js.Parser(); + var strict = true; + + var saxStream = sax.createStream(strict); + fs.createReadStream(inputFileName, { encoding: 'utf8' }) + .pipe(saxStream); + + var streamer = new saxpath.SaXPath(saxStream, '//bag_LVC:Pand'); + + streamer.on('match', xml => { + parser.parseString(xml, (err, result) => { + if (err) { + console.error(`Error parsing xml element ${xml} \n ${err.stack}`); + return callback(err); + } + + geometryTools.joinGMLposlist(result['bag_LVC:Pand']['bag_LVC:pandGeometrie'][0]['gml:Polygon'][0]['gml:exterior'][0]['gml:LinearRing'][0]['gml:posList'][0]._) + .then(list => { + var polygon = []; + polygon[0] = list; + + nodes.push({ + uri: module.exports.url + '/pand/' + result['bag_LVC:Pand']['bag_LVC:identificatie'][0], + id: result['bag_LVC:Pand']['bag_LVC:identificatie'][0], + bouwjaar: result['bag_LVC:Pand']['bag_LVC:bouwjaar'][0], + startDate: result['bag_LVC:Pand']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'] ? + result['bag_LVC:Pand']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'][0] : null, + endDate: result['bag_LVC:Pand']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'] ? + result['bag_LVC:Pand']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'][0] : null, + geometry: { + type: 'Polygon', + coordinates: polygon + } + }); + }); + + }); + }); + + saxStream.on('error', err => { + console.error(`saxStream threw error ${err.stack}`); + + // clear the error + this._parser.error = null; + this._parser.resume(); + }); + + saxStream.on('end', () => writer.write(nodes, edges, outputPITsFile, outputRelationsFile) + .then(result => callback(null, result)) + .catch(err => callback(err)) + ); + +} diff --git a/helpers/geometrytools.js b/helpers/geometrytools.js new file mode 100644 index 0000000..df4f3fb --- /dev/null +++ b/helpers/geometrytools.js @@ -0,0 +1,95 @@ +'use strict'; + +var GJV = require('geojson-validation'); +var proj4 = require('proj4'); +var jsts = require('jsts'); +var reader = new jsts.io.GeoJSONReader(); + +module.exports = { + validateCoords: validateCoords, + joinGMLposlist: joinGMLposlist, + isValidGeoJSON: isValidGeoJSON, + toWGS84: toWGS84 +}; + +var proj4Defs = { + 'EPSG:2400': '+lon_0=15.808277777799999 +lat_0=0.0 +k=1.0 +x_0=1500000.0 +y_0=0.0 +proj=tmerc +ellps=bessel +units=m +towgs84=414.1,41.3,603.1,-0.855,2.141,-7.023,0 +no_defs', + 'EPSG:3006': '+proj=utm +zone=33 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', + 'EPSG:4326': '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs', + 'EPSG:3857': '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs', + 'EPSG:28992': '+proj=sterea +lat_0=52.15616055555555 +lon_0=5.38763888888889 +k=0.9999079 +x_0=155000 +y_0=463000 +ellps=bessel +towgs84=565.237,50.0087,465.658,-0.406857,0.350733,-1.87035,4.0812 +units=m +no_defs' +}; + +function joinGMLposlist(posList, type, dimensions) { + return new Promise(resolve => { + posList = posList.split(' '); + + var geojsonPosList = []; + var counter = 0; + + while (posList.length !== 0) { + var point = []; + point.push(parseFloat(posList.shift())); + point.push(parseFloat(posList.shift())); + if (dimensions !== 2) posList.shift(); // skip 3d height + point = toWGS84(point) + .map(coordinate => parseFloat( + coordinate.toFixed(7)) + ); + geojsonPosList[counter] = point; + + counter += 1; + } + + if (type === 'Polygon') { + //add extra level of array + var extraLevel = [[[]]]; + extraLevel[0] = geojsonPosList; + resolve(extraLevel); + } + + resolve(geojsonPosList); + + }); +} + +function validateCoords(geojsoncoords, type) { + return new Promise((resolve, reject) => { + var geojson = { + type: 'Feature', + geometry: { + type: type, + coordinates: geojsoncoords + }, + properties: {} + }; + + GJV.isFeature(geojson, (valid, errs) => { + if (!valid) { + console.error('Validator rejecting geometry due to:', errs); + reject(errs); + } else { + console.log('JSTS evaluated feature as valid: ' + isValidGeoJSON(geojson) + '\n'); + resolve(isValidGeoJSON(geojson)); + } + }); + }); +} + +function isValidGeoJSON(geoJSONPolygon) { + var jstsGeometry = reader.read(geoJSONPolygon.geometry); + + if (jstsGeometry) { + var validator = new jsts.operation.valid.IsValidOp(jstsGeometry); + return validator.isValid(); + } +} + +function toWGS84(point) { + var EPSG28992 = '+proj=sterea +lat_0=52.15616055555555 +lon_0=5.38763888888889 +k=0.9999079 +x_0=155000 +y_0=463000 +ellps=bessel +towgs84=565.417,50.3319,465.552,-0.398957,0.343988,-1.8774,4.0725 +units=m +no_defs'; + + return proj4( + EPSG28992, + proj4('WGS84') + ).forward(point); +} \ No newline at end of file diff --git a/helpers/placesextractor.js b/helpers/placesextractor.js new file mode 100644 index 0000000..599e85d --- /dev/null +++ b/helpers/placesextractor.js @@ -0,0 +1,99 @@ +'use strict'; +const xml2js = require('xml2js'); +const fs = require('fs'); +const sax = require('sax'); +const saxpath = require('saxpath'); +const geometryTools = require('./geometrytools.js'); +const writer = require('./bagwriter.js'); + +function extractFromFile(inputFileName, outputPITsFile, outputRelationsFile, callback) { + console.log(`Processing ${inputFileName}`); + const nodes = []; + const edges = []; + const parser = new xml2js.Parser(); + const strict = true; + + const saxStream = sax.createStream(strict); + fs.createReadStream(inputFileName, { encoding: 'utf8' }) + .pipe(saxStream); + + const streamer = new saxpath.SaXPath(saxStream, '//bag_LVC:Woonplaats'); + + streamer.on('match', xml => { + parser.parseString(xml, (err, result) => { + if (err) { + console.error(`Error parsing xml element ${xml} \n ${err.stack}`); + return callback(err); + } + + const place = { + uri: module.exports.url + result['bag_LVC:Woonplaats']['bag_LVC:identificatie'][0], + id: result['bag_LVC:Woonplaats']['bag_LVC:identificatie'][0], + label: result['bag_LVC:Woonplaats']['bag_LVC:woonplaatsNaam'][0], + startDate: result['bag_LVC:Woonplaats']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'] ? + result['bag_LVC:Woonplaats']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'][0] : null, + endDate: result['bag_LVC:Woonplaats']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'] ? + result['bag_LVC:Woonplaats']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'][0] : null + }; + + if (result['bag_LVC:Woonplaats']['bag_LVC:woonplaatsGeometrie'][0]['gml:Polygon']) { + geometryTools.joinGMLposlist( + result['bag_LVC:Woonplaats']['bag_LVC:woonplaatsGeometrie'][0]['gml:Polygon'][0]['gml:exterior'][0]['gml:LinearRing'][0]['gml:posList'][0]._, + 'polygon', + 2 // dimensions + ).then(list => { + const polygon = []; + polygon[0] = list; + + place.geometry = { + type: 'Polygon', + coordinates: polygon + }; + + nodes.push(place); + }); + } else if (result['bag_LVC:Woonplaats']['bag_LVC:woonplaatsGeometrie'][0]['gml:MultiSurface']) { + const multiPolygon = []; + multiPolygon[0] = []; + + result['bag_LVC:Woonplaats']['bag_LVC:woonplaatsGeometrie'][0]['gml:MultiSurface'][0]['gml:surfaceMember'] + .forEach(surfaceMember => { + geometryTools.joinGMLposlist( + surfaceMember['gml:Polygon'][0]['gml:exterior'][0]['gml:LinearRing'][0]['gml:posList'][0]._, + 'polygon', + 2 // dimensions + ) + .then(list => multiPolygon[0].push(list)) + .catch(err => callback(err)); + }); + + place.geometry = { + type: 'MultiPolygon', + coordinates: multiPolygon + }; + + nodes.push(place); + } + }); + }); + + saxStream.on('error', err => { + console.error(`saxStream threw error ${err.stack}`); + + // clear the error + this._parser.error = null; + this._parser.resume(); + }); + + saxStream.on('end', () => writer.write(nodes, edges, outputPITsFile, outputRelationsFile) + .then(result => callback(null, result)) + .catch(err => callback(err)) + ); + +} + +module.exports = { + title: 'BAG', + url: 'http://bag.kadaster.nl/woonplaats/', + extractFromFile +}; diff --git a/helpers/publicspacesextractor.js b/helpers/publicspacesextractor.js new file mode 100644 index 0000000..c98fe8f --- /dev/null +++ b/helpers/publicspacesextractor.js @@ -0,0 +1,70 @@ +'use strict'; +var xml2js = require('xml2js'); +var fs = require('fs'); +var sax = require('sax'); +var saxpath = require('saxpath'); +var highland = require('highland'); +var writer = require('./bagwriter.js'); + +module.exports = { + title: 'BAG', + url: 'http://bag.kadaster.nl', + extractFromFile: extractFromFile +}; + +function extractFromFile(inputFileName, outputPITsFile, outputRelationsFile, callback) { + console.log(`Processing ${inputFileName}`); + var nodes = []; + var edges = []; + var parser = new xml2js.Parser(); + var strict = true; + + var saxStream = sax.createStream(strict); + fs.createReadStream(inputFileName, { encoding: 'utf8' }) + .pipe(saxStream); + + var streamer = new saxpath.SaXPath(saxStream, '//bag_LVC:OpenbareRuimte'); + + streamer.on('match', xml => { + parser.parseString(xml, (err, result) => { + if (err) { + console.error(`Error parsing xml element ${xml} \n ${err.stack}`); + return callback(err); + } + + if (result['bag_LVC:OpenbareRuimte']['bag_LVC:openbareRuimteType'][0] === 'Weg') { + nodes.push({ + uri: module.exports.url + '/openbareruimte/' + result['bag_LVC:OpenbareRuimte']['bag_LVC:identificatie'][0], + id: result['bag_LVC:OpenbareRuimte']['bag_LVC:identificatie'][0], + name: result['bag_LVC:OpenbareRuimte']['bag_LVC:openbareRuimteNaam'] ? + result['bag_LVC:OpenbareRuimte']['bag_LVC:openbareRuimteNaam'][0] : null, + startDate: result['bag_LVC:OpenbareRuimte']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'] ? + result['bag_LVC:OpenbareRuimte']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:begindatumTijdvakGeldigheid'][0] : null, + endDate: result['bag_LVC:OpenbareRuimte']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'] ? + result['bag_LVC:OpenbareRuimte']['bag_LVC:tijdvakgeldigheid'][0]['bagtype:einddatumTijdvakGeldigheid'][0] : null + }); + + edges.push({ + from: module.exports.url + '/openbareruimte/' + result['bag_LVC:OpenbareRuimte']['bag_LVC:identificatie'][0], + to: module.exports.url + '/woonplaats/' + result['bag_LVC:OpenbareRuimte']['bag_LVC:gerelateerdeWoonplaats'][0]['bag_LVC:identificatie'], + type: 'hg:liesIn' + }); + } + + }); + }); + + saxStream.on('error', err => { + console.error(`saxStream threw error ${err.stack}`); + + // clear the error + this._parser.error = null; + this._parser.resume(); + }); + + saxStream.on('end', () => writer.write(nodes, edges, outputPITsFile, outputRelationsFile) + .then(result => callback(null, result)) + .catch(err => callback(err)) + ); + +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..a2fe70a --- /dev/null +++ b/package.json @@ -0,0 +1,59 @@ +{ + "name": "histograph-data-bag", + "version": "0.1.0", + "description": "Download and convert Basisregistratie Adressen en Gebouwen data to Histograph NDJSON", + "main": "bag.js", + "devDependencies": { + "babel-cli": "^6.4.5", + "babel-preset-es2015": "^6.3.13", + "babel-register": "^6.4.3", + "chai": "^3.5.0", + "chai-as-promised": "^5.2.0", + "eslint-config-airbnb": "^6.2.0", + "istanbul": "^0.4.2", + "jscs": "^1.10.0", + "jshint": "^2.6.0", + "mocha": "^2.3.4", + "nock": "^7.2.2", + "rimraf": "^2.5.2", + "tape": "^3.4.0" + }, + "scripts": { + "test": "mocha test/test.js" + }, + "author": "Rein van t Veer", + "license": "MIT", + "dependencies": { + "bluebird": "^3.1.1", + "debug": "^2.2.0", + "geojson-validation": "^0.1.6", + "highland": "^2.5.1", + "jsts": "^0.17.0", + "mkdirp": "^0.5.1", + "proj4": "^2.3.12", + "ramda": "^0.18.0", + "reproject": "^1.0.0", + "request": "^2.67.0", + "request-progress": "^2.0.1", + "sax": "^1.1.4", + "saxpath": "^0.6.3", + "xml2js": "^0.4.16" + }, + "jshintConfig": { + "node": true, + "globalstrict": false, + "undef": true, + "unused": true, + "noarg": true + }, + "jscsConfig": { + "preset": "airbnb", + "requireCamelCaseOrUpperCaseIdentifiers": null, + "disallowMultipleVarDecl": true, + "requireMultipleVarDecl": null, + "maxErrors": 1000, + "maximumLineLength": 150, + "requirePaddingNewLinesAfterBlocks": null, + "requireTrailingComma": null + } +} diff --git a/test/bag-pand-xml2js-snippet.json b/test/bag-pand-xml2js-snippet.json new file mode 100644 index 0000000..19d9610 --- /dev/null +++ b/test/bag-pand-xml2js-snippet.json @@ -0,0 +1,70 @@ +{ + "bag_LVC:Pand": { + "bag_LVC:identificatie": [ + "0362100001064920" + ], + "bag_LVC:aanduidingRecordInactief": [ + "N" + ], + "bag_LVC:aanduidingRecordCorrectie": [ + "0" + ], + "bag_LVC:officieel": [ + "N" + ], + "bag_LVC:pandGeometrie": [ + { + "gml:Polygon": [ + { + "$": { + "srsName": "urn:ogc:def:crs:EPSG::28992" + }, + "gml:exterior": [ + { + "gml:LinearRing": [ + { + "gml:posList": [ + { + "_": "116938.595 477068.148 0.0 116930.644 477071.854 0.0 116928.365 477066.959 0.0 116936.316 477063.253 0.0 116936.327 477063.277 0.0 116938.595 477068.148 0.0", + "$": { + "srsDimension": "3", + "count": "6" + } + } + ] + } + ] + } + ] + } + ] + } + ], + "bag_LVC:bouwjaar": [ + "1991" + ], + "bag_LVC:pandstatus": [ + "Pand in gebruik" + ], + "bag_LVC:tijdvakgeldigheid": [ + { + "bagtype:begindatumTijdvakGeldigheid": [ + "1989103100000000" + ] + } + ], + "bag_LVC:inOnderzoek": [ + "N" + ], + "bag_LVC:bron": [ + { + "bagtype:documentdatum": [ + "19891031" + ], + "bagtype:documentnummer": [ + "10919" + ] + } + ] + } +} \ No newline at end of file diff --git a/test/bagTest.js b/test/bagTest.js new file mode 100644 index 0000000..b81ed81 --- /dev/null +++ b/test/bagTest.js @@ -0,0 +1,112 @@ +'use strict'; +const fs = require('fs'); +const path = require('path'); +const rimraf = require('rimraf'); +const nock = require('nock'); + +const chai = require('chai'); +const chaiAsPromised = require('chai-as-promised'); +chai.use(chaiAsPromised); +const expect = chai.expect; + +const bag = require('../bag.js'); +const config = require('./mockups/config.json'); +const mockedAtomXML = path.join(__dirname, 'mockups', 'atom_inspireadressen.xml'); +const extractDir = path.join(__dirname, 'extract'); + +describe('histograph-data-bag', function bagTest() { + describe('download phase', function download() { + it('extracts the dataset size from the source description', () => { + nock('http://geodata.nationaalgeoregister.nl') + .defaultReplyHeaders({ 'Content-Type': 'text/xml' }) + .get('/inspireadressen/atom/inspireadressen.xml') + .replyWithFile(200, mockedAtomXML); + + return bag.extractDownloadSize(config.feedURL) + .then(size => expect(size).to.equal(1550788857)); + }); + + it('downloads the file', () => { + nock('http://data.nlextract.nl') + .get('/bag/bron/BAG_Amstelveen_2011feb01.zip') + .replyWithFile(200, mockedAtomXML); + + return bag.downloadDataFile(config.baseUrlTest, config.dataFileNameTest, __dirname, 5746696) + .then(filename => expect(fs.lstatSync(filename)).to.not.throw); + }); + }); + + describe('unzip phase', function unzip() { + this.timeout(30000); + it('extract the test dataset', () => { + const unzipDir = path.resolve('./test/unzip'); + const filename = path.resolve('./test/BAG_Amstelveen_2011feb01.zip'); + + return bag.extractZipfile(filename, unzipDir) + .then(() => { + return expect(fs.readdirSync(unzipDir)).to.deep.equal([ + '1050LIG08032011-01022011.xml', + '1050LIG08032011-01022011.zip', + '1050NUM08032011-01022011-0001.xml', + '1050NUM08032011-01022011-0002.xml', + '1050NUM08032011-01022011-0003.xml', + '1050NUM08032011-01022011.zip', + '1050OPR08032011-01022011.xml', + '1050OPR08032011-01022011.zip', + '1050PND08032011-01022011-0001.xml', + '1050PND08032011-01022011-0002.xml', + '1050PND08032011-01022011-0003.xml', + '1050PND08032011-01022011.zip', + '1050STA08032011-01022011.xml', + '1050STA08032011-01022011.zip', + '1050VBO08032011-01022011-0001.xml', + '1050VBO08032011-01022011-0002.xml', + '1050VBO08032011-01022011-0003.xml', + '1050VBO08032011-01022011-0004.xml', + '1050VBO08032011-01022011.zip', + '1050WPL08032011-01022011.xml', + '1050WPL08032011-01022011.zip', + '1050XXX08032011-01022011.zip', + 'Leveringsdocument-BAG-Extract.xml' + ]); + }); + }); + }); + + describe('conversion phase', function conversion() { + let jobs; + + before('create jobs object', () => { + jobs = bag.mapFilesToJobs('./test/unzip', './test'); + }); + + after('cleanup', () => { + console.log('Cleaning up'); + rimraf.sync(extractDir); + }); + + it('should map the files to a list of jobs', done => { + expect(jobs.length).to.equal(8); + expect(jobs[0].inputFile.split('.').slice(-1)[0]).to.deep.equal('xml'); + expect(jobs[0].outputPITsFile.split('.').slice(-2)[0]).to.deep.equal('pits'); + expect(jobs[0].outputRelationsFile.split('.').slice(-2)[0]).to.deep.equal('relations'); + done(); + }); + + it('should create the extraction dir if it does not exist', () => bag.mkdir(extractDir) + .then(() => expect(fs.existsSync(extractDir)).to.equal(true))); + + this.timeout(200000); + + it('should extract the entries from a list of files', done => { + const sourceDir = path.join(__dirname, 'unzip'); + bag.convert(config, sourceDir, null, (err, result) => { + if (err) return done(err); + expect(err).to.equal(null); + console.log(result); + expect(result).to.deep.equal(new Array(8).fill(true)); + done(); + }); + }); + }); +}); diff --git a/test/helpers/addressesextractorTest.js b/test/helpers/addressesextractorTest.js new file mode 100644 index 0000000..da4552c --- /dev/null +++ b/test/helpers/addressesextractorTest.js @@ -0,0 +1,58 @@ +'use strict'; +const fs = require('fs'); +const path = require('path'); +const nock = require('nock'); + +const chai = require('chai'); +const chaiAsPromised = require('chai-as-promised'); +chai.use(chaiAsPromised); +const expect = chai.expect; + +const addressesExtractor = require('../../helpers/addressesextractor.js'); +const sourceFile = path.join(__dirname, '..', 'mockups', 'bag-NUM-snippet.xml'); +const outputPITsFile = path.join(__dirname, '..', 'adres.pits.ndjson'); +const outputRelationsFile = path.join(__dirname, '..', 'adres.relations.ndjson'); + +describe('addresses extraction', () => { + + after('Cleanup', () => { + fs.unlinkSync(outputPITsFile); + fs.unlinkSync(outputRelationsFile); + }); + + it('should extract an address from a mocked snippet', done => { + addressesExtractor.extractFromFile(sourceFile, outputPITsFile, outputRelationsFile, (err, result) => { + if (err) throw err; + + var nodes = fs.readFileSync(outputPITsFile, 'utf-8') + .split('\n') + .filter(node => (node)) + .map(node => JSON.parse(node)); + + var edges = fs.readFileSync(outputRelationsFile, 'utf-8') + .split('\n') + .filter(edge => (edge)) + .map(edge => JSON.parse(edge)); + + console.log(`Result: ${nodes.length} addresses, ${edges.length} related streets \n`); + + expect(nodes[1]).to.deep.equal({ + endDate: null, + huisletter: null, + huisnummer: '12', + id: '0957200000300090', + postcode: '6041LZ', + startDate: '2010112200000000', + uri: 'http://bag.kadaster.nl/nummeraanduiding/0957200000300090' + }); + + expect(edges[0]).to.deep.equal({ + from: 'http://bag.kadaster.nl/nummeraanduiding/0957200000300090', + to: 'http://bag.kadaster.nl/openbareruimte/0957300000174823', + type: 'hg:related' + }); + + done(); + }); + }); +}); diff --git a/test/helpers/bagwriterTest.js b/test/helpers/bagwriterTest.js new file mode 100644 index 0000000..ace0026 --- /dev/null +++ b/test/helpers/bagwriterTest.js @@ -0,0 +1,25 @@ +'use strict'; +const fs = require('fs'); +const writer = require('../../helpers/bagwriter.js'); +const chai = require('chai'); +const chaiAsPromised = require('chai-as-promised'); +chai.use(chaiAsPromised); +const expect = chai.expect; + +describe('writer', function () { + const testNodesFile = './test/test.nodes'; + + after('Cleanup', () => { + fs.unlinkSync(testNodesFile); + }); + + it('should reject empty nodes object', () => { + return expect(writer.write()).to.be.rejected; + }); + + it('should reject empty nodes object', () => { + return writer.write([1,2,3], null, testNodesFile) + .then(result => expect(result).to.equal(true)); + }) + +}); diff --git a/test/helpers/buildingsextractorTest.js b/test/helpers/buildingsextractorTest.js new file mode 100644 index 0000000..1f469e2 --- /dev/null +++ b/test/helpers/buildingsextractorTest.js @@ -0,0 +1,70 @@ +'use strict'; +const fs = require('fs'); +const path = require('path'); + +const nock = require('nock'); + +const chai = require('chai'); +const chaiAsPromised = require('chai-as-promised'); +chai.use(chaiAsPromised); +const expect = chai.expect; + +const buildingsExtractor = require('../../helpers/buildingsextractor.js'); +const sourceFile = path.join(__dirname, '..', 'mockups', 'bag-PND-snippet.xml'); +const outputPITsFile = path.join(__dirname, '..', 'pand.pits.ndjson'); +const outputRelationsFile = path.join(__dirname, '..', 'pand.relations.ndjson'); + +describe('buildings extraction', function() { + after('Cleanup', () => { + fs.unlinkSync(outputPITsFile); + }); + + it('should extract the building entries from a file', (done) => { + buildingsExtractor.extractFromFile(sourceFile, outputPITsFile, outputRelationsFile, (err, result) => { + if (err) throw err; + + const nodes = fs.readFileSync(outputPITsFile, 'utf-8') + .split('\n') + .filter(node => (node)) + .map(node => JSON.parse(node)); + + console.log('result length:', nodes.length, '\n'); + console.log('extractedBuildingsFile number 19:', JSON.stringify(nodes[18], null, 2), '\n'); + + expect(nodes[18]).to.deep.equal({ + uri: 'http://bag.kadaster.nl/pand/0362100100084298', + id: '0362100100084298', + bouwjaar: '2011', + startDate: '2011010500000000', + endDate: null, + geometry: { + coordinates: [[ + [ + 4.8346467, + 52.2701938 + ], + [ + 4.8346579, + 52.2702235 + ], + [ + 4.8346023, + 52.2702314 + ], + [ + 4.8345937, + 52.2702087 + ], + [ + 4.8346467, + 52.2701938 + ] + ]], + type: 'Polygon' + } + }); + done(); + }); + }); + +}); \ No newline at end of file diff --git a/test/helpers/geometrytoolsTest.js b/test/helpers/geometrytoolsTest.js new file mode 100644 index 0000000..b41c400 --- /dev/null +++ b/test/helpers/geometrytoolsTest.js @@ -0,0 +1,98 @@ +var fs = require('fs'); +var path = require('path'); + +var chai = require('chai'); +var chaiAsPromised = require('chai-as-promised'); +chai.use(chaiAsPromised); +var should = chai.should(); +var expect = chai.expect; + +var geometryTools = require('../../helpers/geometrytools.js'); + +describe('geometry checking functions', function () { + it('should invalidate an invalid feature', () => { + var invalidFeature = { + type: 'Feature', + properties: { name: 'My non-simple hourglass-shaped geometry' }, + geometry: { + type: 'Polygon', + coordinates: [ + [ + [5.6, 52.4], + [6.3, 52.9], + [6.8, 52.1], + [7.2, 52.6], + [5.6, 52.4] + ] + ] + } + }; + + return geometryTools.validateCoords(invalidFeature.geometry.coordinates, invalidFeature.geometry.type) + .then(valid => expect(valid).to.be.false) + .catch(errs => { + console.error('Validation errors:', errs); + return expect(errs).to.be.not.null; + }); + }); + + it('should reproject the coordinates to WGS84', () => { + var geojson = { + uri: 'http://bag.kadaster.nl/pand/0362100100084298', + id: '0362100100084298', + bouwjaar: '2011', + geometry: { + type: 'Polygon', + coordinates: [ + [ + [ + 117283.951, + 475941.101 + ], + [ + 117284.742, + 475944.408 + ], + [ + 117280.949, + 475945.315 + ], + [ + 117280.344, + 475942.787 + ], + [ + 117283.951, + 475941.101 + ] + ] + ] + } + }; + + expect(geometryTools.toWGS84(geojson.geometry.coordinates[0][0])).to.deep.equal([4.834646702778442, 52.27019375226181]); + + }); + + it('should join a gml-extracted position list to a WGS84 geojson-compatible one', () => { + var testPosList = '116938.595 477068.148 0.0 ' + + '116930.644 477071.854 0.0 ' + + '116928.365 477066.959 0.0 ' + + '116936.316 477063.253 0.0 ' + + '116936.327 477063.277 0.0 ' + + '116938.595 477068.148 0.0'; + + return geometryTools.joinGMLposlist(testPosList, 'Polygon') + .then(geojsoncoords => { + console.log(JSON.stringify(geojsoncoords, null, 2)); + return geometryTools.validateCoords(geojsoncoords, 'Polygon') + .then(valid => expect(valid).to.be.true) + .catch(err => { + console.log('geometry validation error:', err.stack); + return expect(err).to.be.null; + }); + }); + + }); + +}); diff --git a/test/helpers/placesextractorTest.js b/test/helpers/placesextractorTest.js new file mode 100644 index 0000000..64b460e --- /dev/null +++ b/test/helpers/placesextractorTest.js @@ -0,0 +1,88 @@ +'use strict'; +const fs = require('fs'); +const path = require('path'); + +const chai = require('chai'); +const chaiAsPromised = require('chai-as-promised'); +chai.use(chaiAsPromised); +const expect = chai.expect; + +const buildingsExtractor = require('../../helpers/placesextractor.js'); +const sourceFile = path.join(__dirname, '..', 'mockups', 'bag-WPL-snippet.xml'); +const outputPITsFile = path.join(__dirname, '..', 'woonplaats.pits.ndjson'); +const outputRelationsFile = path.join(__dirname, '..', 'woonplaats.relations.ndjson'); + +describe('places extraction', function () { + after('Cleanup', () => { + fs.unlinkSync(outputPITsFile); + }); + + it('should extract the places entries from a file', done => { + buildingsExtractor.extractFromFile(sourceFile, outputPITsFile, outputRelationsFile, (err, result) => { + if (err) throw err; + + const nodes = fs.readFileSync(outputPITsFile, 'utf-8') + .split('\n') + .filter(node => (node)) + .map(node => JSON.parse(node)); + + console.log('result length:', nodes.length, '\n'); + console.log('extractedBuildingsFile number 1:', JSON.stringify(nodes[0], null, 2), '\n'); + + const leeuwarden = nodes + .filter(node => node.label === 'Leeuwarden'); + + expect(leeuwarden[0]).to.deep.equal({ + uri: 'http://bag.kadaster.nl/woonplaats/1197', + id: '1197', + label: 'Leeuwarden', + startDate: '2007110700000200', + endDate: '2012010100000400', + geometry: { + coordinates: [ + [ + [ + [ + 5.8093667, + 53.217454 + ], + [ + 5.8090766, + 53.2174305 + ], + [ + 5.8089482, + 53.2174187 + ], + [ + 5.8093667, + 53.217454 + ] + ], + [ + [ + 5.8095226, + 53.1620268 + ], + [ + 5.811024, + 53.1623452 + ], + [ + 5.8137097, + 53.1629642 + ], + [ + 5.8095226, + 53.1620268 + ] + ] + ] + ], + type: 'MultiPolygon' + } + }); + done(); + }); + }); +}); diff --git a/test/helpers/publicspacesextractorTest.js b/test/helpers/publicspacesextractorTest.js new file mode 100644 index 0000000..2b514b1 --- /dev/null +++ b/test/helpers/publicspacesextractorTest.js @@ -0,0 +1,58 @@ +'use strict'; +const fs = require('fs'); +const path = require('path'); +const nock = require('nock'); + +const chai = require('chai'); +const chaiAsPromised = require('chai-as-promised'); +chai.use(chaiAsPromised); +const expect = chai.expect; + +const publicSpacesExtractor = require('../../helpers/publicspacesextractor.js'); +const sourceFile = path.join(__dirname, '..', 'mockups', 'bag-OPR-snippet.xml'); +const outputPITsFile = path.join(__dirname, '..', 'openbareruimte.pits.ndjson'); +const outputRelationsFile = path.join(__dirname, '..', 'openbareruimte.relations.ndjson'); + +describe('public spaces extraction', function() { + after('Cleanup', () => { + fs.unlinkSync(outputPITsFile); + fs.unlinkSync(outputRelationsFile); + }); + + it('should extract the public spaces from the snippet', done => { + return publicSpacesExtractor.extractFromFile(sourceFile, outputPITsFile, outputRelationsFile, (err, result) => { + if (err) return done(err); + + const nodes = fs.readFileSync(outputPITsFile, 'utf-8') + .split('\n') + .filter(node => (node)) + .map(node => JSON.parse(node)); + + const edges = fs.readFileSync(outputRelationsFile, 'utf-8') + .split('\n') + .filter(edge => (edge)) + .map(edge => JSON.parse(edge)); + + console.log(`Result: ${nodes.length} addresses, ${edges.length} related streets \n`); + expect(nodes.length).to.equal(3); + expect(nodes.length).to.equal(3); + + expect(nodes[0]).to.deep.equal({ + uri: 'http://bag.kadaster.nl/openbareruimte/0003300000116985', + id: '0003300000116985', + name: 'Abel Eppensstraat', + startDate: '1956032800000000', + endDate: null + }); + + expect(edges[0]).to.deep.equal({ + from: 'http://bag.kadaster.nl/openbareruimte/0003300000116985', + to: 'http://bag.kadaster.nl/woonplaats/3386', + type: 'hg:liesIn' + }); + + done(); + }); + }); + +}); diff --git a/test/mockups/BAG_Amstelveen_2011feb01.zip b/test/mockups/BAG_Amstelveen_2011feb01.zip new file mode 100644 index 0000000..37546c8 Binary files /dev/null and b/test/mockups/BAG_Amstelveen_2011feb01.zip differ diff --git a/test/mockups/atom_inspireadressen.xml b/test/mockups/atom_inspireadressen.xml new file mode 100644 index 0000000..42d9e6f --- /dev/null +++ b/test/mockups/atom_inspireadressen.xml @@ -0,0 +1,26 @@ + + + http://geodata.nationaalgeoregister.nl/inspireadressen/atom/inspireadressen.xml + INSPIRE Download Service van BAG - Inspire Adressen + Voorgedefinieerde dataset INSPIRE download service + + + + + PDOK beheer + beheerPDOK@kadaster.nl + + Copyright (c) 2012, Kadaster en Openbare Registers + 2016-02-08T23:00:00.000Z + + inspireadressen.zip + inspireadressen.zip + Downloadgrootte: 1.4 GB + 2016-02-08T23:00:00.000Z + + + Copyright (c) 2012, Kadaster en Openbare Registers + + 50.6 3.1 50.6 7.3 53.7 7.3 53.7 3.1 50.6 3.1 + + \ No newline at end of file diff --git a/test/mockups/bag-NUM-snippet.xml b/test/mockups/bag-NUM-snippet.xml new file mode 100644 index 0000000..ea8c73f --- /dev/null +++ b/test/mockups/bag-NUM-snippet.xml @@ -0,0 +1,78 @@ + + + + + + + 9999 + Nederland + 1 + + + 20160208 + + + + + 0957200000300090 + N + 0 + 12 + N + + 2010042900000000 + 2010112200000000 + + N + Verblijfsobject + 20100428Z0237A99B10 + Naamgeving uitgegeven + 0957300000174823 + + + + 0957200000300090 + N + 0 + 12 + N + 6041LZ + + 2010112200000000 + + N + Verblijfsobject + + 20101122 + z02e277885c + + Naamgeving uitgegeven + + 0957300000174823 + + + + 0957200000300091 + N + 0 + 13 + N + + 2010042900000000 + 2010112200000000 + + N + Verblijfsobject + + 20100428 + Z0237A99B10 + + Naamgeving uitgegeven + + 0957300000174823 + + + + + + diff --git a/test/mockups/bag-OPR-snippet.xml b/test/mockups/bag-OPR-snippet.xml new file mode 100644 index 0000000..69fb48e --- /dev/null +++ b/test/mockups/bag-OPR-snippet.xml @@ -0,0 +1,79 @@ + + + + + + + 9999 + Nederland + 1 + + + 20160208 + + + + + 0003300000116985 + N + 0 + Abel Eppensstraat + N + + 1956032800000000 + + N + Weg + + 19560328 + OR RB 28-03-1956 + + Naamgeving uitgegeven + + 3386 + + + + 0003300000116986 + N + 0 + Adamistraat + N + + 1956032800000000 + + N + Weg + + 19560328 + OR RB 28-03-1956 + + Naamgeving uitgegeven + + 3386 + + + + 0003300000116987 + N + 0 + Alberdaweg + N + + 1964042200000000 + + N + Weg + + 19640422 + OR RB 22-04-1964 + + Naamgeving uitgegeven + + 3386 + + + + + + diff --git a/test/mockups/bag-PND-snippet.xml b/test/mockups/bag-PND-snippet.xml new file mode 100644 index 0000000..1a28f49 --- /dev/null +++ b/test/mockups/bag-PND-snippet.xml @@ -0,0 +1,502 @@ + + + + + + + 1050 + Amstelveen + 1 + + + 1050 + Amstelveen + + + + + 20110308 + 20110201 + + + + + 0362100001112963 + N + 0 + N + + + + + 121897.553 481533.229 0.0 121894.749 481534.371 0.0 121894.149 481534.097 0.0 121891.638 481535.12 0.0 121891.093 481535.342 0.0 121890.332 481533.48 0.0 121890.285 481533.501 0.0 121890.239 481533.389 0.0 121890.629 481533.23 0.0 121890.223 481532.235 0.0 121889.274 481532.623 0.0 121887.938 481529.355 0.0 121888.227 481529.238 0.0 121885.547 481522.675 0.0 121892.139 481519.984 0.0 121892.934 481521.922 0.0 121897.553 481533.229 0.0 + + + + + 1927 + Pand in gebruik + + 1926033100000000 + + N + + 19260331 + 1537 + + + + 0362100001112964 + N + 0 + N + + + + + 121897.553 481533.229 0.0 121892.934 481521.922 0.0 121892.139 481519.984 0.0 121898.734 481517.292 0.0 121899.528 481519.233 0.0 121901.42 481523.86 0.0 121901.709 481523.741 0.0 121903.044 481527.009 0.0 121902.095 481527.398 0.0 121902.501 481528.392 0.0 121902.892 481528.232 0.0 121902.937 481528.344 0.0 121902.889 481528.362 0.0 121903.65 481530.224 0.0 121900.595 481531.47 0.0 121900.357 481532.086 0.0 121897.553 481533.229 0.0 + + + + + 1927 + Pand in gebruik + + 1926033100000000 + + N + + 19260331 + 1537 + + + + 0362100001112966 + N + 0 + N + + + + + 121847.662 481532.725 0.0 121847.163 481528.857 0.0 121859.566 481527.258 0.0 121860.845 481537.57 0.0 121860.87 481537.768 0.0 121860.633 481537.797 0.0 121857.0 481538.249 0.0 121857.591 481543.164 0.0 121857.769 481544.643 0.0 121856.931 481544.748 0.0 121856.748 481543.27 0.0 121850.632 481544.038 0.0 121849.728 481537.02 0.0 121848.24 481537.211 0.0 121847.662 481532.725 0.0 + + + + + 1956 + Pand in gebruik + + 1955051800000000 + + N + + 19550518 + 3733 + + + + 0362100001112968 + N + 0 + N + + + + + 122322.274 481239.14 0.0 122316.732 481241.506 0.0 122315.554 481238.747 0.0 122321.096 481236.381 0.0 122322.274 481239.14 0.0 + + + + + 2010 + Pand in gebruik + + 2010120700000000 + + N + + 20101207 + D-2010/131020 + + + + 0362100001121130 + N + 0 + N + + + + + 119180.511 479653.979 0.0 119178.88 479654.655 0.0 119180.255 479658.007 0.0 119168.193 479662.955 0.0 119168.128 479662.798 0.0 119163.701 479664.614 0.0 119149.392 479629.786 0.0 119153.8 479627.973 0.0 119151.746 479622.979 0.0 119163.815 479618.015 0.0 119165.19 479621.359 0.0 119193.523 479609.717 0.0 119194.555 479612.349 0.0 119190.842 479623.509 0.0 119191.708 479623.802 0.0 119188.207 479634.332 0.0 119187.271 479633.994 0.0 119180.511 479653.979 0.0 + + + + + 2004 + Pand in gebruik + + 2001100900000000 + + N + + 20011009 + 17483 + + + + 0362100001121133 + N + 0 + N + + + + + 118076.174 478764.24 0.0 118077.049 478764.852 0.0 118080.807 478764.073 0.0 118080.482 478762.716 0.0 118081.639 478761.773 0.0 118080.65 478760.582 0.0 118080.864 478760.403 0.0 118081.076 478760.656 0.0 118083.191 478758.9 0.0 118082.981 478758.647 0.0 118083.193 478758.479 0.0 118084.178 478759.666 0.0 118085.327 478758.737 0.0 118086.696 478759.218 0.0 118087.21 478757.756 0.0 118090.322 478758.848 0.0 118089.81 478760.311 0.0 118091.042 478760.739 0.0 118091.296 478762.166 0.0 118092.822 478761.897 0.0 118093.397 478765.145 0.0 118091.871 478765.416 0.0 118092.12 478766.854 0.0 118091.011 478767.787 0.0 118092.009 478768.973 0.0 118089.484 478771.098 0.0 118088.487 478769.912 0.0 118087.384 478770.837 0.0 118086.022 478770.34 0.0 118085.492 478771.795 0.0 118082.392 478770.666 0.0 118082.922 478769.208 0.0 118081.629 478768.71 0.0 118081.384 478767.332 0.0 118079.819 478767.609 0.0 118079.794 478767.46 0.0 118079.377 478767.538 0.0 118077.656 478767.861 0.0 118077.558 478767.879 0.0 118071.538 478776.503 0.0 118070.351 478775.682 0.0 118068.805 478775.957 0.0 118069.935 478782.287 0.0 118070.345 478784.583 0.0 118073.281 478801.105 0.0 118075.419 478813.14 0.0 118076.098 478816.96 0.0 118076.059 478816.967 0.0 118076.356 478818.641 0.0 118063.692 478820.265 0.0 118062.892 478815.752 0.0 118062.985 478815.736 0.0 118060.813 478803.379 0.0 118060.799 478803.297 0.0 118059.683 478796.948 0.0 118055.163 478771.621 0.0 118055.509 478771.56 0.0 118022.263 478748.375 0.0 118021.994 478748.188 0.0 118019.639 478746.545 0.0 118019.347 478746.342 0.0 118014.139 478742.71 0.0 118014.163 478742.677 0.0 118014.015 478742.574 0.0 118014.648 478741.726 0.0 118020.307 478733.654 0.0 118021.329 478732.196 0.0 118021.47 478732.294 0.0 118021.498 478732.253 0.0 118029.379 478737.756 0.0 118046.913 478750.0 0.0 118055.482 478755.984 0.0 118058.237 478757.908 0.0 118059.549 478756.046 0.0 118059.606 478755.965 0.0 118061.536 478757.31 0.0 118062.193 478756.356 0.0 118068.174 478755.313 0.0 118068.489 478754.862 0.0 118068.718 478755.02 0.0 118068.547 478755.267 0.0 118071.057 478757.009 0.0 118071.228 478756.764 0.0 118071.457 478756.924 0.0 118071.144 478757.375 0.0 118071.824 478761.196 0.0 118076.174 478764.24 0.0 + + + + + 1962 + Pand in gebruik + + 1960120100000000 + + N + + 19601201 + 4342 + + + + 0362100001121137 + N + 0 + N + + + + + 118950.727 479661.151 0.0 118927.789 479675.491 0.0 118915.236 479645.357 0.0 118919.011 479643.299 0.0 118946.457 479628.345 0.0 118953.345 479644.165 0.0 118940.393 479649.805 0.0 118946.136 479663.145 0.0 118950.727 479661.151 0.0 + + + + + 2002 + Pand in gebruik + + 1999080300000000 + + N + + 19990803 + 17492 + + + + 0362100001121138 + N + 0 + N + + + + + 119081.028 479651.491 0.0 119080.549 479651.457 0.0 119080.449 479652.913 0.0 119080.019 479652.883 0.0 119079.654 479658.18 0.0 119080.083 479658.21 0.0 119079.968 479659.879 0.0 119084.857 479660.201 0.0 119094.119 479681.382 0.0 119085.551 479685.148 0.0 119079.75 479671.892 0.0 119041.434 479688.631 0.0 119041.304 479688.334 0.0 119026.227 479653.788 0.0 118987.949 479670.58 0.0 118972.724 479635.727 0.0 118953.345 479644.165 0.0 118946.457 479628.345 0.0 118949.835 479626.321 0.0 118949.552 479625.582 0.0 118956.698 479622.455 0.0 118956.291 479621.541 0.0 118963.441 479618.425 0.0 118963.034 479617.512 0.0 118970.181 479614.387 0.0 118969.794 479613.465 0.0 118977.793 479609.973 0.0 118994.024 479602.887 0.0 119043.626 479581.244 0.0 119052.867 479577.178 0.0 119059.496 479592.377 0.0 119055.525 479594.109 0.0 119055.764 479594.655 0.0 119056.002 479595.2 0.0 119056.366 479595.041 0.0 119058.7 479600.383 0.0 119060.866 479605.34 0.0 119062.107 479608.181 0.0 119067.977 479621.616 0.0 119068.818 479623.541 0.0 119070.639 479627.71 0.0 119071.459 479629.588 0.0 119081.028 479651.491 0.0 + + + + + 1998 + Pand in gebruik + + 1996011700000000 + + N + + 19960117 + 17519 + + + + 0362100001121146 + N + 0 + N + + + + + 118848.781 481427.719 0.0 118844.228 481427.91 0.0 118844.232 481428.02 0.0 118840.453 481428.162 0.0 118840.36 481426.31 0.0 118842.73 481426.18 0.0 118842.68 481425.73 0.0 118845.5 481425.63 0.0 118845.32 481424.36 0.0 118848.585 481424.176 0.0 118848.504 481422.702 0.0 118854.181 481422.388 0.0 118857.616 481422.198 0.0 118857.893 481427.214 0.0 118857.693 481427.225 0.0 118848.781 481427.719 0.0 + + + + + 1911 + Pand in gebruik + + 1909091000000000 + + N + + 19090910 + 123 + + + + 0362100001121155 + N + 0 + N + + + + + 119094.625 479708.276 0.0 119089.164 479710.684 0.0 119088.732 479709.701 0.0 119086.773 479710.562 0.0 119086.206 479709.271 0.0 119079.986 479712.017 0.0 119077.052 479705.382 0.0 119090.669 479699.348 0.0 119094.625 479708.276 0.0 + + + + + 1997 + Pand in gebruik + + 1996011000000000 + + N + + 19960110 + 12646 + + + + 0362100001121158 + N + 0 + N + + + + + 120141.064 480035.581 0.0 120145.852 480035.451 0.0 120145.868 480036.051 0.0 120145.928 480038.25 0.0 120141.14 480038.38 0.0 120141.08 480036.181 0.0 120141.064 480035.581 0.0 + + + + + 1975 + Pand in gebruik + + 1973022700000000 + + N + + 19730227 + 7049 + + + + 0362100001121162 + N + 0 + N + + + + + 117984.133 478580.075 0.0 117984.348 478581.317 0.0 117980.041 478582.053 0.0 117979.359 478578.269 0.0 117979.468 478578.25 0.0 117979.446 478578.132 0.0 117983.565 478577.433 0.0 117983.583 478577.532 0.0 117983.692 478577.513 0.0 117983.904 478578.745 0.0 117984.133 478580.075 0.0 + + + + + 1974 + Pand in gebruik + + 1973101900000000 + + N + + 19731019 + 7015 + + + + 0362100001121166 + N + 0 + N + + + + + 119360.636 478769.785 0.0 119361.339 478769.522 0.0 119359.822 478765.472 0.0 119359.752 478765.285 0.0 119359.225 478763.88 0.0 119359.155 478763.693 0.0 119359.076 478763.482 0.0 119358.373 478763.745 0.0 119353.222 478750.0 0.0 119353.004 478749.419 0.0 119360.843 478746.484 0.0 119362.071 478749.761 0.0 119366.208 478760.8 0.0 119367.526 478760.306 0.0 119370.058 478767.043 0.0 119374.527 478765.363 0.0 119375.496 478767.99 0.0 119377.506 478773.318 0.0 119363.881 478778.446 0.0 119360.636 478769.785 0.0 + + + + + 1978 + Pand in gebruik + + 1977062800000000 + + N + + 19770628 + 8132 + + + + 0362100001121183 + N + 0 + N + + + + + 118645.762 478866.043 0.0 118642.753 478867.085 0.0 118640.156 478867.984 0.0 118639.812 478866.905 0.0 118638.909 478864.069 0.0 118644.466 478862.299 0.0 118645.762 478866.043 0.0 + + + + + 1956 + Pand in gebruik + + 1954041300000000 + + N + + 19540413 + 3610 + + + + 0362100001121195 + N + 0 + N + + + + + 118303.557 478146.314 0.0 118301.101 478146.879 0.0 118299.123 478147.335 0.0 118296.696 478147.892 0.0 118294.731 478139.345 0.0 118290.071 478140.416 0.0 118282.197 478142.226 0.0 118279.884 478132.169 0.0 118311.811 478124.829 0.0 118312.693 478128.659 0.0 118312.421 478128.722 0.0 118312.968 478131.1 0.0 118313.24 478131.037 0.0 118314.125 478134.886 0.0 118301.591 478137.767 0.0 118303.557 478146.314 0.0 + + + + + 1955 + Pand in gebruik + + 1954070200000000 + + N + + 19540702 + 3643 + + + + 0362100001121198 + N + 0 + N + + + + + 120779.102 477925.685 0.0 120776.641 477918.833 0.0 120776.597 477918.711 0.0 120781.451 477916.932 0.0 120781.413 477916.828 0.0 120783.464 477916.067 0.0 120786.062 477923.184 0.0 120779.102 477925.685 0.0 + + + + + 1970 + Pand in gebruik + + 1969082500000000 + + N + + 19690825 + 6395 + + + + 0362100001121205 + N + 0 + N + + + + + 116356.24 475406.914 0.0 116360.525 475416.067 0.0 116365.735 475427.197 0.0 116359.938 475429.911 0.0 116354.724 475418.783 0.0 116351.591 475420.25 0.0 116347.836 475412.281 0.0 116347.28 475411.102 0.0 116344.063 475404.203 0.0 116344.188 475404.119 0.0 116342.969 475401.573 0.0 116342.852 475401.614 0.0 116341.525 475398.781 0.0 116337.722 475390.658 0.0 116340.157 475389.553 0.0 116346.525 475386.575 0.0 116346.462 475386.44 0.0 116346.672 475386.342 0.0 116356.24 475406.914 0.0 + + + + + 1980 + Pand in gebruik + + 1979082300000000 + + N + + 19790823 + 8670 + + + + 0362100001190832 + N + 0 + N + + + + + 115803.282 475928.119 0.0 115798.329 475917.386 0.0 115784.807 475923.597 0.0 115789.704 475933.944 0.0 115747.391 475953.304 0.0 115747.337 475953.186 0.0 115728.968 475913.219 0.0 115728.83 475912.92 0.0 115751.096 475902.517 0.0 115770.85 475893.28 0.0 115784.469 475886.958 0.0 115828.76 475866.4 0.0 115828.838 475866.569 0.0 115897.35 475834.51 0.0 115907.421 475856.357 0.0 115912.413 475867.187 0.0 115916.01 475874.99 0.0 115847.42 475906.74 0.0 115847.42 475906.781 0.0 115847.42 475906.844 0.0 115847.434 475906.874 0.0 115847.7 475907.45 0.0 115803.282 475928.119 0.0 + + + + + 1985 + Pand in gebruik + + 1984011900000000 + + N + + 19840119 + 9704A + + + + 0362100100084298 + N + 0 + N + + + + + 117283.951 475941.101 0.0 117284.742 475944.408 0.0 117280.949 475945.315 0.0 117280.344 475942.787 0.0 117283.951 475941.101 0.0 + + + + + 2011 + Bouwvergunning verleend + + 2011010500000000 + + N + + 20110105 + D-2011/023651 + + + + + + + diff --git a/test/mockups/config.json b/test/mockups/config.json new file mode 100644 index 0000000..6e5a823 --- /dev/null +++ b/test/mockups/config.json @@ -0,0 +1,8 @@ +{ + "baseUrlTest" : "http://data.nlextract.nl/bag/bron/", + "dataFileNameTest" : "BAG_Amstelveen_2011feb01.zip", + "feedURL" : "http://geodata.nationaalgeoregister.nl/inspireadressen/atom/inspireadressen.xml", + "data": { + "generatedDataDir": "./test/extract" + } +}