diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..6fd9a8f --- /dev/null +++ b/.npmignore @@ -0,0 +1,4 @@ +tsconfig.json +src +files +.vscode \ No newline at end of file diff --git a/package.json b/package.json index ba22484..6d7671b 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { - "name": "bt-scapacra", - "version": "1.0.0", - "description": "Bundestag scapacra", + "name": "@democracy-deutschland/scapacra-bt", + "version": "1.0.4", + "description": "Scapacra Bundestag", "main": "./dist/index.js", "typings": "./dist/index.d.ts", "author": "DEMOCRACY Deutschland e.V.", @@ -14,10 +14,12 @@ }, "scripts": { "build": "tsc", - "dev": "nodemon .\\src\\index.ts", + "link": "cd dist && npm link", + "dev": "nodemon .\\src\\run.ts", "lint": "tslint --project tsconfig.json && yarn typecheck", "typecheck": "tsc --noEmit", - "test": "mocha -r ts-node/register test/**/*.test.ts" + "test": "mocha -r ts-node/register test/**/*.test.ts", + "prepare": "npm run build" }, "bugs": { "url": "https://github.com/demokratie-live/bt-scapacra/issues" @@ -35,16 +37,16 @@ "xsd-schema-validator": "^0.6.0" }, "dependencies": { + "@democracy-deutschland/scapacra": "1.0.2", "@types/axios": "^0.14.0", "@types/xml2js": "^0.4.3", "@types/xmldom": "^0.1.29", "axios": "^0.18.0", "jsonschema": "^1.2.4", - "scapacra": "https://github.com/demokratie-live/scapacra.git#master", "typescript": "^3.1.6", "url": "^0.11.0", "xml2js": "^0.4.19", "xmldom": "^0.1.27", "xpath": "^0.0.27" } -} +} \ No newline at end of file diff --git a/src/browser/BundestagListBrowser.ts b/src/browser/BundestagListBrowser.ts index c33ad4e..6c36c05 100644 --- a/src/browser/BundestagListBrowser.ts +++ b/src/browser/BundestagListBrowser.ts @@ -1,4 +1,4 @@ -import { IDataPackage, DataType, IBrowser } from 'scapacra'; +import { IDataPackage, DataType, IBrowser } from '@democracy-deutschland/scapacra'; import { URL } from 'url'; diff --git a/src/browser/DeputyProfileBrowser.ts b/src/browser/DeputyProfileBrowser.ts index 3e1e32b..1a49395 100644 --- a/src/browser/DeputyProfileBrowser.ts +++ b/src/browser/DeputyProfileBrowser.ts @@ -1,4 +1,4 @@ -import { IDataPackage, DataType, IBrowser } from 'scapacra'; +import { IDataPackage, DataType, IBrowser } from '@democracy-deutschland/scapacra'; import { URL } from 'url'; @@ -57,7 +57,7 @@ namespace Deputy_Browser { if (blobUrl == undefined) { throw new Error("URL stack is empty."); } - console.log(blobUrl.toString()); + // console.log(blobUrl.toString()); let response = await axios.default.get( blobUrl.toString(), diff --git a/src/browser/PlenarProtocolBrowser.ts b/src/browser/PlenarProtocolBrowser.ts index 4f3cd9c..82784f1 100644 --- a/src/browser/PlenarProtocolBrowser.ts +++ b/src/browser/PlenarProtocolBrowser.ts @@ -1,4 +1,4 @@ -import { DataType, IBrowser } from 'scapacra'; +import { DataType, IBrowser } from '@democracy-deutschland/scapacra'; import { BundestagListBrowser } from './BundestagListBrowser'; export = Documents_Browser; diff --git a/src/browser/ProposedDecisionBrowser.ts b/src/browser/ProposedDecisionBrowser.ts index 8a48ce3..b10fb09 100644 --- a/src/browser/ProposedDecisionBrowser.ts +++ b/src/browser/ProposedDecisionBrowser.ts @@ -1,4 +1,4 @@ -import { DataType, IBrowser } from 'scapacra'; +import { DataType, IBrowser } from '@democracy-deutschland/scapacra'; import { BundestagListBrowser } from './BundestagListBrowser'; export = Documents_Browser; diff --git a/src/config/DeputyProfileScraperConfiguration.ts b/src/config/DeputyProfileScraperConfiguration.ts index c4a2027..2b89f3b 100644 --- a/src/config/DeputyProfileScraperConfiguration.ts +++ b/src/config/DeputyProfileScraperConfiguration.ts @@ -1,4 +1,4 @@ -import { IParser, IBrowser, IScraperConfiguration } from 'scapacra'; +import { IParser, IBrowser, IScraperConfiguration } from '@democracy-deutschland/scapacra'; import { DeputyProfile, DeputyProfileBrowser } from '../browser/DeputyProfileBrowser'; import { DeputyProfileParser } from '../parser/DeputyProfileParser'; export = Deputy_Config; diff --git a/src/config/ProposedDecisionScraperConfiguration.ts b/src/config/ProposedDecisionScraperConfiguration.ts index 949d650..afb4fdc 100644 --- a/src/config/ProposedDecisionScraperConfiguration.ts +++ b/src/config/ProposedDecisionScraperConfiguration.ts @@ -1,4 +1,4 @@ -import { IBrowser, IParser, IScraperConfiguration } from 'scapacra'; +import { IBrowser, IParser, IScraperConfiguration } from '@democracy-deutschland/scapacra'; import { URL } from 'url'; import { Pdf, ProposedDecisionBrowser } from '../browser/ProposedDecisionBrowser'; diff --git a/src/config/ProtocolScraperConfiguration.ts b/src/config/ProtocolScraperConfiguration.ts index 476ebb7..d9de683 100644 --- a/src/config/ProtocolScraperConfiguration.ts +++ b/src/config/ProtocolScraperConfiguration.ts @@ -1,4 +1,4 @@ -import { IBrowser, IParser, IScraperConfiguration } from 'scapacra'; +import { IBrowser, IParser, IScraperConfiguration } from '@democracy-deutschland/scapacra'; import { Xml, PlenarProtocolBrowser } from '../browser/PlenarProtocolBrowser'; export = Documents_Config; diff --git a/src/config/ProtocolSpeechScraperConfiguration.ts b/src/config/ProtocolSpeechScraperConfiguration.ts index 74da00b..f795471 100644 --- a/src/config/ProtocolSpeechScraperConfiguration.ts +++ b/src/config/ProtocolSpeechScraperConfiguration.ts @@ -1,5 +1,5 @@ import { ProtocolScraperConfiguration } from '../config/ProtocolScraperConfiguration'; -import { IParser } from 'scapacra'; +import { IParser } from '@democracy-deutschland/scapacra'; import { ProtocolSpeechesParser } from '../parser/ProtocolSpeechesParser'; import { Xml } from '../browser/PlenarProtocolBrowser'; diff --git a/src/config/ProtocolVotingScraperConfiguration.ts b/src/config/ProtocolVotingScraperConfiguration.ts index fe89f0b..2aeff05 100644 --- a/src/config/ProtocolVotingScraperConfiguration.ts +++ b/src/config/ProtocolVotingScraperConfiguration.ts @@ -1,6 +1,6 @@ import { Xml } from '../browser/PlenarProtocolBrowser'; import { ProtocolScraperConfiguration } from '../config/ProtocolScraperConfiguration'; -import { IParser } from 'scapacra'; +import { IParser } from '@democracy-deutschland/scapacra'; import { ProtocolVotingParser } from '../parser/ProtocolVotingParser'; export = Documents_Config; diff --git a/src/index.ts b/src/index.ts index 4687406..3d3a4a3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,35 +1,13 @@ -import * as fs from 'fs'; -import * as util from 'util'; - -import { Scraper } from 'scapacra'; - import { IProtocolScraperConfigurationOptions } from './config/ProtocolScraperConfiguration'; import { ProtocolSpeechScraperConfiguration } from './config/ProtocolSpeechScraperConfiguration'; import { ProtocolVotingScraperConfiguration } from './config/ProtocolVotingScraperConfiguration'; import { ProposedDecisionScraperConfiguration } from './config/ProposedDecisionScraperConfiguration'; import { DeputyProfileScraperConfiguration } from './config/DeputyProfileScraperConfiguration'; -async function scrape() { - let options: IProtocolScraperConfigurationOptions = { - maxCount: 2 - }; - - await Scraper.scrape([ - // new ProtocolSpeechScraperConfiguration(options), - new ProtocolVotingScraperConfiguration(options), - // new ProposedDecisionScraperConfiguration() - // new DeputyProfileScraperConfiguration() - ], ((dataPackages) => { - console.log(util.inspect(dataPackages, false, null, true)) - for (const dataPackage of dataPackages) { - let id = dataPackage.data.id; - if (id == null) { - id = dataPackage.data["top-id"]; - } - - fs.writeFileSync('out/scraperResult/deputies/' + id + '.json', JSON.stringify(dataPackage.data)); - } - })); +export { + IProtocolScraperConfigurationOptions, + ProtocolSpeechScraperConfiguration, + ProtocolVotingScraperConfiguration, + ProposedDecisionScraperConfiguration, + DeputyProfileScraperConfiguration } - -scrape().then(c => { }); \ No newline at end of file diff --git a/src/parser/DeputyProfileParser.ts b/src/parser/DeputyProfileParser.ts index cca56dd..652a5b8 100644 --- a/src/parser/DeputyProfileParser.ts +++ b/src/parser/DeputyProfileParser.ts @@ -1,7 +1,6 @@ -import { IDataPackage, IParser } from 'scapacra'; +import { IDataPackage, IParser } from '@democracy-deutschland/scapacra'; import { DeputyProfile } from '../browser/DeputyProfileBrowser'; -import { DeputyProfileEvaluator } from './evaluator/DeputyProfileEvaluator'; export = Deputy_Parser; @@ -28,19 +27,19 @@ namespace Deputy_Parser { let m; - //Img & Name - let img: string = ''; + //ImgURL & Name + let imgURL: string = ''; let name: string = ''; - const regex_img_name = /