diff --git a/README.md b/README.md index 3080e3a..d7919cd 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Coverage][cov-image]][cov-url] [![Tests][test-image]][test-url] -[![Badge Fury][fury-image]] +![Badge Fury][fury-image] This `node` command-line utility uses a headless browser (Puppeteer) to render a webpage and download all resources it may need. These resources including the @@ -49,6 +49,7 @@ Now let's see what the total download size was: Usage: wgrep [options] Options: + -V, --version output the version number -d, --directory The output directory (default: "output") -u, --username The user to authenticate as -h, --help output usage information diff --git a/index.js b/index.js index 82de76d..c4a1efb 100755 --- a/index.js +++ b/index.js @@ -19,11 +19,9 @@ const { version } = require('./package.json'); const options = program.opts() co(async function () { + wgrep.ensureOutput(options.directory) console.log(`Calling for "${regex}" in "${options.directory}" from "${url}" with user "${options.username}"`) - const errors = await wgrep.download( url, options.directory ); - if ( errors.flag ) { - console.log('Errors', errors) - } + await wgrep.download( url, options.directory ); wgrep.show(wgrep.find( options.directory, regex )) }); diff --git a/wgrep.js b/wgrep.js index e76877c..e1ea482 100644 --- a/wgrep.js +++ b/wgrep.js @@ -15,39 +15,58 @@ const download = async function ( url, directory ) { // console.log('* directory', directory) const browser = await puppeteer.launch(); const page = await browser.newPage(); - let errors = {flag: false, net: 0, buf: 0, main: 0, page: 0 }; + const errors = { net: 0, buf: 0, main: 0, page: 0, fs: 0 }; - page.on('error', e=> { errors.flag = true; errors.main++ }) - page.on('pageerror', e=> { errors.flag = true; errors.page++ }) + let payload = null; + let logStream = fs.createWriteStream(path.join( __dirname, directory, '.wgrep.log')); + // logStream.on('finish', () => { + // console.log('wrote all data to log file'); + // }); + page.on('error', e=> { errors.main++ }) + page.on('pageerror', e=> { errors.page++ }) page.on('response', async (response) => { const _url = new URL(response.url()); + let filePath = path.join( __dirname, directory, _url.pathname ); - // console.log('filePath', filePath) + // console.log('* filePath', filePath) if (path.extname(_url.pathname).trim() === '') { - filePath = `${filePath}/index.html`; + filePath = path.join(filePath, 'index.html'); } + try { - await fse.outputFile(filePath, await response.buffer()); + payload = await response.buffer(); } catch (e) { - console.warn(e.toString(), _url.href, filePath) - errors.flag = true; + logStream.write(`${e} (${_url.href})\n`) errors.buf++ } + try { + if (!payload) { + logStream.write('No payload for ' + _url + '\n') + } + else { + await fse.outputFile(filePath, payload); + } + } + catch (e) { + logStream.write(`${e} (${_url.href}) ${filePath}\n`) + errors.fs++ + } }); try { + // console.log('* goto', url) await page.goto( url, {waitUntil: 'networkidle2'}); await page.screenshot({path: 'screencap.png', fullPage: true}); } catch (e) { - console.log(e.toString()) - errors.flag = true; + logStream.write(e.toString()); errors.net++ } finally { - await browser.close(); - return errors + await browser.close() + logStream.write(`\n${JSON.stringify(errors, null, 2)}\n`) + logStream.end() } }; @@ -64,7 +83,9 @@ const find = ( directory, regex ) => { test = shell.test; if ( test('-d', directory) ) { console.log(`Looking in "${directory}" for '${regex}'`) - const files = find( directory ).filter( file=> test('-f', file) ); + const files = find( directory ) + .filter( file => test('-f', file) ) + .filter( file => !file.includes('.wgrep.log') ); return grep('-l', regex, files ).trim().split('\n').filter(_=>_) } else { @@ -83,6 +104,19 @@ const show = files => { } } +/** + * Create the output directory if it doesn't exist + */ +const ensureOutput = directory => { + const d = path.join( __dirname, directory ); + + if (!fs.existsSync(d)){ + console.log('Creating output directory', d) + fs.mkdirSync(d); + } +} + +exports.ensureOutput = ensureOutput; exports.download = download; exports.find = find; exports.show = show;