From d65256e722d3e5060907ec3fe75d1e0edb058d5d Mon Sep 17 00:00:00 2001 From: Scott Gilroy Date: Sun, 10 Nov 2024 13:12:51 -0500 Subject: [PATCH] fix: Fix search for 30+ results (#696) Fixes #642 --- .gitignore | 2 + lib/search.js | 147 ++++++++++++--------------------------------- test/lib.search.js | 122 ++++++++++++++++++++++++------------- 3 files changed, 123 insertions(+), 148 deletions(-) diff --git a/.gitignore b/.gitignore index b08e409d..e424e32d 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ build/Release # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git node_modules .nyc_output + +.vscode \ No newline at end of file diff --git a/lib/search.js b/lib/search.js index a9e6bc8f..52ef4168 100644 --- a/lib/search.js +++ b/lib/search.js @@ -10,23 +10,11 @@ import scriptData from './utils/scriptData.js'; * process the next pages. */ function initialRequest (opts) { - // sometimes the first result page is a cluster of subsections, - // need to skip to the full results page - function skipClusterPage (html) { - const match = html.match(/href="\/store\/apps\/collection\/search_collection_more_results_cluster?(.*?)"/); - if (match) { - const innerUrl = BASE_URL + match[0].split(/"/)[1]; - return request(Object.assign({ - url: innerUrl - }, opts.requestOptions), opts.throttle); - } - return html; - } - - const url = `${BASE_URL}/store/search?c=apps&q=${opts.term}&hl=${opts.lang}&gl=${opts.country}&price=${opts.price}`; - return request(Object.assign({ url }, opts.requestOptions), opts.throttle) - .then(skipClusterPage) - .then((html) => processFirstPage(html, opts, [], INITIAL_MAPPINGS)); + const url = `${BASE_URL}/work/search?q=${opts.term}&hl=${opts.lang}&gl=${opts.country}&price=${opts.price}`; + return request( + Object.assign({ url }, opts.requestOptions), + opts.throttle + ).then((html) => processFirstPage(html, opts, [], INITIAL_MAPPINGS)); } function extaractDeveloperId (link) { @@ -38,124 +26,69 @@ async function processFirstPage (html, opts, savedApps, mappings) { html = scriptData.parse(html); } - const mainAppMapping = { - title: [16, 2, 0, 0], - appId: [16, 11, 0, 0], + const appsMapping = { + title: [2], + appId: [12, 0], url: { - path: [17, 0, 0, 4, 2], + path: [9, 4, 2], fun: (path) => new url.URL(path, BASE_URL).toString() }, - icon: [16, 2, 95, 0, 3, 2], - developer: [16, 2, 68, 0], + icon: [1, 1, 0, 3, 2], + developer: [4, 0, 0, 0], developerId: { - path: [16, 2, 68, 1, 4, 2], + path: [4, 0, 0, 1, 4, 2], fun: extaractDeveloperId }, - currency: [17, 0, 2, 0, 1, 0, 1], - price: { - path: [17, 0, 2, 0, 1, 0, 0], - fun: (price) => price / 1000000 - }, - free: { - path: [17, 0, 2, 0, 1, 0, 0], - fun: (price) => price === 0 - }, - summary: [16, 2, 73, 0, 1], - scoreText: [16, 2, 51, 0, 0], - score: [16, 2, 51, 0, 1] - }; - - const moreResultsMapping = { - title: [0, 3], - appId: [0, 0, 0], - url: { - path: [0, 10, 4, 2], - fun: (path) => new url.URL(path, BASE_URL).toString() - }, - icon: [0, 1, 3, 2], - developer: [0, 14], - currency: [0, 8, 1, 0, 1], + currency: [7, 0, 3, 2, 1, 0, 1], price: { - path: [0, 8, 1, 0, 0], + path: [7, 0, 3, 2, 1, 0, 0], fun: (price) => price / 1000000 }, free: { - path: [0, 8, 1, 0, 0], + path: [7, 0, 3, 2, 1, 0, 0], fun: (price) => price === 0 }, - summary: [0, 13, 1], - scoreText: [0, 4, 0], - score: [0, 4, 1] + summary: [4, 1, 1, 1, 1], + scoreText: [6, 0, 2, 1, 0], + score: [6, 0, 2, 1, 1] }; - removeUnneededSections(html, mappings); + const sections = R.path(mappings.sections, html) || []; + if (noResultsFound(sections)) return []; - const sections = R.path(mappings.sections, html); - if (noResultsFound(sections, opts)) return []; + const tokenSection = sections.filter((section) => isTokenSection(section))[0]; + const appsSection = R.path(mappings.apps, html); - const moreResultsSection = sections.filter(section => isMoreSection(section))[0]; - const mainAppSection = R.path(mappings.app, html); - - const processedApps = R.map(scriptData.extractor(moreResultsMapping), R.path(SECTIONS_MAPPING.apps, moreResultsSection)); - if (mainAppSection) { - processedApps.unshift(scriptData.extractor(mainAppMapping)(mainAppSection)); - } + // parse each item in appsSection array + const processedApps = R.map(scriptData.extractor(appsMapping), appsSection); const apps = opts.fullDetail ? await processFullDetailApps(processedApps, opts) : processedApps; - const token = R.path(SECTIONS_MAPPING.token, moreResultsSection); + const token = R.path(SECTIONS_MAPPING.token, tokenSection); return checkFinished(opts, [...savedApps, ...apps], token); } -function isMoreSection (section) { - const sectionTitle = R.path(SECTIONS_MAPPING.title, section); - return R.is(String, sectionTitle); +function isTokenSection (section) { + const sectionToken = + R.is(Array, section) && R.path(SECTIONS_MAPPING.token, section); + return R.is(String, sectionToken); } -/** - * Removes unused sections that contain no app informations - * Removed sections: - * * About these results - * * Suggested message - * - * Note: For EU countries the suggested message is shown before the About section - * while for no result it is reverted - */ -function removeUnneededSections (html, mappings) { - removeSectionsIfPathValueOfType(html, SECTIONS_MAPPING.aboutResultsTitle, String); - // if the search function does no longer throw when no result was returned - // mapping SECTIONS_MAPPING.suggestedResultDescritpion can be replaced with SECTIONS_MAPPING.noResult mapping - removeSectionsIfPathValueOfType(html, [...mappings.sections, 0, ...SECTIONS_MAPPING.suggestedResultDescritpion], String); - removeSectionsIfPathValueOfType(html, SECTIONS_MAPPING.aboutResultsTitle, String); -} - -function removeSectionsIfPathValueOfType (html, path, type) { - if (R.is(type, R.path(path, html))) { - R.path(INITIAL_MAPPINGS.sections, html).shift(); +function noResultsFound (sections) { + if (sections.length === 0) { + return true; } } -function noResultsFound (sections, opts) { - return sections.some(section => { - const noResults = R.path(SECTIONS_MAPPING.noResults, section); - return R.is(String, noResults) && noResults.endsWith(`${opts.term}`); - }); -} - const INITIAL_MAPPINGS = { - app: ['ds:4', 0, 1, 0, 23], - sections: ['ds:4', 0, 1] + apps: ['ds:1', 0, 1, 0, 0, 0], + sections: ['ds:1', 0, 1, 0, 0] }; const SECTIONS_MAPPING = { - title: [22, 1, 0], - token: [22, 1, 3, 1], - apps: [22, 0], - noResults: [25, 0, 0, 0, 1], - suggestedResultDescritpion: [25, 0, 0, 1, 1], - aboutResultsTitle: ['ds:4', 0, 1, 0, 31, 0] + token: [1] }; function getPriceGoogleValue (value) { @@ -177,7 +110,7 @@ function search (appData, opts) { } if (opts.num && opts.num > 250) { - throw Error('The number of results can\'t exceed 250'); + throw Error("The number of results can't exceed 250"); } opts = { @@ -192,13 +125,13 @@ function search (appData, opts) { requestOptions: opts.requestOptions }; - initialRequest(opts) - .then(resolve) - .catch(reject); + initialRequest(opts).then(resolve).catch(reject); }).then((results) => { if (opts.fullDetail) { // if full detail is wanted get it from the app module - return Promise.all(results.map((app) => appData({ ...opts, appId: app.appId }))); + return Promise.all( + results.map((app) => appData({ ...opts, appId: app.appId })) + ); } return results; }); diff --git a/test/lib.search.js b/test/lib.search.js index 53095c92..18b44c8b 100644 --- a/test/lib.search.js +++ b/test/lib.search.js @@ -4,58 +4,84 @@ import { assertValidApp, assertIdsInArray } from './common.js'; describe('Search method', () => { it('should fetch a valid application list', () => { - return gplay.search({ term: 'Panda vs Zombies' }) + return gplay + .search({ term: 'Panda vs Zombies' }) .then((apps) => apps.map(assertValidApp)); }); + describe('additional properties', () => { + it('should fetch a valid application list with developer property', () => { + return gplay + .search({ + term: 'com.google.android.gm' + }) + .then((apps) => apps.map((app) => assert.isString(app.developer))); + }); + + it('should fetch a valid application list with developerId property', () => { + return gplay + .search({ + term: 'com.google.android.gm' + }) + .then((apps) => apps.map((app) => assert.isString(app.developerId))); + }); + }); + it('should validate the results number', function () { const count = 5; - return gplay.search({ - term: 'vr', - num: count - }) + return gplay + .search({ + term: 'vr', + num: count + }) .then((apps) => { apps.map(assertValidApp); - assert(apps.length === count, `should return ${count} items but ${apps.length} returned`); + assert( + apps.length === count, + `should return ${count} items but ${apps.length} returned` + ); }); }); // preregister tend to have some fields missing, increasing chances of failure // by searching "preregister" we have more chances of getting some in the results it('should search for pre register', () => - gplay.search({ term: 'preregister', num: 10 }) + gplay + .search({ term: 'preregister', num: 10 }) .then((apps) => apps.map(assertValidApp))); it('should search for pre register with fullDetail', () => - gplay.search({ term: 'preregister', num: 10, fullDetail: true }) + gplay + .search({ term: 'preregister', num: 10, fullDetail: true }) .then((apps) => apps.map(assertValidApp))).timeout(5 * 1000); it('should fetch multiple pages of distinct results', () => - gplay.search({ term: 'p', num: 55 }) - .then((apps) => { - assert.equal(apps.length, 55, 'should return as many apps as requested'); - })); + gplay.search({ term: 'p', num: 55 }).then((apps) => { + assert.equal(apps.length, 55, 'should return as many apps as requested'); + })); it('should fetch multiple pages of when not starting from cluster of subsections', () => - gplay.search({ term: 'p', num: 65 }) - .then((apps) => { - assert.equal(apps.length, 65, 'should return as many apps as requested'); - })); + gplay.search({ term: 'p', num: 65 }).then((apps) => { + assert.equal(apps.length, 65, 'should return as many apps as requested'); + })); describe('country and language specific', () => { describe('without more results section', () => { it('should fetch a valid application list for eu country', () => { - return gplay.search({ term: 'Panda vs Zombies', country: 'GH' }) + return gplay + .search({ term: 'Panda vs Zombies', country: 'GH' }) .then((apps) => apps.map(assertValidApp)); }); it('should fetch a valid application list for non eu country', () => { - return gplay.search({ term: 'Facebook', country: 'GE' }) + return gplay + .search({ term: 'Facebook', country: 'GE' }) .then((apps) => apps.map(assertValidApp)); }); it('should fetch a valid application list for eu country with specific language', () => { - return gplay.search({ term: 'Panda vs Zombies', country: 'BE', lang: 'it' }) + return gplay + .search({ term: 'Panda vs Zombies', country: 'BE', lang: 'it' }) .then((apps) => apps.map(assertValidApp)); }); }); @@ -63,15 +89,15 @@ describe('Search method', () => { describe('more results mapping', () => { it('should return few netflix apps', () => { - return gplay.search({ term: 'netflix' }) - .then((apps) => { - assert.equal(apps[0].appId, 'com.netflix.mediaclient'); - assert.isAbove(apps.length, 0); - }); + return gplay.search({ term: 'netflix' }).then((apps) => { + assert.equal(apps[0].appId, 'com.netflix.mediaclient'); + assert.isAbove(apps.length, 0); + }); }); it('should return few netflix apps from german store with german language', () => { - return gplay.search({ term: 'netflix', lang: 'de', country: 'DE' }) + return gplay + .search({ term: 'netflix', lang: 'de', country: 'DE' }) .then((apps) => { assert.equal(apps[0].appId, 'com.netflix.mediaclient'); // Don't check specific ids, as results may vary @@ -80,48 +106,62 @@ describe('Search method', () => { }); it('should return few google mail apps', () => { - return gplay.search({ term: 'gmail' }) - .then((apps) => { - assert.equal(apps[0].appId, 'com.google.android.gm'); - assert.isTrue(apps.some((app) => app.appId === 'com.google.android.gm.lite')); - }); + return gplay.search({ term: 'gmail' }).then((apps) => { + assert.equal(apps[0].appId, 'com.google.android.gm'); + assert.isTrue( + apps.some((app) => app.appId === 'com.google.android.gm.lite') + ); + }); }); it('should return apps for search with a category as query', () => { - return gplay.search({ term: 'games' }) + return gplay + .search({ term: 'games' }) .then((apps) => assertIdsInArray(apps, 'com.kiloo.subwaysurf')); }); it('should return empty set when no results found', () => { - return gplay.search({ term: 'asdasdyxcnmjysalsaflaslf' }) + return gplay + .search({ term: 'asdasdyxcnmjysalsaflaslf' }) .then(assert.isEmpty); }); it('should return empty set when no results found in eu country store', () => { - return gplay.search({ term: 'ASyyDASDyyASDASD', country: 'DE', lang: 'SP' }) + return gplay + .search({ term: 'ASyyDASDyyASDASD', country: 'DE', lang: 'SP' }) .then(assert.isEmpty); }); it('should return empty set when no results found in us store with other language', () => { - return gplay.search({ term: 'ASyyDASDyyASDASD', country: 'US', lang: 'FR' }) + return gplay + .search({ term: 'ASyyDASDyyASDASD', country: 'US', lang: 'FR' }) .then(assert.isEmpty); }); }); describe('suggested search', () => { it('should return apps from suggested search', () => { - return gplay.search({ term: 'runing app' }) - .then((apps) => { - apps.map(assertValidApp); - assertIdsInArray(apps, 'com.runtastic.android', 'running.tracker.gps.map', 'com.google.android.apps.fitness'); - }); + return gplay.search({ term: 'runing app' }).then((apps) => { + apps.map(assertValidApp); + assertIdsInArray( + apps, + 'com.runtastic.android', + 'running.tracker.gps.map', + 'com.google.android.apps.fitness' + ); + }); }); it('should return apps from suggested search in european country', () => { - return gplay.search({ term: 'runing tracker', country: 'GR' }) + return gplay + .search({ term: 'runing tracker', country: 'GR' }) .then((apps) => { apps.map(assertValidApp); - assertIdsInArray(apps, 'com.runtastic.android', 'running.tracker.gps.map'); + assertIdsInArray( + apps, + 'com.runtastic.android', + 'running.tracker.gps.map' + ); }); }); });