From 9945fe2df594918a1bc5b62640d8486afdb888f0 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 2 Aug 2023 12:27:02 +0300 Subject: [PATCH 1/8] Migrate to MW Action API v2 --- src/MediaWiki.ts | 6 +++--- test/unit/downloader.test.ts | 9 +++++++-- test/unit/util.test.ts | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index c3ddb107c..d831da248 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -183,7 +183,7 @@ class MediaWiki { } // Getting token to login. - const { content, responseHeaders } = await downloader.downloadContent(url + 'action=query&meta=tokens&type=login&format=json') + const { content, responseHeaders } = await downloader.downloadContent(url + 'action=query&meta=tokens&type=login&format=json&formatversion=2') // Logging in await axios(this.apiUrl.href, { @@ -221,10 +221,10 @@ class MediaWiki { const entries = json.query[type] Object.keys(entries).forEach((key) => { const entry = entries[key] - const name = entry['*'] + const name = entry.alias const num = entry.id const allowedSubpages = 'subpages' in entry - const isContent = !!(entry.content !== undefined || util.contains(addNamespaces, num)) + const isContent = !!(entry.content || util.contains(addNamespaces, num)) const canonical = entry.canonical ? entry.canonical : '' const details = { num, allowedSubpages, isContent } /* Namespaces in local language */ diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 01e261ef3..ce9da8840 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -37,6 +37,11 @@ describe('Downloader class', () => { await downloader.setBaseUrls() }) + test('Test Action API version 2 response in comparison with version 1', async () => { + const actionAPIResV1 = await downloader.query('?action=parse&format=json&prop=modules|jsconfigvars|headhtml&page=Potato') + const actionAPIResV2 = await downloader.query('?action=parse&format=json&prop=modules|jsconfigvars|headhtml&formatversion=2&page=Potato') + }) + test('downloader.query returns valid JSON', async () => { const queryRet = await downloader.query() expect(typeof queryRet).toBe('object') @@ -235,7 +240,7 @@ describe('Downloader class', () => { }) test('Url is not image type', async () => { - const isnotImage = isImageUrl('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json') + const isnotImage = isImageUrl('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json&formatversion=2') expect(isnotImage).not.toBeTruthy() }) @@ -339,7 +344,7 @@ describe('Downloader class', () => { async function getRandomImageUrl(): Promise { const resp = await Axios( - 'https://commons.wikimedia.org/w/api.php?action=query&generator=random&grnnamespace=6&prop=imageinfo&iiprop=url&formatversion=2&iiurlwidth=100&format=json', + 'https://commons.wikimedia.org/w/api.php?action=query&generator=random&grnnamespace=6&prop=imageinfo&iiprop=url&formatversion=2&iiurlwidth=100&format=json&formatversion=2', ) const url = resp.data.query.pages[0].imageinfo[0].url return isImageUrl(url) ? url : getRandomImageUrl() diff --git a/test/unit/util.test.ts b/test/unit/util.test.ts index 890a6d24e..75e2772c6 100644 --- a/test/unit/util.test.ts +++ b/test/unit/util.test.ts @@ -313,7 +313,7 @@ describe('Utils', () => { test('No title normalisation', async () => { const resp = await axios.get( - 'https://en.wiktionary.org/w/api.php?action=query&format=json&prop=redirects|revisions|pageimages&rdlimit=max&rdnamespace=0&redirects=true&titles=constructor', + 'https://en.wiktionary.org/w/api.php?action=query&format=json&prop=redirects|revisions|pageimages&rdlimit=max&rdnamespace=0&redirects=true&titles=constructor&formatversion=2', { responseType: 'json' }, ) const normalizedObject = normalizeMwResponse(resp.data.query) From ec26f5d6ae3d1de1839d4f027ad5a32b4191dafd Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 2 Aug 2023 13:23:06 +0300 Subject: [PATCH 2/8] Add formatversion=2 option to API url builder --- src/util/builders/url/api.director.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/util/builders/url/api.director.ts b/src/util/builders/url/api.director.ts index 477ee286e..1c6569b8b 100644 --- a/src/util/builders/url/api.director.ts +++ b/src/util/builders/url/api.director.ts @@ -19,6 +19,7 @@ export default class ApiURLDirector { cmtype: 'subcat', cmlimit: 'max', format: 'json', + formatversion: '2', cmtitle: articleId, cmcontinue: continueStr, }) @@ -28,7 +29,7 @@ export default class ApiURLDirector { buildSiteInfoQueryURL() { return urlBuilder .setDomain(this.baseDomain) - .setQueryParams({ action: 'query', meta: 'siteinfo', format: 'json', siprop: 'general|namespaces|statistics|variables|category|wikidesc' }) + .setQueryParams({ action: 'query', meta: 'siteinfo', format: 'json', formatversion: '2', siprop: 'general|namespaces|statistics|variables|category|wikidesc' }) .build() } @@ -37,11 +38,14 @@ export default class ApiURLDirector { } buildNamespacesURL() { - return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'query', meta: 'siteinfo', siprop: 'namespaces|namespacealiases', format: 'json' }).build() + return urlBuilder + .setDomain(this.baseDomain) + .setQueryParams({ action: 'query', meta: 'siteinfo', siprop: 'namespaces|namespacealiases', format: 'json', formatversion: '2' }) + .build() } buildSiteInfoURL() { - return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'query', meta: 'siteinfo', format: 'json' }).build() + return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'query', meta: 'siteinfo', format: 'json', formatversion: '2' }).build() } buildVisualEditorURL() { From 3f0afdbe9b439b04b2e630f4ba10dcc8b93a6393 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 2 Aug 2023 13:47:10 +0300 Subject: [PATCH 3/8] Add test for MW Action API v2 (partial-impl) --- src/util/builders/url/api.director.ts | 11 +++++++---- test/unit/downloader.test.ts | 5 +++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/util/builders/url/api.director.ts b/src/util/builders/url/api.director.ts index 1c6569b8b..529e36a3a 100644 --- a/src/util/builders/url/api.director.ts +++ b/src/util/builders/url/api.director.ts @@ -38,10 +38,13 @@ export default class ApiURLDirector { } buildNamespacesURL() { - return urlBuilder - .setDomain(this.baseDomain) - .setQueryParams({ action: 'query', meta: 'siteinfo', siprop: 'namespaces|namespacealiases', format: 'json', formatversion: '2' }) - .build() + return ( + urlBuilder + .setDomain(this.baseDomain) + // TODO: set formatversion: '2' here + .setQueryParams({ action: 'query', meta: 'siteinfo', siprop: 'namespaces|namespacealiases', format: 'json' }) + .build() + ) } buildSiteInfoURL() { diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index ce9da8840..3a15266b0 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -38,8 +38,9 @@ describe('Downloader class', () => { }) test('Test Action API version 2 response in comparison with version 1', async () => { - const actionAPIResV1 = await downloader.query('?action=parse&format=json&prop=modules|jsconfigvars|headhtml&page=Potato') - const actionAPIResV2 = await downloader.query('?action=parse&format=json&prop=modules|jsconfigvars|headhtml&formatversion=2&page=Potato') + const actionAPIResV1 = await downloader.getJSON('https://en.wikipedia.org/w/api.php?action=parse&format=json&prop=modules|jsconfigvars|headhtml&page=Potato') + const actionAPIResV2 = await downloader.getJSON('https://en.wikipedia.org/w/api.php?action=parse&format=json&prop=modules|jsconfigvars|headhtml&formatversion=2&page=Potato') + expect(actionAPIResV1).not.toEqual(actionAPIResV2) }) test('downloader.query returns valid JSON', async () => { From e2c015dbccbfb57084084b85b03e986d4c95c2e1 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 2 Aug 2023 14:00:57 +0300 Subject: [PATCH 4/8] Update api.director.test file --- test/unit/builders/url/api.director.test.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/unit/builders/url/api.director.test.ts b/test/unit/builders/url/api.director.test.ts index c8be2653b..1cb50ef73 100644 --- a/test/unit/builders/url/api.director.test.ts +++ b/test/unit/builders/url/api.director.test.ts @@ -7,7 +7,7 @@ describe('ApiURLDirector', () => { it('should return a string URL to get article sub categories', () => { const url = apiUrlDirector.buildSubCategoriesURL('article-123') - expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtype=subcat&cmlimit=max&format=json&cmtitle=article-123&cmcontinue=') + expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtype=subcat&cmlimit=max&format=json&formatversion=2&cmtitle=article-123&cmcontinue=') }) }) @@ -15,7 +15,9 @@ describe('ApiURLDirector', () => { it('should return string URL to get site info', () => { const url = apiUrlDirector.buildSiteInfoQueryURL() - expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json&siprop=general%7Cnamespaces%7Cstatistics%7Cvariables%7Ccategory%7Cwikidesc') + expect(url).toBe( + 'https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json&formatversion=2&siprop=general%7Cnamespaces%7Cstatistics%7Cvariables%7Ccategory%7Cwikidesc', + ) }) }) @@ -47,7 +49,7 @@ describe('ApiURLDirector', () => { it('should return a string URL with predefined query params for retrieving site info', () => { const url = apiUrlDirector.buildSiteInfoURL() - expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json') + expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&format=json&formatversion=2') }) }) From 63fbae46f015db9a7dbea26b82cf37a640585e4f Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 2 Aug 2023 17:26:24 +0300 Subject: [PATCH 5/8] Minor fixes for tests --- src/util/builders/url/api.director.ts | 2 +- test/unit/builders/url/api.director.test.ts | 2 +- test/unit/downloader.test.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/util/builders/url/api.director.ts b/src/util/builders/url/api.director.ts index 529e36a3a..9159c3b8c 100644 --- a/src/util/builders/url/api.director.ts +++ b/src/util/builders/url/api.director.ts @@ -62,6 +62,6 @@ export default class ApiURLDirector { } private buildBaseArticleURL() { - return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'parse', format: 'json', prop: 'modules|jsconfigvars|headhtml' }).build() + return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'parse', format: 'json', prop: 'modules|jsconfigvars|headhtml', formatversion: '2' }).build() } } diff --git a/test/unit/builders/url/api.director.test.ts b/test/unit/builders/url/api.director.test.ts index 1cb50ef73..8af73cc7a 100644 --- a/test/unit/builders/url/api.director.test.ts +++ b/test/unit/builders/url/api.director.test.ts @@ -33,7 +33,7 @@ describe('ApiURLDirector', () => { it('should return a string URL with predefined query params and provided page for retrieving article', () => { const url = apiUrlDirector.buildArticleApiURL('article-123') - expect(url).toBe('https://en.wikipedia.org/w/api.php?action=parse&format=json&prop=modules%7Cjsconfigvars%7Cheadhtml&page=article-123') + expect(url).toBe('https://en.wikipedia.org/w/api.php?action=parse&format=json&prop=modules%7Cjsconfigvars%7Cheadhtml&formatversion=2&page=article-123') }) }) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 3a15266b0..153666e05 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -94,7 +94,7 @@ describe('Downloader class', () => { expect(Paris).toBeDefined() expect(Zürich).toBeDefined() - expect(THISARTICLEDOESNTEXIST.missing).toBe('') + expect(THISARTICLEDOESNTEXIST.missing).toBe(true) }) test("getArticleDetailsNS query returns 'gapContinue' or 'multiple articles', ", async () => { From 61a416fe161308e2479c1826d974d50f68cb57d9 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 7 Sep 2023 16:24:34 +0300 Subject: [PATCH 6/8] Minor fixes after rebasing --- src/Downloader.ts | 2 +- src/MediaWiki.ts | 4 ++-- src/util/builders/url/api.director.ts | 16 ++++++++-------- test/unit/builders/url/api.director.test.ts | 2 +- test/unit/downloader.test.ts | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index 73e5cf6a8..6369cf692 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -669,7 +669,7 @@ class Downloader { // Saving, as a js module, the jsconfigvars that are set in the header of a wikipedia page // the script below extracts the config with a regex executed on the page header returned from the api - const scriptTags = domino.createDocument(`${headhtml['*']}`).getElementsByTagName('script') + const scriptTags = domino.createDocument(`${headhtml}`).getElementsByTagName('script') const regex = /mw\.config\.set\(\{.*?\}\);/gm // eslint-disable-next-line @typescript-eslint/prefer-for-of for (let i = 0; i < scriptTags.length; i += 1) { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index d831da248..768e25c47 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -151,7 +151,7 @@ class MediaWiki { } const resp = await downloader.getJSON(this.apiUrlDirector.buildQueryURL(reqOpts)) - const isCoordinateWarning = resp.warnings && resp.warnings.query && (resp.warnings.query['*'] || '').includes('coordinates') + const isCoordinateWarning = JSON.stringify(resp?.warnings?.query ?? '').includes('coordinates') if (isCoordinateWarning) { logger.info('Coordinates not available on this wiki') return (this.#hasCoordinates = false) @@ -224,7 +224,7 @@ class MediaWiki { const name = entry.alias const num = entry.id const allowedSubpages = 'subpages' in entry - const isContent = !!(entry.content || util.contains(addNamespaces, num)) + const isContent = !!(entry.content !== undefined || util.contains(addNamespaces, num)) const canonical = entry.canonical ? entry.canonical : '' const details = { num, allowedSubpages, isContent } /* Namespaces in local language */ diff --git a/src/util/builders/url/api.director.ts b/src/util/builders/url/api.director.ts index 9159c3b8c..6651d7c6d 100644 --- a/src/util/builders/url/api.director.ts +++ b/src/util/builders/url/api.director.ts @@ -38,13 +38,10 @@ export default class ApiURLDirector { } buildNamespacesURL() { - return ( - urlBuilder - .setDomain(this.baseDomain) - // TODO: set formatversion: '2' here - .setQueryParams({ action: 'query', meta: 'siteinfo', siprop: 'namespaces|namespacealiases', format: 'json' }) - .build() - ) + return urlBuilder + .setDomain(this.baseDomain) + .setQueryParams({ action: 'query', meta: 'siteinfo', siprop: 'namespaces|namespacealiases', format: 'json', formatversion: '2' }) + .build() } buildSiteInfoURL() { @@ -52,7 +49,10 @@ export default class ApiURLDirector { } buildVisualEditorURL() { - return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'visualeditor', mobileformat: 'html', format: 'json', paction: 'parse', page: '' }).build(true) + return urlBuilder + .setDomain(this.baseDomain) + .setQueryParams({ action: 'visualeditor', mobileformat: 'html', format: 'json', paction: 'parse', formatversion: '2', page: '' }) + .build(true) } buildArticleApiURL(articleId: string) { diff --git a/test/unit/builders/url/api.director.test.ts b/test/unit/builders/url/api.director.test.ts index 8af73cc7a..ebac6726b 100644 --- a/test/unit/builders/url/api.director.test.ts +++ b/test/unit/builders/url/api.director.test.ts @@ -57,7 +57,7 @@ describe('ApiURLDirector', () => { it('should return base visual editor URL object with default query params', () => { const url = apiUrlDirector.buildVisualEditorURL() - expect(url.href).toBe('https://en.wikipedia.org/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&page=') + expect(url.href).toBe('https://en.wikipedia.org/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&formatversion=2&page=') }) }) }) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 153666e05..3a15266b0 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -94,7 +94,7 @@ describe('Downloader class', () => { expect(Paris).toBeDefined() expect(Zürich).toBeDefined() - expect(THISARTICLEDOESNTEXIST.missing).toBe(true) + expect(THISARTICLEDOESNTEXIST.missing).toBe('') }) test("getArticleDetailsNS query returns 'gapContinue' or 'multiple articles', ", async () => { From aafeab197c1165b6f4342a0ca7c207a7dad33ae8 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 7 Sep 2023 17:57:18 +0300 Subject: [PATCH 7/8] Refactor Mediawiki.getNamespaces() (partial impl) --- src/Downloader.ts | 1 + src/MediaWiki.ts | 10 +++++++++- src/types.d.ts | 2 +- test/unit/builders/url/api.director.test.ts | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index 6369cf692..bf40280d6 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -415,6 +415,7 @@ class Downloader { ...MediaWiki.queryOpts, prop: MediaWiki.queryOpts.prop.concat(prop), rdnamespace: validNamespaceIds.join('|'), + formatversion: '2', redirects: redirects ? true : undefined, } } diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 768e25c47..d392b0979 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -20,6 +20,7 @@ export interface QueryOpts { rdlimit: string rdnamespace: string | number redirects?: boolean + formatversion: string } class MediaWiki { @@ -115,6 +116,7 @@ class MediaWiki { rdlimit: 'max', rdnamespace: 0, redirects: false, + formatversion: '2', } this.#hasWikimediaDesktopRestApi = null @@ -213,15 +215,21 @@ class MediaWiki { } } + /* + TODO: fix this to handle formatversion=2 + Entries for namespaces and namespacealiases are different now. Make sure to distinguish 'alias' and 'name' properties for each of them + */ public async getNamespaces(addNamespaces: number[], downloader: Downloader) { const url = this.apiUrlDirector.buildNamespacesURL() const json: any = await downloader.getJSON(url) + console.log('json ', json) ;['namespaces', 'namespacealiases'].forEach((type) => { const entries = json.query[type] + console.log('entries ', entries) Object.keys(entries).forEach((key) => { const entry = entries[key] - const name = entry.alias + const name = entry.name const num = entry.id const allowedSubpages = 'subpages' in entry const isContent = !!(entry.content !== undefined || util.contains(addNamespaces, num)) diff --git a/src/types.d.ts b/src/types.d.ts index 50ffb8156..9e25f7526 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -124,7 +124,7 @@ interface MwApiQueryResponse { } interface MwApiResponse { - batchcomplete: string + batchcomplete: boolean query: MwApiQueryResponse continue?: { [key: string]: string diff --git a/test/unit/builders/url/api.director.test.ts b/test/unit/builders/url/api.director.test.ts index ebac6726b..993b9dfa9 100644 --- a/test/unit/builders/url/api.director.test.ts +++ b/test/unit/builders/url/api.director.test.ts @@ -41,7 +41,7 @@ describe('ApiURLDirector', () => { it('should return a string URL with predefined query params to get article namespaces', () => { const url = apiUrlDirector.buildNamespacesURL() - expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces%7Cnamespacealiases&format=json') + expect(url).toBe('https://en.wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces%7Cnamespacealiases&format=json&formatversion=2') }) }) From 2b736c55a54b12716495079a0059ed2318a6c8fc Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 8 Sep 2023 10:23:54 +0300 Subject: [PATCH 8/8] Update Mediawiki.getNamespaces(), minor test fixes --- src/MediaWiki.ts | 10 ++-------- test/unit/downloader.test.ts | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index d392b0979..9f4951bb4 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -215,24 +215,18 @@ class MediaWiki { } } - /* - TODO: fix this to handle formatversion=2 - Entries for namespaces and namespacealiases are different now. Make sure to distinguish 'alias' and 'name' properties for each of them - */ public async getNamespaces(addNamespaces: number[], downloader: Downloader) { const url = this.apiUrlDirector.buildNamespacesURL() const json: any = await downloader.getJSON(url) - console.log('json ', json) ;['namespaces', 'namespacealiases'].forEach((type) => { const entries = json.query[type] - console.log('entries ', entries) Object.keys(entries).forEach((key) => { const entry = entries[key] - const name = entry.name + const name = type === 'namespaces' ? entry.name : entry.alias const num = entry.id const allowedSubpages = 'subpages' in entry - const isContent = !!(entry.content !== undefined || util.contains(addNamespaces, num)) + const isContent = type === 'namespaces' ? !!(entry.content || util.contains(addNamespaces, num)) : !!(entry.content !== undefined || util.contains(addNamespaces, num)) const canonical = entry.canonical ? entry.canonical : '' const details = { num, allowedSubpages, isContent } /* Namespaces in local language */ diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 3a15266b0..153666e05 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -94,7 +94,7 @@ describe('Downloader class', () => { expect(Paris).toBeDefined() expect(Zürich).toBeDefined() - expect(THISARTICLEDOESNTEXIST.missing).toBe('') + expect(THISARTICLEDOESNTEXIST.missing).toBe(true) }) test("getArticleDetailsNS query returns 'gapContinue' or 'multiple articles', ", async () => {