diff --git a/src/utils/scraper/SG/STB.ts b/src/utils/scraper/SG/STB.ts index 5493e12..575cd63 100644 --- a/src/utils/scraper/SG/STB.ts +++ b/src/utils/scraper/SG/STB.ts @@ -6,10 +6,9 @@ import Request from 'utils/Request' import Helpers from 'utils/Helpers' const BASE_URL = `https://www.stratatb.gov.sg` -const CURRENT_DECISIONS = `${BASE_URL}/resources-judgments.html` -const HISTORICAL_DECISIONS = `${BASE_URL}/resources-judgments-archives.html` +const CURRENT_DECISIONS = `${BASE_URL}/news-and-judgments/judgments/` -export const SGSTBlongFormatRegex = /stb(\sno\.?)?\s?(?\d{1,4}[a-z]?)((\s(and|&)\s\d{1,4}[a-z]?)|(\/\d{1,4}[a-z]?)+)?\sof\s(?[12]\d{3})/gi +export const SGSTBlongFormatRegex = /stb(\sno\.?)?\s?(?\d{1,4}[a-z]?)(,\s\d{1,4}){0,4}((\s(and|&)\s\d{1,4}[a-z]?)|(\/\d{1,4}[a-z]?)+)?\sof\s(?[12]\d{3})/gi const SGSTBsquareBracketRegex = /\[(?[12]\d{3})] sgstb (?\d{1,4}[a-z]?)/gi export const SGSTBIsSquareBracketFormat = (citation: string): boolean => (new RegExp(SGSTBsquareBracketRegex, `i`)).test(citation) export const SGSTBIsLongFormat = (citation: string): boolean => (new RegExp(SGSTBlongFormatRegex, `i`)).test(citation) @@ -17,61 +16,52 @@ export const SGSTBLongFormat = (squareBracketCitation: string) => { const [match] = [...squareBracketCitation.matchAll( SGSTBsquareBracketRegex, )] - return `STB ${match.groups.stbnumber} of ${match.groups.stbyear}` + return `STB ${match.groups.number} of ${match.groups.year}` } export const SGSTBSquareBracketFormat = (longCitation: string): string => { const [match] = [...longCitation.matchAll( SGSTBlongFormatRegex, )] - return `[${match.groups.year}] SGSTB ${match.groups.number}` + return `[${match.groups.stbyear}] SGSTB ${match.groups.stbnumber}` } const parseCasesPage = (html: string): Law.Case[] => { const $ = cheerio.load(html) - return $(`.main-cnt > .list.yearItm`).map((yearIndex, year) => { - return $(`.itm`, year).map((rowIndex, row) => { - const hyperlink = $(`.itm-desc a`, row) - const path = hyperlink.attr(`href`) - const hyperlinkText = hyperlink.attr(`title`).replaceAll(` `, ` `) - const matches = [...hyperlinkText.matchAll(/(stb.*\d{4})\s*[–-]\s*([\da-z].*)/gi)] - try { - const [[_, citation, name]] = matches - const judgmentLink: Law.Link = { - doctype: `Judgment`, - filetype: `PDF`, - url: `${BASE_URL}/${path}`, - } - return { - citation: citation.trim(), - database: Constants.DATABASES.SG_stb, - jurisdiction: Constants.JURISDICTIONS.SG.id, - links: [ - judgmentLink, - ], - name: name.trim(), - } - } catch (error) { - Logger.error( - error, - ) + return $(`.bp-container > .row > div.col.resource-card-element`).map((_, item) => { + const hyperlink = $(`a`, item) + const path = hyperlink.attr(`href`) + const title = $(`h5 > b`, item).text().trim() + const [match] = [...title.matchAll(SGSTBlongFormatRegex)] + const citation = match[0] + const name = title.replace(`${citation} – `, ``).trim() + + try { + const judgmentLink: Law.Link = { + doctype: `Judgment`, + filetype: `PDF`, + url: `${BASE_URL}/${path}`, + } + return { + citation: SGSTBSquareBracketFormat(citation), + database: Constants.DATABASES.SG_stb, + jurisdiction: Constants.JURISDICTIONS.SG.id, + links: [ + judgmentLink, + ], + name, } - return null - }).get().filter(c => c !== null) - }).get() + } catch (error) { + Logger.error( + error, + ) + } + return null + }).get().filter(c => c !== null) } const getAllCases = async (): Promise => { - const getCurrentCases = Request.get(CURRENT_DECISIONS) - const getHistoricalCases = Request.get(HISTORICAL_DECISIONS) - const results = (await Promise.allSettled([ - getCurrentCases, - getHistoricalCases, - ])).filter(({ status }) => status === `fulfilled`) - - return results.flatMap(({ value }: any) => { - const { data } = value - return parseCasesPage(data) - }) + const { data } = await Request.get(CURRENT_DECISIONS) + return parseCasesPage(data) } const makeSTBCitation = (citation: string): string => {