Skip to content

Commit

Permalink
fix: STB search
Browse files Browse the repository at this point in the history
  • Loading branch information
hueyy committed Mar 9, 2024
1 parent bab4d13 commit 0bbd88f
Showing 1 changed file with 35 additions and 45 deletions.
80 changes: 35 additions & 45 deletions src/utils/scraper/SG/STB.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,72 +6,62 @@ import Request from 'utils/Request'
import Helpers from 'utils/Helpers'

const BASE_URL = `https://www.stratatb.gov.sg`
const CURRENT_DECISIONS = `${BASE_URL}/resources-judgments.html`
const HISTORICAL_DECISIONS = `${BASE_URL}/resources-judgments-archives.html`
const CURRENT_DECISIONS = `${BASE_URL}/news-and-judgments/judgments/`

export const SGSTBlongFormatRegex = /stb(\sno\.?)?\s?(?<stbnumber>\d{1,4}[a-z]?)((\s(and|&)\s\d{1,4}[a-z]?)|(\/\d{1,4}[a-z]?)+)?\sof\s(?<stbyear>[12]\d{3})/gi
export const SGSTBlongFormatRegex = /stb(\sno\.?)?\s?(?<stbnumber>\d{1,4}[a-z]?)(,\s\d{1,4}){0,4}((\s(and|&)\s\d{1,4}[a-z]?)|(\/\d{1,4}[a-z]?)+)?\sof\s(?<stbyear>[12]\d{3})/gi
const SGSTBsquareBracketRegex = /\[(?<year>[12]\d{3})] sgstb (?<number>\d{1,4}[a-z]?)/gi
export const SGSTBIsSquareBracketFormat = (citation: string): boolean => (new RegExp(SGSTBsquareBracketRegex, `i`)).test(citation)
export const SGSTBIsLongFormat = (citation: string): boolean => (new RegExp(SGSTBlongFormatRegex, `i`)).test(citation)
export const SGSTBLongFormat = (squareBracketCitation: string) => {
const [match] = [...squareBracketCitation.matchAll(
SGSTBsquareBracketRegex,
)]
return `STB ${match.groups.stbnumber} of ${match.groups.stbyear}`
return `STB ${match.groups.number} of ${match.groups.year}`
}
export const SGSTBSquareBracketFormat = (longCitation: string): string => {
const [match] = [...longCitation.matchAll(
SGSTBlongFormatRegex,
)]
return `[${match.groups.year}] SGSTB ${match.groups.number}`
return `[${match.groups.stbyear}] SGSTB ${match.groups.stbnumber}`
}

const parseCasesPage = (html: string): Law.Case[] => {
const $ = cheerio.load(html)
return $(`.main-cnt > .list.yearItm`).map((yearIndex, year) => {
return $(`.itm`, year).map((rowIndex, row) => {
const hyperlink = $(`.itm-desc a`, row)
const path = hyperlink.attr(`href`)
const hyperlinkText = hyperlink.attr(`title`).replaceAll(`&nbsp;`, ` `)
const matches = [...hyperlinkText.matchAll(/(stb.*\d{4})\s*[–-]\s*([\da-z].*)/gi)]
try {
const [[_, citation, name]] = matches
const judgmentLink: Law.Link = {
doctype: `Judgment`,
filetype: `PDF`,
url: `${BASE_URL}/${path}`,
}
return {
citation: citation.trim(),
database: Constants.DATABASES.SG_stb,
jurisdiction: Constants.JURISDICTIONS.SG.id,
links: [
judgmentLink,
],
name: name.trim(),
}
} catch (error) {
Logger.error(
error,
)
return $(`.bp-container > .row > div.col.resource-card-element`).map((_, item) => {
const hyperlink = $(`a`, item)
const path = hyperlink.attr(`href`)
const title = $(`h5 > b`, item).text().trim()
const [match] = [...title.matchAll(SGSTBlongFormatRegex)]
const citation = match[0]
const name = title.replace(`${citation} – `, ``).trim()

try {
const judgmentLink: Law.Link = {
doctype: `Judgment`,
filetype: `PDF`,
url: `${BASE_URL}/${path}`,
}
return {
citation: SGSTBSquareBracketFormat(citation),
database: Constants.DATABASES.SG_stb,
jurisdiction: Constants.JURISDICTIONS.SG.id,
links: [
judgmentLink,
],
name,
}
return null
}).get().filter(c => c !== null)
}).get()
} catch (error) {
Logger.error(
error,
)
}
return null
}).get().filter(c => c !== null)
}

const getAllCases = async (): Promise<Law.Case[]> => {
const getCurrentCases = Request.get(CURRENT_DECISIONS)
const getHistoricalCases = Request.get(HISTORICAL_DECISIONS)
const results = (await Promise.allSettled([
getCurrentCases,
getHistoricalCases,
])).filter(({ status }) => status === `fulfilled`)

return results.flatMap(({ value }: any) => {
const { data } = value
return parseCasesPage(data)
})
const { data } = await Request.get(CURRENT_DECISIONS)
return parseCasesPage(data)
}

const makeSTBCitation = (citation: string): string => {
Expand Down

0 comments on commit 0bbd88f

Please sign in to comment.