-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.ts
97 lines (77 loc) · 3.21 KB
/
build.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import fs from 'node:fs'
import type * as recorderTypes from '@cityssm/pdf-metadata-recorder/types'
import { OPTIONS_ALL, textToSearchTerms } from '@cityssm/text-to-search-terms'
const repositoryURLs = [
'https://cityssm.github.io/council-agendas-2023',
'https://cityssm.github.io/council-agendas-2022',
'https://cityssm.github.io/council-agendas-2021',
'https://cityssm.github.io/council-agendas-2020',
'https://cityssm.github.io/council-agendas-2019',
'https://cityssm.github.io/council-agendas-2018',
'https://cityssm.github.io/council-agendas-2017',
'https://cityssm.github.io/council-agendas-2016',
'https://cityssm.github.io/council-agendas-2015',
'https://cityssm.github.io/council-agendas-2014',
'https://cityssm.github.io/council-agendas-2013',
'https://cityssm.github.io/council-agendas-2012',
'https://cityssm.github.io/council-agendas-2011',
'https://cityssm.github.io/council-agendas-2010',
'https://cityssm.github.io/council-agendas-2009',
'https://cityssm.github.io/council-agendas-2008',
'https://cityssm.github.io/council-agendas-2007',
'https://cityssm.github.io/council-agendas-2006',
'https://cityssm.github.io/council-agendas-2005',
'https://cityssm.github.io/council-agendas-2004',
'https://cityssm.github.io/council-agendas-2003',
'https://cityssm.github.io/council-agendas-2002',
'https://cityssm.github.io/council-agendas-2001',
'https://cityssm.github.io/council-agendas-2000',
'https://cityssm.github.io/council-agendas-1999'
]
async function buildAgendaMetadata(): Promise<AgendaMetadata[]> {
const allAgendaMetadata: AgendaMetadata[] = []
for (const repositoryURL of repositoryURLs) {
const metadataURL = repositoryURL + '/metadata.json'
console.log(`Fetching ${metadataURL} ...`)
const metadataResponse = await fetch(metadataURL)
const allPdfMetadata =
(await metadataResponse.json()) as recorderTypes.PdfMetadata[]
console.log(`- Processing ${allPdfMetadata.length} agendas.`)
allPdfMetadata.reverse()
for (const [pdfIndex, pdfMetadata] of allPdfMetadata.entries()) {
console.log(
` - ${pdfIndex + 1} / ${allPdfMetadata.length} - ${
pdfMetadata.fileName
}`
)
// Delete unused fields
delete pdfMetadata.author
delete pdfMetadata.title
// Parse the file name
const fileNameSplit = pdfMetadata.fileName.slice(0, -4).split(/[ _-]+/)
const agendaDate =
fileNameSplit[0] + '-' + fileNameSplit[1] + '-' + fileNameSplit[2]
let agendaTitle = fileNameSplit[3]
for (let index = 4; index < fileNameSplit.length; index += 1) {
agendaTitle += ' ' + fileNameSplit[index]
}
// Clean up content
pdfMetadata.fullContent = textToSearchTerms(
pdfMetadata.fullContent ?? '',
OPTIONS_ALL
).join(' ')
const agendaMetadata: AgendaMetadata = Object.assign(
{
url: repositoryURL + '/' + pdfMetadata.fileName,
agendaDate,
agendaTitle
},
pdfMetadata
)
allAgendaMetadata.push(agendaMetadata)
}
}
return allAgendaMetadata
}
const agendaMetadata = await buildAgendaMetadata()
fs.writeFileSync('metadata.json', JSON.stringify(agendaMetadata, undefined, 2))