Skip to content

Commit

Permalink
Merge pull request #2915 from owid/svg-tester-perf
Browse files Browse the repository at this point in the history
enhance(svg-tester): capture performance data
  • Loading branch information
larsyencken authored Nov 21, 2023
2 parents 8ddcbaa + c399fb3 commit ed4f195
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 19 deletions.
44 changes: 36 additions & 8 deletions devTools/svgTester/export-graphs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ async function main(parsedArgs: parseArgs.ParsedArgs) {
let outDir = parsedArgs["o"] ?? "../owid-grapher-svgs/svg"
const targetConfigs: string[] = parseArgAsList(parsedArgs["c"])
const targetChartTypes: string[] = parseArgAsList(parsedArgs["t"])
const isolate = parsedArgs["isolate"] ?? false

if (isolate) {
console.info(
"Running in 'isolate' mode. This will be slower, but heap usage readouts will be accurate."
)
} else {
console.info(
"Not running in 'isolate'. Reported heap usage readouts will be inaccurate. Run in --isolate mode (way slower!) for accurate heap usage readouts."
)
}

// create a directory that contains the old and new svgs for easy comparing
const enableComparisons =
Expand Down Expand Up @@ -67,14 +78,30 @@ async function main(parsedArgs: parseArgs.ParsedArgs) {
const jobDescriptions: utils.RenderSvgAndSaveJobDescription[] =
directories.map((dir) => ({ dir: path.join(inDir, dir), outDir }))

const pool = workerpool.pool(__dirname + "/worker.js", {
minWorkers: 2,
})

// Parallelize the CPU heavy rendering jobs
const svgRecords: utils.SvgRecord[] = await Promise.all(
jobDescriptions.map((job) => pool.exec("renderSvgAndSave", [job]))
)
let svgRecords: utils.SvgRecord[] = []
if (!isolate) {
const pool = workerpool.pool(__dirname + "/worker.js", {
minWorkers: 2,
})

// Parallelize the CPU heavy rendering jobs
svgRecords = await Promise.all(
jobDescriptions.map((job) =>
pool.exec("renderSvgAndSave", [job])
)
)
} else {
let i = 1
for (const job of jobDescriptions) {
const pool = workerpool.pool(__dirname + "/worker.js", {
maxWorkers: 1,
})
const svgRecord = await pool.exec("renderSvgAndSave", [job])
pool.terminate()
svgRecords.push(svgRecord)
console.log(i++, "/", n)
}
}

// Copy over copies from master for easy comparing
if (enableComparisons) {
Expand Down Expand Up @@ -125,6 +152,7 @@ Options:
-o DIR Output directory that will contain the csv file and one svg file per grapher [default: ../owid-grapher-svgs/svg]
-c ID A comma-separated list of config IDs that you want to run instead of generating SVGs from all configs [default: undefined]
-t TYPE A comma-separated list of chart types that you want to run instead of generating SVGs from all configs [default: undefined]
--isolate Run each export in a separate process. This yields accurate heap usage measurements, but is slower. [default: false]
`)
process.exit(0)
} else {
Expand Down
62 changes: 51 additions & 11 deletions devTools/svgTester/utils.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import { ChartTypeName } from "@ourworldindata/grapher"
import { ChartTypeName, GrapherTabOption } from "@ourworldindata/grapher"
import {
MultipleOwidVariableDataDimensionsMap,
OwidVariableDataMetadataDimensions,
OwidVariableMixedData,
OwidVariableWithSourceAndDimension,
TESTING_ONLY_reset_guid,
} from "@ourworldindata/utils"
import fs from "fs-extra"
import fs, { stat } from "fs-extra"
import md5 from "md5"
import path from "path"
import stream from "stream"
Expand All @@ -23,10 +22,11 @@ import {
BAKED_GRAPHER_URL,
BAKED_BASE_URL,
} from "../../settings/serverSettings.js"
import { getHeapStatistics } from "v8"

export const CONFIG_FILENAME: string = "config.json"
const RESULTS_FILENAME = "results.csv"
export const SVG_CSV_HEADER = `grapherId,slug,chartType,md5,svgFilename`
export const SVG_CSV_HEADER = `grapherId,slug,chartType,md5,svgFilename,durationReceiveData,durationTotal,heapUsed,totalDataFileSize`

export const finished = util.promisify(stream.finished) // (A)

Expand Down Expand Up @@ -65,12 +65,20 @@ const resultDifference = (difference: SvgDifference): VerifyResult => ({
difference: difference,
})

export type SvgRenderPerformance = {
durationReceiveData: number
durationTotal: number
heapUsed: number
totalDataFileSize: number
}

export type SvgRecord = {
chartId: number
slug: string
chartType: ChartTypeName | undefined
chartType: ChartTypeName | GrapherTabOption | undefined
md5: string
svgFilename: string
performance?: SvgRenderPerformance
}

export interface SvgDifference {
Expand All @@ -88,6 +96,7 @@ export interface JobDirectory {
export interface JobConfigAndData {
config: GrapherInterface
variableData: MultipleOwidVariableDataDimensionsMap
totalDataFileSize: number
}

export function logIfVerbose(verbose: boolean, message: string, param?: any) {
Expand Down Expand Up @@ -250,6 +259,9 @@ export async function renderSvg(dir: string): Promise<[string, SvgRecord]> {
// they keep a stateful variable in clientutils. To minimize differences
// between consecutive runs we reset this id here before every export
TESTING_ONLY_reset_guid()

const timeStart = Date.now()

const grapher = initGrapherForSvgExport({
...configAndData.config,
adminBaseUrl: BAKED_BASE_URL,
Expand All @@ -264,13 +276,25 @@ export async function renderSvg(dir: string): Promise<[string, SvgRecord]> {
)

grapher.receiveOwidData(configAndData.variableData)
const durationReceiveData = Date.now() - timeStart

const svg = grapher.staticSVG
const durationTotal = Date.now() - timeStart

const svgRecord = {
chartId: configAndData.config.id!,
slug: configAndData.config.slug!,
chartType: configAndData.config.type,
chartType: grapher.tab === "chart" ? grapher.type : grapher.tab,
md5: processSvgAndCalculateHash(svg),
svgFilename: outFilename,
performance: {
durationReceiveData,
durationTotal,
// The heap size measurement is only accurate if the parent process is run with `--isolate`, otherwise the same
// process is used for multiple graphs and the heap size accumulates
heapUsed: getHeapStatistics().used_heap_size,
totalDataFileSize: configAndData.totalDataFileSize,
},
}
return Promise.resolve([svg, svgRecord])
}
Expand Down Expand Up @@ -345,19 +369,20 @@ export async function loadGrapherConfigAndData(
const loadDataPromises = variableIds.map(async (variableId) => {
const dataPath = path.join(inputDir, `${variableId}.data.json`)
const metadataPath = path.join(inputDir, `${variableId}.metadata.json`)
const dataFileSize = await stat(dataPath).then((stats) => stats.size)
const data = (await readJsonFile(dataPath)) as OwidVariableMixedData
const metadata = (await readJsonFile(
metadataPath
)) as OwidVariableWithSourceAndDimension
return { data, metadata }
return { data, metadata, dataFileSize }
})

const data: OwidVariableDataMetadataDimensions[] =
await Promise.all(loadDataPromises)
const data = await Promise.all(loadDataPromises)

const variableData = new Map(data.map((d) => [d.metadata.id, d]))
const totalDataFileSize = _.sum(data.map((d) => d.dataFileSize))

return { config, variableData }
return { config, variableData, totalDataFileSize }
}

export function logDifferencesToConsole(
Expand Down Expand Up @@ -407,7 +432,22 @@ export async function writeResultsCsvFile(
const csvFileStream = fs.createWriteStream(resultsPath)
csvFileStream.write(SVG_CSV_HEADER + "\n")
for (const row of svgRecords) {
const line = `${row.chartId},${row.slug},${row.chartType},${row.md5},${row.svgFilename}`
const line = [
row.chartId,
row.slug,
row.chartType,
row.md5,
row.svgFilename,

// Perf
row.performance?.durationReceiveData,
row.performance?.durationTotal,
row.performance?.heapUsed,
row.performance?.totalDataFileSize,
]
.map((item) => item ?? "")
.join(",")

csvFileStream.write(line + "\n")
}
csvFileStream.end()
Expand Down

0 comments on commit ed4f195

Please sign in to comment.