From ea62440f0b5dab19a11e341526b59ff207597899 Mon Sep 17 00:00:00 2001 From: Alyssa Chvasta Date: Wed, 11 Dec 2024 11:03:07 -0800 Subject: [PATCH] Internal change GitOrigin-RevId: e0f6d40a13463dd73d6a0b228a7701ba9d6263b2 --- package-lock.json | 19 +++++++ package.json | 1 + runner-cli/rerunner.ts | 51 +++++++++++++++++ runner-cli/runner.ts | 47 ++++++++++++++++ .../rerunner.ts => runner-cli/runner_utils.ts | 56 +------------------ 5 files changed, 121 insertions(+), 53 deletions(-) create mode 100644 runner-cli/rerunner.ts create mode 100644 runner-cli/runner.ts rename evaluations/rerunner.ts => runner-cli/runner_utils.ts (51%) diff --git a/package-lock.json b/package-lock.json index 4bc4933..020db4a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -29,6 +29,7 @@ "husky": "^9.1.6", "jest": "^29.7.0", "lint-staged": "^15.2.10", + "marked": "^15.0.3", "nodemon": "^3.1.4", "papaparse": "^5.4.1", "prettier": "^3.3.3", @@ -6115,6 +6116,18 @@ "dev": true, "license": "Python-2.0" }, + "node_modules/marked": { + "version": "15.0.3", + "resolved": "https://registry.npmjs.org/marked/-/marked-15.0.3.tgz", + "integrity": "sha512-Ai0cepvl2NHnTcO9jYDtcOEtVBNVYR31XnEA3BndO7f5As1wzpcOceSUM8FDkNLJNIODcLpDTWay/qQhqbuMvg==", + "dev": true, + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/mdast-util-to-hast": { "version": "13.2.0", "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.0.tgz", @@ -12204,6 +12217,12 @@ } } }, + "marked": { + "version": "15.0.3", + "resolved": "https://registry.npmjs.org/marked/-/marked-15.0.3.tgz", + "integrity": "sha512-Ai0cepvl2NHnTcO9jYDtcOEtVBNVYR31XnEA3BndO7f5As1wzpcOceSUM8FDkNLJNIODcLpDTWay/qQhqbuMvg==", + "dev": true + }, "mdast-util-to-hast": { "version": "13.2.0", "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.0.tgz", diff --git a/package.json b/package.json index 63d4687..911daf9 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,7 @@ "husky": "^9.1.6", "jest": "^29.7.0", "lint-staged": "^15.2.10", + "marked": "^15.0.3", "nodemon": "^3.1.4", "papaparse": "^5.4.1", "prettier": "^3.3.3", diff --git a/runner-cli/rerunner.ts b/runner-cli/rerunner.ts new file mode 100644 index 0000000..c8405e4 --- /dev/null +++ b/runner-cli/rerunner.ts @@ -0,0 +1,51 @@ +// Rerun summarize 5x using a CSV file as input and outputting the summaries to another CSV. +// Run like: +// npx ts-node ./evaluations/rerunner.ts --outputFile "data1.csv" \ +// --vertexProject "" \ +// --inputFile "/usr/local/google/home/achvasta/Downloads/comments-with-vote-tallies.csv" +// --rerunCount 3 + +import { Command } from "commander"; +import { createObjectCsvWriter } from "csv-writer"; +import { getCommentsFromCsv, getSummary } from "./runner_utils"; + +interface outputCsvFormat { + run: number; + summaryType: string; + text: string; +} + +async function main(): Promise { + // Parse command line arguments. + const program = new Command(); + program + .option("-o, --outputFile ", "The output file name.") + .option("-i, --inputFile ", "The input file name.") + .option("-r, --rerunCount ", "The number of times to rerun.") + .option("-v, --vertexProject ", "The Vertex Project name."); + program.parse(process.argv); + const options = program.opts(); + + const comments = await getCommentsFromCsv(options.inputFile); + + let outputTexts: outputCsvFormat[] = []; + const csvWriter = createObjectCsvWriter({ + path: options.outputFile, + header: ["run", "summaryType", "text"], + }); + + for (let i = 0; i < options.rerunCount; i++) { + const summary = await getSummary(options.vertexProject, comments); + outputTexts = outputTexts.concat([ + { + run: i, + summaryType: "VoteTally", + text: summary.getText("MARKDOWN"), + }, + ]); + } + + csvWriter.writeRecords(outputTexts).then(() => console.log("CSV file written successfully.")); +} + +main(); diff --git a/runner-cli/runner.ts b/runner-cli/runner.ts new file mode 100644 index 0000000..1a2e014 --- /dev/null +++ b/runner-cli/runner.ts @@ -0,0 +1,47 @@ +// Run the summarizer based on a CSV input and output the result as an hmtl page. + +import { Command } from "commander"; +import * as fs from "fs"; +import { marked } from "marked"; +import { getCommentsFromCsv, getSummary } from "./runner_utils"; + +async function main(): Promise { + // Parse command line arguments. + const program = new Command(); + program + .option("-o, --outputFile ", "The output file name.") + .option("-i, --inputFile ", "The input file name.") + .option("-v, --vertexProject ", "The Vertex Project name."); + program.parse(process.argv); + const options = program.opts(); + + const comments = await getCommentsFromCsv(options.inputFile); + + const summary = await getSummary(options.vertexProject, comments); + const markdownContent = summary.getText("MARKDOWN"); + const htmlContent = ` + + + + Summary + + + + ${marked(markdownContent)} + +`; + + const outputPath = `${options.outputFile}.html`; + fs.writeFileSync(outputPath, htmlContent); + console.log(`Written summary to ${outputPath}`); +} + +main(); diff --git a/evaluations/rerunner.ts b/runner-cli/runner_utils.ts similarity index 51% rename from evaluations/rerunner.ts rename to runner-cli/runner_utils.ts index 54a0686..a892309 100644 --- a/evaluations/rerunner.ts +++ b/runner-cli/runner_utils.ts @@ -1,25 +1,10 @@ -// Rerun summarize 5x using a CSV file as input and outputting the summaries to another CSV. -// Run like: -// npx ts-node ./evaluations/rerunner.ts --outputFile "data1.csv" \ -// --vertexProject "" \ -// --inputFile "/usr/local/google/home/achvasta/Downloads/comments-with-vote-tallies.csv" -// --rerunCount 3 - -import { Command } from "commander"; import { Sensemaker } from "../src/sensemaker"; import { VertexModel } from "../src/models/vertex_model"; -import { Comment, SummarizationType, Summary, VoteTally } from "../src/types"; -import { createObjectCsvWriter } from "csv-writer"; +import { Summary, VoteTally, Comment, SummarizationType } from "../src/types"; import * as path from "path"; import * as fs from "fs"; import { parse } from "csv-parse"; -interface outputCsvFormat { - run: number; - summaryType: string; - text: string; -} - // TODO: remove this and make it more general type VoteTallyCsvRow = { index: number; @@ -40,14 +25,14 @@ type VoteTallyCsvRow = { "group-1-agree-count": number; }; -async function getSummary(project: string, comments: Comment[]): Promise { +export async function getSummary(project: string, comments: Comment[]): Promise { const sensemaker = new Sensemaker({ defaultModel: new VertexModel(project, "us-central1", "gemini-1.5-pro-002"), }); return await sensemaker.summarize(comments, SummarizationType.VOTE_TALLY); } -async function getCommentsFromCsv(inputFilePath: string): Promise { +export async function getCommentsFromCsv(inputFilePath: string): Promise { const filePath = path.resolve(inputFilePath); const fileContent = fs.readFileSync(filePath, { encoding: "utf-8" }); @@ -85,38 +70,3 @@ async function getCommentsFromCsv(inputFilePath: string): Promise { .on("end", () => resolve(data)); }); } - -async function main(): Promise { - // Parse command line arguments. - const program = new Command(); - program - .option("-o, --outputFile ", "The output file name.") - .option("-i, --inputFile ", "The input file name.") - .option("-r, --rerunCount ", "The number of times to rerun.") - .option("-v, --vertexProject ", "The Vertex Project name."); - program.parse(process.argv); - const options = program.opts(); - - const comments = await getCommentsFromCsv(options.inputFile); - - let outputTexts: outputCsvFormat[] = []; - const csvWriter = createObjectCsvWriter({ - path: options.outputFile, - header: ["run", "summaryType", "text"], - }); - - for (let i = 0; i < options.rerunCount; i++) { - const summary = await getSummary(options.vertexProject, comments); - outputTexts = outputTexts.concat([ - { - run: i, - summaryType: "VoteTally", - text: summary.getText("MARKDOWN"), - }, - ]); - } - - csvWriter.writeRecords(outputTexts).then(() => console.log("CSV file written successfully.")); -} - -main();