diff --git a/src/sensemaker_utils.test.ts b/src/sensemaker_utils.test.ts index fb2c90f..19c51d5 100644 --- a/src/sensemaker_utils.test.ts +++ b/src/sensemaker_utils.test.ts @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -import { getPrompt } from "./sensemaker_utils"; +import { getPrompt, groupCommentsBySubtopic } from "./sensemaker_utils"; +import { Comment } from "./types"; describe("SensemakerUtilsTest", () => { it("should create a prompt", () => { @@ -45,4 +46,58 @@ comment1 comment2` ); }); + describe("groupCommentsByTopic", () => { + it("should group comments by topic and subtopic", () => { + const categorizedComments: Comment[] = [ + { + id: "1", + text: "Comment 1", + topics: [ + { name: "Topic 1", subtopics: [{ name: "Subtopic 1.1" }] }, + { name: "Topic 2", subtopics: [{ name: "Subtopic 2.1" }] }, + ], + }, + { + id: "2", + text: "Comment 2", + topics: [ + { name: "Topic 1", subtopics: [{ name: "Subtopic 1.1" }] }, + { name: "Topic 1", subtopics: [{ name: "Subtopic 1.2" }] }, + ], + }, + ]; + + const expectedOutput = { + "Topic 1": { + "Subtopic 1.1": { + "1": "Comment 1", + "2": "Comment 2", + }, + "Subtopic 1.2": { + "2": "Comment 2", + }, + }, + "Topic 2": { + "Subtopic 2.1": { + "1": "Comment 1", + }, + }, + }; + + const result = groupCommentsBySubtopic(categorizedComments); + expect(result).toEqual(expectedOutput); + }); + + it("should skip comment if it has no topics", () => { + const categorizedComments: Comment[] = [ + { + id: "1", + text: "Comment 1", + topics: [], // No topics assigned + }, + ]; + + expect(groupCommentsBySubtopic(categorizedComments)).toEqual({}); + }); + }); }); diff --git a/src/sensemaker_utils.ts b/src/sensemaker_utils.ts index 178a20c..fe17ad7 100644 --- a/src/sensemaker_utils.ts +++ b/src/sensemaker_utils.ts @@ -75,7 +75,11 @@ export function hydrateCommentRecord( * * TODO: create a similar function to group comments by topics only. */ -export function groupCommentsBySubtopic(categorized: Comment[]) { +export function groupCommentsBySubtopic(categorized: Comment[]): { + [topicName: string]: { + [subtopicName: string]: { [commentId: string]: string }; + }; +} { const groupedComments: { [topicName: string]: { [subtopicName: string]: { [commentId: string]: string }; @@ -83,7 +87,8 @@ export function groupCommentsBySubtopic(categorized: Comment[]) { } = {}; for (const comment of categorized) { if (!comment.topics || comment.topics.length === 0) { - throw new Error(`Comment with ID ${comment.id} has no topics assigned.`); + console.log(`Comment with ID ${comment.id} has no topics assigned.`); + continue; } for (const topic of comment.topics) { if (!groupedComments[topic.name]) { diff --git a/src/stats_util.test.ts b/src/stats_util.test.ts new file mode 100644 index 0000000..1442201 --- /dev/null +++ b/src/stats_util.test.ts @@ -0,0 +1,99 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { SummaryStats } from "./stats_util"; +import { Comment } from "./types"; + +const TEST_COMMENTS = [ + { + id: "1", + text: "comment1", + voteTalliesByGroup: { + "0": { + agreeCount: 10, + disagreeCount: 5, + passCount: 0, + totalCount: 15, + }, + "1": { + agreeCount: 5, + disagreeCount: 10, + passCount: 5, + totalCount: 20, + }, + }, + }, + { + id: "2", + text: "comment2", + voteTalliesByGroup: { + "0": { + agreeCount: 2, + disagreeCount: 5, + passCount: 3, + totalCount: 10, + }, + "1": { + agreeCount: 5, + disagreeCount: 3, + passCount: 2, + totalCount: 10, + }, + }, + }, +]; + +describe("StatsUtilTest", () => { + it("should get the total number of votes from multiple comments", () => { + const summaryStats = new SummaryStats(TEST_COMMENTS); + expect(summaryStats.voteCount).toEqual(55); + }); + + it("SummaryStats should get the total number of comments", () => { + const summaryStats = new SummaryStats(TEST_COMMENTS); + expect(summaryStats.commentCount).toEqual(2); + }); + + it("should count comments by topic", () => { + const comments: Comment[] = [ + { + id: "1", + text: "comment 1", + topics: [{ name: "Topic A", subtopics: [{ name: "Subtopic A.1" }] }], + }, + { + id: "2", + text: "comment 2", + topics: [{ name: "Topic A", subtopics: [{ name: "Subtopic A.1" }] }], + }, + { + id: "3", + text: "comment 3", + topics: [{ name: "Topic A", subtopics: [{ name: "Subtopic A.2" }] }], + }, + ]; + + const expectedTopicStats = [ + { + name: "Topic A", + commentCount: 3, + subtopicStats: [ + { name: "Subtopic A.1", commentCount: 2 }, + { name: "Subtopic A.2", commentCount: 1 }, + ], + }, + ]; + expect(new SummaryStats(comments).getStatsByTopic()).toEqual(expectedTopicStats); + }); +}); diff --git a/src/stats_util.ts b/src/stats_util.ts new file mode 100644 index 0000000..def9b5c --- /dev/null +++ b/src/stats_util.ts @@ -0,0 +1,91 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utils to get statistical information from a deliberation + +import { Comment } from "./types"; +import { groupCommentsBySubtopic } from "./sensemaker_utils"; + +// Statistics to include in the summary. +export class SummaryStats { + comments: Comment[]; + constructor(comments: Comment[]) { + this.comments = comments; + } + + private getCommentVoteCount(comment: Comment): number { + let count = 0; + for (const groupName in comment.voteTalliesByGroup) { + const groupCount = comment.voteTalliesByGroup[groupName].totalCount; + if (groupCount > 0) { + count += groupCount; + } + } + return count; + } + + // The total number of votes in all comments in a deliberation. + get voteCount(): number { + return this.comments.reduce((sum: number, comment: Comment) => { + return sum + this.getCommentVoteCount(comment); + }, 0); + } + + // The total number of comments in a deliberation. + get commentCount(): number { + return this.comments.length; + } + + /** + * Counts the number of comments associated with each topic and subtopic. + * + * @param commentsByTopic A nested map where keys are topic names, values are maps + * where keys are subtopic names, and values are maps where + * keys are comment IDs and values are comment texts. + * @returns An array of `TopicStats` objects. + */ + getStatsByTopic(): TopicStats[] { + const commentsByTopic = groupCommentsBySubtopic(this.comments); + const topicStats: TopicStats[] = []; + + for (const topicName in commentsByTopic) { + const subtopics = commentsByTopic[topicName]; + const subtopicStats: TopicStats[] = []; + let totalTopicComments = 0; + + for (const subtopicName in subtopics) { + const commentCount = Object.keys(subtopics[subtopicName]).length; + totalTopicComments += commentCount; + subtopicStats.push({ name: subtopicName, commentCount }); + } + + topicStats.push({ + name: topicName, + commentCount: totalTopicComments, + subtopicStats: subtopicStats, + }); + } + + return topicStats; + } +} + +/** + * Represents statistics about a topic and its subtopics. + */ +export interface TopicStats { + name: string; + commentCount: number; + subtopicStats?: TopicStats[]; +} diff --git a/src/tasks/categorization.test.ts b/src/tasks/categorization.test.ts index b209498..54f3444 100644 --- a/src/tasks/categorization.test.ts +++ b/src/tasks/categorization.test.ts @@ -20,7 +20,6 @@ import { } from "./categorization"; import { CommentRecord, Comment, Topic } from "../types"; import { VertexModel } from "../models/vertex_model"; -import { groupCommentsBySubtopic } from "../sensemaker_utils"; // Mock the model response. This mock needs to be set up to return response specific for each test. let mockGenerateData: jest.SpyInstance; @@ -495,61 +494,4 @@ describe("findMissingComments", () => { { id: "2", text: "Comment 2" }, ]); }); - - describe("groupCommentsByTopic", () => { - it("should group comments by topic and subtopic", () => { - const categorizedComments: Comment[] = [ - { - id: "1", - text: "Comment 1", - topics: [ - { name: "Topic 1", subtopics: [{ name: "Subtopic 1.1" }] }, - { name: "Topic 2", subtopics: [{ name: "Subtopic 2.1" }] }, - ], - }, - { - id: "2", - text: "Comment 2", - topics: [ - { name: "Topic 1", subtopics: [{ name: "Subtopic 1.1" }] }, - { name: "Topic 1", subtopics: [{ name: "Subtopic 1.2" }] }, - ], - }, - ]; - - const expectedOutput = { - "Topic 1": { - "Subtopic 1.1": { - "1": "Comment 1", - "2": "Comment 2", - }, - "Subtopic 1.2": { - "2": "Comment 2", - }, - }, - "Topic 2": { - "Subtopic 2.1": { - "1": "Comment 1", - }, - }, - }; - - const result = groupCommentsBySubtopic(categorizedComments); - expect(result).toEqual(expectedOutput); - }); - - it("should throw an error if a comment has no topics", () => { - const categorizedComments: Comment[] = [ - { - id: "1", - text: "Comment 1", - topics: [], // No topics assigned - }, - ]; - - expect(() => groupCommentsBySubtopic(categorizedComments)).toThrow( - "Comment with ID 1 has no topics assigned." - ); - }); - }); }); diff --git a/src/tasks/summarization.test.ts b/src/tasks/summarization.test.ts index 84ac44e..0e6af2d 100644 --- a/src/tasks/summarization.test.ts +++ b/src/tasks/summarization.test.ts @@ -13,53 +13,65 @@ // limitations under the License. import { formatCommentsWithVotes, - _countCommentsByTopic, + getSummarizationInstructions, _sortTopicsByComments, _quantifyTopicNames, } from "./summarization"; +const TEST_COMMENTS = [ + { + id: "1", + text: "comment1", + voteTalliesByGroup: { + "0": { + agreeCount: 10, + disagreeCount: 5, + passCount: 0, + totalCount: 15, + }, + "1": { + agreeCount: 5, + disagreeCount: 10, + passCount: 5, + totalCount: 20, + }, + }, + }, + { + id: "2", + text: "comment2", + voteTalliesByGroup: { + "0": { + agreeCount: 2, + disagreeCount: 5, + passCount: 3, + totalCount: 10, + }, + "1": { + agreeCount: 5, + disagreeCount: 3, + passCount: 2, + totalCount: 10, + }, + }, + }, +]; + describe("SummaryTest", () => { + it("prompt should include the comment count and the vote count", () => { + // Has 2 comments and 55 votes. + expect(getSummarizationInstructions(true, TEST_COMMENTS)).toContain("2 statements"); + expect(getSummarizationInstructions(true, TEST_COMMENTS)).toContain("55 votes"); + }); + + it("prompt shouldn't include votes if groups aren't included", () => { + // Has 2 comments and 55 votes. + expect(getSummarizationInstructions(false, TEST_COMMENTS)).toContain("2 statements"); + expect(getSummarizationInstructions(false, TEST_COMMENTS)).not.toContain("55 votes"); + }); + it("should format comments with vote tallies via formatCommentsWithVotes", () => { - expect( - formatCommentsWithVotes([ - { - id: "1", - text: "comment1", - voteTalliesByGroup: { - "0": { - agreeCount: 10, - disagreeCount: 5, - passCount: 0, - totalCount: 15, - }, - "1": { - agreeCount: 5, - disagreeCount: 10, - passCount: 5, - totalCount: 20, - }, - }, - }, - { - id: "2", - text: "comment2", - voteTalliesByGroup: { - "0": { - agreeCount: 2, - disagreeCount: 5, - passCount: 3, - totalCount: 10, - }, - "1": { - agreeCount: 5, - disagreeCount: 3, - passCount: 2, - totalCount: 10, - }, - }, - }, - ]) - ).toEqual([ + expect(formatCommentsWithVotes(TEST_COMMENTS)).toEqual([ `comment1 vote info per group: {"0":{"agreeCount":10,"disagreeCount":5,"passCount":0,"totalCount":15},"1":{"agreeCount":5,"disagreeCount":10,"passCount":5,"totalCount":20}}`, `comment2 @@ -67,27 +79,6 @@ describe("SummaryTest", () => { ]); }); - it("should count comments by topic", () => { - const commentsByTopic = { - "Topic A": { - "Subtopic A.1": { c1: "comment 1", c2: "comment 2" }, - "Subtopic A.2": { c3: "comment 3" }, - }, - }; - - const expectedTopicStats = [ - { - name: "Topic A", - commentCount: 3, - subtopicStats: [ - { name: "Subtopic A.1", commentCount: 2 }, - { name: "Subtopic A.2", commentCount: 1 }, - ], - }, - ]; - expect(_countCommentsByTopic(commentsByTopic)).toEqual(expectedTopicStats); - }); - it("should sort topics by comment count and put 'Other' topics and subtopics last", () => { const topicStats = [ { diff --git a/src/tasks/summarization.ts b/src/tasks/summarization.ts index 6565d62..8c15083 100644 --- a/src/tasks/summarization.ts +++ b/src/tasks/summarization.ts @@ -16,12 +16,14 @@ import { Model } from "../models/model"; import { Comment, SummarizationType } from "../types"; -import { getPrompt, groupCommentsBySubtopic } from "../sensemaker_utils"; - -function getSummarizationInstructions(comments: Comment[], includeGroups: boolean): string { - // group comments by topics, count stats, add it to topics, and sort topics by # of comments - const commentsByTopic = groupCommentsBySubtopic(comments); - const topicStats = _countCommentsByTopic(commentsByTopic); +import { getPrompt } from "../sensemaker_utils"; +import { SummaryStats, TopicStats } from "../stats_util"; + +export function getSummarizationInstructions(includeGroups: boolean, comments: Comment[]): string { + // Prepare statistics like vote count and number of comments per topic for injecting in prompt as + // well as sorts topics based on count. + const summaryStats = new SummaryStats(comments); + const topicStats = summaryStats.getStatsByTopic(); const sortedTopics = _sortTopicsByComments(topicStats); const quantifiedTopics = _quantifyTopicNames(sortedTopics); @@ -63,7 +65,19 @@ ${includeGroups ? "## Description of Groups" : ""} * _Low consensus:_ ## Conclusion -${includeGroups ? "There should be a one-paragraph section describing the two voting groups, focusing on their expressed views without guessing demographics." : ""} +The introduction should be one paragraph long and contain ${includeGroups ? "five" : "four"} sentences. +The first sentence should include the information that there were ${summaryStats.commentCount} statements ${includeGroups ? `that had ${summaryStats.voteCount} votes` : ""}. +The second sentence should include what topics were discussed. +${ + includeGroups + ? "The third sentence should include information on the groups such " + + "as their similarities and differences. " + : "" +} +The next sentence should list topics with consensus. +The last sentence should list topics without consensus. + +${includeGroups ? "There should be a one-paragraph section describing the voting groups, focusing on their expressed views without guessing demographics." : ""} `; } @@ -106,7 +120,7 @@ export async function basicSummarize( ): Promise { const commentTexts = comments.map((comment) => comment.text); return await model.generateText( - getPrompt(getSummarizationInstructions(comments, false), commentTexts, additionalInstructions) + getPrompt(getSummarizationInstructions(false, comments), commentTexts, additionalInstructions) ); } @@ -136,56 +150,13 @@ export async function voteTallySummarize( ): Promise { return await model.generateText( getPrompt( - getSummarizationInstructions(comments, true), + getSummarizationInstructions(true, comments), formatCommentsWithVotes(comments), additionalInstructions ) ); } -/** - * Represents statistics about a topic and its subtopics. - */ -interface TopicStats { - name: string; - commentCount: number; - subtopicStats?: TopicStats[]; -} - -/** - * Counts the number of comments associated with each topic and subtopic. - * - * @param commentsByTopic A nested map where keys are topic names, values are maps - * where keys are subtopic names, and values are maps where - * keys are comment IDs and values are comment texts. - * @returns An array of `TopicStats` objects. - */ -export function _countCommentsByTopic(commentsByTopic: { - [key: string]: { [key: string]: { [key: string]: string } }; -}): TopicStats[] { - const topicStats: TopicStats[] = []; - - for (const topicName in commentsByTopic) { - const subtopics = commentsByTopic[topicName]; - const subtopicStats: TopicStats[] = []; - let totalTopicComments = 0; - - for (const subtopicName in subtopics) { - const commentCount = Object.keys(subtopics[subtopicName]).length; - totalTopicComments += commentCount; - subtopicStats.push({ name: subtopicName, commentCount }); - } - - topicStats.push({ - name: topicName, - commentCount: totalTopicComments, - subtopicStats: subtopicStats, - }); - } - - return topicStats; -} - /** * Sorts topics and their subtopics based on comment count in descending order, with "Other" topics and subtopics going last. *