Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: relevance for img and a tags #227

Open
wants to merge 5 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
"Rpcs",
"sonarjs",
"pico",
"timespan"
"timespan",
"mistralai"
],
"dictionaries": ["typescript", "node", "software-terms"],
"import": [
Expand Down
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ PERMIT_ERC20_TOKENS_NO_FEE_WHITELIST=""
KERNEL_PUBLIC_KEY=""

# Logger level, default is INFO
LOG_LEVEL=""
LOG_LEVEL=""
25 changes: 25 additions & 0 deletions src/configuration/data-purge-config.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
import { Type, Static } from "@sinclair/typebox";

const openAiType = Type.Object(
{
/**
* AI model to use for comment evaluation.
*/
model: Type.String({
default: "gpt-4o-2024-08-06",
description: "OpenAI model, e.g. gpt-4o",
examples: ["gpt-4o"],
}),
/**
* Specific endpoint to send the comments to.
*/
endpoint: Type.String({
default: "https://api.openai.com/v1",
pattern: /^(https?:\/\/[^\s$.?#].\S*)$/i.source,
description: "OpenAI endpoint for requests",
examples: ["https://api.openai.com/v1"],
}),
},
{ default: {} }
);


export const dataPurgeConfigurationType = Type.Object({
skipCommentsWhileAssigned: Type.Union([Type.Literal("all"), Type.Literal("exact"), Type.Literal("none")], {
default: "all",
Expand All @@ -10,6 +34,7 @@ export const dataPurgeConfigurationType = Type.Object({
"- 'none': Includes all comments, regardless of assignment status or timing.",
examples: ["all", "exact", "none"],
}),
openAi: openAiType,
});

export type DataPurgeConfiguration = Static<typeof dataPurgeConfigurationType>;
144 changes: 143 additions & 1 deletion src/parser/data-purge-module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { IssueActivity } from "../issue-activity";
import { parseGitHubUrl } from "../start";
import { BaseModule } from "../types/module";
import { Result } from "../types/results";
import OpenAI from 'openai';

/**
* Removes the data in the comments that we do not want to be processed.
Expand All @@ -13,6 +14,11 @@ export class DataPurgeModule extends BaseModule {
readonly _configuration: DataPurgeConfiguration | null = this.context.config.incentives.dataPurge;
_assignmentPeriods: UserAssignments = {};

readonly _openAi = new OpenAI({
apiKey: this.context.env.OPENAI_API_KEY,
...(this._configuration?.openAi.endpoint && { baseURL: this._configuration.openAi.endpoint }),
});

get enabled(): boolean {
if (!this._configuration) {
this.context.logger.error("Invalid / missing configuration detected for DataPurgeModule, disabling.");
Expand Down Expand Up @@ -44,6 +50,140 @@ export class DataPurgeModule extends BaseModule {
return false;
}



async _generateImageDescription(imageUrl: string): Promise<string | null> {
try {
const imageResponse = await fetch(imageUrl);
const imageData = await imageResponse.arrayBuffer();
const base64Image = Buffer.from(imageData).toString('base64');
const response = await this._openAi.chat.completions.create({
model: "chatgpt-4o-latest",
messages: [
{
role: "user",
content: [
{ type: "text", text: "Describe this image concisely in one paragraph." },
{ type: "image_url", image_url: { url: `data:image/jpeg;base64,${base64Image}` } }
]
}
],
max_tokens: 300
});

return response.choices[0]?.message?.content || null;
} catch (error) {
this.context.logger.error(`Failed to generate image description: ${error}`);
return null;
}
}

async _generateChatResponse(userMessage: string): Promise<string | null> {
try {
const response = await this._openAi.chat.completions.create({
model: "gpt-4o-2024-08-06",
messages: [
{
role: "user",
content: userMessage
}
],
max_tokens: 500
});

return response.choices[0]?.message?.content || null;
} catch (error) {
this.context.logger.error(`Failed to generate chat response: ${error}`);
return null;
}
}

async _generateLinkDescription(linkUrl: string): Promise<string | null> {
try {
const linkResponse = await fetch(linkUrl);
const contentType = linkResponse.headers.get('content-type');

if (!contentType || (!contentType.includes('text/html') && !contentType.includes('text/plain'))) {
this.context.logger.info(`Skipping non-HTML content: ${contentType}, ${linkUrl}`);
return null;
}

const linkData = await linkResponse.text();
const cleanText = linkData
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.replace(/{\s*"props".*$/s, '')
.trim();

const response = await this._openAi.chat.completions.create({
model: "gpt-4o-2024-08-06",
messages: [
{
role: "user",
content: `Summarize the following webpage code into a concise and easy-to-understand text explanation of one paragraph with no bullet points. Focus on describing the purpose, structure, and functionality of the code, including key elements such as layout, styles, scripts, and any interactive features. Avoid technical jargon unless necessary: ${cleanText}`
}
],
max_tokens: 500
});

return response.choices[0]?.message?.content || null;
} catch (error) {
this.context.logger.error(`Failed to generate link description: ${error}`);
return null;
}
}

private async _processCommentBody(commentBody: string): Promise<string> {
// Extract image URL from Markdown or HTML image tags
const imageMatch = commentBody.match(/!\[.*?\]\((.*?)\)/) || commentBody.match(/src="([^"]*)"/);
const imageUrl = imageMatch ? imageMatch[1] : null;

if (imageUrl) {
const description = await this._generateImageDescription(imageUrl);
if (description) {
this.context.logger.info(`Generated description: ${description}`);

// Update the commentBody by replacing alt with description
const updatedContent = commentBody
// Replace Markdown-style images with HTML <img> tags and set description attribute
.replace(/!\[(.*?)\]\((.*?)\)/g, `<img src="$2" alt="${description}">`)
// Replace the alt attribute with the description variable's value
.replace(/alt="[^"]*"/, `alt="${description}"`);

return updatedContent;
}
}

return commentBody;
}

private async _processCommentBodyLink(commentBody: string): Promise<string> {
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)|<a[^>]*href="([^"]+)"|https?:\/\/\S+/g;
const links = [...commentBody.matchAll(linkRegex)]
.map((match) => match[2] || match[3] || match[0])
.map((url) => url.replace(/[?"]/g, "")); // Clean up URLs by removing ? and " characters

let updatedContent = commentBody;

for (const link of links) {
const description = await this._generateLinkDescription(link);
if (description) {
const linkResponse = await fetch(link);
const contentType = linkResponse.headers.get("content-type");

if (contentType && (contentType.includes("text/html") || contentType.includes("text/plain"))) {
updatedContent = commentBody.replace(
new RegExp(link, "g"),
`<a href="${link}" title="${description}">${link}</a>`
);
}
}
}
return updatedContent;
}

async transform(data: Readonly<IssueActivity>, result: Result) {
this._assignmentPeriods = await getAssignmentPeriods(
this.context.octokit,
Expand All @@ -54,7 +194,9 @@ export class DataPurgeModule extends BaseModule {
continue;
}
if (comment.body && comment.user?.login && result[comment.user.login]) {
const newContent = comment.body
const processedCommentBody = await this._processCommentBody(comment.body);
const processedCommentBodyLink = await this._processCommentBodyLink(processedCommentBody);
const newContent = processedCommentBodyLink
// Remove quoted text
.replace(/^>.*$/gm, "")
// Remove commands such as /start
Expand Down
Loading