Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embedding Ignore Filters #389

Merged
merged 11 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions frontend/src/app/assignment/model/assignment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export interface ClassroomInfo {
mossResult?: string;
openaiApiKey?: string;
openaiConsent?: boolean;
openaiIgnore?: string;
}

export default class Assignment {
Expand Down
3 changes: 3 additions & 0 deletions frontend/src/app/assignment/model/solution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ export interface EstimatedCosts {
files: number;
tokens: number;
estimatedCost: number;
functions: string[];
ignoredFiles: string[];
ignoredFunctions: string[];
}

export class Feedback {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,40 @@
If disabled, the students' consent will be ignored and all submissions will be indexed.
</div>
</div>
<div class="mb-3">
<label class="form-label" for="openaiIgnore">
OpenAI Ignore
</label>
<textarea class="form-control" id="openaiIgnore" rows="10"
[(ngModel)]="classroom.openaiIgnore" (change)="context.saveDraft()"></textarea>
<div class="form-text">
A gitignore-like list of directories, files and methods to ignore when indexing code.
<details>
<summary>More Info</summary>
<ul>
<li>
Supports wildcards, negation, and other <a href="https://github.com/isaacs/minimatch" target="_blank">minimatch</a> patterns.
</li>
<li>
To ignore a file, use <code>Foo.java</code>.
</li>
<li>
To ignore a directory, use <code>bar/</code>.
</li>
<li>
To ignore a directory except for a specific file, use <code>bar/</code> and <code>!bar/Foo.java</code>.
</li>
<li>
To ignore a method or function, use <code>Foo.java#baz</code>.
</li>
<li>
To ignore all methods and functions except for a specific one, use <code>Foo.java#*</code> and <code>!Foo.java#baz</code>.
</li>
<li>
If a file is already ignored, all method rules for that file, even allow rules, have no effect.
</li>
</ul>
</details>
</div>
</div>
}
Original file line number Diff line number Diff line change
@@ -1,31 +1,45 @@
<p>
After importing from GitHub or Files, you can import embeddings.
</p>
<p>
Here is an estimate of the token costs.
Actual costs may be <strong>lower</strong> if the embeddings are already cached,
or <strong>slightly higher</strong> due to some tokens like filenames being added to improve results.
Charges will be applied to your OpenAI account.
</p>
<p>
Please review the results below and click <b>Import</b> to confirm.
</p>
@if (estimatedCosts) {
<div class="row">
<app-statistic-value class="col" label="Solutions" [value]="estimatedCosts.solutions" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Files" [value]="estimatedCosts.files" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Tokens" [value]="estimatedCosts.tokens" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Estimated Cost" [value]="estimatedCosts.estimatedCost | currency:'USD':true:'0.7'" [standalone]="true"></app-statistic-value>
</div>
}
@if (finalCosts) {
@if (costsAreFinal) {
<hr/>
<p>
The import has been completed.
The following costs have been applied to your OpenAI account.
</p>
} @else {
<p>
Here is an estimate of the token costs.
Actual costs may be <strong>lower</strong> if the embeddings are already cached,
or <strong>slightly higher</strong> due to some tokens like filenames being added to improve results.
Charges will be applied to your OpenAI account.
</p>
<p>
Please review the results below and click <b>Import</b> to confirm.
</p>
}
@if (costs) {
<div class="row">
<app-statistic-value class="col" label="Tokens" [value]="finalCosts.tokens" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Cost" [value]="finalCosts.estimatedCost | currency:'USD':true:'0.7'" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Solutions" [value]="costs.solutions" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Files" [value]="costs.files" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Functions" [value]="costs.functions.length" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" label="Tokens" [value]="costs.tokens" [standalone]="true"></app-statistic-value>
<app-statistic-value class="col" [label]="costsAreFinal ? 'Total Cost' : 'Estimated Cost'" [value]="costs.estimatedCost | currency:'USD':true:'0.7'" [standalone]="true"></app-statistic-value>
</div>
<div class="mb-3">
<label for="functions">Imported Functions</label>
<textarea class="form-control" id="functions" rows="6" readonly>{{ costs.functions.join('\n') }}</textarea>
</div>
@if (costs.ignoredFiles.length > 0) {
<div class="mb-3">
<label for="ignoredFiles">Ignored Files</label>
<textarea class="form-control" id="ignoredFiles" rows="6" readonly>{{ costs.ignoredFiles.join('\n') }}</textarea>
</div>
}
@if (costs.ignoredFunctions.length > 0) {
<div class="mb-3">
<label for="ignoredFunctions">Ignored Functions</label>
<textarea class="form-control" id="ignoredFunctions" rows="6" readonly>{{ costs.ignoredFunctions.join('\n') }}</textarea>
</div>
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import {EmbeddingService} from "../../../services/embedding.service";
styleUrls: ['./import-embeddings.component.scss']
})
export class ImportEmbeddingsComponent implements OnInit {
estimatedCosts?: EstimatedCosts;
finalCosts?: EstimatedCosts;
costs?: EstimatedCosts;
costsAreFinal = false;

constructor(
private embeddingService: EmbeddingService,
Expand All @@ -22,13 +22,16 @@ export class ImportEmbeddingsComponent implements OnInit {
ngOnInit() {
this.route.params.pipe(
switchMap(({aid}) => this.embeddingService.import(aid, true)),
).subscribe(costs => this.estimatedCosts = costs);
).subscribe(costs => this.costs = costs);
}

import() {
const assignmentId = this.route.snapshot.params.aid;
return this.embeddingService.import(assignmentId).pipe(
tap(result => this.finalCosts = result),
tap(result => {
this.costs = result;
this.costsAreFinal = true;
}),
);
}
}
6 changes: 6 additions & 0 deletions services/apps/assignments/src/assignment/assignment.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ export class ClassroomInfo {
@IsOptional()
@IsBoolean()
openaiConsent?: boolean;

@Prop()
@ApiPropertyOptional()
@IsOptional()
@IsString()
openaiIgnore?: string;
}

@Schema()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ export class EmbeddingController {
@Query('estimate', new ParseBoolPipe({optional: true})) estimate?: boolean,
): Promise<EmbeddingEstimate> {
const assignment = await this.assignmentService.find(assignmentId) || notFound(assignmentId);
return estimate
? this.embeddingService.estimateEmbeddings(assignment)
: this.embeddingService.createEmbeddings(assignment);
return this.embeddingService.createEmbeddings(assignment, estimate);
}

@Get('embeddings')
Expand Down
27 changes: 26 additions & 1 deletion services/apps/assignments/src/embedding/embedding.dto.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
import {ApiProperty, ApiPropertyOptional} from "@nestjs/swagger";
import {ArrayMaxSize, ArrayMinSize, IsIn, IsInt, IsMongoId, IsNumber, IsOptional, IsString} from "class-validator";
import {
ArrayMaxSize,
ArrayMinSize,
IsArray,
IsIn,
IsInt,
IsMongoId,
IsNumber,
IsOptional,
IsString
} from "class-validator";

export class EmbeddingEstimate {
@ApiProperty()
Expand All @@ -17,6 +27,21 @@ export class EmbeddingEstimate {
@ApiProperty()
@IsNumber({maxDecimalPlaces: 2})
estimatedCost: number;

@ApiProperty()
@IsArray()
@IsString({each: true})
functions: string[];

@ApiProperty()
@IsArray()
@IsString({each: true})
ignoredFiles: string[];

@ApiProperty()
@IsArray()
@IsString({each: true})
ignoredFunctions: string[];
}

export class EmbeddableBase {
Expand Down
78 changes: 73 additions & 5 deletions services/apps/assignments/src/embedding/embedding.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ import {
PYTHON_FUNCTION_HEADER
} from './embedding.service';
import {ElasticsearchService} from "@nestjs/elasticsearch";
import {SearchService} from "../search/search.service";
import {FileDocument, SearchService} from "../search/search.service";
import {OpenAIService} from "./openai.service";
import {SolutionService} from "../solution/solution.service";
import * as ignore from 'ignore-file';

describe('EmbeddingService', () => {
let service: EmbeddingService;
Expand Down Expand Up @@ -41,9 +42,21 @@ class Foo {
}
}
`;

expect(service.getFunctions(code, CLIKE_FUNCTION_HEADER, findClosingBrace)).toEqual([
const doc: FileDocument = {
assignment: 'a1',
solution: 's1',
file: 'Foo.java',
content: code,
};

expect(service.getFunctions(doc, CLIKE_FUNCTION_HEADER, findClosingBrace)).toEqual([
{
assignment: 'a1',
solution: 's1',
file: 'Foo.java',
id: 's1-Foo.java-1',
type: 'snippet',
embedding: [],
name: 'bar',
line: 1,
text: `\
Expand All @@ -52,6 +65,12 @@ class Foo {
}`,
},
{
assignment: 'a1',
solution: 's1',
file: 'Foo.java',
id: 's1-Foo.java-5',
type: 'snippet',
embedding: [],
name: 'baz',
line: 5,
text: `\
Expand All @@ -77,9 +96,21 @@ class Foo:
if i != 0:
i = i + 1
`;

expect(service.getFunctions(code, PYTHON_FUNCTION_HEADER, findIndentEnd)).toEqual([
const doc: FileDocument = {
assignment: 'a1',
solution: 's1',
file: 'Foo.py',
content: code,
};

expect(service.getFunctions(doc, PYTHON_FUNCTION_HEADER, findIndentEnd)).toEqual([
{
assignment: 'a1',
solution: 's1',
file: 'Foo.py',
id: 's1-Foo.py-1',
type: 'snippet',
embedding: [],
name: 'bar',
line: 1,
text: `\
Expand All @@ -91,6 +122,12 @@ class Foo:
`,
},
{
assignment: 'a1',
solution: 's1',
file: 'Foo.py',
id: 's1-Foo.py-6',
type: 'snippet',
embedding: [],
name: 'baz',
line: 6,
text: `\
Expand Down Expand Up @@ -119,3 +156,34 @@ def baz():
expect(findIndentEnd(code.trim(), 35, 45)).toEqual(51);
});
});

describe('Ignore snippets', () => {
it('should ignore files', () => {
const ignoreFile = `\
foo/
!foo/Bar.java
`;
const ignoreFn = ignore.compile(ignoreFile) as (path: string) => boolean;
expect(ignoreFn('foo/Foo.java')).toEqual(true);
expect(ignoreFn('foo/Bar.java')).toEqual(false);
});

it('should ignore methods', () => {
const ignoreFile = `\
Foo.java#*
!Foo.java#bar

Bar.java
!Bar.java#baz
`;
const ignoreFn = ignore.compile(ignoreFile) as (path: string) => boolean;
// this is important, otherwise the documents will be pre-filtered
expect(ignoreFn('Foo.java')).toEqual(false);
expect(ignoreFn('Foo.java#bar')).toEqual(false);
expect(ignoreFn('Foo.java#baz')).toEqual(true);

expect(ignoreFn('Bar.java')).toEqual(true);
expect(ignoreFn('Bar.java#bar')).toEqual(false);
expect(ignoreFn('Bar.java#baz')).toEqual(false);
});
});
Loading