Skip to content

Commit

Permalink
only delete files that aren't referenced elsewhere
Browse files Browse the repository at this point in the history
  • Loading branch information
snopoke committed Dec 13, 2024
1 parent 21dc039 commit 8bcbc27
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 2 deletions.
20 changes: 18 additions & 2 deletions apps/assistants/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from io import BytesIO

import openai
from django.db.models import Count, Subquery
from langchain_core.utils.function_calling import convert_to_openai_tool as lc_convert_to_openai_tool
from openai import OpenAI
from openai.types.beta import Assistant
Expand Down Expand Up @@ -178,15 +179,30 @@ def delete_openai_assistant(assistant: OpenAiAssistant):
except openai.NotFoundError:
pass

for resource in assistant.tool_resources.all():
tool_resources = list(assistant.tool_resources.all())
for resource in tool_resources:
if resource.tool_type == "file_search" and "vector_store_id" in resource.extra:
vector_store_id = resource.extra.pop("vector_store_id")
client.beta.vector_stores.delete(vector_store_id=vector_store_id)

for file in resource.files.all():
files_to_delete = _get_files_to_delete(assistant.team, resource.id)
for file in files_to_delete:
delete_file_from_openai(client, file)


def _get_files_to_delete(team, tool_resource_id):
"""Get files linked to the tool resource that are not referenced by any other tool resource."""
files_with_single_reference = (
ToolResources.files.through.objects.filter(toolresources__assistant__team=team)
.values("file")
.annotate(count=Count("toolresources"))
.filter(count=1)
.values("file_id")
)

return File.objects.filter(toolresources=tool_resource_id, id__in=Subquery(files_with_single_reference)).iterator()


def is_tool_configured_remotely_but_missing_locally(assistant_data, local_tool_types, tool_name: str) -> bool:
"""Checks if a tool is configured in OpenAI but missing in OCS."""
tool_configured_in_openai = hasattr(assistant_data.tool_resources, tool_name) and getattr(
Expand Down
42 changes: 42 additions & 0 deletions apps/assistants/tests/test_delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest

from apps.assistants.models import ToolResources
from apps.assistants.sync import _get_files_to_delete
from apps.utils.factories.assistants import OpenAiAssistantFactory
from apps.utils.factories.files import FileFactory


@pytest.fixture()
def assistant():
return OpenAiAssistantFactory(assistant_id="test_id", builtin_tools=["code_interpreter", "file_search"])


@pytest.fixture()
def code_resource(assistant):
files = FileFactory.create_batch(2, team=assistant.team)

tool_resource = ToolResources.objects.create(tool_type="code_interpreter", assistant=assistant)
tool_resource.files.set(files)
return tool_resource


@pytest.mark.django_db()
def test_files_to_delete_when_only_referenced_by_one_resource(code_resource):
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id))
assert len(files_to_delete) == 2
assert {f.id for f in files_to_delete} == {f.id for f in code_resource.files.all()}


@pytest.mark.django_db()
def test_files_not_to_delete_when_referenced_by_multiple_resources(code_resource):
all_files = list(code_resource.files.all())
tool_resource = ToolResources.objects.create(tool_type="file_search", assistant=code_resource.assistant)
tool_resource.files.set([all_files[0]])

# only the second file should be deleted
files_to_delete = list(_get_files_to_delete(code_resource.assistant.team, code_resource.id))
assert len(files_to_delete) == 1
assert files_to_delete[0].id == all_files[1].id

files_to_delete = list(_get_files_to_delete(tool_resource.assistant.team, tool_resource.id))
assert len(files_to_delete) == 0

0 comments on commit 8bcbc27

Please sign in to comment.