Skip to content

Commit

Permalink
cherry pick
Browse files Browse the repository at this point in the history
  • Loading branch information
LostVector committed Sep 30, 2024
1 parent 427c86e commit 2c61f91
Showing 1 changed file with 0 additions and 97 deletions.
97 changes: 0 additions & 97 deletions backend/danswer/background/celery/tasks/pruning/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,103 +154,6 @@ def try_creating_prune_generator_task(
return 1


# @shared_task(
# name="check_for_prune_task",
# soft_time_limit=JOB_TIMEOUT,
# )
# def check_for_prune_task() -> None:
# """Runs periodically to check if any prune tasks should be run and adds them
# to the queue"""

# with Session(get_sqlalchemy_engine()) as db_session:
# all_cc_pairs = get_connector_credential_pairs(db_session)

# for cc_pair in all_cc_pairs:
# if should_prune_cc_pair(
# connector=cc_pair.connector,
# credential=cc_pair.credential,
# db_session=db_session,
# ):
# task_logger.info(f"Pruning the {cc_pair.connector.name} connector")

# prune_documents_task.apply_async(
# kwargs=dict(
# connector_id=cc_pair.connector.id,
# credential_id=cc_pair.credential.id,
# )
# )


# @build_celery_task_wrapper(name_cc_prune_task)
# @shared_task(soft_time_limit=JOB_TIMEOUT)
# def prune_documents_task(connector_id: int, credential_id: int) -> None:
# """connector pruning task. For a cc pair, this task pulls all document IDs from the source
# and compares those IDs to locally stored documents and deletes all locally stored IDs missing
# from the most recently pulled document ID list"""
# with Session(get_sqlalchemy_engine()) as db_session:
# try:
# cc_pair = get_connector_credential_pair(
# db_session=db_session,
# connector_id=connector_id,
# credential_id=credential_id,
# )

# if not cc_pair:
# task_logger.warning(
# f"ccpair not found for {connector_id} {credential_id}"
# )
# return

# runnable_connector = instantiate_connector(
# db_session,
# cc_pair.connector.source,
# InputType.PRUNE,
# cc_pair.connector.connector_specific_config,
# cc_pair.credential,
# )

# all_connector_doc_ids: set[str] = extract_ids_from_runnable_connector(
# runnable_connector
# )

# all_indexed_document_ids = {
# doc.id
# for doc in get_documents_for_connector_credential_pair(
# db_session=db_session,
# connector_id=connector_id,
# credential_id=credential_id,
# )
# }

# doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)

# curr_ind_name, sec_ind_name = get_both_index_names(db_session)
# document_index = get_default_document_index(
# primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
# )

# if len(doc_ids_to_remove) == 0:
# task_logger.info(
# f"No docs to prune from {cc_pair.connector.source} connector"
# )
# return

# task_logger.info(
# f"pruning {len(doc_ids_to_remove)} doc(s) from {cc_pair.connector.source} connector"
# )
# delete_connector_credential_pair_batch(
# document_ids=doc_ids_to_remove,
# connector_id=connector_id,
# credential_id=credential_id,
# document_index=document_index,
# )
# except Exception as e:
# task_logger.exception(
# f"Failed to run pruning for connector id {connector_id}."
# )
# raise e


@shared_task(name="connector_pruning_generator_task", soft_time_limit=JOB_TIMEOUT)
def connector_pruning_generator_task(connector_id: int, credential_id: int) -> None:
"""connector pruning task. For a cc pair, this task pulls all document IDs from the source
Expand Down

0 comments on commit 2c61f91

Please sign in to comment.