From 3cb6f6f80f40f974c2208b8723daa87722d6b3ab Mon Sep 17 00:00:00 2001 From: jahwag <540380+jahwag@users.noreply.github.com> Date: Fri, 13 Sep 2024 19:52:41 +0200 Subject: [PATCH] feat: added logging of token count on push --- pyproject.toml | 5 ++-- requirements.txt | 3 +- src/claudesync/syncmanager.py | 56 +++++++++++++++++++++++++++-------- 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index facda6d..b3bab46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "claudesync" -version = "0.5.7" +version = "0.5.8" authors = [ {name = "Jahziah Wagner", email = "jahziah.wagner+pypi@gmail.com"}, ] @@ -26,7 +26,8 @@ dependencies = [ "claudesync>=0.5.4", "crontab>=1.0.1", "python-crontab>=3.2.0", - "Brotli>=1.1.0" + "Brotli>=1.1.0", + "anthropic>=0.34.2" ] keywords = [ "sync", diff --git a/requirements.txt b/requirements.txt index 74ac917..23a627a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ pytest-cov>=5.0.0 claudesync>=0.5.4 crontab>=1.0.1 python-crontab>=3.2.0 -Brotli>=1.1.0 \ No newline at end of file +Brotli>=1.1.0 +anthropic>=0.34.2 \ No newline at end of file diff --git a/src/claudesync/syncmanager.py b/src/claudesync/syncmanager.py index d4bf4c5..ebe82ec 100644 --- a/src/claudesync/syncmanager.py +++ b/src/claudesync/syncmanager.py @@ -4,8 +4,8 @@ import logging from datetime import datetime, timezone import io +from anthropic import Anthropic -import click from tqdm import tqdm from claudesync.utils import compute_md5_hash @@ -54,12 +54,16 @@ def __init__(self, provider, config, local_path): self.max_retries = 3 self.retry_delay = 1 self.compression_algorithm = config.get("compression_algorithm", "none") + self.synced_files = {} + self.anthropic_client = Anthropic() def sync(self, local_files, remote_files): + self.synced_files = {} # Reset synced files at the start of sync if self.compression_algorithm == "none": self._sync_without_compression(local_files, remote_files) else: self._sync_with_compression(local_files, remote_files) + self.log_token_count() def _sync_without_compression(self, local_files, remote_files): remote_files_to_delete = set(rf["file_name"] for rf in remote_files) @@ -94,6 +98,10 @@ def _sync_without_compression(self, local_files, remote_files): self.prune_remote_files(remote_files, remote_files_to_delete) + # Count tokens for synced files + for local_file in synced_files: + self.count_tokens_for_file(local_file) + def _sync_with_compression(self, local_files, remote_files): packed_content = self._pack_files(local_files) compressed_content = compress_content( @@ -115,6 +123,10 @@ def _sync_with_compression(self, local_files, remote_files): self._cleanup_old_remote_files(remote_files) + # Count tokens for all local files (since they're all included in the compressed file) + for local_file in local_files: + self.count_tokens_for_file(local_file) + def _pack_files(self, local_files): packed_content = io.StringIO() for file_path, file_hash in local_files.items(): @@ -191,12 +203,12 @@ def _cleanup_old_remote_files(self, remote_files): @retry_on_403() def update_existing_file( - self, - local_file, - local_checksum, - remote_file, - remote_files_to_delete, - synced_files, + self, + local_file, + local_checksum, + remote_file, + remote_files_to_delete, + synced_files, ): remote_content = remote_file["content"] remote_checksum = compute_md5_hash(remote_content) @@ -210,7 +222,7 @@ def update_existing_file( ) pbar.update(1) with open( - os.path.join(self.local_path, local_file), "r", encoding="utf-8" + os.path.join(self.local_path, local_file), "r", encoding="utf-8" ) as file: content = file.read() self.provider.upload_file( @@ -228,7 +240,7 @@ def update_existing_file( def upload_new_file(self, local_file, synced_files): logger.debug(f"Uploading new file {local_file} to remote...") with open( - os.path.join(self.local_path, local_file), "r", encoding="utf-8" + os.path.join(self.local_path, local_file), "r", encoding="utf-8" ) as file: content = file.read() with tqdm(total=1, desc=f"Uploading {local_file}", leave=False) as pbar: @@ -264,7 +276,7 @@ def sync_remote_to_local(self, remote_file, remote_files_to_delete, synced_files ) def update_existing_local_file( - self, local_file_path, remote_file, remote_files_to_delete, synced_files + self, local_file_path, remote_file, remote_files_to_delete, synced_files ): local_mtime = datetime.fromtimestamp( os.path.getmtime(local_file_path), tz=timezone.utc @@ -284,14 +296,14 @@ def update_existing_local_file( remote_files_to_delete.remove(remote_file["file_name"]) def create_new_local_file( - self, local_file_path, remote_file, remote_files_to_delete, synced_files + self, local_file_path, remote_file, remote_files_to_delete, synced_files ): logger.debug( f"Creating new local file {remote_file['file_name']} from remote..." ) content = remote_file["content"] with tqdm( - total=1, desc=f"Creating {remote_file['file_name']}", leave=False + total=1, desc=f"Creating {remote_file['file_name']}", leave=False ) as pbar: with open(local_file_path, "w", encoding="utf-8") as file: file.write(content) @@ -302,7 +314,7 @@ def create_new_local_file( def prune_remote_files(self, remote_files, remote_files_to_delete): if not self.config.get("prune_remote_files"): - click.echo("Remote pruning is not enabled.") + logger.info("Remote pruning is not enabled.") return for file_to_delete in list(remote_files_to_delete): @@ -320,3 +332,21 @@ def delete_remote_files(self, file_to_delete, remote_files): ) pbar.update(1) time.sleep(self.upload_delay) + + def count_tokens_for_file(self, file_path): + full_path = os.path.join(self.local_path, file_path) + with open(full_path, 'r', encoding='utf-8', errors='ignore') as file: + content = file.read() + token_count = self.anthropic_client.count_tokens(content) + self.synced_files[file_path] = token_count + + def get_total_token_count(self): + return sum(self.synced_files.values()) + + def get_synced_file_count(self): + return len(self.synced_files) + + def log_token_count(self): + total_tokens = self.get_total_token_count() + synced_file_count = self.get_synced_file_count() + logger.info(f"Total tokens in synced files: {total_tokens:,}") \ No newline at end of file