From d51034b4e17d27663e7e7a0ea4a2077e596f9140 Mon Sep 17 00:00:00 2001 From: Jahziah Wagner Date: Tue, 6 Aug 2024 17:44:59 +0200 Subject: [PATCH] Support correct destination for two way sync --- .gitignore | 5 +- pyproject.toml | 2 +- src/claudesync/base_syncmanager.py | 240 ++++++++++++++++ src/claudesync/cli/project.py | 49 +++- src/claudesync/cli/sync.py | 9 +- src/claudesync/config_manager.py | 20 ++ src/claudesync/one_way_syncmanager.py | 66 +++++ src/claudesync/providers/base_claude_ai.py | 7 + src/claudesync/syncmanager.py | 320 ++++++++++++++++----- src/claudesync/two_way_syncmanager.py | 283 ++++++++++++++++++ src/claudesync/utils.py | 11 +- 11 files changed, 931 insertions(+), 81 deletions(-) create mode 100644 src/claudesync/base_syncmanager.py create mode 100644 src/claudesync/one_way_syncmanager.py create mode 100644 src/claudesync/two_way_syncmanager.py diff --git a/.gitignore b/.gitignore index 421f4ab..f21d097 100644 --- a/.gitignore +++ b/.gitignore @@ -165,6 +165,8 @@ cython_debug/ **/*.egg-info __pycache__ +*.iml + # claude claude.sync config.json @@ -172,4 +174,5 @@ claudesync.log claude_chats some_value -ROADMAP.md \ No newline at end of file +ROADMAP.md +.claudesync \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index cc3d40f..b05d3bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "claudesync" -version = "0.4.9" +version = "0.5.0" authors = [ {name = "Jahziah Wagner", email = "jahziah.wagner+pypi@gmail.com"}, ] diff --git a/src/claudesync/base_syncmanager.py b/src/claudesync/base_syncmanager.py new file mode 100644 index 0000000..4d2ae8d --- /dev/null +++ b/src/claudesync/base_syncmanager.py @@ -0,0 +1,240 @@ +import os +import json +import logging +import time +from datetime import datetime +from tqdm import tqdm +from claudesync.utils import compute_md5_hash + +logger = logging.getLogger(__name__) + +CLAUDESYNC_PATH_COMMENT = "// CLAUDESYNC_PATH: {}\n" + + +class BaseSyncManager: + """ + Base class for managing synchronization between local files and remote Claude.ai projects. + """ + + def __init__(self, provider, config): + """ + Initialize the BaseSyncManager with the given provider and configuration. + + Args: + provider (Provider): The provider instance to interact with the remote storage. + config (dict): Configuration dictionary containing sync settings. + """ + self.provider = provider + self.config = config + self.active_organization_id = config.get("active_organization_id") + self.active_project_id = config.get("active_project_id") + self.local_path = config.get("local_path") + self.upload_delay = config.get("upload_delay", 0.5) + self.autocrlf = config.get("autocrlf", "true") + self.last_known_times_file = os.path.join( + self.local_path, ".claudesync", "last_known_times.json" + ) + self.prune_remote_files = config.get("prune_remote_files", False) + + def load_last_known_times(self): + """ + Load the last known modification times of files from a JSON file. + + Returns: + dict: A dictionary of file names to their last known modification times. + """ + os.makedirs(os.path.dirname(self.last_known_times_file), exist_ok=True) + if os.path.exists(self.last_known_times_file): + with open(self.last_known_times_file, "r") as f: + return {k: datetime.fromisoformat(v) for k, v in json.load(f).items()} + return {} + + def save_last_known_times(self, last_known_times): + """ + Save the last known modification times of files to a JSON file. + + Args: + last_known_times (dict): A dictionary of file names to their last known modification times. + """ + os.makedirs(os.path.dirname(self.last_known_times_file), exist_ok=True) + with open(self.last_known_times_file, "w") as f: + json.dump({k: v.isoformat() for k, v in last_known_times.items()}, f) + + def normalize_line_endings(self, content, for_local=True): + """ + Normalize line endings based on the autocrlf setting. + + Args: + content (str): The content to normalize. + for_local (bool): True if normalizing for local file, False for remote. + + Returns: + str: The content with normalized line endings. + """ + # First, standardize to LF + content = content.replace("\r\n", "\n").replace("\r", "\n") + + if for_local: + if self.autocrlf == "true" and os.name == "nt": + # Convert to CRLF for Windows when autocrlf is true + content = content.replace("\n", "\r\n") + else: # for remote + if self.autocrlf == "input": + # Keep LF for remote when autocrlf is input + pass + elif self.autocrlf == "true": + # Convert to LF for remote when autocrlf is true + content = content.replace("\r\n", "\n") + + return content + + def _add_path_comment(self, content, file_path): + """ + Add a path comment to the content if it doesn't already exist. + + Args: + content (str): The file content. + file_path (str): The full path of the file. + + Returns: + str: The content with the path comment added. + """ + relative_path = os.path.relpath(file_path, self.local_path) + if not content.startswith("// CLAUDESYNC_PATH:"): + return CLAUDESYNC_PATH_COMMENT.format(relative_path) + content + return content + + def _remove_path_comment(self, content): + """ + Remove the path comment from the content if it exists. + + Args: + content (str): The file content. + + Returns: + str: The content with the path comment removed. + """ + lines = content.split("\n", 1) + if lines and lines[0].startswith("// CLAUDESYNC_PATH:"): + return lines[1] if len(lines) > 1 else "" + return content + + def _extract_path_from_comment(self, content): + """ + Extract the file path from the path comment if it exists. + + Args: + content (str): The file content. + + Returns: + str or None: The extracted file path, or None if no path comment is found. + """ + lines = content.split("\n", 1) + if lines and lines[0].startswith("// CLAUDESYNC_PATH:"): + return lines[0].split(": ", 1)[1].strip() + return None + + def sync(self, local_files, remote_files): + """ + Main synchronization method that orchestrates the sync process. + This method should be implemented by derived classes. + + Args: + local_files (dict): Dictionary of local file names and their corresponding checksums. + remote_files (list): List of dictionaries representing remote files. + """ + raise NotImplementedError("Sync method must be implemented by derived classes.") + + def get_all_local_files(self): + """ + Get a set of all files in the local directory. + + Returns: + set: A set of all file paths relative to the local_path. + """ + all_files = set() + for root, _, files in os.walk(self.local_path): + for file in files: + relative_path = os.path.relpath( + os.path.join(root, file), self.local_path + ) + all_files.add(relative_path) + return all_files + + def update_existing_file( + self, + local_file, + local_checksum, + remote_file, + remote_files_to_delete, + synced_files, + ): + """ + Update an existing file on the remote if it has changed locally or if the path comment needs to be added. + + Args: + local_file (str): Name of the local file. + local_checksum (str): MD5 checksum of the local file content. + remote_file (dict): Dictionary representing the remote file. + remote_files_to_delete (set): Set of remote file names to be considered for deletion. + synced_files (set): Set of file names that have been synchronized. + """ + file_path = os.path.join(self.local_path, local_file) + with open(file_path, "r", encoding="utf-8") as file: + local_content = file.read() + + local_content_with_comment = self._add_path_comment(local_content, file_path) + local_content_normalized = self.normalize_line_endings( + local_content_with_comment, for_local=False + ) + local_checksum_with_comment = compute_md5_hash(local_content_normalized) + + remote_content = remote_file["content"] + remote_checksum = compute_md5_hash(remote_content) + + if local_checksum_with_comment != remote_checksum: + logger.debug(f"Updating {local_file} on remote...") + with tqdm(total=2, desc=f"Updating {local_file}", leave=False) as pbar: + self.provider.delete_file( + self.active_organization_id, + self.active_project_id, + remote_file["uuid"], + ) + pbar.update(1) + self.provider.upload_file( + self.active_organization_id, + self.active_project_id, + local_file, + local_content_normalized, + ) + pbar.update(1) + time.sleep(self.upload_delay) + synced_files.add(local_file) + remote_files_to_delete.remove(local_file) + + def upload_new_file(self, local_file, synced_files): + """ + Upload a new file to the remote project. + + Args: + local_file (str): Name of the local file to be uploaded. + synced_files (set): Set of file names that have been synchronized. + """ + logger.debug(f"Uploading new file {local_file} to remote...") + file_path = os.path.join(self.local_path, local_file) + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + content_with_comment = self._add_path_comment(content, file_path) + normalized_content = self.normalize_line_endings( + content_with_comment, for_local=False + ) + with tqdm(total=1, desc=f"Uploading {local_file}", leave=False) as pbar: + self.provider.upload_file( + self.active_organization_id, + self.active_project_id, + local_file, + normalized_content, + ) + pbar.update(1) + time.sleep(self.upload_delay) + synced_files.add(local_file) diff --git a/src/claudesync/cli/project.py b/src/claudesync/cli/project.py index 98a771a..d560303 100644 --- a/src/claudesync/cli/project.py +++ b/src/claudesync/cli/project.py @@ -1,14 +1,15 @@ import os - import click - from claudesync.exceptions import ProviderError -from ..syncmanager import SyncManager +from ..one_way_syncmanager import OneWaySyncManager +from ..two_way_syncmanager import TwoWaySyncManager +from ..utils import ( + get_local_files, +) from ..utils import ( handle_errors, validate_and_get_provider, validate_and_store_local_path, - get_local_files, ) @@ -46,6 +47,33 @@ def create(config): validate_and_store_local_path(config) + # Reset sync state + sync_state_file = os.path.join(config.get("local_path"), ".claudesync", "sync_state.json") + if os.path.exists(sync_state_file): + os.remove(sync_state_file) + click.echo("Sync state has been reset for the new project.") + + # Prompt for custom instruction / system prompt + if click.confirm( + "Would you like to configure our recommended custom instruction / system prompt?", + default=True, + ): + prompt_template = ( + 'When processing files, prepend "// CLAUDESYNC_PATH: {relative_path}\\n" to the start of each file\'s ' + "content, where {relative_path} is the file's path relative to the project root." + ) + try: + provider.set_project_prompt_template( + active_organization_id, new_project["uuid"], prompt_template + ) + click.echo( + "Custom instruction / system prompt has been set successfully." + ) + except Exception as e: + click.echo( + f"Failed to set custom instruction / system prompt: {str(e)}" + ) + except ProviderError as e: click.echo(f"Failed to create project: {str(e)}") @@ -101,6 +129,12 @@ def select(ctx): ) validate_and_store_local_path(config) + + # Reset sync state + sync_state_file = os.path.join(config.get("local_path"), ".claudesync", "sync_state.json") + if os.path.exists(sync_state_file): + os.remove(sync_state_file) + click.echo("Sync state has been reset for the new project.") else: click.echo("Invalid selection. Please try again.") @@ -136,11 +170,16 @@ def sync(config): """Synchronize only the project files.""" provider = validate_and_get_provider(config, require_project=True) - sync_manager = SyncManager(provider, config) + if config.get("two_way_sync", False): + sync_manager = TwoWaySyncManager(provider, config) + else: + sync_manager = OneWaySyncManager(provider, config) + remote_files = provider.list_files( sync_manager.active_organization_id, sync_manager.active_project_id ) local_files = get_local_files(config.get("local_path")) + sync_manager.sync(local_files, remote_files) click.echo("Project sync completed successfully.") diff --git a/src/claudesync/cli/sync.py b/src/claudesync/cli/sync.py index f6f5c08..f50b656 100644 --- a/src/claudesync/cli/sync.py +++ b/src/claudesync/cli/sync.py @@ -5,8 +5,9 @@ from crontab import CronTab from claudesync.utils import get_local_files +from ..one_way_syncmanager import OneWaySyncManager +from ..two_way_syncmanager import TwoWaySyncManager from ..utils import handle_errors, validate_and_get_provider -from ..syncmanager import SyncManager from ..chat_sync import sync_chats @@ -39,7 +40,11 @@ def sync(config): provider = validate_and_get_provider(config, require_project=True) # Sync projects - sync_manager = SyncManager(provider, config) + if config.get("two_way_sync", False): + sync_manager = TwoWaySyncManager(provider, config) + else: + sync_manager = OneWaySyncManager(provider, config) + remote_files = provider.list_files( sync_manager.active_organization_id, sync_manager.active_project_id ) diff --git a/src/claudesync/config_manager.py b/src/claudesync/config_manager.py index 1be2da5..2c59aac 100644 --- a/src/claudesync/config_manager.py +++ b/src/claudesync/config_manager.py @@ -1,5 +1,7 @@ import datetime import json +import os +import subprocess from pathlib import Path @@ -42,8 +44,26 @@ def _get_default_config(self): "max_file_size": 32 * 1024, # Default 32 KB "two_way_sync": False, # Default to False "curl_use_file_input": False, + "autocrlf": self._get_git_autocrlf(), + "prune_remote_files": False, } + def _get_git_autocrlf(self): + try: + result = subprocess.run( + ["git", "config", "--get", "core.autocrlf"], + capture_output=True, + text=True, + check=True, + ) + value = result.stdout.strip().lower() + if value in ["true", "false", "input"]: + return value + except subprocess.CalledProcessError: + pass + # Default to 'true' on Windows, 'input' on other systems if git config is not available + return "true" if os.name == "nt" else "input" + def _load_config(self): """ Loads the configuration from the JSON file, applying default values for missing keys. diff --git a/src/claudesync/one_way_syncmanager.py b/src/claudesync/one_way_syncmanager.py new file mode 100644 index 0000000..924d6e6 --- /dev/null +++ b/src/claudesync/one_way_syncmanager.py @@ -0,0 +1,66 @@ +import time +import logging +from tqdm import tqdm +from .base_syncmanager import BaseSyncManager + +logger = logging.getLogger(__name__) + + +class OneWaySyncManager(BaseSyncManager): + """ + Manages one-way synchronization from local files to remote Claude.ai projects. + """ + + def sync(self, local_files, remote_files): + """ + Main synchronization method for one-way sync (local to remote). + + Args: + local_files (dict): Dictionary of local file names and their corresponding checksums. + remote_files (list): List of dictionaries representing remote files. + """ + remote_files_to_delete = set(rf["file_name"] for rf in remote_files) + synced_files = set() + + # Sync local files to remote + with tqdm(total=len(local_files), desc="Local → Remote") as pbar: + for local_file, local_checksum in local_files.items(): + remote_file = next( + (rf for rf in remote_files if rf["file_name"] == local_file), None + ) + if remote_file: + self.update_existing_file( + local_file, + local_checksum, + remote_file, + remote_files_to_delete, + synced_files, + ) + else: + self.upload_new_file(local_file, synced_files) + pbar.update(1) + + # Delete remaining files that weren't synced + if self.prune_remote_files: + for file_to_delete in list(remote_files_to_delete): + remote_file = next( + rf for rf in remote_files if rf["file_name"] == file_to_delete + ) + self.delete_remote_file(remote_file) + + def delete_remote_file(self, remote_file): + """ + Delete a file from the remote project. + + Args: + remote_file (dict): Dictionary representing the remote file to be deleted. + """ + logger.debug(f"Deleting {remote_file['file_name']} from remote...") + with tqdm( + total=1, desc=f"Deleting {remote_file['file_name']}", leave=False + ) as pbar: + self.provider.delete_file( + self.active_organization_id, self.active_project_id, remote_file["uuid"] + ) + pbar.update(1) + time.sleep(self.upload_delay) diff --git a/src/claudesync/providers/base_claude_ai.py b/src/claudesync/providers/base_claude_ai.py index b1b1c89..0f81c83 100644 --- a/src/claudesync/providers/base_claude_ai.py +++ b/src/claudesync/providers/base_claude_ai.py @@ -193,3 +193,10 @@ def delete_chat(self, organization_id, conversation_uuids): def _make_request(self, method, endpoint, data=None): raise NotImplementedError("This method should be implemented by subclasses") + + def set_project_prompt_template(self, organization_id, project_id, prompt_template): + """Set the prompt template for a project.""" + data = {"prompt_template": prompt_template} + return self._make_request( + "PUT", f"/organizations/{organization_id}/projects/{project_id}", data + ) diff --git a/src/claudesync/syncmanager.py b/src/claudesync/syncmanager.py index 8c2e262..30de71b 100644 --- a/src/claudesync/syncmanager.py +++ b/src/claudesync/syncmanager.py @@ -1,28 +1,29 @@ import os import time +import json import logging from datetime import datetime, timezone - from tqdm import tqdm - from claudesync.utils import compute_md5_hash logger = logging.getLogger(__name__) +CLAUDESYNC_PATH_COMMENT = "// CLAUDESYNC_PATH: {}\n" + class SyncManager: + """ + Manages the synchronization process between local files and remote Claude.ai projects. + Implements timestamp-based deletion for two-way sync. + """ + def __init__(self, provider, config): """ Initialize the SyncManager with the given provider and configuration. Args: provider (Provider): The provider instance to interact with the remote storage. - config (dict): Configuration dictionary containing sync settings such as: - - active_organization_id (str): ID of the active organization. - - active_project_id (str): ID of the active project. - - local_path (str): Path to the local directory to be synchronized. - - upload_delay (float, optional): Delay between upload operations in seconds. Defaults to 0.5. - - two_way_sync (bool, optional): Flag to enable two-way synchronization. Defaults to False. + config (dict): Configuration dictionary containing sync settings. """ self.provider = provider self.config = config @@ -31,26 +32,56 @@ def __init__(self, provider, config): self.local_path = config.get("local_path") self.upload_delay = config.get("upload_delay", 0.5) self.two_way_sync = config.get("two_way_sync", False) + self.autocrlf = config.get("autocrlf", "true") + self.last_known_times_file = os.path.join( + self.local_path, ".claudesync", "last_known_times.json" + ) + self.prune_remote_files = config.get("prune_remote_files", False) + + def load_last_known_times(self): + """ + Load the last known modification times of files from a JSON file. + + Returns: + dict: A dictionary of file names to their last known modification times. + """ + os.makedirs(os.path.dirname(self.last_known_times_file), exist_ok=True) + if os.path.exists(self.last_known_times_file): + with open(self.last_known_times_file, "r") as f: + return {k: datetime.fromisoformat(v) for k, v in json.load(f).items()} + return {} + + def save_last_known_times(self, last_known_times): + """ + Save the last known modification times of files to a JSON file. + + Args: + last_known_times (dict): A dictionary of file names to their last known modification times. + """ + os.makedirs(os.path.dirname(self.last_known_times_file), exist_ok=True) + with open(self.last_known_times_file, "w") as f: + json.dump({k: v.isoformat() for k, v in last_known_times.items()}, f) def sync(self, local_files, remote_files): """ Main synchronization method that orchestrates the sync process. - This method manages the synchronization between local and remote files. It handles the - synchronization from local to remote, updates local timestamps, performs two-way sync if enabled, - and deletes remote files that are no longer present locally. - Args: local_files (dict): Dictionary of local file names and their corresponding checksums. - remote_files (list): List of dictionaries representing remote files, each containing: - - "file_name" (str): Name of the file. - - "content" (str): Content of the file. - - "created_at" (str): Timestamp when the file was created in ISO format. - - "uuid" (str): Unique identifier of the remote file. + remote_files (list): List of dictionaries representing remote files. """ + last_known_times = self.load_last_known_times() remote_files_to_delete = set(rf["file_name"] for rf in remote_files) synced_files = set() + # Update last known times for existing local files + for local_file in local_files: + file_path = os.path.join(self.local_path, local_file) + last_known_times[local_file] = datetime.fromtimestamp( + os.path.getmtime(file_path), tz=timezone.utc + ) + + # Sync local files to remote with tqdm(total=len(local_files), desc="Local → Remote") as pbar: for local_file, local_checksum in local_files.items(): remote_file = next( @@ -70,16 +101,114 @@ def sync(self, local_files, remote_files): self.update_local_timestamps(remote_files, synced_files) + # Two-way sync: handle remote files if self.two_way_sync: with tqdm(total=len(remote_files), desc="Local ← Remote") as pbar: for remote_file in remote_files: - self.sync_remote_to_local( - remote_file, remote_files_to_delete, synced_files - ) + if remote_file["file_name"] not in local_files: + remote_time = datetime.fromisoformat( + remote_file["created_at"].replace("Z", "+00:00") + ) + last_known_time = last_known_times.get(remote_file["file_name"]) + if last_known_time and remote_time <= last_known_time: + # File was deleted locally, delete from remote + self.delete_remote_file(remote_file) + remote_files_to_delete.remove(remote_file["file_name"]) + else: + # New remote file, sync to local + self.sync_remote_to_local( + remote_file, remote_files_to_delete, synced_files + ) + else: + self.sync_remote_to_local( + remote_file, remote_files_to_delete, synced_files + ) pbar.update(1) - for file_to_delete in list(remote_files_to_delete): - self.delete_remote_files(file_to_delete, remote_files) - pbar.update(1) + + # Delete remaining files that weren't synced or already deleted + if self.prune_remote_files: + for file_to_delete in list(remote_files_to_delete): + remote_file = next( + rf for rf in remote_files if rf["file_name"] == file_to_delete + ) + self.delete_remote_file(remote_file) + + # Save the updated last known times + self.save_last_known_times(last_known_times) + + def normalize_line_endings(self, content, for_local=True): + """ + Normalize line endings based on the autocrlf setting. + + Args: + content (str): The content to normalize. + for_local (bool): True if normalizing for local file, False for remote. + + Returns: + str: The content with normalized line endings. + """ + # First, standardize to LF + content = content.replace("\r\n", "\n").replace("\r", "\n") + + if for_local: + if self.autocrlf == "true" and os.name == "nt": + # Convert to CRLF for Windows when autocrlf is true + content = content.replace("\n", "\r\n") + else: # for remote + if self.autocrlf == "input": + # Keep LF for remote when autocrlf is input + pass + elif self.autocrlf == "true": + # Convert to LF for remote when autocrlf is true + content = content.replace("\r\n", "\n") + + return content + + def _add_path_comment(self, content, file_path): + """ + Add a path comment to the content if it doesn't already exist. + + Args: + content (str): The file content. + file_path (str): The full path of the file. + + Returns: + str: The content with the path comment added. + """ + relative_path = os.path.relpath(file_path, self.local_path) + if not content.startswith("// CLAUDESYNC_PATH:"): + return CLAUDESYNC_PATH_COMMENT.format(relative_path) + content + return content + + def _remove_path_comment(self, content): + """ + Remove the path comment from the content if it exists. + + Args: + content (str): The file content. + + Returns: + str: The content with the path comment removed. + """ + lines = content.split("\n", 1) + if lines and lines[0].startswith("// CLAUDESYNC_PATH:"): + return lines[1] if len(lines) > 1 else "" + return content + + def _extract_path_from_comment(self, content): + """ + Extract the file path from the path comment if it exists. + + Args: + content (str): The file content. + + Returns: + str or None: The extracted file path, or None if no path comment is found. + """ + lines = content.split("\n", 1) + if lines and lines[0].startswith("// CLAUDESYNC_PATH:"): + return lines[0].split(": ", 1)[1].strip() + return None def update_existing_file( self, @@ -90,10 +219,7 @@ def update_existing_file( synced_files, ): """ - Update an existing file on the remote if it has changed locally. - - This method compares the local and remote file checksums. If they differ, it deletes the old remote file - and uploads the new version from the local file. + Update an existing file on the remote if it has changed locally or if the path comment needs to be added. Args: local_file (str): Name of the local file. @@ -102,8 +228,20 @@ def update_existing_file( remote_files_to_delete (set): Set of remote file names to be considered for deletion. synced_files (set): Set of file names that have been synchronized. """ - remote_checksum = compute_md5_hash(remote_file["content"]) - if local_checksum != remote_checksum: + file_path = os.path.join(self.local_path, local_file) + with open(file_path, "r", encoding="utf-8") as file: + local_content = file.read() + + local_content_with_comment = self._add_path_comment(local_content, file_path) + local_content_normalized = self.normalize_line_endings( + local_content_with_comment, for_local=False + ) + local_checksum_with_comment = compute_md5_hash(local_content_normalized) + + remote_content = remote_file["content"] + remote_checksum = compute_md5_hash(remote_content) + + if local_checksum_with_comment != remote_checksum: logger.debug(f"Updating {local_file} on remote...") with tqdm(total=2, desc=f"Updating {local_file}", leave=False) as pbar: self.provider.delete_file( @@ -112,15 +250,11 @@ def update_existing_file( remote_file["uuid"], ) pbar.update(1) - with open( - os.path.join(self.local_path, local_file), "r", encoding="utf-8" - ) as file: - content = file.read() self.provider.upload_file( self.active_organization_id, self.active_project_id, local_file, - content, + local_content_normalized, ) pbar.update(1) time.sleep(self.upload_delay) @@ -131,20 +265,24 @@ def upload_new_file(self, local_file, synced_files): """ Upload a new file to the remote project. - This method reads the content of the local file and uploads it to the remote project. - Args: local_file (str): Name of the local file to be uploaded. synced_files (set): Set of file names that have been synchronized. """ logger.debug(f"Uploading new file {local_file} to remote...") - with open( - os.path.join(self.local_path, local_file), "r", encoding="utf-8" - ) as file: + file_path = os.path.join(self.local_path, local_file) + with open(file_path, "r", encoding="utf-8") as file: content = file.read() + content_with_comment = self._add_path_comment(content, file_path) + normalized_content = self.normalize_line_endings( + content_with_comment, for_local=False + ) with tqdm(total=1, desc=f"Uploading {local_file}", leave=False) as pbar: self.provider.upload_file( - self.active_organization_id, self.active_project_id, local_file, content + self.active_organization_id, + self.active_project_id, + local_file, + normalized_content, ) pbar.update(1) time.sleep(self.upload_delay) @@ -154,9 +292,6 @@ def update_local_timestamps(self, remote_files, synced_files): """ Update local file timestamps to match the remote timestamps. - This method updates the modification timestamps of local files to match their corresponding - remote file timestamps if they have been synchronized. - Args: remote_files (list): List of dictionaries representing remote files. synced_files (set): Set of file names that have been synchronized. @@ -177,38 +312,60 @@ def sync_remote_to_local(self, remote_file, remote_files_to_delete, synced_files """ Synchronize a remote file to the local project (two-way sync). - This method checks if the remote file exists locally. If it does, it updates the file - if the remote version is newer. If it doesn't exist locally, it creates a new local file. - Args: remote_file (dict): Dictionary representing the remote file. remote_files_to_delete (set): Set of remote file names to be considered for deletion. synced_files (set): Set of file names that have been synchronized. """ - local_file_path = os.path.join(self.local_path, remote_file["file_name"]) + content = remote_file["content"] + file_path = self._extract_path_from_comment(content) + if file_path: + local_file_path = os.path.join(self.local_path, file_path) + else: + # If no path comment, use the remote file name + local_file_path = os.path.join(self.local_path, remote_file["file_name"]) + + content_without_comment = self._remove_path_comment(content) + normalized_content = self.normalize_line_endings( + content_without_comment, for_local=True + ) + + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + if os.path.exists(local_file_path): self.update_existing_local_file( - local_file_path, remote_file, remote_files_to_delete, synced_files + local_file_path, + remote_file, + remote_files_to_delete, + synced_files, + normalized_content, ) else: self.create_new_local_file( - local_file_path, remote_file, remote_files_to_delete, synced_files + local_file_path, + remote_file, + remote_files_to_delete, + synced_files, + normalized_content, ) def update_existing_local_file( - self, local_file_path, remote_file, remote_files_to_delete, synced_files + self, + local_file_path, + remote_file, + remote_files_to_delete, + synced_files, + content, ): """ Update an existing local file if the remote version is newer. - This method compares the local file's modification time with the remote file's creation time. - If the remote file is newer, it updates the local file with the remote content. - Args: local_file_path (str): Path to the local file. remote_file (dict): Dictionary representing the remote file. remote_files_to_delete (set): Set of remote file names to be considered for deletion. synced_files (set): Set of file names that have been synchronized. + content (str): Content of the remote file without the path comment. """ local_mtime = datetime.fromtimestamp( os.path.getmtime(local_file_path), tz=timezone.utc @@ -220,25 +377,29 @@ def update_existing_local_file( logger.debug( f"Updating local file {remote_file['file_name']} from remote..." ) - with open(local_file_path, "w", encoding="utf-8") as file: - file.write(remote_file["content"]) + with open(local_file_path, "w", newline="", encoding="utf-8") as file: + file.write(content) synced_files.add(remote_file["file_name"]) if remote_file["file_name"] in remote_files_to_delete: remote_files_to_delete.remove(remote_file["file_name"]) def create_new_local_file( - self, local_file_path, remote_file, remote_files_to_delete, synced_files + self, + local_file_path, + remote_file, + remote_files_to_delete, + synced_files, + content, ): """ Create a new local file from a remote file. - This method creates a new local file with the content from the remote file. - Args: local_file_path (str): Path to the new local file. remote_file (dict): Dictionary representing the remote file. remote_files_to_delete (set): Set of remote file names to be considered for deletion. synced_files (set): Set of file names that have been synchronized. + content (str): Content of the remote file without the path comment. """ logger.debug( f"Creating new local file {remote_file['file_name']} from remote..." @@ -246,30 +407,47 @@ def create_new_local_file( with tqdm( total=1, desc=f"Creating {remote_file['file_name']}", leave=False ) as pbar: - with open(local_file_path, "w", encoding="utf-8") as file: - file.write(remote_file["content"]) + with open(local_file_path, "w", newline="", encoding="utf-8") as file: + file.write(content) pbar.update(1) synced_files.add(remote_file["file_name"]) if remote_file["file_name"] in remote_files_to_delete: remote_files_to_delete.remove(remote_file["file_name"]) - def delete_remote_files(self, file_to_delete, remote_files): + def delete_remote_file(self, remote_file): """ - Delete a file from the remote project that no longer exists locally. - - This method deletes a remote file that is not present in the local directory. + Delete a file from the remote project. Args: - file_to_delete (str): Name of the remote file to be deleted. - remote_files (list): List of dictionaries representing remote files. + remote_file (dict): Dictionary representing the remote file to be deleted. """ - logger.debug(f"Deleting {file_to_delete} from remote...") - remote_file = next( - rf for rf in remote_files if rf["file_name"] == file_to_delete - ) - with tqdm(total=1, desc=f"Deleting {file_to_delete}", leave=False) as pbar: + logger.debug(f"Deleting {remote_file['file_name']} from remote...") + with tqdm( + total=1, desc=f"Deleting {remote_file['file_name']}", leave=False + ) as pbar: self.provider.delete_file( self.active_organization_id, self.active_project_id, remote_file["uuid"] ) pbar.update(1) time.sleep(self.upload_delay) + + # Remove the file from last_known_times + last_known_times = self.load_last_known_times() + last_known_times.pop(remote_file["file_name"], None) + self.save_last_known_times(last_known_times) + + def get_all_local_files(self): + """ + Get a set of all files in the local directory. + + Returns: + set: A set of all file paths relative to the local_path. + """ + all_files = set() + for root, _, files in os.walk(self.local_path): + for file in files: + relative_path = os.path.relpath( + os.path.join(root, file), self.local_path + ) + all_files.add(relative_path) + return all_files diff --git a/src/claudesync/two_way_syncmanager.py b/src/claudesync/two_way_syncmanager.py new file mode 100644 index 0000000..7a6199f --- /dev/null +++ b/src/claudesync/two_way_syncmanager.py @@ -0,0 +1,283 @@ +import json +import os +import time +import logging +from datetime import datetime, timezone +from tqdm import tqdm +from claudesync.utils import compute_md5_hash +from .base_syncmanager import BaseSyncManager + + +class TwoWaySyncManager(BaseSyncManager): + """ + Manages two-way synchronization between local files and remote Claude.ai projects. + """ + + def __init__(self, provider, config): + super().__init__(provider, config) + self.logger = logging.getLogger(__name__) + self._configure_logging() + + def _configure_logging(self): + log_level = self.config.get("log_level", "INFO") + logging.basicConfig(level=getattr(logging, log_level)) + self.logger.setLevel(getattr(logging, log_level)) + + def sync(self, local_files, remote_files): + """ + Main synchronization method for two-way sync. + + Args: + local_files (dict): Dictionary of local file names and their corresponding checksums. + remote_files (list): List of dictionaries representing remote files. + """ + sync_state = self.load_sync_state() + local_changes = self.detect_local_changes(local_files, sync_state) + remote_changes = self.detect_remote_changes(remote_files, sync_state) + + self.apply_changes( + local_changes, remote_changes, local_files, remote_files, sync_state + ) + + if self.prune_remote_files: + remote_file_names = set(rf['file_name'] for rf in remote_files) + files_to_delete = remote_file_names - set(local_files.keys()) + for file_to_delete in files_to_delete: + remote_file = next(rf for rf in remote_files if rf['file_name'] == file_to_delete) + self.delete_remote_file(remote_file['uuid']) + self.logger.debug(f"Deleted remote file: {file_to_delete}") + + self.save_sync_state(sync_state) + + def load_sync_state(self): + """Load the previous sync state from a file.""" + sync_state_file = os.path.join( + self.local_path, ".claudesync", "sync_state.json" + ) + if os.path.exists(sync_state_file): + with open(sync_state_file, "r") as f: + return json.load(f) + return {} + + def save_sync_state(self, sync_state): + """Save the current sync state to a file.""" + sync_state_file = os.path.join( + self.local_path, ".claudesync", "sync_state.json" + ) + os.makedirs(os.path.dirname(sync_state_file), exist_ok=True) + with open(sync_state_file, "w") as f: + json.dump(sync_state, f) + + def detect_local_changes(self, local_files, sync_state): + """Detect changes in local files since the last sync.""" + changes = {} + for file_name, checksum in local_files.items(): + if file_name not in sync_state: + changes[file_name] = "new" + elif sync_state[file_name]["local_checksum"] != checksum: + changes[file_name] = "modified" + + for file_name in sync_state: + if file_name not in local_files: + changes[file_name] = "deleted" + + return changes + + def detect_remote_changes(self, remote_files, sync_state): + """Detect changes in remote files since the last sync.""" + changes = {} + remote_dict = {rf["file_name"]: rf for rf in remote_files} + + for file_name, remote_file in remote_dict.items(): + if file_name not in sync_state: + changes[file_name] = "new" + elif sync_state[file_name]["remote_checksum"] != compute_md5_hash( + remote_file["content"] + ): + changes[file_name] = "modified" + + for file_name in sync_state: + if file_name not in remote_dict: + changes[file_name] = "deleted" + + return changes + + def apply_changes( + self, local_changes, remote_changes, local_files, remote_files, sync_state + ): + """Apply the detected changes to both local and remote systems.""" + remote_dict = {rf["file_name"]: rf for rf in remote_files} + + with tqdm( + total=len(set(local_changes.keys()) | set(remote_changes.keys())), + desc="Syncing files", + ) as pbar: + for file_name in set(local_changes.keys()) | set(remote_changes.keys()): + local_change = local_changes.get(file_name) + remote_change = remote_changes.get(file_name) + + self._handle_file_changes( + file_name, + local_change, + remote_change, + local_files, + remote_dict, + sync_state, + ) + + self._update_sync_state( + file_name, + local_change, + remote_change, + local_files, + remote_dict, + sync_state, + ) + + pbar.update(1) + + def _handle_file_changes( + self, + file_name, + local_change, + remote_change, + local_files, + remote_dict, + sync_state, + ): + if local_change == "deleted" and remote_change == "deleted": + self._handle_both_deleted(file_name, sync_state) + elif local_change == "new" and remote_change is None: + self.upload_file(file_name, local_files[file_name]) + elif remote_change == "new" and local_change is None: + self.download_file(remote_dict[file_name]) + elif local_change == "modified" and remote_change is None: + self.upload_file(file_name, local_files[file_name]) + elif remote_change == "modified" and local_change is None: + self._handle_remote_modified(file_name, remote_dict) + elif local_change == "deleted" and remote_change is None: + self.delete_remote_file(sync_state[file_name]["remote_uuid"]) + elif remote_change == "deleted" and local_change is None: + self.delete_local_file(file_name) + elif local_change and remote_change: + self.resolve_conflict( + file_name, + local_files.get(file_name), + remote_dict.get(file_name), + ) + + def _handle_both_deleted(self, file_name, sync_state): + self.logger.debug( + f"File {file_name} has been deleted from both local and remote. Removing from sync state." + ) + sync_state.pop(file_name, None) + + def _handle_remote_modified(self, file_name, remote_dict): + remote_file = remote_dict.get(file_name) + if remote_file: + self.download_file(remote_file) + else: + self.logger.warning( + f"Remote file {file_name} not found, but marked as modified. Skipping download." + ) + + def _update_sync_state( + self, + file_name, + local_change, + remote_change, + local_files, + remote_dict, + sync_state, + ): + if file_name in sync_state or ( + local_change != "deleted" or remote_change != "deleted" + ): + sync_state[file_name] = { + "local_checksum": local_files.get(file_name, None), + "remote_checksum": ( + compute_md5_hash(remote_dict[file_name]["content"]) + if file_name in remote_dict + else None + ), + "remote_uuid": ( + remote_dict[file_name]["uuid"] if file_name in remote_dict else None + ), + "last_sync": datetime.now(timezone.utc).isoformat(), + } + + def upload_file(self, file_name, checksum): + """Upload a file to the remote project.""" + file_path = os.path.join(self.local_path, file_name) + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + content_with_comment = self._add_path_comment(content, file_path) + normalized_content = self.normalize_line_endings( + content_with_comment, for_local=False + ) + + self.provider.upload_file( + self.active_organization_id, + self.active_project_id, + file_name, + normalized_content, + ) + self.logger.debug(f"Uploaded file: {file_name}") + time.sleep(self.upload_delay) + + def download_file(self, remote_file): + """Download a file from the remote project.""" + content = remote_file["content"] + file_path = self._extract_path_from_comment(content) or remote_file["file_name"] + local_file_path = os.path.join(self.local_path, file_path) + + content_without_comment = self._remove_path_comment(content) + normalized_content = self.normalize_line_endings( + content_without_comment, for_local=True + ) + + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + with open(local_file_path, "w", newline="", encoding="utf-8") as f: + f.write(normalized_content) + + self.logger.debug(f"Downloaded file: {file_path}") + + def delete_remote_file(self, file_uuid): + """Delete a file from the remote project.""" + self.provider.delete_file( + self.active_organization_id, self.active_project_id, file_uuid + ) + self.logger.debug(f"Deleted remote file: {file_uuid}") + time.sleep(self.upload_delay) + + def delete_local_file(self, file_name): + """Delete a file from the local project.""" + file_path = os.path.join(self.local_path, file_name) + if os.path.exists(file_path): + os.remove(file_path) + self.logger.debug(f"Deleted local file: {file_name}") + + def resolve_conflict(self, file_name, local_checksum, remote_file): + """Resolve conflicts when both local and remote files have changed.""" + if not local_checksum: + if remote_file: + self.download_file(remote_file) + else: + self.logger.debug( + f"Remote file {file_name} not found, skipping download" + ) + elif not remote_file: + self.upload_file(file_name, local_checksum) + else: + local_mtime = os.path.getmtime(os.path.join(self.local_path, file_name)) + remote_mtime = datetime.fromisoformat( + remote_file["created_at"].replace("Z", "+00:00") + ).timestamp() + + if local_mtime > remote_mtime: + self.upload_file(file_name, local_checksum) + else: + self.download_file(remote_file) + + self.logger.debug(f"Resolved conflict for file: {file_name}") diff --git a/src/claudesync/utils.py b/src/claudesync/utils.py index 512796d..eced9ca 100644 --- a/src/claudesync/utils.py +++ b/src/claudesync/utils.py @@ -186,7 +186,16 @@ def get_local_files(local_path): gitignore = load_gitignore(local_path) claudeignore = load_claudeignore(local_path) files = {} - exclude_dirs = {".git", ".svn", ".hg", ".bzr", "_darcs", "CVS", "claude_chats"} + exclude_dirs = { + ".git", + ".svn", + ".hg", + ".bzr", + "_darcs", + "CVS", + "claude_chats", + ".claudesync", + } for root, dirs, filenames in os.walk(local_path): dirs[:] = [d for d in dirs if d not in exclude_dirs]