diff --git a/pyproject.toml b/pyproject.toml index 733216e..07e89f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "claudesync" -version = "0.5.4" +version = "0.5.5" authors = [ {name = "Jahziah Wagner", email = "jahziah.wagner+pypi@gmail.com"}, ] @@ -25,7 +25,8 @@ dependencies = [ "pytest-cov>=5.0.0", "claudesync>=0.5.4", "crontab>=1.0.1", - "python-crontab>=3.2.0" + "python-crontab>=3.2.0", + "Brotli>=1.1.0" ] keywords = [ "sync", diff --git a/requirements.txt b/requirements.txt index a544172..74ac917 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ tqdm>=4.66.5 pytest-cov>=5.0.0 claudesync>=0.5.4 crontab>=1.0.1 -python-crontab>=3.2.0 \ No newline at end of file +python-crontab>=3.2.0 +Brotli>=1.1.0 \ No newline at end of file diff --git a/src/claudesync/compression.py b/src/claudesync/compression.py new file mode 100644 index 0000000..3cfd068 --- /dev/null +++ b/src/claudesync/compression.py @@ -0,0 +1,302 @@ +import json +import zlib +import bz2 +import lzma +import base64 +import brotli +from collections import Counter +import os +import io +import heapq + + +def compress_files(local_path, local_files, algorithm): + packed_content = _pack_files(local_path, local_files) + return compress_content(packed_content, algorithm) + + +def decompress_files(local_path, compressed_content, algorithm): + decompressed_content = decompress_content(compressed_content, algorithm) + _unpack_files(local_path, decompressed_content) + + +def _pack_files(local_path, local_files): + packed_content = io.StringIO() + for file_path, file_hash in local_files.items(): + full_path = os.path.join(local_path, file_path) + with open(full_path, "r", encoding="utf-8") as f: + content = f.read() + packed_content.write(f"--- BEGIN FILE: {file_path} ---\n") + packed_content.write(content) + packed_content.write(f"\n--- END FILE: {file_path} ---\n") + return packed_content.getvalue() + + +def _unpack_files(local_path, decompressed_content): + current_file = None + current_content = io.StringIO() + + for line in decompressed_content.splitlines(): + if line.startswith("--- BEGIN FILE:"): + if current_file: + _write_file(local_path, current_file, current_content.getvalue()) + current_content = io.StringIO() + current_file = line.split("--- BEGIN FILE:")[1].strip() + elif line.startswith("--- END FILE:"): + if current_file: + _write_file(local_path, current_file, current_content.getvalue()) + current_file = None + current_content = io.StringIO() + else: + current_content.write(line + "\n") + + if current_file: + _write_file(local_path, current_file, current_content.getvalue()) + + +def _write_file(local_path, file_path, content): + full_path = os.path.join(local_path, file_path) + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, "w", encoding="utf-8") as f: + f.write(content) + + +def compress_content(content, algorithm): + compressors = { + "zlib": zlib_compress, + "bz2": bz2_compress, + "lzma": lzma_compress, + "brotli": brotli_compress, # Add Brotli to compressors + "dictionary": dictionary_compress, + "rle": rle_compress, + "huffman": huffman_compress, + "lzw": lzw_compress, + "pack": no_compress, + } + if algorithm in compressors: + return compressors[algorithm](content) + else: + return content # No compression + + +def decompress_content(compressed_content, algorithm): + decompressors = { + "zlib": zlib_decompress, + "bz2": bz2_decompress, + "lzma": lzma_decompress, + "brotli": brotli_decompress, # Add Brotli to decompressors + "dictionary": dictionary_decompress, + "rle": rle_decompress, + "huffman": huffman_decompress, + "lzw": lzw_decompress, + "pack": no_decompress, + } + if algorithm in decompressors: + return decompressors[algorithm](compressed_content) + else: + return compressed_content # No decompression + + +# Pack compression +def no_compress(text): + return text + + +def no_decompress(compressed_text): + return compressed_text + + +# Brotli compression +def brotli_compress(text): + compressed = brotli.compress(text.encode("utf-8")) + return base64.b64encode(compressed).decode("ascii") + + +def brotli_decompress(compressed_text): + decoded = base64.b64decode(compressed_text.encode("ascii")) + return brotli.decompress(decoded).decode("utf-8") + + +# Zlib compression +def zlib_compress(text): + compressed = zlib.compress(text.encode("utf-8")) + return base64.b64encode(compressed).decode("ascii") + + +def zlib_decompress(compressed_text): + decoded = base64.b64decode(compressed_text.encode("ascii")) + return zlib.decompress(decoded).decode("utf-8") + + +# BZ2 compression +def bz2_compress(text): + compressed = bz2.compress(text.encode("utf-8")) + return base64.b64encode(compressed).decode("ascii") + + +def bz2_decompress(compressed_text): + decoded = base64.b64decode(compressed_text.encode("ascii")) + return bz2.decompress(decoded).decode("utf-8") + + +# LZMA compression +def lzma_compress(text): + compressed = lzma.compress(text.encode("utf-8")) + return base64.b64encode(compressed).decode("ascii") + + +def lzma_decompress(compressed_text): + decoded = base64.b64decode(compressed_text.encode("ascii")) + return lzma.decompress(decoded).decode("utf-8") + + +# Dictionary-based compression +def dictionary_compress(text): + words = text.split() + dictionary = {} + compressed = [] + + for word in words: + if word not in dictionary: + dictionary[word] = str(len(dictionary)) + compressed.append(dictionary[word]) + + return json.dumps({"dict": dictionary, "compressed": " ".join(compressed)}) + + +def dictionary_decompress(compressed_text): + data = json.loads(compressed_text) + dictionary = {v: k for k, v in data["dict"].items()} + return " ".join(dictionary[token] for token in data["compressed"].split()) + + +# Run-length encoding (RLE) +def rle_compress(text): + compressed = [] + count = 1 + for i in range(1, len(text)): + if text[i] == text[i - 1]: + count += 1 + else: + compressed.append((text[i - 1], count)) + count = 1 + compressed.append((text[-1], count)) + return json.dumps(compressed) + + +def rle_decompress(compressed_text): + compressed = json.loads(compressed_text) + return "".join(char * count for char, count in compressed) + + +# Huffman coding +class HuffmanNode: + def __init__(self, char, freq): + self.char = char + self.freq = freq + self.left = None + self.right = None + + def __lt__(self, other): + return self.freq < other.freq + + +def huffman_compress(text): + freq = Counter(text) + heap = [HuffmanNode(char, freq) for char, freq in freq.items()] + heapq.heapify(heap) + + while len(heap) > 1: + left = heapq.heappop(heap) + right = heapq.heappop(heap) + merged = HuffmanNode(None, left.freq + right.freq) + merged.left = left + merged.right = right + heapq.heappush(heap, merged) + + root = heap[0] + codes = {} + + def generate_codes(node, code): + if node.char: + codes[node.char] = code + return + generate_codes(node.left, code + "0") + generate_codes(node.right, code + "1") + + generate_codes(root, "") + + encoded = "".join(codes[char] for char in text) + padding = 8 - len(encoded) % 8 + encoded += "0" * padding + + compressed = bytearray() + for i in range(0, len(encoded), 8): + byte = encoded[i: i + 8] + compressed.append(int(byte, 2)) + + return json.dumps( + { + "tree": {char: code for char, code in codes.items()}, + "padding": padding, + "data": base64.b64encode(compressed).decode("ascii"), + } + ) + + +def huffman_decompress(compressed_text): + data = json.loads(compressed_text) + tree = {code: char for char, code in data["tree"].items()} + padding = data["padding"] + compressed = base64.b64decode(data["data"].encode("ascii")) + + binary = "".join(f"{byte:08b}" for byte in compressed) + binary = binary[:-padding] if padding else binary + + decoded = "" + code = "" + for bit in binary: + code += bit + if code in tree: + decoded += tree[code] + code = "" + + return decoded + + +# LZW compression +def lzw_compress(text): + dictionary = {chr(i): i for i in range(256)} + result = [] + w = "" + for c in text: + wc = w + c + if wc in dictionary: + w = wc + else: + result.append(dictionary[w]) + dictionary[wc] = len(dictionary) + w = c + if w: + result.append(dictionary[w]) + return base64.b64encode(bytes(result)).decode("ascii") + + +def lzw_decompress(compressed_text): + compressed = base64.b64decode(compressed_text.encode("ascii")) + dictionary = {i: chr(i) for i in range(256)} + result = [] + w = chr(compressed[0]) + result.append(w) + for i in range(1, len(compressed)): + k = compressed[i] + if k in dictionary: + entry = dictionary[k] + elif k == len(dictionary): + entry = w + w[0] + else: + raise ValueError("Bad compressed k: %s" % k) + result.append(entry) + dictionary[len(dictionary)] = w + entry[0] + w = entry + return "".join(result) diff --git a/src/claudesync/configmanager/base_config_manager.py b/src/claudesync/configmanager/base_config_manager.py index 0b588a3..bdb6530 100644 --- a/src/claudesync/configmanager/base_config_manager.py +++ b/src/claudesync/configmanager/base_config_manager.py @@ -37,9 +37,9 @@ def _get_default_config(self): "upload_delay": 0.5, "max_file_size": 32 * 1024, "two_way_sync": False, - "curl_use_file_input": False, "prune_remote_files": True, "claude_api_url": "https://api.claude.ai/api", + "compression_algorithm": "none", "submodule_detect_filenames": [ "pom.xml", "build.gradle", @@ -71,17 +71,18 @@ def _get_default_config(self): "production_code": { "description": "Production source code", "patterns": [ - "src/**/*.java", - "src/**/*.py", - "src/**/*.js", - "src/**/*.ts", + "**/src/**/*.java", + "**/*.py", + "**/*.js", + "**/*.ts", + "**/*.vue", ], }, "test_code": { "description": "Test source code", "patterns": [ - "test/**/*.java", - "tests/**/*.py", + "**/test/**/*.java", + "**/tests/**/*.py", "**/test_*.py", "**/*Test.java", ], @@ -89,14 +90,40 @@ def _get_default_config(self): "build_config": { "description": "Build configuration files", "patterns": [ - "pom.xml", - "build.gradle", - "package.json", - "setup.py", - "Cargo.toml", - "go.mod", - "pyproject.toml", - "requirements.txt", + "**/pom.xml", + "**/build.gradle", + "**/package.json", + "**/setup.py", + "**/Cargo.toml", + "**/go.mod", + "**/pyproject.toml", + "**/requirements.txt", + "**/*.tf", + "**/*.yaml", + "**/*.yml", + "**/*.properties", + ], + }, + "uberproject_java": { + "description": "Uberproject Java + Javascript", + "patterns": [ + "**/src/**/*.java", + "**/*.py", + "**/*.js", + "**/*.ts", + "**/*.vue", + "**/pom.xml", + "**/build.gradle", + "**/package.json", + "**/setup.py", + "**/Cargo.toml", + "**/go.mod", + "**/pyproject.toml", + "**/requirements.txt", + "**/*.tf", + "**/*.yaml", + "**/*.yml", + "**/*.properties", ], }, }, diff --git a/src/claudesync/syncmanager.py b/src/claudesync/syncmanager.py index 0d67881..d4bf4c5 100644 --- a/src/claudesync/syncmanager.py +++ b/src/claudesync/syncmanager.py @@ -3,12 +3,14 @@ import time import logging from datetime import datetime, timezone +import io import click from tqdm import tqdm from claudesync.utils import compute_md5_hash from claudesync.exceptions import ProviderError +from .compression import compress_content, decompress_content logger = logging.getLogger(__name__) @@ -28,7 +30,7 @@ def wrapper(*args, **kwargs): f"Received 403 error. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})" ) else: - self.logger.warning( + logger.warning( f"Received 403 error. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})" ) time.sleep(delay) @@ -41,18 +43,7 @@ def wrapper(*args, **kwargs): class SyncManager: - """ - Manages the synchronization process between local and remote files. - """ - def __init__(self, provider, config, local_path): - """ - Initialize the SyncManager with the given provider and configuration. - - Args: - provider (Provider): The provider instance to interact with the remote storage. - config (dict): Configuration dictionary containing sync settings. - """ self.provider = provider self.config = config self.active_organization_id = config.get("active_organization_id") @@ -60,17 +51,17 @@ def __init__(self, provider, config, local_path): self.local_path = local_path self.upload_delay = config.get("upload_delay", 0.5) self.two_way_sync = config.get("two_way_sync", False) - self.max_retries = 3 # Maximum number of retries for 403 errors - self.retry_delay = 1 # Delay between retries in seconds + self.max_retries = 3 + self.retry_delay = 1 + self.compression_algorithm = config.get("compression_algorithm", "none") def sync(self, local_files, remote_files): - """ - Main synchronization method that orchestrates the sync process. + if self.compression_algorithm == "none": + self._sync_without_compression(local_files, remote_files) + else: + self._sync_with_compression(local_files, remote_files) - Args: - local_files (dict): Dictionary of local file names and their corresponding checksums. - remote_files (list): List of dictionaries representing remote files. - """ + def _sync_without_compression(self, local_files, remote_files): remote_files_to_delete = set(rf["file_name"] for rf in remote_files) synced_files = set() @@ -103,6 +94,101 @@ def sync(self, local_files, remote_files): self.prune_remote_files(remote_files, remote_files_to_delete) + def _sync_with_compression(self, local_files, remote_files): + packed_content = self._pack_files(local_files) + compressed_content = compress_content( + packed_content, self.compression_algorithm + ) + + remote_file_name = ( + f"claudesync_packed_{datetime.now().strftime('%Y%m%d%H%M%S')}.dat" + ) + self._upload_compressed_file(compressed_content, remote_file_name) + + if self.two_way_sync: + remote_compressed_content = self._download_compressed_file() + if remote_compressed_content: + remote_packed_content = decompress_content( + remote_compressed_content, self.compression_algorithm + ) + self._unpack_files(remote_packed_content) + + self._cleanup_old_remote_files(remote_files) + + def _pack_files(self, local_files): + packed_content = io.StringIO() + for file_path, file_hash in local_files.items(): + full_path = os.path.join(self.local_path, file_path) + with open(full_path, "r", encoding="utf-8") as f: + content = f.read() + packed_content.write(f"--- BEGIN FILE: {file_path} ---\n") + packed_content.write(content) + packed_content.write(f"\n--- END FILE: {file_path} ---\n") + return packed_content.getvalue() + + @retry_on_403() + def _upload_compressed_file(self, compressed_content, file_name): + logger.debug(f"Uploading compressed file {file_name} to remote...") + self.provider.upload_file( + self.active_organization_id, + self.active_project_id, + file_name, + compressed_content, + ) + time.sleep(self.upload_delay) + + @retry_on_403() + def _download_compressed_file(self): + logger.debug("Downloading latest compressed file from remote...") + remote_files = self.provider.list_files( + self.active_organization_id, self.active_project_id + ) + compressed_files = [ + rf + for rf in remote_files + if rf["file_name"].startswith("claudesync_packed_") + ] + if compressed_files: + latest_file = max(compressed_files, key=lambda x: x["file_name"]) + return latest_file["content"] + return None + + def _unpack_files(self, packed_content): + current_file = None + current_content = io.StringIO() + + for line in packed_content.splitlines(): + if line.startswith("--- BEGIN FILE:"): + if current_file: + self._write_file(current_file, current_content.getvalue()) + current_content = io.StringIO() + current_file = line.split("--- BEGIN FILE:")[1].strip() + elif line.startswith("--- END FILE:"): + if current_file: + self._write_file(current_file, current_content.getvalue()) + current_file = None + current_content = io.StringIO() + else: + current_content.write(line + "\n") + + if current_file: + self._write_file(current_file, current_content.getvalue()) + + def _write_file(self, file_path, content): + full_path = os.path.join(self.local_path, file_path) + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, "w", encoding="utf-8") as f: + f.write(content) + + def _cleanup_old_remote_files(self, remote_files): + for remote_file in remote_files: + if remote_file["file_name"].startswith("claudesync_packed_"): + self.provider.delete_file( + self.active_organization_id, + self.active_project_id, + remote_file["uuid"], + ) + @retry_on_403() def update_existing_file( self, @@ -112,17 +198,8 @@ def update_existing_file( remote_files_to_delete, synced_files, ): - """ - Update an existing file on the remote if it has changed locally. - - Args: - local_file (str): Name of the local file. - local_checksum (str): MD5 checksum of the local file content. - remote_file (dict): Dictionary representing the remote file. - remote_files_to_delete (set): Set of remote file names to be considered for deletion. - synced_files (set): Set of file names that have been synchronized. - """ - remote_checksum = compute_md5_hash(remote_file["content"]) + remote_content = remote_file["content"] + remote_checksum = compute_md5_hash(remote_content) if local_checksum != remote_checksum: logger.debug(f"Updating {local_file} on remote...") with tqdm(total=2, desc=f"Updating {local_file}", leave=False) as pbar: @@ -149,13 +226,6 @@ def update_existing_file( @retry_on_403() def upload_new_file(self, local_file, synced_files): - """ - Upload a new file to the remote project. - - Args: - local_file (str): Name of the local file to be uploaded. - synced_files (set): Set of file names that have been synchronized. - """ logger.debug(f"Uploading new file {local_file} to remote...") with open( os.path.join(self.local_path, local_file), "r", encoding="utf-8" @@ -170,13 +240,6 @@ def upload_new_file(self, local_file, synced_files): synced_files.add(local_file) def update_local_timestamps(self, remote_files, synced_files): - """ - Update local file timestamps to match the remote timestamps. - - Args: - remote_files (list): List of dictionaries representing remote files. - synced_files (set): Set of file names that have been synchronized. - """ for remote_file in remote_files: if remote_file["file_name"] in synced_files: local_file_path = os.path.join( @@ -190,14 +253,6 @@ def update_local_timestamps(self, remote_files, synced_files): logger.debug(f"Updated timestamp on local file {local_file_path}") def sync_remote_to_local(self, remote_file, remote_files_to_delete, synced_files): - """ - Synchronize a remote file to the local project (two-way sync). - - Args: - remote_file (dict): Dictionary representing the remote file. - remote_files_to_delete (set): Set of remote file names to be considered for deletion. - synced_files (set): Set of file names that have been synchronized. - """ local_file_path = os.path.join(self.local_path, remote_file["file_name"]) if os.path.exists(local_file_path): self.update_existing_local_file( @@ -211,15 +266,6 @@ def sync_remote_to_local(self, remote_file, remote_files_to_delete, synced_files def update_existing_local_file( self, local_file_path, remote_file, remote_files_to_delete, synced_files ): - """ - Update an existing local file if the remote version is newer. - - Args: - local_file_path (str): Path to the local file. - remote_file (dict): Dictionary representing the remote file. - remote_files_to_delete (set): Set of remote file names to be considered for deletion. - synced_files (set): Set of file names that have been synchronized. - """ local_mtime = datetime.fromtimestamp( os.path.getmtime(local_file_path), tz=timezone.utc ) @@ -230,8 +276,9 @@ def update_existing_local_file( logger.debug( f"Updating local file {remote_file['file_name']} from remote..." ) + content = remote_file["content"] with open(local_file_path, "w", encoding="utf-8") as file: - file.write(remote_file["content"]) + file.write(content) synced_files.add(remote_file["file_name"]) if remote_file["file_name"] in remote_files_to_delete: remote_files_to_delete.remove(remote_file["file_name"]) @@ -239,36 +286,21 @@ def update_existing_local_file( def create_new_local_file( self, local_file_path, remote_file, remote_files_to_delete, synced_files ): - """ - Create a new local file from a remote file. - - Args: - local_file_path (str): Path to the new local file. - remote_file (dict): Dictionary representing the remote file. - remote_files_to_delete (set): Set of remote file names to be considered for deletion. - synced_files (set): Set of file names that have been synchronized. - """ logger.debug( f"Creating new local file {remote_file['file_name']} from remote..." ) + content = remote_file["content"] with tqdm( total=1, desc=f"Creating {remote_file['file_name']}", leave=False ) as pbar: with open(local_file_path, "w", encoding="utf-8") as file: - file.write(remote_file["content"]) + file.write(content) pbar.update(1) synced_files.add(remote_file["file_name"]) if remote_file["file_name"] in remote_files_to_delete: remote_files_to_delete.remove(remote_file["file_name"]) def prune_remote_files(self, remote_files, remote_files_to_delete): - """ - Delete remote files that no longer exist locally. - - Args: - remote_files (list): List of dictionaries representing remote files. - remote_files_to_delete (set): Set of remote file names to be deleted. - """ if not self.config.get("prune_remote_files"): click.echo("Remote pruning is not enabled.") return @@ -278,13 +310,6 @@ def prune_remote_files(self, remote_files, remote_files_to_delete): @retry_on_403() def delete_remote_files(self, file_to_delete, remote_files): - """ - Delete a file from the remote project that no longer exists locally. - - Args: - file_to_delete (str): Name of the remote file to be deleted. - remote_files (list): List of dictionaries representing remote files. - """ logger.debug(f"Deleting {file_to_delete} from remote...") remote_file = next( rf for rf in remote_files if rf["file_name"] == file_to_delete