Skip to content

Commit

Permalink
MIME types too unreliable for use
Browse files Browse the repository at this point in the history
  • Loading branch information
jahwag authored Jul 20, 2024
1 parent 21d8ae2 commit 22f5813
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 58 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "claudesync"
version = "0.3.1"
version = "0.3.2"
authors = [
{name = "Jahziah Wagner", email = "[email protected]"},
]
Expand Down
2 changes: 0 additions & 2 deletions src/claudesync/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,3 @@ def max_filesize(config, size):
return
config.set("max_file_size", size)
click.echo(f"Maximum file size set to {size} bytes.")


10 changes: 6 additions & 4 deletions src/claudesync/cli/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def sync(config):
local_files = get_local_files(local_path)

# Track remote files to delete
remote_files_to_delete = set(rf['file_name'] for rf in remote_files)
remote_files_to_delete = set(rf["file_name"] for rf in remote_files)

for local_file, local_checksum in local_files.items():
remote_file = next(
Expand All @@ -69,7 +69,7 @@ def sync(config):
active_organization_id, active_project_id, remote_file["uuid"]
)
with open(
os.path.join(local_path, local_file), "r", encoding="utf-8"
os.path.join(local_path, local_file), "r", encoding="utf-8"
) as file:
content = file.read()
provider.upload_file(
Expand All @@ -80,7 +80,7 @@ def sync(config):
else:
click.echo(f"Uploading new file {local_file} to remote...")
with open(
os.path.join(local_path, local_file), "r", encoding="utf-8"
os.path.join(local_path, local_file), "r", encoding="utf-8"
) as file:
content = file.read()
provider.upload_file(
Expand All @@ -91,7 +91,9 @@ def sync(config):
# Delete remote files that no longer exist locally
for file_to_delete in remote_files_to_delete:
click.echo(f"Deleting {file_to_delete} from remote...")
remote_file = next(rf for rf in remote_files if rf["file_name"] == file_to_delete)
remote_file = next(
rf for rf in remote_files if rf["file_name"] == file_to_delete
)
provider.delete_file(
active_organization_id, active_project_id, remote_file["uuid"]
)
Expand Down
4 changes: 2 additions & 2 deletions src/claudesync/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _load_config(self):
return {
"log_level": "INFO",
"upload_delay": 0.5,
"max_file_size": 32 * 1024 # Default 32 KB
"max_file_size": 32 * 1024, # Default 32 KB
}
with open(self.config_file, "r") as f:
config = json.load(f)
Expand All @@ -35,4 +35,4 @@ def get(self, key, default=None):

def set(self, key, value):
self.config[key] = value
self._save_config()
self._save_config()
108 changes: 61 additions & 47 deletions src/claudesync/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import hashlib
import mimetypes
from functools import wraps

import click
Expand All @@ -15,64 +14,79 @@

config_manager = ConfigManager()


def calculate_checksum(content):
normalized_content = content.replace("\r\n", "\n").replace("\r", "\n").strip()
return hashlib.md5(normalized_content.encode("utf-8")).hexdigest()


def load_gitignore(base_path):
patterns = []
current_dir = base_path
while True:
gitignore_path = os.path.join(current_dir, ".gitignore")
if os.path.exists(gitignore_path):
with open(gitignore_path, "r") as f:
patterns.extend(f.read().splitlines())

if os.path.exists(os.path.join(current_dir, ".git")):
break # Stop if we've reached the root of the Git repository

parent_dir = os.path.dirname(current_dir)
if parent_dir == current_dir or parent_dir == base_path:
break # Stop if we've reached the filesystem root or the base watched directory
current_dir = parent_dir

return pathspec.PathSpec.from_lines("gitwildmatch", patterns) if patterns else None

def should_ignore(gitignore, local_path):
# Check file type
mime_type, _ = mimetypes.guess_type(local_path)
if mime_type and not mime_type.startswith("text/"):
return True
# Check if .git dir
if ".git" in local_path.split(os.sep):
return True
# Check if temporary editor file
if local_path.endswith("~"):
return True
# Check if too big
max_file_size = config_manager.get("max_file_size", 32 * 1024) # Default to 32 KB if not set
if os.path.getsize(local_path) > max_file_size:
return True
# Check .gitignore
return gitignore.match_file(local_path) if gitignore else False
gitignore_path = os.path.join(base_path, ".gitignore")
if os.path.exists(gitignore_path):
with open(gitignore_path, "r") as f:
return pathspec.PathSpec.from_lines("gitwildmatch", f)
return None


def is_text_file(file_path, sample_size=8192):
try:
with open(file_path, "rb") as file:
return b"\x00" not in file.read(sample_size)
except IOError:
return False


def calculate_checksum(content):
return hashlib.md5(content.encode("utf-8")).hexdigest()


def get_local_files(local_path):
gitignore = load_gitignore(local_path)
files = {}
for root, _, filenames in os.walk(local_path):

# List of directories to exclude
exclude_dirs = {".git", ".svn", ".hg", ".bzr", "_darcs", "CVS"}

for root, dirs, filenames in os.walk(local_path):
# Remove excluded directories
dirs[:] = [d for d in dirs if d not in exclude_dirs]

rel_root = os.path.relpath(root, local_path)
if rel_root == ".":
rel_root = ""

for filename in filenames:
file_path = os.path.join(root, filename)
if not should_ignore(gitignore, file_path):
rel_path = os.path.relpath(file_path, local_path)
try:
with open(file_path, "r", encoding="utf-8") as file:
content = file.read()
files[rel_path] = calculate_checksum(content)
except Exception as e:
logger.error(f"Error reading file {file_path}: {str(e)}")
continue
rel_path = os.path.join(rel_root, filename)
full_path = os.path.join(root, filename)

# Skip files larger than 200KB
max_file_size = config_manager.get("max_file_size", 32 * 1024)
if os.path.getsize(full_path) > max_file_size:
continue

# Skip temporary editor files
if filename.endswith("~"):
continue

# Use gitignore rules if available
if gitignore and gitignore.match_file(rel_path):
continue

# Check if it's a text file
if not is_text_file(full_path):
continue

try:
with open(full_path, "r", encoding="utf-8") as file:
content = file.read()
files[rel_path] = calculate_checksum(content)
except UnicodeDecodeError:
# If UTF-8 decoding fails, it's likely not a text file we can handle
logger.debug(f"Unable to read {full_path} as UTF-8 text. Skipping.")
continue
except Exception as e:
logger.error(f"Error reading file {full_path}: {str(e)}")

return files


Expand Down
6 changes: 4 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ def test_get_local_files(self):
f.write("Content of file3")

# Create a .git file
with open(os.path.join(tmpdir, ".git"), "w") as f:
f.write("*.log\n")
for vcs in {".git", ".svn", ".hg", ".bzr", "_darcs", "CVS"}:
os.makedirs(os.path.join(tmpdir, vcs), exist_ok=True)
with open(os.path.join(tmpdir, vcs, ".gitignore"), "w") as f:
f.write("*.log\n")

# Create a test~ file
with open(os.path.join(tmpdir, "test~"), "w") as f:
Expand Down

0 comments on commit 22f5813

Please sign in to comment.