Skip to content

Commit

Permalink
add paths to csv summary
Browse files Browse the repository at this point in the history
  • Loading branch information
jkshenton committed Oct 30, 2024
1 parent 8380973 commit 40e7ec6
Showing 1 changed file with 41 additions and 9 deletions.
50 changes: 41 additions & 9 deletions .github/scripts/collect_traffic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# .github/scripts/collect_traffic.py
import argparse
import os
import json
Expand All @@ -7,14 +6,8 @@
from datetime import datetime, timedelta
from pathlib import Path
import logging

import time






class GitHubTrafficCollector:
def __init__(self, repo):
self.token = os.environ['GH_TOKEN']
Expand Down Expand Up @@ -73,14 +66,18 @@ def collect_metrics(self):
elif metric_name == "referrers":
referrer_metrics = self._process_referrers(data)
daily_data.update(referrer_metrics)
elif metric_name == "paths":
path_metrics = self._process_paths(data)
daily_data.update(path_metrics)
except Exception as e:
logging.error(f"Error collecting {metric_name} for {self.repo}: {e}")
if metric_name in ["views", "clones"]:
daily_data[f"{metric_name}_count"] = 0
daily_data[f"{metric_name}_uniques"] = 0
elif metric_name == "referrers":
daily_data.update(self._process_referrers([]))

elif metric_name == "paths":
daily_data.update(self._process_paths([]))

self._update_summary(daily_data)

Expand All @@ -106,7 +103,7 @@ def _update_summary(self, daily_data):
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv(summary_file, index=False)


def _process_referrers(self, referrers_data):
"""Process referrers data to extract summary metrics."""
if not referrers_data:
Expand All @@ -132,6 +129,41 @@ def _process_referrers(self, referrers_data):
'total_referrer_uniques': sum(r.get('uniques', 0) for r in referrers_data),
'distinct_referrers': len(referrers_data)
}



def _process_paths(self, paths_data):
"""Process paths data to extract summary metrics."""
if not paths_data:
return {
'top_path': 'none',
'top_path_count': 0,
'top_path_uniques': 0,
'total_path_count': 0,
'total_path_uniques': 0,
'distinct_paths': 0,
'readme_views': 0,
'readme_uniques': 0
}

# Sort paths by count
sorted_paths = sorted(paths_data, key=lambda x: (x.get('count', 0), x.get('uniques', 0)), reverse=True)
top_path = sorted_paths[0] if sorted_paths else {'path': 'none', 'count': 0, 'uniques': 0}

# Calculate README metrics (considering both /README.md and /readme.md)
readme_paths = [p for p in paths_data if p.get('path', '').lower() == '/readme.md']
readme_stats = readme_paths[0] if readme_paths else {'count': 0, 'uniques': 0}

return {
'top_path': top_path.get('path', 'none'),
'top_path_count': top_path.get('count', 0),
'top_path_uniques': top_path.get('uniques', 0),
'total_path_count': sum(p.get('count', 0) for p in paths_data),
'total_path_uniques': sum(p.get('uniques', 0) for p in paths_data),
'distinct_paths': len(paths_data),
'readme_views': readme_stats.get('count', 0),
'readme_uniques': readme_stats.get('uniques', 0)
}

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Collect GitHub traffic metrics.')
Expand Down

0 comments on commit 40e7ec6

Please sign in to comment.