forked from josephpconley/awesome-scala
-
Notifications
You must be signed in to change notification settings - Fork 0
/
metadata.py
executable file
·195 lines (171 loc) · 6.25 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# by Erik Osheim
#
# Reads README.md, and writes a README.md.new. If the format of
# README.md changes, this script may need modifications.
#
# Currently it rewrites each section, doing the following:
# 1. alphabetizing
# 2. querying GitHub for stars and days since active
# 3. formatting the link title to show this info
# 4. bolding projects with lots of stars
#
# Once README.md has the stars/days info in the links, the
# repo_regex will need slight modification.
#
# In order to use GH authentication, create a file in this directory
# called .access-token, whose contents are: "$user:$token" where $user
# is your github username, and $token is a Personal Access Token.
import base64
import datetime
import json
import os.path
import random
import re
import shutil
import sys
import urllib2
# we use these regexes when "parsing" README.md
empty_regex = re.compile(r"^ *\n$")
section_regex = re.compile(r"^## (.+)\n$")
repo_regex = re.compile(r"^\* (?:\*\*)?\[?([^*★]+[^ ★])(?: ★ ([^ ]+) ⧗ ([^ *]+))?\]\((.+?)\)(?:\*\*)?(?: (?:-|—|–) (.+))?\n$")
end_regex = re.compile(r"^# .+\n$")
github_regex = re.compile(r"^https://github.com/(.+?)/(.+?)(?:/?)$")
# some paths
readme_path = 'README.md'
temp_path = 'README.md.new'
# these will be updated if .access-token exists.
user = None
token = None
# use fake to avoid hitting github API
fake = True
# whether to query all projects, or just those lacking scores/days.
full_update = False
# right now.
now = datetime.datetime.now()
# ask github for the number of stargazers, and days since last
# activity, for the given github project.
def query(owner, name):
if fake:
print ' {0}/{1}: ok'.format(owner, name)
return (random.randint(1, 1000), random.randint(1, 300))
else:
try:
req = urllib2.Request('https://api.github.com/repos/{0}/{1}'.format(owner, name))
if user is not None and token is not None:
b64 = base64.encodestring('{0}:{1}'.format(user, token)).replace('\n', '')
req.add_header("Authorization", "Basic {0}".format(b64))
u = urllib2.urlopen(req)
j = json.load(u)
t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
days = max(int((now - t).days), 0)
print ' {0}/{1}: ok'.format(owner, name)
return (int(j['stargazers_count']), days)
except urllib2.HTTPError, e:
print ' {0}/{1}: FAILED'.format(owner, name)
return (None, None)
def output_repo(outf, name, stars, days, link, rdesc):
popular = stars is not None and int(stars) >= 500
if stars is None and days is None:
title = name
else:
title = '%s ★ %s ⧗ %s' % (name, stars, days)
if popular:
outf.write('* **[{0}]({1})** - {2}\n'.format(title, link, rdesc))
else:
outf.write('* [{0}]({1}) - {2}\n'.format(title, link, rdesc))
def flush_section(outf, section, sdesc, repos):
print ' ' + section.strip()
outf.write(section)
outf.write('\n')
if sdesc:
outf.write(sdesc)
outf.write('\n')
repos.sort(key=lambda t: t[0].lower())
for name, stars, days, link, rdesc in repos:
if not full_update and stars is not None and days is not None:
output_repo(outf, name, stars, days, link, rdesc)
continue
m = github_regex.match(link)
if not m:
print ' {0}: not a repo'.format(link)
output_repo(outf, name, stars, days, link, rdesc)
continue
stars, days = query(m.group(1), m.group(2))
output_repo(outf, name, stars, days, link, rdesc)
outf.write('\n')
def run():
if full_update:
print 'querying for all entries'
else:
print 'querying for new entries only'
if fake:
print 'running in fake mode -- no GH queries will be made'
if os.path.exists('.access-token'):
global user, token
user, token = open('.access-token').read().strip().split(':')
print 'using Personal Access Token {0}:{1}'.format(user, token)
else:
print 'no Personal Access Token found in .access-token'
inf = open(readme_path, 'r')
lines = list(inf)
inf.close()
print 'read {0}'.format(readme_path)
started = False
finished = False
section = None
sdesc = None
repos = []
outf = open(temp_path, 'w')
total_repos = 0
print 'writing {0}'.format(temp_path)
for line in lines:
if finished:
outf.write(line)
elif started:
if end_regex.match(line):
total_repos += len(repos)
flush_section(outf, section, sdesc, repos)
outf.write(line)
finished = True
elif empty_regex.match(line):
continue
elif section_regex.match(line):
total_repos += len(repos)
flush_section(outf, section, sdesc, repos)
section = line
sdesc = None
repos = []
else:
m = repo_regex.match(line)
if m:
name, stars, days, link, rdesc = m.groups()
repos.append((name, stars, days, link, rdesc))
elif sdesc is None:
sdesc = line
else:
raise Exception("cannot parse {0}".format(line))
else:
if section_regex.match(line):
section = line
started = True
else:
outf.write(line)
outf.close()
print 'wrote {0} repos to {1}'.format(total_repos, temp_path)
print 'moving {0} to {1}'.format(temp_path, readme_path)
shutil.move(temp_path, readme_path)
if __name__ == "__main__":
#global fake, full_update
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-f", "--fake", action="store_true", dest="fake",
default=False, help="don't query github, use fake data")
parser.add_option("-u", "--update", action="store_true", dest="update",
default=False, help="update all entries to newest data")
opts, _ = parser.parse_args()
fake = opts.fake
full_update = opts.update
run()