-
Notifications
You must be signed in to change notification settings - Fork 0
/
womeninreddrafts.py
70 lines (60 loc) · 2.39 KB
/
womeninreddrafts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
'''
Parts from https://en.wikipedia.org/wiki/User:Ritchie333/afcbios.py, licensed CC-BY-SA-3.0
'''
import re
from botbase import *
titles = []
page_to_update = "Wikipedia:WikiProject Women in Red/Drafts"
reMarker = re.compile("<ref.*\/ref>|{{.*}}|<!--.*-->|\'\'\'|----")
reTitle = re.compile( '\(.*\)' )
header_new = "New Additions"
header_old = "Existing Pages"
wikitext = "{{/Header}}\n"
wikitext_header_2 = "== {} ==\n"
wikitext_header_3 = "=== {} - {} ===\n"
wikitext_entry = "* [[{}]]\n::<small><nowiki>{}</nowiki></small>\n:::<small><nowiki>{} - {}</nowiki></small>\n"
search_query = 'incategory:"{}" "{}"'
categories = [ "AfC submissions declined as a non-notable biography", "AfC submissions declined as a non-notable academic topic" ]
keywords = [ "she was", "she is", "her book", "her work" ]
def run_search(category, keyword):
page_query = p.data.api.ListGenerator(
"search",
srnamespace = 118,
srsearch = search_query.format(category, keyword),
srprop = "",
site = site
)
return [page_result["title"] for page_result in page_query]
def generate_entries(titles, header):
section_wikitext = wikitext_header_2.format(header)
for num, title in enumerate(titles):
if num % 50 == 0:
section_wikitext += wikitext_header_3.format(num + 1, num + 50)
page = p.Page(site, title)
timestamp = str(page.latest_revision["timestamp"])[0:10]
editsummary = page.latest_revision["comment"]
shortText = reMarker.sub( '', page.text )
shortTitle = reTitle.sub( '', title[6:] )
sentences = re.search( re.escape(shortTitle) + '.*\.', shortText )
if sentences is not None:
firstsentence = sentences.group().partition( '.' )[0]
else:
firstsentence = ""
section_wikitext += wikitext_entry.format(
title, firstsentence, timestamp, editsummary
)
return section_wikitext
for category in categories:
for keyword in keywords:
titles += run_search(category, keyword)
titles = set(titles)
with open('last_titles.txt', 'r') as last_titles_file:
last_titles = set(last_titles_file.read().split("|"))
with open('last_titles.txt', 'w') as last_titles_file:
last_titles_file.write("|".join(titles))
new_titles = titles - last_titles
old_titles = titles & last_titles
wikitext += (generate_entries(new_titles, header_new) + generate_entries(old_titles, header_old))
page = p.Page(site, page_to_update)
page.text = wikitext
page.savewithshutoff(summary = 'Update "Women in Red drafts" report', minor = False)