-
Notifications
You must be signed in to change notification settings - Fork 0
/
ms2018.py
174 lines (118 loc) · 5.06 KB
/
ms2018.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python
# coding: utf-8
# - 2018 issue is still open. Resolve issues there first.
# - ~Add column {'magnitude'}.~
# - ~Several candidates appear as running with similar names (e.g. JEFFERY HARNESS and JEFFREY HARNESS). Investigate such instances, and if they correspond to the same candidate name and are not writeins, unify the candidate names.~
# - ~Unify the format of the names for judicial court races (e.g. CHANCERY COURT vs. JUSTICE COURT JUDGE).~
# - Redo the district column so it follows the current rules.
# - ~Investigate office name COAHOMA COAHOMA COUNTY JUDGE and decide whether COAHOMA should appear once or twice.~
# - Investigate candidate MERLE MCANALLY, who appears in some rows as running with party INDEPENDENT and others as NONPARTISAN. Decide whether the parties are correct, and if not, fix them.
# In[37]:
import pandas as pd
import numpy as np
import os
import csv
df = pd.read_csv('2018-ms-precinct-autoadapted.csv')
df = df.drop_duplicates()
df = df.fillna("")
# In[38]:
df['magnitude'] = 1
# In[39]:
df['party_detailed'] = df['party_detailed'].replace({'""': ''})
def get_party_simplified(x):
if x in ["",'DEMOCRAT','REPUBLICAN',"LIBERTARIAN",'NONPARTISAN']:
return x
else:
return "OTHER"
df['party_simplified'] = df['party_detailed'].apply(get_party_simplified)
# In[40]:
df['candidate'] = df['candidate'].replace({'CARNELLA FONDREN': 'CARNELIA FONDREN',
'D NEILL HARRIS SR': 'D NEIL HARRIS SR',
'GERALD W CHATMAN SR': 'GERALD W CHATHAM SR',
'JEFFREY HARNESS': 'JEFFERY HARNESS',
'PAULA DRUNGHOLE ELLIS': 'PAULA DRUNGOLE ELLIS',
'ELIZABETHBETSYPERSON': 'ELIZABETH "BETSY" PERSON',
'CARLOYN STEELE JOHNSON':'CAROLYN STEELE JOHNSON',
'FOR THE BOND ISSUE':'YES',
'AGAINST THE BOND ISSUE':'NO'})
# In[41]:
# wrong party listed for this candidate
df.loc[df['candidate'].str.contains("JEFFERY HARNESS"), 'party_detailed'] = 'DEMOCRAT'
df.loc[df['candidate'].str.contains("JEFFERY HARNESS"), 'party_simplified'] = 'DEMOCRAT'
# In[42]:
df['office'] = df['office'].replace({'COUNTY COURT JUDGE': 'COUNTY COURT',
'COAHOMA COUNTY JUDGE': 'COAHOMA COAHOMA COUNTY',
'COUNTY YOUTH COURT JUDGE': 'COUNTY YOUTH COURT',
'JUSTICE COURT JUDGE': 'JUSTICE COURT',
'VOTE FOR ONE': 'BOND ISSUE'
})
# In[43]:
df.loc[df['candidate'].str.contains("MERLE MCANALLY"), 'party_detailed'] = 'NONPARTISAN'
df.loc[df['candidate'].str.contains("MERLE MCANALLY"), 'party_simplified'] = 'NONPARTISAN'
# In[44]:
def cleanDistrict(x):
if "DIST" in x:
if len(x) > 5:
if x[0] == '0':
return '0' + x[5:].replace(" , ", ", ")
if ',' not in x:
return x.split(' ')[-1].zfill(3)
else:
return '00' + x[5:].replace(" , ", ", ")
else:
if len(x) == 1:
return '00' + x
if len(x) == 2:
return '0' + x
if x == '""':
return ''
else:
return x
df['district'] = df['district'].apply(cleanDistrict)
# In[45]:
def cleanDistrict2(x):
if ',' in x:
index = x.index(',')
if index == 4:
return x[1:]
else:
return x
else:
return x
df['district'] = df['district'].apply(cleanDistrict2)
#DC
df.loc[df['office']=='CIRCUIT COURT','dataverse'] = 'STATE'
df = df.replace([True,False], ['TRUE','FALSE'])
df.loc[df['office']=='STATE HOUSE','special'] = 'TRUE'
# Final step: Remove all trailing white space and put columns in correct order.
df=df.applymap(lambda x: x.strip() if type(x)==str else x)
df=df[["precinct", "office", "party_detailed", "party_simplified", "mode", "votes", "county_name", "county_fips", "jurisdiction_name",
"jurisdiction_fips", "candidate", "district", "dataverse", "year", "stage", "state", "special", "writein", "state_po",
"state_fips", "state_cen", "state_ic", "date", "readme_check", "magnitude"]]
df.to_csv('2018-ms-precinct-general-updated.csv', index=False,quoting=csv.QUOTE_NONNUMERIC)
# In[48]:
# for x in df['stage'].unique():
# print(x)
# ### weird candidate names
#
# FOR THE BOND ISSUE
# AGAINST THE BOND ISSUE
#
# CARNELIA FONDREN
# CARNELLA FONDREN
#
# D NEIL HARRIS SR
# D NEILL HARRIS SR
#
# GERALD W CHATHAM SR
# GERALD W CHATMAN SR # may need to change district from DIST 2 , PLACE 4 to DIST 17 , PLACE 4
#
# JEFFERY HARNESS
# JEFFREY HARNESS
#
# PAULA DRUNGHOLE ELLIS
# PAULA DRUNGOLE ELLIS
#
# ELIZABETHBETSYPERSON
# In[47]:
# get_ipython().system('jupyter nbconvert --to script mississippi_2021_updates.ipynb')