-
Notifications
You must be signed in to change notification settings - Fork 0
/
election2012.py
85 lines (65 loc) · 2.77 KB
/
election2012.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
"""
@author: Eric Smith
Created 2014-03-03
Reads in 2012 US election results, from http://www.theguardian.com/news/datablog/2012/nov/07/us-2012-election-county-results-download#data
Suffixes at the end of variable names:
A: numpy array
B: boolean
D: dictionary
DF: pandas DataFrame
L: list
S: string
SR: pandas Series
T: tuple
Underscores indicate chaining: for instance, "fooT_T" is a tuple of tuples
"""
import numpy as np
import os
import pandas as pd
import config
reload(config)
def main():
# Read in 2012 election data
filePathS = os.path.join(config.rawDataPathS, 'election_statistics',
'US_elect_county__2012.csv')
fullDF = pd.read_csv(filePathS,
low_memory=False)
fullDF = fullDF.convert_objects(convert_numeric=True)
# Remove entries that correspond to the voting records of the entire state
validRowsLC = fullDF.loc[:, 'FIPS Code'].astype(bool)
countyDF = fullDF.loc[validRowsLC, :]
# Extract the correct information for each row
countyDF.loc[:, 'numDemVotes'] = extract_votes_all_rows(countyDF, 'Dem')
countyDF.loc[:, 'numGOPVotes'] = extract_votes_all_rows(countyDF, 'GOP')
# Extract the important fields for each row: State Postal, FIPS Code, County Name, TOTAL VOTES CAST, numDemVotes, numGOPVotes
desiredColumnsL = ['FIPS Code', 'TOTAL VOTES CAST',
'numDemVotes', 'numGOPVotes']
partialDF = countyDF.reindex(columns=desiredColumnsL)
partialDF.columns = ['FIPS', 'Election2012Total',
'Election2012Dem', 'Election2012Rep']
# Sum all entries with the same FIPS code (since the New England states report vote totals by municipality instead of by city)
groupedPartialDF = partialDF.groupby('FIPS')
finalDF = groupedPartialDF.aggregate(np.sum)
return finalDF
def extract_votes_all_rows(countyDF, partyS):
"""
Find the rows that correspond to the party given in partyS
"""
numCounties = countyDF.shape[0]
partyVotesSR = pd.Series(index=countyDF.index)
numExtractedVotes = 0
# Set votes from 'Party' column
isCorrectPartyB_SR = countyDF.loc[:, 'Party'] == partyS
partyVotesSR.loc[isCorrectPartyB_SR] = \
countyDF.loc[isCorrectPartyB_SR, 'Votes']
numExtractedVotes += np.sum(isCorrectPartyB_SR)
# Set votes from 'Party.1', 'Party.2', etc. columns
iParty = 0
while numExtractedVotes != numCounties:
iParty += 1
isCorrectPartyB_SR = countyDF.loc[:, 'Party.' + str(iParty)] == partyS
partyVotesSR.loc[isCorrectPartyB_SR] = \
countyDF.loc[isCorrectPartyB_SR, 'Votes.' + str(iParty)]
numExtractedVotes += np.sum(isCorrectPartyB_SR)
return partyVotesSR