Assignment 1 Part 1: Graph Class
graph_elements = { "Apple" : ["Banana","Cherry"],
"Banana" : ["Apple", "Durian"],
"Cherry" : ["Apple", "Durian"],
"Durian" : ["Orange"],
"Orange" : ["Durian"]
}
class graph:
def __init__(self, value=None):
self.value = value
def getVertices(self):
return list(self.value.keys())
def getEdges(self):
return self.edges()
def edges(self):
edgelist = []
for vertex in self.value:
for neighbor in self.value[vertex]:
if {neighbor, vertex} not in edgelist:
edgelist.append({vertex, neighbor})
return edgelist
g = graph(graph_elements)
print(g.getVertices())
print(g.getEdges())
['Apple', 'Banana', 'Cherry', 'Durian', 'Orange'] [{'Apple', 'Banana'}, {'Cherry', 'Apple'}, {'Durian', 'Banana'}, {'Cherry', 'Durian'}, {'Orange', 'Durian'}]
Depth First Traversal of a graph
def depth_first_search(graph, start, visited=None):
if visited is None:
visited = set()
visited.add(start)
#print(start)
for next in graph[start] - visited:
depth_first_search(graph, next, visited)
return visited
graph = {'Apple': set(['Banana', 'Cherry']),
'Banana': set(['Apple', 'Durian', 'Orange']),
'Cherry': set(['Apple']),
'Durian': set(['Banana', 'Orange']),
'Orange': set(['Cherry', 'Durian'])
}
depth_first_search(graph, 'Banana')
{'Apple', 'Banana', 'Cherry', 'Durian', 'Orange'}
Breadth-First Traversal of a Graph
import collections
def breadth_first_search(graph, root):
visited, queue = set(), collections.deque([root])
visited.add(root)
while queue:
vertex = queue.popleft()
print(str(vertex) + " ", end="")
for neighbour in graph[vertex]:
if neighbour not in visited:
visited.add(neighbour)
queue.append(neighbour)
if __name__ == '__main__':
graph = {0: [1, 2],
1: [0, 2],
2: [3],
3: [1, 2]
}
breadth_first_search(graph, 1)
1 0 2 3
Assignment 1 Part 2: Election Data Search
import pandas as pd
from google.colab import drive
drive.mount('/data/')
data_dir = '/data/My Drive/Colab Notebooks/FEC dataset'
!ls '/data/My Drive/Colab Notebooks/FEC dataset'
Drive already mounted at /data/; to attempt to forcibly remount, call drive.mount("/data/", force_remount=True). ccl20.zip cm_header_file.csv pas220.zip ccl_header_file.csv cn20.zip pas2_header_file.csv cm20.zip cn_header_file.csv
Search on 'CN20.zip' folder
from zipfile import ZipFile
header = pd.read_csv(data_dir+'/cn_header_file.csv')
with ZipFile(data_dir+'/cn20.zip') as zip:
candidates = pd.read_csv(zip.open('cn.txt'), sep='|', names=header.columns)
candidates.head()
CAND_ID | CAND_NAME | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | CAND_OFFICE_ST | CAND_OFFICE | CAND_OFFICE_DISTRICT | CAND_ICI | CAND_STATUS | CAND_PCC | CAND_ST1 | CAND_ST2 | CAND_CITY | CAND_ST | CAND_ZIP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | H0AK00105 | LAMB, THOMAS | NNE | 2020 | AK | H | 0.0 | C | N | C00607515 | 1861 W LAKE LUCILLE DR | NaN | WASILLA | AK | 99654.0 |
1 | H0AK00113 | TUGATUK, RAY SEAN | DEM | 2020 | AK | H | 0.0 | C | N | NaN | PO BOX 172 | NaN | MANAKOTAK | AK | 99628.0 |
2 | H0AK01046 | CATALANO, THOMAS | OTH | 2020 | AK | H | 0.0 | NaN | N | NaN | 188 WEST NORTHERN LIGHTS BOULEVARD | NaN | ANCHORAGE | AK | 99503.0 |
3 | H0AL01055 | CARL, JERRY LEE, JR | REP | 2020 | AL | H | 1.0 | O | C | C00697789 | PO BOX 852138 | NaN | MOBILE | AL | 36685.0 |
4 | H0AL01063 | LAMBERT, DOUGLAS WESTLEY III | REP | 2020 | AL | H | 1.0 | O | C | C00701557 | 7194 STILLWATER BLVD | NaN | SPANISH FORT | AL | 36527.0 |
candidates[candidates['CAND_NAME'].str.contains('WALKER')].head()
CAND_ID | CAND_NAME | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | CAND_OFFICE_ST | CAND_OFFICE | CAND_OFFICE_DISTRICT | CAND_ICI | CAND_STATUS | CAND_PCC | CAND_ST1 | CAND_ST2 | CAND_CITY | CAND_ST | CAND_ZIP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
97 | H0AZ09054 | WALKER, JASON S | REP | 2020 | AZ | H | 9.0 | C | N | NaN | 4815 E. THOMAS RD. | W224 | PHOENIX | AZ | 85018.0 |
848 | H0IL15103 | WALKER, ALEX | REP | 2020 | IL | H | 15.0 | O | N | C00717967 | 2606 CHARLESTON AVE | NaN | MATTOON | IL | 61938.0 |
1009 | H0MA04168 | GROSSMAN, REBECCA WALKER | DEM | 2020 | MA | H | 4.0 | C | C | C00720482 | PO BOX 590686 | NaN | NEWTON CENTER | MA | 2459.0 |
1391 | H0NC08247 | WALKER, RODNEY L | DEM | 2020 | NC | H | 8.0 | C | N | C00707877 | PO BOX 712 | NaN | SOUTHERN PINES | NC | 28388.0 |
1574 | H0NV03108 | WALKERLIEB, ZACHARY | REP | 2020 | NV | H | 3.0 | C | C | C00703025 | 5887 GLORY HEIGHTS DR | NaN | LAS VEGAS | NV | 89135.0 |
tg = candidates[(candidates['CAND_ELECTION_YR'] == 2020) & (candidates['CAND_OFFICE_ST'] == 'FL')]
tg.head()
CAND_ID | CAND_NAME | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | CAND_OFFICE_ST | CAND_OFFICE | CAND_OFFICE_DISTRICT | CAND_ICI | CAND_STATUS | CAND_PCC | CAND_ST1 | CAND_ST2 | CAND_CITY | CAND_ST | CAND_ZIP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
437 | H0FL01112 | ROSAS, EMILY | REP | 2020 | FL | H | 1.0 | C | N | C00664300 | 6610 JENNIFER DRIVE | NaN | TEMPLE TERRACE | FL | 33617.0 |
438 | H0FL01120 | ROMAGNANO, CHASE ANDERSON ANDY REV. | REP | 2020 | FL | H | 1.0 | C | N | C00719351 | P.O. BOX 9328 | NaN | PENSACOLA | FL | 32513.0 |
439 | H0FL01138 | HOWARD, CHERYL | DEM | 2020 | FL | H | 1.0 | C | N | C00735977 | 605 CROWN COVE | NaN | PENSACOLA | FL | 32502.0 |
440 | H0FL01146 | MERK, GREGORY CHARLES | REP | 2020 | FL | H | 1.0 | C | C | C00740340 | 4661 CALLE VENTOSO | NaN | PENSACOLA | FL | 32514.0 |
441 | H0FL01153 | ORAM, ALBERT | NPA | 2020 | FL | H | 1.0 | C | C | C00746636 | 825 BAYSHORE DR APT 804 | NaN | PENSACOLA | FL | 32507.0 |
len(tg.index)
242
candidates[candidates['CAND_NAME'].str.contains('TRUMP, DONALD')]
CAND_ID | CAND_NAME | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | CAND_OFFICE_ST | CAND_OFFICE | CAND_OFFICE_DISTRICT | CAND_ICI | CAND_STATUS | CAND_PCC | CAND_ST1 | CAND_ST2 | CAND_CITY | CAND_ST | CAND_ZIP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
6403 | P80001571 | TRUMP, DONALD J. | REP | 2020 | US | P | 0.0 | I | C | C00580100 | 725 FIFTH AVENUE | NaN | NEW YORK | NY | 10022.0 |
Search on 'Pas220.zip' folder
header = pd.read_csv(data_dir+'/pas2_header_file.csv')
with ZipFile(data_dir+'/pas220.zip') as zip:
#print(zip.namelist())
spending = pd.read_csv(zip.open('itpas2.txt'), sep='|', names=header.columns)
spending.head()
/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (10,11,12) have mixed types.Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
CMTE_ID | AMNDT_IND | RPT_TP | TRANSACTION_PGI | IMAGE_NUM | TRANSACTION_TP | ENTITY_TP | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | OTHER_ID | CAND_ID | TRAN_ID | FILE_NUM | MEMO_CD | MEMO_TEXT | SUB_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00567180 | T | TER | P2020 | 201901099143774199 | 24K | PAC | TED YOHO FOR CONGRESS | GAINESVILLE | FL | 32608 | NaN | NaN | 1082019.0 | 1880 | C00494583 | H2FL06109 | SB23.4447 | 1303604 | NaN | NaN | 4021920191640423011 |
1 | C00104885 | A | TER | G2020 | 201901289144031511 | 24K | CCM | TEAM GRAHAM INC | COLUMBIA | SC | 29202 | NaN | NaN | 12202018.0 | 3000 | C00458828 | H4SC03087 | SB23.17757 | 1307636 | NaN | NaN | 4022220191643444985 |
2 | C00104885 | A | TER | P2022 | 201901289144031512 | 24K | CCM | TIM SCOTT FOR SENATE | CHARLESTON | SC | 29407 | NaN | NaN | 12202018.0 | 1000 | C00540302 | H0SC01279 | SB23.17756 | 1307636 | NaN | NaN | 4022220191643444987 |
3 | C00104885 | A | TER | P2020 | 201901289144031511 | 24K | CCM | FRIENDS OF JIM CLYBURN | COLUMBIA | SC | 29211 | NaN | NaN | 1072019.0 | 470 | C00255562 | H2SC02042 | SB23.17755 | 1307636 | NaN | NaN | 4022220191643444981 |
4 | C00688408 | T | TER | G2018 | 201901319144305867 | 24E | ORG | JACKSON ADVOCATE | JACKSON | MS | 39207 | NaN | NaN | 12122018.0 | 1000 | S8MS00287 | S8MS00287 | SE.4233 | 1310906 | NaN | NaN | 4022420191643632157 |
house_spending_FL = spending[(spending['CAND_ID'].str.startswith('H')==True) & (spending['STATE']=='FL')]
df_house = house_spending_FL[['CMTE_ID', 'NAME', 'STATE', 'TRANSACTION_AMT', 'CAND_ID']]
df_house.head()
CMTE_ID | NAME | STATE | TRANSACTION_AMT | CAND_ID | |
---|---|---|---|---|---|
0 | C00567180 | TED YOHO FOR CONGRESS | FL | 1880 | H2FL06109 |
154 | C00027342 | HASTINGS FOR CONGRESS | FL | 5000 | H2FL23021 |
195 | C00003855 | DONNA SHALALA FOR CONGRESS | FL | 1000 | H8FL27193 |
256 | C00339655 | VERN BUCHANAN FOR CONGRESS | FL | 5000 | H6FL13148 |
421 | C00035204 | BILIRAKIS FOR CONGRESS | FL | 1000 | H6FL09070 |
Search on 'CM20.zip' folder
header = pd.read_csv(data_dir+'/cm_header_file.csv')
with ZipFile(data_dir+'/cm20.zip') as zip:
#print(zip.namelist())
df = pd.read_csv(zip.open('cm.txt'), sep='|', names=header.columns)
df.head()
CMTE_ID | CMTE_NM | TRES_NM | CMTE_ST1 | CMTE_ST2 | CMTE_CITY | CMTE_ST | CMTE_ZIP | CMTE_DSGN | CMTE_TP | CMTE_PTY_AFFILIATION | CMTE_FILING_FREQ | ORG_TP | CONNECTED_ORG_NM | CAND_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00000059 | HALLMARK CARDS PAC | SARAH MOE | 2501 MCGEE | MD #500 | KANSAS CITY | MO | 64108 | U | Q | UNK | M | C | NaN | NaN |
1 | C00000422 | AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ... | WALKER, KEVIN MR. | 25 MASSACHUSETTS AVE, NW | SUITE 600 | WASHINGTON | DC | 200017400 | B | Q | NaN | M | M | DELAWARE MEDICAL PAC | NaN |
2 | C00000489 | D R I V E POLITICAL FUND CHAPTER 886 | JERRY SIMS JR | 3528 W RENO | NaN | OKLAHOMA CITY | OK | 73107 | U | N | NaN | Q | L | NaN | NaN |
3 | C00000547 | KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE | JERRY SLAUGHTER | 623 SW 10TH AVE | NaN | TOPEKA | KS | 666121627 | U | Q | UNK | Q | M | KANSAS MEDICAL SOCIETY | NaN |
4 | C00000638 | INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC... | ACHENBACH, GRANT MR. | 322 CANAL WALK, CANAL LEVEL | NaN | INDIANAPOLIS | IN | 46202 | U | Q | NaN | T | M | NaN | NaN |
df_name = df[['CMTE_ID', 'CMTE_NM']]
df_name.head()
CMTE_ID | CMTE_NM | |
---|---|---|
0 | C00000059 | HALLMARK CARDS PAC |
1 | C00000422 | AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ... |
2 | C00000489 | D R I V E POLITICAL FUND CHAPTER 886 |
3 | C00000547 | KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE |
4 | C00000638 | INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC... |
Society_Organization = df[df['CONNECTED_ORG_NM'].str.contains('SOCIETY', na=False)]
Society_Organization.head()
CMTE_ID | CMTE_NM | TRES_NM | CMTE_ST1 | CMTE_ST2 | CMTE_CITY | CMTE_ST | CMTE_ZIP | CMTE_DSGN | CMTE_TP | CMTE_PTY_AFFILIATION | CMTE_FILING_FREQ | ORG_TP | CONNECTED_ORG_NM | CAND_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | C00000547 | KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE | JERRY SLAUGHTER | 623 SW 10TH AVE | NaN | TOPEKA | KS | 666121627 | U | Q | UNK | Q | M | KANSAS MEDICAL SOCIETY | NaN |
11 | C00001180 | MICHIGAN DOCTORS POLITICAL ACTION COMMITTEE - ... | GHOSE, AMIT | P.O. BOX 769 | NaN | EAST LANSING | MI | 48826 | U | Q | NNE | Q | T | MICHIGAN STATE MEDICAL SOCIETY | NaN |
37 | C00003152 | NORTH CAROLINA MEDICAL SOCIETY FEDERAL POLITIC... | HAYES, E. REBECCA | PO BOX 25834 | 222 N. PERSON STREET | RALEIGH | NC | 27611 | U | Q | NaN | Q | M | NORTH CAROLINA MEDICAL SOCIETY | NaN |
51 | C00003970 | IOWA MEDICAL SOCIETY POLITICAL ACTION COMMITTEE | DOOLEY, JOHN B | 515 E LOCUST STREET | SUITE 400 | DES MOINES | IA | 50309 | U | Q | NaN | Q | M | IOWA MEDICAL SOCIETY | NaN |
136 | C00012211 | NORTH CAROLINA DENTAL SOCIETY PAC | BROWN, EVELYN M. DR. | 1600 EVANS ROAD | NaN | CARY | NC | 27513 | U | Q | NaN | Q | M | NORTH CAROLINA DENTAL SOCIETY | NaN |
len(Society_Organization.index)
28
Search on 'CCL20.zip' folder
header = pd.read_csv(data_dir+'/ccl_header_file.csv')
with ZipFile(data_dir+'/ccl20.zip') as zip:
#print(zip.namelist())
linkage = pd.read_csv(zip.open('ccl.txt'), sep='|', names=header.columns)
linkage.head()
CAND_ID | CAND_ELECTION_YR | FEC_ELECTION_YR | CMTE_ID | CMTE_TP | CMTE_DSGN | LINKAGE_ID | |
---|---|---|---|---|---|---|---|
0 | C00713602 | 2019 | 2020 | C00712851 | O | U | 228963 |
1 | H0AK00105 | 2020 | 2020 | C00607515 | H | P | 229250 |
2 | H0AL01055 | 2020 | 2020 | C00697789 | H | P | 226125 |
3 | H0AL01063 | 2020 | 2020 | C00701557 | H | P | 227053 |
4 | H0AL01071 | 2020 | 2020 | C00701409 | H | P | 227054 |
df1 = pd.concat([candidates, linkage, df], axis=1, sort=False).reset_index()
df1.head()
index | CAND_ID | CAND_NAME | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | CAND_OFFICE_ST | CAND_OFFICE | CAND_OFFICE_DISTRICT | CAND_ICI | CAND_STATUS | CAND_PCC | CAND_ST1 | CAND_ST2 | CAND_CITY | CAND_ST | CAND_ZIP | CAND_ID | CAND_ELECTION_YR | FEC_ELECTION_YR | CMTE_ID | CMTE_TP | CMTE_DSGN | LINKAGE_ID | CMTE_ID | CMTE_NM | TRES_NM | CMTE_ST1 | CMTE_ST2 | CMTE_CITY | CMTE_ST | CMTE_ZIP | CMTE_DSGN | CMTE_TP | CMTE_PTY_AFFILIATION | CMTE_FILING_FREQ | ORG_TP | CONNECTED_ORG_NM | CAND_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | H0AK00105 | LAMB, THOMAS | NNE | 2020.0 | AK | H | 0.0 | C | N | C00607515 | 1861 W LAKE LUCILLE DR | NaN | WASILLA | AK | 99654.0 | C00713602 | 2019.0 | 2020.0 | C00712851 | O | U | 228963.0 | C00000059 | HALLMARK CARDS PAC | SARAH MOE | 2501 MCGEE | MD #500 | KANSAS CITY | MO | 64108 | U | Q | UNK | M | C | NaN | NaN |
1 | 1 | H0AK00113 | TUGATUK, RAY SEAN | DEM | 2020.0 | AK | H | 0.0 | C | N | NaN | PO BOX 172 | NaN | MANAKOTAK | AK | 99628.0 | H0AK00105 | 2020.0 | 2020.0 | C00607515 | H | P | 229250.0 | C00000422 | AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ... | WALKER, KEVIN MR. | 25 MASSACHUSETTS AVE, NW | SUITE 600 | WASHINGTON | DC | 200017400 | B | Q | NaN | M | M | DELAWARE MEDICAL PAC | NaN |
2 | 2 | H0AK01046 | CATALANO, THOMAS | OTH | 2020.0 | AK | H | 0.0 | NaN | N | NaN | 188 WEST NORTHERN LIGHTS BOULEVARD | NaN | ANCHORAGE | AK | 99503.0 | H0AL01055 | 2020.0 | 2020.0 | C00697789 | H | P | 226125.0 | C00000489 | D R I V E POLITICAL FUND CHAPTER 886 | JERRY SIMS JR | 3528 W RENO | NaN | OKLAHOMA CITY | OK | 73107 | U | N | NaN | Q | L | NaN | NaN |
3 | 3 | H0AL01055 | CARL, JERRY LEE, JR | REP | 2020.0 | AL | H | 1.0 | O | C | C00697789 | PO BOX 852138 | NaN | MOBILE | AL | 36685.0 | H0AL01063 | 2020.0 | 2020.0 | C00701557 | H | P | 227053.0 | C00000547 | KANSAS MEDICAL SOCIETY POLITICAL ACTION COMMITTEE | JERRY SLAUGHTER | 623 SW 10TH AVE | NaN | TOPEKA | KS | 666121627 | U | Q | UNK | Q | M | KANSAS MEDICAL SOCIETY | NaN |
4 | 4 | H0AL01063 | LAMBERT, DOUGLAS WESTLEY III | REP | 2020.0 | AL | H | 1.0 | O | C | C00701557 | 7194 STILLWATER BLVD | NaN | SPANISH FORT | AL | 36527.0 | H0AL01071 | 2020.0 | 2020.0 | C00701409 | H | P | 227054.0 | C00000638 | INDIANA STATE MEDICAL ASSOCIATION POLITICAL AC... | ACHENBACH, GRANT MR. | 322 CANAL WALK, CANAL LEVEL | NaN | INDIANAPOLIS | IN | 46202 | U | Q | NaN | T | M | NaN | NaN |
df2 = pd.merge(candidates, linkage, on='CAND_ID')
df2.head()
CAND_ID | CAND_NAME | CAND_PTY_AFFILIATION | CAND_ELECTION_YR_x | CAND_OFFICE_ST | CAND_OFFICE | CAND_OFFICE_DISTRICT | CAND_ICI | CAND_STATUS | CAND_PCC | CAND_ST1 | CAND_ST2 | CAND_CITY | CAND_ST | CAND_ZIP | CAND_ELECTION_YR_y | FEC_ELECTION_YR | CMTE_ID | CMTE_TP | CMTE_DSGN | LINKAGE_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | H0AK00105 | LAMB, THOMAS | NNE | 2020 | AK | H | 0.0 | C | N | C00607515 | 1861 W LAKE LUCILLE DR | NaN | WASILLA | AK | 99654.0 | 2020 | 2020 | C00607515 | H | P | 229250 |
1 | H0AL01055 | CARL, JERRY LEE, JR | REP | 2020 | AL | H | 1.0 | O | C | C00697789 | PO BOX 852138 | NaN | MOBILE | AL | 36685.0 | 2020 | 2020 | C00697789 | H | P | 226125 |
2 | H0AL01063 | LAMBERT, DOUGLAS WESTLEY III | REP | 2020 | AL | H | 1.0 | O | C | C00701557 | 7194 STILLWATER BLVD | NaN | SPANISH FORT | AL | 36527.0 | 2020 | 2020 | C00701557 | H | P | 227053 |
3 | H0AL01071 | PRINGLE, CHRISTOPHER PAUL | REP | 2020 | AL | H | 1.0 | O | C | C00701409 | 202 GOVERNMENT STREET | SUITE 220 | MOBILE | AL | 36602.0 | 2020 | 2020 | C00701409 | H | P | 227054 |
4 | H0AL01089 | HIGHTOWER, BILL | REP | 2020 | AL | H | 1.0 | O | C | C00703066 | PO BOX 91038 | NaN | MOBILE | AL | 36691.0 | 2020 | 2020 | C00703066 | H | P | 227266 |
df_merge = pd.merge(df2, df_name, on='CMTE_ID')
#df_merge.head()
df_sort = df_merge[['CAND_ID', 'CAND_NAME', 'CAND_ST', 'CMTE_NM']]
df_sort[df_sort['CAND_ST']=='FL'].head(10)
CAND_ID | CAND_NAME | CAND_ST | CMTE_NM | |
---|---|---|---|---|
7 | H0AL01113 | ROMAGNANO, CHASE ANDERSON ANDY REVEREN | FL | ANDY FOR NORTHWEST FLORIDA |
8 | H0FL01120 | ROMAGNANO, CHASE ANDERSON ANDY REV. | FL | ANDY FOR NORTHWEST FLORIDA |
14 | H0FL02110 | SOUTHERLAND, WILLIAM STEVE II | FL | FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST... |
381 | H0FL01120 | ROMAGNANO, CHASE ANDERSON ANDY REV. | FL | ANDY ROMAGNANO FOR NORTHWEST FLORIDA |
382 | H0FL01138 | HOWARD, CHERYL | FL | CHERYL HOWARD FOR U S HOUSE OF REPRESENTATIVES |
383 | H0FL01146 | MERK, GREGORY CHARLES | FL | GREG MERK CAMPAIGN |
384 | H0FL01153 | ORAM, ALBERT | FL | ORAM FOR CONGRESS |
386 | H0FL02193 | ANDERSON, WILLIE JEFFERSON JR | FL | ANDERSON FOR US CONGRESS |
387 | H0FL02219 | THRIPP, KRISTY | FL | KRISTY THRIPP FOR CONGRESS |
388 | H0FL03118 | BLACK, DEAN A | FL | DEAN BLACK FOR CONGRESS |
df_sort_house = pd.merge(df_sort, df_house, on='CAND_ID')
df_sort_house.head()
CAND_ID | CAND_NAME | CAND_ST | CMTE_NM | CMTE_ID | NAME | STATE | TRANSACTION_AMT | |
---|---|---|---|---|---|---|---|---|
0 | H0AL02087 | ROBY, MARTHA | NaN | MARTHA ROBY FOR CONGRESS | C00522458 | PINPOINT WEBSOLUTIONS | FL | 750 |
1 | H0AL02087 | ROBY, MARTHA | NaN | FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST... | C00522458 | PINPOINT WEBSOLUTIONS | FL | 750 |
2 | H0GA08099 | SCOTT, JAMES AUSTIN | GA | FRESHMAN AGRICULTURAL REPUBLICAN MEMBERS TRUST... | C00522458 | PINPOINT WEBSOLUTIONS | FL | 750 |
3 | H0GA08099 | SCOTT, JAMES AUSTIN | GA | AUSTIN SCOTT FOR CONGRESS INC | C00522458 | PINPOINT WEBSOLUTIONS | FL | 750 |
4 | H0GA08099 | SCOTT, JAMES AUSTIN | GA | AUSTIN SCOTT VICTORY FUND | C00522458 | PINPOINT WEBSOLUTIONS | FL | 750 |
df_sort_house['TRANSACTION_AMT'].sum()
17474528