from google.colab import drive
drive.mount('/data/')
data_dir = '/data/My Drive/Colab Notebooks/FEC dataset'
!ls '/data/My Drive/Colab Notebooks/FEC dataset'
!pip install matplotlib
Mounted at /data/ ccl20.zip cm_header_file.csv indiv_header_file.csv ccl_header_file.csv cn20.zip pas220.zip CD_trends.xlsx cn_header_file.csv pas2_header_file.csv cm20.zip indiv20.zip Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (3.2.2) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.4.7) Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.8.1) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (0.10.0) Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.18.5) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.2.0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.1->matplotlib) (1.15.0)
import zipfile
zip = zipfile.ZipFile(data_dir+'/indiv20.zip')
#zip.namelist()
import pandas as pd
header = pd.read_csv(data_dir+'/indiv_header_file.csv')
data=pd.read_csv(zip.open('by_date/itcont_2020_20200630_20300630.txt'), sep='|', names=header.columns)
data.head()
/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (10,16,18,19) have mixed types.Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
CMTE_ID | AMNDT_IND | RPT_TP | TRANSACTION_PGI | IMAGE_NUM | TRANSACTION_TP | ENTITY_TP | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | OTHER_ID | TRAN_ID | FILE_NUM | MEMO_CD | MEMO_TEXT | SUB_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00363317 | A | YE | P2020 | 202004199219743280 | 15E | IND | LITTLE, WILLIAM | NEW YORK | NY | 1.0128e+08 | NOT EMPLOYED | NOT EMPLOYED | 12162020 | 500 | NaN | 4017159 | 1402014 | NaN | * EARMARKED CONTRIBUTION: SEE BELOW | 4042120201737536230 |
1 | C00723122 | A | YE | P2020 | 202007159244979799 | 15E | IND | STOWE, BARBARA | RESTON | VA | 2.01942e+08 | NOT EMPLOYED | NOT EMPLOYED | 12282020 | 100 | C00193433 | 4753483 | 1423440 | NaN | * EARMARKED CONTRIBUTION: SEE BELOW | 4072620201794577716 |
2 | C00290825 | A | YE | P2020 | 202004159216892816 | 15E | IND | MEHIEL, KAREN | NEW YORK | NY | 1.01281e+08 | KAMPACK, INC. | EXECUTIVE | 12182020 | 2800 | C00401224 | 3965375 | 1398991 | NaN | * EARMARKED CONTRIBUTION: SEE BELOW | 4050620201741858091 |
3 | C00363317 | A | M12 | P2020 | 202004199219742982 | 15E | IND | LITTLE, WILLIAM | NEW YORK | NY | 1.0128e+08 | NOT EMPLOYED | NOT EMPLOYED | 10302020 | 500 | C00401224 | 4017173 | 1401993 | NaN | * EARMARKED CONTRIBUTION: SEE BELOW | 4042120201737536220 |
4 | C00589309 | A | YE | P | 202002209187171385 | 15E | IND | DAVIDSON, GREG | REDONDO BEACH | CA | 9.02782e+08 | NORTHROP GRUMMAN | AEROSPACE MANAGER | 12312020 | 100 | C00401224 | VVBX0QHNGR6 | 1385228 | NaN | * EARMARKED CONTRIBUTION: SEE BELOW | 4022920201700018835 |
print(data['TRANSACTION_AMT'].max())
10000000
sort_amt = data.sort_values(by='TRANSACTION_AMT', ascending=False)
sort_amt.head()
CMTE_ID | AMNDT_IND | RPT_TP | TRANSACTION_PGI | IMAGE_NUM | TRANSACTION_TP | ENTITY_TP | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | OTHER_ID | TRAN_ID | FILE_NUM | MEMO_CD | MEMO_TEXT | SUB_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
990582 | C00571703 | N | M8 | P | 202008209266851913 | 10 | IND | MELLON, TIMOTHY | SARATOGA | WY | 823311500 | SELF-EMPLOYED | INVESTMENTS | 7092020 | 10000000 | NaN | SA11A.15446 | 1434706 | NaN | NaN | 4090120201833903380 |
990568 | C00571703 | N | M8 | P | 202008209266851908 | 10 | IND | SCHWARZMAN, STEPHEN A. | NEW YORK | NY | 101543302 | BLACKSTONE | CHAIRMAN & CEO | 7012020 | 10000000 | NaN | SA11A.15411 | 1434706 | NaN | NaN | 4090120201833903366 |
469388 | C00637512 | N | M8 | P | 202008209266413693 | 10 | ORG | AMERICA FIRST POLICIES, INC. | ARLINGTON | VA | 22202 | NaN | NaN | 7202020 | 10000000 | NaN | SA11AI.165580 | 1434640 | NaN | NaN | 4082920201831236982 |
1151552 | C00484642 | N | M7 | P | 202007209260164631 | 10 | ORG | MAJORITY FORWARD | WASHINGTON | DC | 200055998 | NaN | NaN | 6302020 | 8000000 | NaN | 1973314 | 1427419 | NaN | NaN | 4072920201808862242 |
1351670 | C00747246 | N | Q2 | P | 202007159245095555 | 15 | ORG | SIXTEEN THIRTY FUND | WASHINGTON | DC | 200362605 | NaN | NaN | 6302020 | 5700000 | NaN | 12295463 | 1423930 | NaN | NaN | 4071720201791015689 |
df = pd.DataFrame(data, columns=['CMTE_ID', 'NAME', 'CITY', 'STATE', 'ZIP_CODE', 'EMPLOYER', 'OCCUPATION', 'TRANSACTION_DT', 'TRANSACTION_AMT'])
from zipfile import ZipFile
import pandas as pd
header = pd.read_csv(data_dir+'/cn_header_file.csv')
with ZipFile(data_dir+'/cn20.zip') as zip:
candidates = pd.read_csv(zip.open('cn.txt'), sep='|', names=header.columns)
candidates.head()
CAND_ID | CAND_NAME | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | CAND_OFFICE_ST | CAND_OFFICE | CAND_OFFICE_DISTRICT | CAND_ICI | CAND_STATUS | CAND_PCC | CAND_ST1 | CAND_ST2 | CAND_CITY | CAND_ST | CAND_ZIP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | H0AK00105 | LAMB, THOMAS | NNE | 2020 | AK | H | 0.0 | C | N | C00607515 | 1861 W LAKE LUCILLE DR | NaN | WASILLA | AK | 99654.0 |
1 | H0AK00113 | TUGATUK, RAY SEAN | DEM | 2020 | AK | H | 0.0 | C | N | NaN | PO BOX 172 | NaN | MANAKOTAK | AK | 99628.0 |
2 | H0AK01046 | CATALANO, THOMAS | OTH | 2020 | AK | H | 0.0 | NaN | N | NaN | 188 WEST NORTHERN LIGHTS BOULEVARD | NaN | ANCHORAGE | AK | 99503.0 |
3 | H0AL01055 | CARL, JERRY LEE, JR | REP | 2020 | AL | H | 1.0 | O | C | C00697789 | PO BOX 852138 | NaN | MOBILE | AL | 36685.0 |
4 | H0AL01063 | LAMBERT, DOUGLAS WESTLEY III | REP | 2020 | AL | H | 1.0 | O | C | C00701557 | 7194 STILLWATER BLVD | NaN | SPANISH FORT | AL | 36527.0 |
candidates_final = pd.DataFrame(candidates, columns=['CAND_ID', 'CAND_PTY_AFFILIATION'])
header = pd.read_csv(data_dir+'/ccl_header_file.csv')
with ZipFile(data_dir+'/ccl20.zip') as zip:
#print(zip.namelist())
linkage = pd.read_csv(zip.open('ccl.txt'), sep='|', names=header.columns)
linkage.head()
CAND_ID | CAND_ELECTION_YR | FEC_ELECTION_YR | CMTE_ID | CMTE_TP | CMTE_DSGN | LINKAGE_ID | |
---|---|---|---|---|---|---|---|
0 | C00713602 | 2019 | 2020 | C00712851 | O | U | 228963 |
1 | H0AK00105 | 2020 | 2020 | C00607515 | H | P | 229250 |
2 | H0AL01055 | 2020 | 2020 | C00697789 | H | P | 226125 |
3 | H0AL01063 | 2020 | 2020 | C00701557 | H | P | 227053 |
4 | H0AL01071 | 2020 | 2020 | C00701409 | H | P | 227054 |
df_merge = pd.merge(candidates_final, linkage, on='CAND_ID')
df_merge.head()
CAND_ID | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | FEC_ELECTION_YR | CMTE_ID | CMTE_TP | CMTE_DSGN | LINKAGE_ID | |
---|---|---|---|---|---|---|---|---|
0 | H0AK00105 | NNE | 2020 | 2020 | C00607515 | H | P | 229250 |
1 | H0AL01055 | REP | 2020 | 2020 | C00697789 | H | P | 226125 |
2 | H0AL01063 | REP | 2020 | 2020 | C00701557 | H | P | 227053 |
3 | H0AL01071 | REP | 2020 | 2020 | C00701409 | H | P | 227054 |
4 | H0AL01089 | REP | 2020 | 2020 | C00703066 | H | P | 227266 |
sort_amt.dropna(subset = ["EMPLOYER", "OCCUPATION"], inplace=True)
sort_amt.head()
CMTE_ID | AMNDT_IND | RPT_TP | TRANSACTION_PGI | IMAGE_NUM | TRANSACTION_TP | ENTITY_TP | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | OTHER_ID | TRAN_ID | FILE_NUM | MEMO_CD | MEMO_TEXT | SUB_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
990582 | C00571703 | N | M8 | P | 202008209266851913 | 10 | IND | MELLON, TIMOTHY | SARATOGA | WY | 823311500 | SELF-EMPLOYED | INVESTMENTS | 7092020 | 10000000 | NaN | SA11A.15446 | 1434706 | NaN | NaN | 4090120201833903380 |
990568 | C00571703 | N | M8 | P | 202008209266851908 | 10 | IND | SCHWARZMAN, STEPHEN A. | NEW YORK | NY | 101543302 | BLACKSTONE | CHAIRMAN & CEO | 7012020 | 10000000 | NaN | SA11A.15411 | 1434706 | NaN | NaN | 4090120201833903366 |
988418 | C00547349 | N | M8 | P | 202008209266445875 | 10 | IND | STEYER, THOMAS F. | SAN FRANCISCO | CA | 941049007 | FAHR, LLC | FOUNDER | 7012020 | 3479294 | NaN | VNVNVHN8SQ0 | 1434668 | NaN | NaN | 4082920201831239483 |
1001457 | C00495028 | N | M8 | P | 202008209266639943 | 10 | IND | SIMONS, JAMES H. | NEW YORK | NY | 100107007 | EUCLIDEAN CAPITAL | PRESIDENT | 7152020 | 2500000 | NaN | VN8FNNJW723 | 1434687 | NaN | NON-CONTRIBUTION ACCOUNT | 4090220201833936065 |
860246 | C00620971 | N | M8 | P | 202008209266126372 | 10 | IND | STEYER, THOMAS | SAN FRANCISCO | CA | 9.41045e+08 | FAHR LLC | PHILANTHROPY AND ADVOCACY | 7242020 | 2500000 | NaN | VSH7WMSTV40 | 1434556 | NaN | NaN | 4090120201833903301 |
sort_amt[sort_amt['OCCUPATION']=='EXECUTIVE']['EMPLOYER'].describe()
count 6193 unique 2349 top SOUTHERN CA EDISON freq 215 Name: EMPLOYER, dtype: object
df_newdup = df[(df['EMPLOYER'].duplicated()) &
(df['EMPLOYER']!='NOT EMPLOYED') &
(df['EMPLOYER']!='RETIRED')]
df_newdup.head()
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | |
---|---|---|---|---|---|---|---|---|---|
7 | C00706333 | ALVAREZ, JACK | TRACY | CA | 95304 | ALVAREZ FARMS, INC. | PRESIDENT | 9302020 | 2300 |
8 | C00706333 | ALVAREZ, JACK | TRACY | CA | 95304 | ALVAREZ FARMS, INC. | PRESIDENT | 9302020 | 200 |
13 | C00431932 | COOPER, DAVID | NEW BRAUNFELS | TX | 78132 | OVINTIV SERVICES INC. | DRILLING COORDINATOR | 6302020 | 104 |
14 | C00431932 | CURRAN, KENT | LITTLETON | CO | 80127 | OVINTIV SERVICES INC. | SENIOR LAND NEGOTIATOR | 6302020 | 20 |
15 | C00431932 | DARLINGTON, BRUCE | SPRING | TX | 77379 | OVINTIV SERVICES INC. | SR. MANAGER, DRILLING & COMPL | 6302020 | 50 |
set(df_newdup['EMPLOYER'])
{'CONSULTANT', 'BANNER HEALTH', 'YOUR PART-TIME CONTROLLER LLC', 'WRIGHT COLLEGE', 'II-VI INC.', 'AREAS APPRAISERS INC', 'SAIONTZ & KIRK, P.A.', "UCSF BENIOFF CHILDREN'S HOSPITAL OAKLA", 'NICKELSPORN &LUNDIN PC', 'LOCUS IMPACT INVESTING', 'GRAVLEE HOMES INC.', 'CENTURY CONTRACTORS', 'ONPOINT MARKETING INC.', 'GEORGE FOX UNIVERSITY', 'TURN IT OVER CLEANING', 'COLLIERS', 'PRIDGEON AND CLAY, INC.', 'STERLING REALTORS', 'MAINE STATE CU', 'CIVIL LIBERTIES LIST', 'CALIFORNIA', 'AMERICAN CONCRETE', 'GARRISON PC', 'SCHOOL OF ART INSTITUTE OF CHICAGO', 'WASHINGTON STATE HOSPITAL ASSOCIATION', 'JFNNJ', 'NAVAIR SETA (HOFFMAN ENGINEERING)', 'PEACH & LILY', 'EATON SALES', 'MILWAUKEE NEPHROLOGI', 'ADELPHI TECHNOLOGY INC.', 'DEANE DANCE CENTER', 'ODESSA FENCE', 'NXP SEMICONDUCTOR', 'NATIONAL FLATBED LLC', 'T.T.DUNPHY', 'KB DEVELOPMENT', 'SM CONSULTANT', 'PNWRCC', 'STORCH AMINI PC', 'MONTOGOMERY COUNTY GOVERNMENT', 'NIKE, INC', 'CAPE ELECTRIC', 'GD MISSION SYSTEMS INC', 'AKIN GUMP ET AL', 'MINUTEMAN POWER SERVICES LLC', 'BOS DAIRY, LLC', 'SILICONES PLUS INC', 'COCA-COLA CONSOLIDATED, INC.', 'CENTERSTONE', 'A. LEE KIRK ATTORNEY AT LAW', 'LMR FREIGHT', 'FOLEY ABBOTT LLC', 'WIND RIVER TRANSPORT', 'WARNER BROTHERS', 'MOSES & SINGER', 'STACKBIT', 'ROWPAR PHARMACEUTICALS', 'ASURINT', 'GUBB & BARSHAY', 'TYLER TECHNOLOGIES', 'YALE UNIVERSITY', 'CUNY / ISLG', 'OSCARRENDA CONT', 'HAHN & HAHN LLP', 'SBT', 'A BETTER CHANCE FOR OUR', 'GARMIN INTERNATIONAL', 'CA-LOTTS CREDIT & CAR SALES', 'EXXONMOBIL PRODUCTION US', 'BOSTON SYMPHONY ORCHESTRA', 'DECADES OF WHEELS LLC', 'SPORTS ROCKET INC', 'CTVHCS', 'ENTERTAINMENT ONE', 'GIDEON INFORMATICS INC', 'CRAYOLA LLC', 'ENERBANK USA', 'MAXIM CRANE WORKS, LP', 'UMASS MEDICAL SCHOOL', 'GROSSMAN IRON ANS STEEL CO', 'US CONCRETE, INC.', 'MARTIN M RON ASSOCIATES', 'OAKTON COMMUNITY COLLEGE', 'SANTA CRUZ IHSS', 'CENTRAL PACIFIC BANK', 'ICON VALUATION', 'CURETON MIDSTREAM', 'FIS GROU0', 'A TUMBLING T RANCHES', 'THOMAS MEDIA GROUP LLC', 'XCEL ENERGY', 'GSSM', 'I.S. ENVIRONMENTAL PROTECTION AGENCY', 'REESE NURSING SERVICE 51', 'IMPLUS LLC', 'MOORE CAPITAL MANAGEMENT', 'DONKAGELE FARMSINC.', 'PPMM', 'TTA APPRAISAL', 'CENTENNIAL INSURANCE AGENCY', 'KISABETH FURNITURE', 'INSTA LUBE PH CORP', 'UNVERSITY OF ALABAMA BIRMINGHA', "ST. CATHERINE'S SCHOOL", 'TATOOSH SEAFOODS', 'EMERGENCY PHYSICIAN', 'MERRILL BANK OF AMERICA', 'ONEAL AND ASSOCIATES', 'METROPOLITAN TRANSPORTATION AUTHORITY', 'NATHAN LITTAUER HOSPITAL', 'RETIRRD', 'ROPER ST FRANCIS HEALTHCARE', 'JGNEIL', 'PD&C', 'CVE', 'PIRE', 'SELF EMPLOYED - WOMAN OWNED SMALL BUSI', 'CUSHEES INC.', 'BEALLS', 'VALLEY PHYSICIANS ALLIANCE', 'FRIENDSHIP HOUSE', 'PATERSON CITY', 'INFO TECH, INC', 'ROSENDIN ELECTRIC', 'MCDERMOTT', 'GCCMHC', 'LOCKHART WORK PROGRAM FACILITY', 'FOOD LION, LLC', 'NTESS, LLC', 'ETRN - WAYNESBURG', 'UPPER IOWA UNIVERSITY', "HOM SOTHEBY'S", 'BAC LOCAL 01 MN', 'MURRAY IND', 'ARVEST BANK', 'GRIFFIN ELECTRIC.INC.', 'LAND TITLE', 'SAN PASQUAL BAND OF MISSION INDIANS', 'BLOOMER BIOTECH', 'GEORGIA-PACIFIC WOOD PRODUCTS LLC', "ST. ANN'S WAREHOUSE", 'COLTON JOINT UNIFIED', 'WINGATE WEST SPRINGFIELD', 'INSIGHTSQUARED', 'WASATCH DISTRIBUTING CO', 'LOGISTICS HEALTH INC', 'HOMES ARE US INC', 'MANPOWER', 'LOUISIANA ORTHOPEDIC SPECIALISTS', 'BHATE CONSTRUCTION', 'CORNUCOPIA CRUISE LI', 'WAKE FOREST', 'SALT RIVER PROJECT', 'ADVANCE FIRE SYSTEMS INC', 'THE WINDWARD SCHOOL', 'LIBERTY BANK', 'FAITH BAPTIST CHURCH', 'MORRIS AUTOMOTIVE MACHINE', 'SACTO. PUB. LIBRARY JOINT POWERS AUTH.', 'TRAILWEST BANK', 'A-1 AFFORDABLE SIGN CO.', 'TUMAC LUMBER CO', 'PINECONE APARTMENTS', 'APR SOULTIONS', 'VBCPS', 'QUORA, INC.', 'KOCH BUSINESS SOLUTIONS, LP', 'DRIGGERS SCHULTZ & HERBST', 'SVB FINANCIAL GROUP', 'SERRA & GARRITY PC', 'BUSINESS PERFORMANCE INC.', 'RTI-HS', 'HIGHLAND EXCAVATION', 'AMOS WILKINSON, CRNA', 'COMMUNICATIONS DIRECTOR', 'THE LIGHT SOURCE INC', 'MULLALLY DEVELOPMENT', 'SILICON LABS', 'BERNDT CPA LLC', 'CAREY INTERNATIONAL', 'ANJALEONI ENTERPRISES INC', 'HAMPSHIRE', 'AMICA', 'LIVINGMIND PROJECT, INC.', 'NICOR', 'STAR BODY AND PAINT', 'TARANTINO AUTO BODY', 'FPSR', 'AUTOMATE ASSOCIATES', 'DEMOCRATIC NATIONAL CONVENTION COMMITT', 'HOME & OFFICE CABINETRY', 'NUCOR STEEL FLORIDA INC.', 'THE PROPERTY SHOP', 'HOPKINS SCHOOL', 'SCRUBS ETC', 'ROCHESTER COMMUNITY SCHOOL DIS', 'BHE RENEWABLES, LLC', 'COMSEWOGUE SD', 'ZOGENIX INC.', 'NATIONAL AQUARIUMN', 'KIESEL LAW LLP', 'UNITARIAN UNIVERSITY', 'POSEF', 'CHENHALL SERVICES', 'STILLWATER PUBLIC SCHOOLS', 'GARCIA MARBLE & TILE', 'HENDERSON ENGINEERING CO.', 'ALLIANZ OF AMERICA CORP', 'FERMAN BMW', 'BRISBANE SCHOOL DISTRICT', 'DAWSON HOLDINGS INC', 'U. S. DEPT OF VETERANS AFFAIRS', 'EARLES ARCHITECTS AND ASSOCIATES', 'BLUFF POINT ASSOCIATES', 'OVESCO', 'RYAN COYLE', 'AMERICAN ENTERPRISE INV. SRVCS', 'VISITING NURSE ASSOCIATION', 'SMG', 'ASHNU INTERNATIONAL INC', 'MOLDEX METRIC', 'ROSWELL PARK CANCER INSTITUTE INC', 'PECCAINC', 'COEUR ALASKA', 'MRA LABRATORIES', 'PETERBOROUGH PLAYERS', 'AMERESCO', 'SUNY DOWNSTATE', 'BCBS', 'S M STOLLER', 'REAL ESTATE DEV CO', 'BAPTIST HEALTH', 'JONATHAN D. SALK M.D.', 'ALPHAPORT', 'PRECISION AUTOMOTIVE PLASTICS', 'CITY OF RIALTO', 'UMIVERSITY PF DENVER', 'SAN JUAN COLLEGE', 'SPENCER STUART', 'CHICAGO AREA LECET', "WOMEN'S RESOURCE CENTER", 'BAKER PERKINS', 'BOE REAL ESTATE', 'L.A.BELL MOTOR LINES INC.', 'CAPGEMINI AMERICA', 'ORION ENGINEERING CONSTRUCTION', 'GOSHEN FAMILY PHYSICIANS', 'ORANGE VILLAGE', 'SO TEX EXTERM', 'AIR PRODUCTS', 'MEDICAL GROUP', 'BOSTON CAPITAL', 'FOX NEWS NETWORK LLC', 'LSPM', 'SUPERMICRO COMPUTER INC', 'REDD REALTY', 'CUMMINS INC.', 'CAREY PERKINS', 'RHAMILTON CONSULTING', 'UCS', 'SAINT MARYS COUNTY PUBLIC SCHOOLS', 'NYSOMS', 'CODESTREAM INC.', 'CONNER MKTG SALES', 'BURGERBUSTERS INC', 'NEUROCRINE', 'FIRST AMERICAN', 'DURDEN CONSTRUCTION', 'TRUCKERS INSURANCE ASSOCIATES, INC.', 'YOUNG SOMMER', 'BERNARDS APPRAISAL ASSCOCIATES', 'C.L. BARNHOUSE CO.', 'FIVES MACHINING SYSTEMS', 'RDO', 'NYEMASTER GOODE PC', 'UNVERSITY OF COLORADO BOULDER', 'JIM DOYLE & ASSOCIATES', 'POLING & CUTLER', 'URIST FINANCIAL AND RETIREMENT PLANNIN', 'COUNCIL FOR RESPONSIBLE NUTRIT', 'USD 289', 'ICANN', 'VAPOTHERM', 'SMITHFIELD FOODS', 'CROCKETT PROPERTIES', 'CELEBRATION CHURCH', 'COASTAL RESOURCES', 'PALM BEACH COUNTY FIRE RESCUE', 'TEK SYSTEMS', 'WABASH VALLEY POWER ASSN., INC.', 'KAIFER INS', 'CENTRA', 'PBS MENTAL HEALTH ASSOCIATES', 'FYZICAL', 'META HOUSING CORPORATION', 'FLATIRON WORKS', 'CENTER FOR ECONOMIC DEVELOPMENT LAW', 'OMAHA PUBLIC SCHOOL', 'CONSTELLATION', 'WESTERRA CREDIT UNION', 'BREYMAN PROPERTIES', 'XXX', 'HMHP', 'MARY KAY INC', 'THE STANDARD', 'U OF UTAH HEALTH HOSPITALS AND CLINICS', 'TAKEDA PHARMACEUTICALS U.S.A. INC.', 'MCDERMOTT WILL & EMERY', 'AYA HEALTHCARE', 'GRAMBLING STATE UNIVERSITY', 'DUKE CUSTOM FABRICATION', 'TETRATECH', 'DAI', 'AVIANDS', 'FIDES LLC', 'EDUCATION FIRST FCU', 'CEM', 'BHG RAND REALTY', 'COMPOSITE & CASTING SUPPLY INC', 'DESIGN VITTORPIA LLC', 'MAC ARTHUR FOUNDATIO', 'LA CANADA WEST', 'BARJAC INC', 'MORRIS DEV', 'BROOKS, WILBURN, & LOGAN CO', 'SALVATION ARMY AND', 'BRAUN & BRAUN', 'BUCHER CHRISTIAN', 'VERITIV CORP', 'NANSEMOND PRE-CAST', 'JORDAN SCHOOL DISTRICT', 'CENTERSTAGE PRODUCTIONS', 'BTCO, INC.', 'SALEM CLINIC', 'RBC WEALTH MANAGEMENT', 'EMMANUEL MEDICAL', 'COMMUNITY GROUP INC', 'FINANCIAL BROKERAGE', 'SWISHER INTERNATIONAL, INC.', 'OPSALESINC', 'EXELTECH CONSULTING INC', 'OHIO CONFERENCE OF COMMUNITY DEVELOPME', 'THE CHAPIN SCHOOL', 'PHILLIP SAN SEBASTIAN', 'STATE OF VERMONT', 'RICK HAMM CONSTRUCTION', 'TIMMONS SHEET METAL', 'TVHO', 'UNITED TEACHERS LOS ANGELES', 'ST JOSEPH', 'WORCESTER PUBLIC SCHOOLS', 'LORDS VALLEY SELF STORAGE', 'FPN', 'MOUNT SINAI HOSPITAL MANHATTAN', 'KIDS DEVELOPMENTAL THERAPY', 'VETERANS AFFAIRS', "MY FRIEND'S PLACE", 'PAINT WIZARDS INC.', 'EDG CONSULTING ENGINEERS', 'FINISH KARE PRODUCTS', 'E-DEVELOPMENT INTERNATIONAL', 'JAMES F STEARNS CO', 'NUMERIX', 'PARK NICOLLET CLINIC', 'TUSCOLA ISD', 'INDEPENDENT REPAIR', 'KUMIN INSURANCE GROUP', 'COGHLAN CROWSON LLP', 'PASSAGE TO INDIA', 'PAWNEE HEALTH AND WELLNESS', 'M L BERGER & CO.', 'HP PRODUCTIONS', 'STRIBLING', 'ROBSON COMMUNITES', 'BANKERS FINANCIAL CORP', 'PEGASYSTEMS', 'AZ STAGE SOUND LIGHTS', 'LAW OFFICE OF DALE WAGNER', 'BRAYTON PURCELL LLP', 'NATIVEENERGY', 'FULTON COUNTY', 'ENCORE', 'ROOFEX', 'GCEI', 'NEW YORK CITY POLICEPENSION FUND', 'AT&T CORP.', 'KIPP DC', 'PARKER REALTY & ASSOCIATES', 'AMA CONSULTING ENGINEERS', 'SCORP GROUP INC.', 'VILLAGE SUPERMARKETS DBA SHOPRITE', 'GREG COLEMAN LAW PC', 'SALESFORCE', 'RAPID CPAP LLC', 'ARTIST', "READ N' POST", 'MONIMEL CORP', 'ORANGE COUNTY COMMUNITY COLLEGE', 'C MYERS CORP', 'LIGHTNING ORCHARD', 'CUNNINGHAM JEWELERS', 'FRANKLIN MUTUAL INSURANCE COMPANY', 'PCSD', 'DOCTOR', 'CDFW', "ST. DUNSTAN'S ANGLICAN CHURCH", 'ACME SUPERMARKET', 'MENARDS', 'CLAREMONT', 'LAWSON, DAVIS, PICKREN & SEYDEL', 'CHRISTIAN LEADERS INSTITUTE', "SJOERD'S PRO TOOLS", 'WHITE HILL CHURCH OF BRETHREN', 'BURNS MCDONNELL ENGINEERING COMPANY I', 'MATANKY', 'WOMBLE BOND DICKINSON (US) LLP', 'LUIMAN REAL ESTATE INC', 'HERZOG TECHNOLOGIES, INC.', 'PHILIPS HEALTH SYSTEMS', 'BENDER ENGINEERING', 'MEV', 'FOX VALLEY IMAGING', 'METROPOLITAN BAPTIST CHURCH', 'ROSEMOUNT CENTER', 'GREATER LAWRENCE TECH SCHOOL', 'RE/MAX REALTY ASSOCIATES-CHA', 'MORRISON FOERSTER', 'THE CARLYLE GROUP INC.', 'SENATOR LEW FREDERICK', 'HUNGRY PLANET INTELLIGENCE', 'MULLIGAN SECURITY COMPANY', 'SNC-LAVALIN', 'BSC', 'PRA', 'CLEAN WATER OF VA', 'ASA STAFFING', 'M/E ENGINEERING', 'SERVICE EMPLOYEES INTERNATIONAL UNION', 'PRECISIONEFFECT', 'SEAWORLD CALIFORNIA', 'AFSCME CA LOC 3299', 'WILDWOOD', 'GE PLASTICS', 'US TRANSPORTATION', 'MONTEFIORE MEDICAL CENTER', 'PCG', 'CTS', 'CEDAR FALLS COMM SCHOOLS', 'MERCANTILE BANK', 'THE POKEMON COMPANY INTERNATIONAL', 'FIFTH STREET RENAISSANCE', 'METROPOLITAN NASHVILLE BD OF ED', 'SPRINGETTSBURY TOWNSHIP', 'GETTYSBURG COLLEGE', 'SSES', 'CONTINENTAL AUTOMOTIVE', 'AMERICAN INSTITUTES FOR REASEARCH', 'DEER VALLEY RESORT', 'CARGILLE-SACHER LABS, INC.', 'JP MORGAN', 'CARDIOVASCULAR', 'PERFORMANCE SYSTEMS', 'KLD', 'FLORIDA', 'THE ARLINGTON SLEEP DISORDER CENTER', 'DE WINNE CONSTRUCTION', 'CBRE, INC', 'FISHER PHILLIPS', 'IC MANAGE', 'DELANEY CORPORATE SERVICES', 'HOMESTEAD INC', 'KERING', 'ONEOK FIELD SERVICES COMPANY', 'COWLES PARKWAY FORD, INC.', 'GIM CAPITAL MANAGEMENT', 'STANFORD MEDICAL GROUP', 'KILLIAN &DONOHUE', 'JENSEN TRAVELON', 'WMLM', 'MATTESON MARINE SEV', 'CRAFT COFFEE', 'INSTANT CARE', 'NOT IN WORKFORCE', 'HIGH-MARK SYSTEMS', 'TRINSEO LLC', 'HOYT ARCHITECTS', 'TIVERITY CONSULTING', 'LED SUPPLY', 'MELINDA MOTLAGH', 'CALIFORNIA STATE UNIVERSITY LA', 'UNC CHAPEL HILL', 'CMC CONSTRUCTION', 'G M NORTHRUP CORP', 'GROW MARKETING', 'SWISSRAY CUSTOMER CARE LLC', 'GREECE CENTRAL SCHOOL DISTRICT', 'BEVERLY-HANKS & ASSOCIATES', 'ASG REAL ESTATE CO.', 'BACK TO THE PAST', 'CHARLOTTE MECKLENBURG SCHOOLS', "CONNOLLY'S TOWING INC", 'UNIVERSITY OF PITTSBURGH SCHOOL OF MED', 'DOCTORS MAKING HOUSECALLS', 'MINITAB', 'HDR ARCHITECTURE INC.', 'NAR', 'THE MONEY STORE', 'LAMAR STATE COLLEGE - PORT ARTHUR', 'GGUSD', 'SHERATON UNIVERSAL HOTEL', 'STACY AND BAKER LAW', 'GJAC', 'LOBIS TECHNOLOGY CONSULTANTS LLC', 'ACCRUENT', 'CANCIO NADAL & RIVERA LLC', 'OLD VINE MANAGEMENT GROUP', 'NATIONAL PATIENT ADVOCATE FOUNDATION', 'GARNET VALLEY SCHOOL DISTRICT', 'GUARANTEE INS AGCY', 'TRINITY CONSULTANTS', 'COOK COUNTY OF IL', 'AONL', 'NOSSAMAN LLP', 'BREAD FOR THE WORLD', 'FNC', 'NORTH SHORE SENIOR CENTER', "HAY'S", 'SELF ORIGINAL ARTISTS NYC', 'POWER SUPPLY', 'WIDGEON MGT CORP', 'RADIAN GUARANTY INC.', 'JENISON PUBLIC SCHOOLS', 'A PITTSBURGH PLUMBER LLC', 'PENASQUITOS PET CLINIC', 'NEA FED. GOVT. AGENCY', 'MA LEAGUE OF CHCS', 'STATE FARM INS.', 'KANYEZI AFRICA SAFARI', 'UFCW LOCAL NO. 328', 'ABLE ELECTRICAL SVC.', 'KAREN G BINDER', 'VALLEY EMERGENCY CARE', 'SUMMIT REHAB UPMC', 'THE FLORIDA AQUARIUM', 'BRUCE LEE', 'SOUND COMMUNITY SOLUTIONS', 'FOOD SCIENCES CORP.', 'JOHN MORRELL & COMPANY', 'UN ENVIRONMENT PROGRAMME', 'JJ MARQUIS ELECTRIC', 'COMMUNITIES ACTIVELY LIVING INDEPENDEN', 'USONIAN REALTY', 'ZUMIEZ', 'ROYAL FLEX CIRCUITS', 'COMMERCEHUB', 'GENESIS MEDICAL CENTR', "CHILDREN'S HOSPITAL BOSTON", 'INDATA CORPORATION', 'EPIC LLC', 'AUDERE PARTNERS', 'CLARK CONSTRUCTION', 'RJH SCIENTIFIC INC', 'TBWBHL', 'MUNGER TOLLES & OLSON', 'HERE', 'SAP NATIONAL SECURITY SER', 'FORTINET', 'CATHERINE WILCOX DDS', 'HEMCON MEDICAL TECHNOLOGIES INC', 'RAYA RADIOLOGY', 'BROWNSTEIN HYATT FARBER SCHRECK', 'BLRG', 'BASD', 'PARIS BRIDGE ACADEMY', 'HOME FURNITURE', 'JDS&A ADVISORS', 'NVI', 'DISNEY ANIMATION STUDIOS', 'TELLIGENT MASONRY LLC', 'REI', 'HOLLYWOOD CASINO', 'SAPPHIRE COMPUTERS INC.', 'SEABULK TANKERS, INC.', 'TAURIAINEN ENGINEERING', 'SIMPLYEZ HDM LLC', 'LAFAYETTE GENERAL HEALTH', 'WELLTOWER, INC.', 'KIRKLAND AND ELLIS', "CABELA'S INC.", 'VJSTURDIVANTINC', 'GARDEN CITY SCHOOLS DIST', 'SEARIVER MARITIME INC', 'MADISON FIRE DEPT.', 'POWERS MUSIC SCHOOL', 'LA MESA SPRING VALLEY SCHOOLS', 'BHHS REAL ESTATE', 'NEW ENGLAND GRANITE MARBLE', 'CAPROCK DAIRY', 'RACHIO', 'MCPHEE PLUMBING', 'TERRE HAUTE HEART CENTER', 'MORENO', 'SENTINELONE', 'BERRY PLASTICS', 'COSTAL CONNECTION', 'GLOBAL VILLAGE ACADEMY', 'MARK WINKLER', 'PENN STATE UNIVERSITY', 'COLUMBIA MUTUAL INSURANCE COMPANY', 'BOB BARKER', 'HATCHERY PLANNING', 'UMECO', 'COMMERCIAL DEVELOPER', 'TRUCK-TECH', 'NEDERLANDER ORGANIZATION', 'MURRAY & MURRAY', 'WJW ARCHITECTS', 'HOLMES MURPHY', 'PEOPLE READY', 'COLUMBUS STATE UNIVERSITY', 'CARAHSOFT', "FEDERAL GOV'T", 'REIW CONSULTING LLC', 'I&CO', 'CHURCH OF. HRIST', 'OCCUCARE INTERNATIONAL', 'BP AMERICA', 'TEMPEST CAPITAL LTD', 'WEST LAFAYETTE COM SCHOOL CORP', 'ALEXANDRIA REAL ESTATE', 'JHU/APPLIED PHYSICS LAB.', 'DESERT HOUSE OF PRAYER', 'UPDEGRAFF CLINIC', 'SHANTI POOLS LLC', 'MPL', 'GLENN MITCHELL INSURANCE', 'SWITCHBACK TRAVEL LLC', 'PUBLIC EDUCATION', 'SELFEMPLOMENT', 'BCD MEETINGS & EVENTS', 'COLUMBIA PRESBYTERIAN HOSPITAL', 'SANOFI PASTEUR', 'KOPPEL AND SCHER', 'APEX-STUDIO SUAREZ', 'DEPT OF THE AIR FORCE', 'DURANGO', 'IRAD SERVICES LLC', 'WINGATE AT WEST SPRINGFIELD', 'LWV-DENVER', 'DOSS REALTY GROUP', 'CAPSTAR ADVISORS', 'SCHOOL CITY OF HAMMOND', 'NORBORD', 'FAMILY HERITAGE', 'TRACTOR SUPPLY OMPANY', 'JAMESTOWN ASSOCIATES', 'PIEDMONT TRIAD ANESTHESIA, PA', 'LIONS SHARE FCU', 'LOCKARD, INC.', 'GREENFIELD POWER EQUIPMENT, INC.', 'LOCHEED MARTIN', 'NUCOR STEEL AUBURN, INC.', 'SLMC', 'HANES INC.', 'OHHP', 'LANCASTER GENERAL HE', 'TELEPHONICS SYSTEMS ENGINEERING GROUP', 'INTEGRA', 'RESMED', 'DISCOVERY INSTITUTE', 'STOCKHOLM UNIVERSITY', 'CENTURY 21 MEYER', 'JACKSONLEWIS(PARTNER)', 'US DOT', 'WOODS PRECISION PRODUCTS', 'ENGINEWORLD LLC', 'THE KIRLIN COMPANY', 'W.A. HYNES & CO.', 'MORRISON & FOERSTER, LLP', 'SYMMETROCM', 'AUBURN HOUSING AUTHORITY', 'CALPINE', 'TOTAL E&P USA', 'ECONOMIC POLICY INSTITUTE', 'NEVADA STATE MUSEUM', 'HUNTER COLLGE', 'CITY OF HOUSTON', 'COLORADO CARE ASSISTANCE', 'BEAUREGARD ELECTRIC CO-OP, INC.', 'DF LEVIN ASSOCIATES', 'SOCIAL CAPITAL GROUP LLC', 'MRS.', 'WIT CREEK PARTNERS', 'SHONDALAND', 'NETSAGE', 'BGR, INC.', 'VERIZON CORP', 'FRIENDS SCHOOL OF BALTIMORE', 'TAYLOR CORPORATION', 'KAMIN IND', 'PROVIDENCE ANESTHESIOLOGY ASSOCIATES,', 'TTUHSC', 'VERRILL DANA, LLP', 'EL CAMINO COLLEGE', 'METROMILE', 'ROPER AND ROPER', 'IGLER/PEARLMAN PA', 'PROQUEST', 'MIRAGE FINE FOODS, INC.', 'AMSTED INTERNATIONAL', 'SOUTHWEST FAMILY GUIDANCE CENTER', 'CITIZENS MEDICAL CENTER', 'FRESNO STATE', "ST. MARY'S UNIVERSITY", 'BLUE HERON WELLNESS', 'RINGCENTRAL', 'RUST COLLEGE', 'NEXTEER', 'VOL STATE CC', 'PEOPLES GROUP SELF-EMPLOYED', 'RIA', 'VIMAR', 'GREATSCAPES', 'DAILY JOURNAL', 'GOULD KILLIAN CPA GROUP', 'FREDRICK MANAGEMENT', 'STRONGHOLD', 'GENISIS HEALTHCARE', 'DEMOCRATIC INTELLIGENCE', 'STADIUM TOYOTA', 'LB CONSOLIDATED', 'THE STATE BANK OF FARIBAULT', 'U.S. AGENCY FOR INTERNATIONAL DEVELOPM', 'COOK COUNTY', 'SPARKS WILLSON PC', 'GDK CONSTRUCTION', 'US GOVT ACCOUNTABILITY OFFICE', 'CENTRA INC.', 'LAWWA', 'VERITE', 'MOLZEN CORBIN', '831 DON CUBERO AVE', 'IOWA TALENTED AND GIFTED ASSOCIATION', 'THE ROADRUNNER PRESS', 'ACME GLASS AND MIRROR', 'HABITAT AMERICA', 'POWERS LAW', 'EXPRESSO', 'CSU SAN MARCOS', 'BWXT', 'PREMIER RADIOLOGY', 'WA STATE NURSES ASSOCIATION', 'TOURISM ASSN.', 'EKLHEALTH LLC', 'RODAN+FIELDS', 'UFCW LOCAL NO. 876', 'FRIENDSHIP ACRES PARK INC', 'MOORE AND VAN ALLEN PLLC', 'SAGE V FOODS', 'DR. SUE CAREY PLLC', 'KELLY AUTOMOTIVE GROUP', 'EDX', 'AMHS', 'ESI TOTAL FUEL MANAGEMENT', 'PRIZE LOGIC', 'WINSTEAD PC', 'MEDSTAR GEORGETOWN UNIVERSITY HOSPITAL', 'COMCAST CORPORATION', 'MOSES & SINGER LLP', 'SANDHILLS COMMUNITY COLLEGE', 'MILLIKEN', 'VA DCR', 'GWATNEY CHEVROLET', 'ORTHOPEDIC SPINE THERAPY', 'BERING STRAITS NATIVE CORPORATION', 'SIKORSKY', 'GREVE FOUNDATION', 'SOLIC', 'LUKE', 'CH ROBINSON', 'UCDAVIS CANCER CENTER', 'JAMS INC.', 'MCCOOL FARM AND CATTLE', 'VASSAR ELECTRIC INC', 'NWP', 'COTRONICS CORPORATION', 'MOVEMENT FOR LIFE', 'GILBERT CONSTRUCTION', 'MOUNT VERNON CITY SD', 'CAS', 'NSWCLA', 'CATHOLIC DIOCESE OF ROCKFORD', 'NAP ENGINEERS', 'DIRECT MARKETING CONCEPTS, INC.', 'FMCSA', 'SCIENTIAE LLC', 'MODA HEALTH', 'FLORIDA HIGH SCHOOL ATHLETIC ASSOC.', 'SUITECX', 'EVANS LAW FIRM, INC.', 'COMMUNITY LEGAL AID SERVICES', 'TONI SHERMAN INTERIORS LLC', 'AGS CONSTRUCTION', 'CCRMC', 'MOLINA HEALTHCARE OF FL', 'REPEAT CONSULTANTS', 'EWL INC.', 'WILMERHALE', 'TOWNSEND REAL ESTATE', 'CENTINEL FINANCIAL GROUP', 'AZARA LLC', 'GEORGETOWN UNIVERSITY LAW CENTER', 'CROSSROADS ANESTHESIAP', 'AMPLITY HEALTH', 'IMAGE ONE CORP', 'TRADELINK LLC', 'GIBBON PUBLIC SCHOOLS', 'MERCY FAMILY CENTER', 'SILVERSAND SERVICES', 'CITY OF CHESAPEAKE', 'HOWMET AEROSPACE INC.', 'SOUTHERN TRUCK AND EQUIPMENT', 'UNIVERSITY OF TEXAS MEDICAL BRANCH AT', 'KORDICH CONSTRUCTION', 'ALTSHULER BERZON LLP', 'SUNTRUST ROBINSON HUMPHREY INC.', 'AHRENS COMPANIES', 'HAL SYSTEMS CORP', 'PACIFIC RIM CAPITAL, INC.', 'APF', 'PREMIER ASSET MGMT., INC.', 'TEAMSTERS LOCAL UNION 191', 'ADLER GIERSCH', 'SGF', 'MICHIGAN STATE UNIVERSITY', 'ALLIED BARTON', 'RAINFOCUS', 'D.U.E BRANDS', 'WHEATON COLLEGE NORTON MA', 'GBP CONTRACTING', 'MOORE PUBLIC SCHOOLS', 'AIR TRANSPORT ASSOCIATION, INC', 'SSCI', 'THE RUSSELL GROU UNITED LLC', 'SONOMA COUNTY REGIONAL PARKS FOUNDATIO', 'FTLF', 'DOOR 2 DOOR INCOME INC', 'PROFESSIONAL LOSS ADJUSTERS INC.', 'AMERICAN IRON & ALLOYS', 'LASHLY & BAER P.C.', 'UNIVERSITY OF CALIFORNIA, LA', 'PEARL PROPERTIES', 'MID MICH INS', 'BURROW JAN', 'PATRICIA FLORES', 'WARNER BROTHERS TELEVISION', 'MASSACHUSETTS MUTUAL LIFE INSURANCE CO', 'AMERICAN FEDERATION OF TEACHERS', "ST.PETER'S EPISCOPAL CHURCH", 'RANDOLPH-BROOKS FCU', 'UNIVERSITY OF MASS', 'EQT CORP.', 'HAMILTON CITY SD', 'NAPER ENTERPRISES', 'NEW MEXICO ORTHOPAEDICS', 'BAYVIEW LOAN SERVICING', 'PICASSO TILE', 'TERRY ROBERTS CONSULTING INC', 'CONFLUENCE DISTRIBUTION INC.', 'HBSPECIALTY FOODS', 'AVONWORTH', 'ASSOCIATED UNIVERSIT', 'FAUSTOLLEAN', 'AVMED', 'EJME', 'SUPERIOR AIR GROUND AMBLANCE', 'UBER(RIDESHARE OPERATOR)', 'MAGIC TOUCH PAINTING', 'CITY OF PHOENIX', 'GRANDVIEW RADIOLOGY', 'LUNDEBERG SCHOOL OF SEAMANSHIP', 'SCHEEN&SMITH PSC', 'LIBERAIL KENWORK', 'DUKE ENERGY OHIO, INC.', 'CITY OF HUNTINGTON WOODS', 'SPIRIT PHARMACEUTICALS LLC', 'WILEY,WILSON, INC.', 'MOUNT SINAI WEST', 'THE METHODIST HOSPITAL', 'PRIMERA ENGINEERS', 'TOUR-SARKISSIAN LAW OFFICES LLP', 'SIBCY CLINE', 'C.G. REIN DEVELOPMENT CO.', 'LAKE TRUCKING CO.', 'POPE, HARDWICKE', 'AEROSPACE CORPORATION', 'INNOVATIVE THERAPY CONCEPTS INC.', 'ASSOC RADIOLOSISTS', 'RADIANT REFINING', 'CAMPO SANTO PRODUCTIONS LLC', 'KANSAS CITY BALLET', 'NATIONALITIES SERVICE CENTER', 'AIRSWIFT', 'NEW HARVEST MINISTRIES INC.', 'EASTCHESTER FIRE DISTRICT', 'THERMOSEAL', 'ADVANTEDGE', 'NC DEPT. OF PUBLIC SAFETY', 'ACCUSTAR', 'EXECUTIVE ENERGY MANAGEMENT, LLC', 'GPG', 'IMEX MEDIA', 'NTP', 'SP MANAGEMENT', 'BROWN CAPITAL MANAGEMENT', 'CIGNA DENTAL HEALTH, INC.', 'CSI COMPANIES', 'OHIO EQUITIES INC.', 'THE RUBY BRINK', 'MVWSD', 'HEALTH CARE SERVICE CORP', 'GREAT PLAINS TECHNOLOGY CENTER', 'NEW TEACHER CENTER', 'ANYTIME PLUMBING INC', 'CALVO ENTERPRISES', 'ARCHDIOCESE OF NEWARK', 'UNIVERSITY OF DELAWARE THEATRE DEPARTM', 'GREEN HASSON JANKS', 'OAKLEIGH LTD.', 'UNIVERSITY OF BRISTOL', 'POLSINELLI', 'CHRISTIAN WORSHIP CENTER', 'BILL BRAVO AUTOMOTIVE PORTRAITS', 'JOHN DEERE FINANCIAL', 'CONDOMINIUM MGMT SVCS', 'SALTCHUK', 'JUST FOR SHOW INC.', 'OXFORD UNIVERSITY PRESS', 'CHARLES J GARRISON', 'LAWRENCE MEMORIALS HOSPITAL', 'JACKSON HEALTHCARE', 'SIERRA PACIFIC', 'NEW SOUTH RIVER BAPTIST ASSO', 'UNIVERSITY OF MAINE', 'ALPHA ELECTRIC CO', 'KEYIMPACT', 'IL. DEPT OF HUMAN SERVICES', 'PANJIVA', 'FACIAL PLASTIC SURGERY ASSOCIATES', 'GREEN MOUNTAIN TREATMENT CENTER', 'CINTERRA GROUP', 'NIWCC', 'SOLTAGE LLC', 'PEPSI COLA', 'RLA NATIONAL REHABILITATION CENTER', 'CARE HAWAII', 'IVAN & DAUGUSTINIS', 'ALLIANCE RADIOLOGY', 'UNIV. OF CALIFORNIA', 'PARKVIEW COMMUNITY HOSPITAL', 'SPORTS LEICHT RESTORATIONS INC', 'NONE RETIRED', 'RENVYLE PARTNERS', 'PORT APARTMENTS', 'ECD', 'MO. DMH DD', 'DECAHEALTH', 'NESS INC', 'NJ DEPARTMENT OF HEALTH', 'AV INC.', 'ALLIED UNIVERSAL SECURITY SERVICES', 'ONI RISK PARTNERS', 'GROVEPORT MADISON', 'CAMBREX CHARLES CITY INC.', 'GILROY UNIFIED SCHOOL DISTRICT', 'MJUSD', 'ILCJA&TP', '4J ENERGY LLC', 'HIGHLINE MEDICAL CENTERE', 'SHIELD RESTRATINTS', '8 MILE FARM', 'R DIXON SPEAS ASSOCIATES, INC.', 'RED HOT AHIR', 'UNITY', 'EAST BATON ROUGE PARISH SCHOOL', 'GP', 'CHOATE HALL & STEWART', 'THE PATRIOT FINANCIAL GROUP, LLC', 'ANALYSIS GROUP', 'MORRIS JAMES LLP', 'MORRIS TEAM REALTY, LLC', 'MAYER BROWN LLP', "LABORERS' LOCAL 225", 'FREEDOM MOBILITY', 'DOCTORS FOR EMERGENCY SERVICES', 'CUSTOM VAULT CORP', 'XDSI', 'YCSD', 'REHAB WITHOUT WALLS', ...}
len(set(df_newdup['EMPLOYER']))
65420
df_newdup.dropna(subset = ["EMPLOYER"], inplace=True)
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy """Entry point for launching an IPython kernel.
df_aero = df_newdup[df_newdup['EMPLOYER'].str.contains('AEROSPACE CORPORATION')]
df_aero
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | |
---|---|---|---|---|---|---|---|---|---|
136824 | C00703975 | DAVIS, LORRIE | LOS ANGELES | CA | 900561529 | THE AEROSPACE CORPORATION | SENIOR PROJECT ENGINEER | 7112020 | 20 |
150573 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7242020 | 200 |
150574 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7292020 | 200 |
191693 | C00703975 | YOUNG, KAROLYN | REDONDO BEACH | CA | 9.02771e+08 | THE AEROSPACE CORPORATION | ENGINEER | 7252020 | 250 |
201669 | C00703975 | STUTTERHEIM, KENNETH B. | PASADENA | MD | 2.11223e+08 | THE AEROSPACE CORPORATION | ENGINEERING SPECIALIST | 7162020 | 250 |
246960 | C00703975 | JAGER, AMY | INDIAN HARBOUR BEACH | FL | 329373526 | THE AEROSPACE CORPORATION | ENGINEER | 7272020 | 15 |
404391 | C00193433 | SIMPSON, MARK M. MR. | LONG BEACH | CA | 90808 | THE AEROSPACE CORPORATION | ENGINEER | 7292020 | 200 |
493316 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7232020 | 10 |
541906 | C00000935 | ALVAREZ, MANUEL | SAN PEDRO | CA | 9.07311e+08 | AEROSPACE CORPORATION | ENGINEER | 7232020 | 35 |
547077 | C00000935 | GUNAY, DEVIN | LOS ANGELES | CA | 9.00347e+08 | THE AEROSPACE CORPORATION | SOFTWARE ENGINEER | 7132020 | 40 |
585671 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7042020 | 25 |
625961 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 8.09114e+08 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 7302020 | 100 |
627176 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7262020 | 10 |
920917 | C00010603 | MERRILL, ALBERT W | VENICE | CA | 90291 | THE AEROSPACE CORPORATION | ENGINEER | 7262020 | 25 |
922676 | C00010603 | ESLINGER, SUELLEN | REDONDO BEACH | CA | 9.02782e+08 | THE AEROSPACE CORPORATION | ENGINEER | 7262020 | 300 |
934612 | C00010603 | MERRILL, ALBERT W | VENICE | CA | 90291 | THE AEROSPACE CORPORATION | ENGINEER | 7312020 | 25 |
936453 | C00010603 | MERRILL, ALBERT W | VENICE | CA | 90291 | THE AEROSPACE CORPORATION | ENGINEER | 7312020 | 25 |
938548 | C00010603 | MERRILL, ALBERT W | VENICE | CA | 90291 | THE AEROSPACE CORPORATION | ENGINEER | 7262020 | 12 |
946916 | C00010603 | BYERS, MARK | SAN DIEGO | CA | 9.21096e+08 | THE AEROSPACE CORPORATION | ENGINEER | 7122020 | 65 |
947929 | C00010603 | FRICKS, KATHRYN | GREENBELT | MD | 2.07704e+08 | AEROSPACE CORPORATION | ENGINEER | 7312020 | 500 |
992812 | C00484642 | ALVAREZ, MANUEL | SAN PEDRO | CA | 907311416 | AEROSPACE CORPORATION | ENGINEER | 7242020 | 50 |
1014099 | C00484642 | ALVAREZ, MANUEL | SAN PEDRO | CA | 907311416 | AEROSPACE CORPORATION | ENGINEER | 7152020 | 75 |
1107728 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 |
1109284 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 |
1157904 | C00003418 | BAUER, SPENCER J. MR. | EL SEGUNDO | CA | 902453728 | AEROSPACE CORPORATION | DIRECTOR | 6302020 | 50 |
1213364 | C00696526 | HOLLANDER, SIDNEY | GLENDALE | AZ | 853180038 | AEROSPACE CORPORATION | ENGINEER | 7092020 | 250 |
1254043 | C00401224 | WHITE, RUSSELL | FAIRFAX | VA | 220305208 | THE AEROSPACE CORPORATION | SCIENTIST | 6302020 | 100 |
1363622 | C00126847 | SMITH, DARLENE | CHARLESTOWN | RI | 02813 | KAMAN AEROSPACE CORPORATION | VP GM AIR VEHICLES | 6302020 | 100 |
1460070 | C00694323 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 |
1507410 | C00694323 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 |
df_aero_merge = pd.merge(df_bob, df_merge, on='CMTE_ID')
df_aero_merge
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | FEC_ELECTION_YR | CMTE_TP | CMTE_DSGN | LINKAGE_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | DAVIS, LORRIE | LOS ANGELES | CA | 900561529 | THE AEROSPACE CORPORATION | SENIOR PROJECT ENGINEER | 7112020 | 20 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
1 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7242020 | 200 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
2 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7292020 | 200 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
3 | C00703975 | YOUNG, KAROLYN | REDONDO BEACH | CA | 9.02771e+08 | THE AEROSPACE CORPORATION | ENGINEER | 7252020 | 250 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
4 | C00703975 | STUTTERHEIM, KENNETH B. | PASADENA | MD | 2.11223e+08 | THE AEROSPACE CORPORATION | ENGINEERING SPECIALIST | 7162020 | 250 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
5 | C00703975 | JAGER, AMY | INDIAN HARBOUR BEACH | FL | 329373526 | THE AEROSPACE CORPORATION | ENGINEER | 7272020 | 15 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
6 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7232020 | 10 | H0NY27090 | REP | 2020 | 2020 | Y | U | 232064 |
7 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7042020 | 25 | H0NY27090 | REP | 2020 | 2020 | Y | U | 232064 |
8 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 8.09114e+08 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 7302020 | 100 | H0NY27090 | REP | 2020 | 2020 | Y | U | 232064 |
9 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7262020 | 10 | H0NY27090 | REP | 2020 | 2020 | Y | U | 232064 |
10 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 | H0NY27090 | REP | 2020 | 2020 | Y | U | 232064 |
11 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 | H0NY27090 | REP | 2020 | 2020 | Y | U | 232064 |
12 | C00696526 | HOLLANDER, SIDNEY | GLENDALE | AZ | 853180038 | AEROSPACE CORPORATION | ENGINEER | 7092020 | 250 | S0AZ00350 | DEM | 2020 | 2020 | S | P | 225862 |
CD = ['CA-37', 'CA-47', 'CA-47', 'CA-33', 'MD-03', 'FL-08', 'VA-05', 'VA-05', 'CO-05', 'VA-05', 'CO-05', 'CO-05', 'AZ-07']
df_aero_merge['CD'] = CD
df_aero_merge = df_aero_merge.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])
df_aero_final = pd.merge(df_aero_merge,
trends,
on ='CD',
how ='inner')
df_aero_final
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | LINKAGE_ID | CD | Party | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | DAVIS, LORRIE | LOS ANGELES | CA | 900561529 | THE AEROSPACE CORPORATION | SENIOR PROJECT ENGINEER | 7112020 | 20 | P80000722 | DEM | 227491 | CA-37 | (D) |
1 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7242020 | 200 | P80000722 | DEM | 227491 | CA-47 | (D) |
2 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7292020 | 200 | P80000722 | DEM | 227491 | CA-47 | (D) |
3 | C00703975 | YOUNG, KAROLYN | REDONDO BEACH | CA | 9.02771e+08 | THE AEROSPACE CORPORATION | ENGINEER | 7252020 | 250 | P80000722 | DEM | 227491 | CA-33 | (D) |
4 | C00703975 | STUTTERHEIM, KENNETH B. | PASADENA | MD | 2.11223e+08 | THE AEROSPACE CORPORATION | ENGINEERING SPECIALIST | 7162020 | 250 | P80000722 | DEM | 227491 | MD-03 | (D) |
5 | C00703975 | JAGER, AMY | INDIAN HARBOUR BEACH | FL | 329373526 | THE AEROSPACE CORPORATION | ENGINEER | 7272020 | 15 | P80000722 | DEM | 227491 | FL-08 | (R) |
6 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7232020 | 10 | H0NY27090 | REP | 232064 | VA-05 | (R) |
7 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7042020 | 25 | H0NY27090 | REP | 232064 | VA-05 | (R) |
8 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7262020 | 10 | H0NY27090 | REP | 232064 | VA-05 | (R) |
9 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 8.09114e+08 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 7302020 | 100 | H0NY27090 | REP | 232064 | CO-05 | (R) |
10 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 | H0NY27090 | REP | 232064 | CO-05 | (R) |
11 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 | H0NY27090 | REP | 232064 | CO-05 | (R) |
12 | C00696526 | HOLLANDER, SIDNEY | GLENDALE | AZ | 853180038 | AEROSPACE CORPORATION | ENGINEER | 7092020 | 250 | S0AZ00350 | DEM | 225862 | AZ-07 | (D) |
df_aero_final['INDEX']= [1 if x =='DEM' else 0 for x in df_aero_final['CAND_PTY_AFFILIATION']]
df_aero_final['INDEX_BOSS']=1
df_aero_final
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | LINKAGE_ID | CD | Party | INDEX | INDEX_BOSS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | DAVIS, LORRIE | LOS ANGELES | CA | 900561529 | THE AEROSPACE CORPORATION | SENIOR PROJECT ENGINEER | 7112020 | 20 | P80000722 | DEM | 227491 | CA-37 | (D) | 1 | 1 |
1 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7242020 | 200 | P80000722 | DEM | 227491 | CA-47 | (D) | 1 | 1 |
2 | C00703975 | SIMPSON, MARK M | LONG BEACH | CA | 908083812 | THE AEROSPACE CORPORATION | ENGINEER | 7292020 | 200 | P80000722 | DEM | 227491 | CA-47 | (D) | 1 | 1 |
3 | C00703975 | YOUNG, KAROLYN | REDONDO BEACH | CA | 9.02771e+08 | THE AEROSPACE CORPORATION | ENGINEER | 7252020 | 250 | P80000722 | DEM | 227491 | CA-33 | (D) | 1 | 1 |
4 | C00703975 | STUTTERHEIM, KENNETH B. | PASADENA | MD | 2.11223e+08 | THE AEROSPACE CORPORATION | ENGINEERING SPECIALIST | 7162020 | 250 | P80000722 | DEM | 227491 | MD-03 | (D) | 1 | 1 |
5 | C00703975 | JAGER, AMY | INDIAN HARBOUR BEACH | FL | 329373526 | THE AEROSPACE CORPORATION | ENGINEER | 7272020 | 15 | P80000722 | DEM | 227491 | FL-08 | (R) | 1 | 1 |
6 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7232020 | 10 | H0NY27090 | REP | 232064 | VA-05 | (R) | 0 | 1 |
7 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7042020 | 25 | H0NY27090 | REP | 232064 | VA-05 | (R) | 0 | 1 |
8 | C00075820 | FARAGO, ZOLTAN L. MR. | BROAD RUN | VA | 2.01372e+08 | THE AEROSPACE CORPORATION | PROJECT ENGINEER | 7262020 | 10 | H0NY27090 | REP | 232064 | VA-05 | (R) | 0 | 1 |
9 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 8.09114e+08 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 7302020 | 100 | H0NY27090 | REP | 232064 | CO-05 | (R) | 0 | 1 |
10 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 | H0NY27090 | REP | 232064 | CO-05 | (R) | 0 | 1 |
11 | C00075820 | CINLEMIS, MICHELLE | COLORADO SPRINGS | CO | 809113801 | THE AEROSPACE CORPORATION | SENIOR PROJECT LEADER | 6302020 | 100 | H0NY27090 | REP | 232064 | CO-05 | (R) | 0 | 1 |
12 | C00696526 | HOLLANDER, SIDNEY | GLENDALE | AZ | 853180038 | AEROSPACE CORPORATION | ENGINEER | 7092020 | 250 | S0AZ00350 | DEM | 225862 | AZ-07 | (D) | 1 | 1 |
subset2 = df_aero_final[['INDEX','INDEX_BOSS']]
from sklearn.linear_model import LinearRegression
linear_regressor = LinearRegression()
from sklearn.preprocessing import MinMaxScaler
scaler1 = MinMaxScaler()
scaler1.fit(subset2)
inner_join_scaled=scaler1.transform(subset2)
x = inner_join_scaled[:,0].reshape(-1,1)
y = inner_join_scaled[:,1].reshape(-1,1)
linear_regressor.fit(x, y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
df4 = df_newdup[df_newdup['EMPLOYER'].str.contains('AT&T')]
df4
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | |
---|---|---|---|---|---|---|---|---|---|
13062 | C00703975 | FAVARA, RICHARD | FREEHOLD | NJ | 7.72843e+07 | AT&T | SALES | 7082020 | 25 |
13063 | C00703975 | FAVARA, RICHARD | FREEHOLD | NJ | 7.72843e+07 | AT&T | SALES | 7142020 | 17 |
13064 | C00703975 | FAVARA, RICHARD | FREEHOLD | NJ | 7.72843e+07 | AT&T | SALES | 7192020 | 17 |
13622 | C00703975 | EMERSON, TERRY | DALLAS | TX | 7.52242e+08 | AT&T | PROJECT MANAGER | 7012020 | 5 |
13623 | C00703975 | EMERSON, TERRY | DALLAS | TX | 7.52242e+08 | AT&T | PROJECT MANAGER | 7162020 | 21 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1583204 | C00694323 | HERNANDEZ, JOE | SAN BRUNO | CA | 940661112 | AT&T | SPLICING TECHNICIAN | 6302020 | 10 |
1587874 | C00694323 | HERNANDEZ, JOE | SAN BRUNO | CA | 940661112 | AT&T | SPLICING TECHNICIAN | 6302020 | 20 |
1595694 | C00694323 | HERNANDEZ, JOE | SAN BRUNO | CA | 940661112 | AT&T | SPLICING TECHNICIAN | 6302020 | 10 |
1600117 | C00694323 | HERNANDEZ, JOE | SAN BRUNO | CA | 940661112 | AT&T | SPLICING TECHNICIAN | 6302020 | 25 |
1603559 | C00694323 | ORTIZ, LISA | RIVERSIDE | CA | 925035708 | AT&T | PM | 6302020 | 35 |
4226 rows × 9 columns
df5 = pd.merge(df4, df_merge, on='CMTE_ID')
df5.tail()
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | FEC_ELECTION_YR | CMTE_TP | CMTE_DSGN | LINKAGE_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
243 | C00711549 | COLLINS, RICK | LAKEWOOD | WA | 98498 | AT&T | SALES CONSULTANT | 6302020 | 100 | S0KY00339 | DEM | 2020 | 2020 | S | P | 228669 |
244 | C00711549 | NURSE, CHRIS | ROCKVILLE | MD | 20850 | AT&T | MANAGER | 6302020 | 500 | S0KY00339 | DEM | 2020 | 2020 | S | P | 228669 |
245 | C00666040 | HERNANDEZ, JOE | SAN BRUNO | CA | 940661112 | AT&T | SPLICING TECHNICIAN | 6302020 | 10 | S8AZ00221 | REP | 2020 | 2020 | S | P | 224208 |
246 | C00736876 | BENTON, WANDETTA | DULUTH | GA | 300978117 | AT&T | NETWORK TECH | 6302020 | 25 | S0GA00559 | DEM | 2020 | 2020 | S | P | 231982 |
247 | C00736876 | BENTON, WANDETTA | DULUTH | GA | 300978117 | AT&T | NETWORK TECH | 6302020 | 25 | S0GA00559 | DEM | 2020 | 2020 | S | P | 231982 |
df_biogen = df_newdup[df_newdup['EMPLOYER'].str.contains('BIOGEN')]
df_biogen
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | |
---|---|---|---|---|---|---|---|---|---|
71874 | C00703975 | MARX, ISAAC | ARLINGTON | MA | 2.47438e+07 | BIOGEN | CHEMIST | 7182020 | 250 |
125267 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7292020 | 50 |
125819 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7152020 | 50 |
125820 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7222020 | 50 |
128132 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7082020 | 50 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1576084 | C00694323 | CHECKAN, RICHARD | FUQUAY VARINA | NC | 275267624 | BIOGEN | ENGINEERING | 6302020 | 5 |
1582011 | C00694323 | CHECKAN, RICHARD | FUQUAY VARINA | NC | 275267624 | BIOGEN | ENGINEERING | 6302020 | 1 |
1591754 | C00694323 | CHECKAN, RICHARD | FUQUAY VARINA | NC | 275267624 | BIOGEN | ENGINEERING | 6302020 | 1 |
1600172 | C00694323 | CHECKAN, RICHARD | FUQUAY VARINA | NC | 275267624 | BIOGEN | ENGINEERING | 6302020 | 1 |
1603569 | C00694323 | CHECKAN, RICHARD | FUQUAY VARINA | NC | 275267624 | BIOGEN | ENGINEERING | 6302020 | 1 |
98 rows × 9 columns
df6 = pd.merge(df_biogen, df_merge, on='CMTE_ID')
df6
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | CAND_ELECTION_YR | FEC_ELECTION_YR | CMTE_TP | CMTE_DSGN | LINKAGE_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | MARX, ISAAC | ARLINGTON | MA | 2.47438e+07 | BIOGEN | CHEMIST | 7182020 | 250 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
1 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7292020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
2 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7152020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
3 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7222020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
4 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7082020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
5 | C00703975 | DILLEY, ANNE | ARLINGTON | MA | 2.47648e+07 | BIOGEN | EPIDEMIOLOGIST | 7052020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
6 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7302020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
7 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7182020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
8 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7252020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
9 | C00703975 | VANDER STOEP, STEPHEN | BOSTON | MA | 2.12925e+07 | BIOGEN | ATTORNEY | 7172020 | 100 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
10 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 7172020 | 20 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
11 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7042020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
12 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7112020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
13 | C00703975 | LYKINS, JIM | DUPONT | WA | 9.83277e+08 | BIOGEN | SALES | 7282020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
14 | C00703975 | MALDONADO, REBECCA | SAN ANTONIO | TX | 782491598 | BIOGEN | SALES | 7032020 | 150 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
15 | C00703975 | HOWE, MICHAEL | CANTON | MA | 2.02116e+07 | BIOGEN | ATTORNEY | 7162020 | 100 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
16 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 6302020 | 50 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
17 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 6302020 | 15 | P80000722 | DEM | 2020 | 2020 | P | P | 227491 |
18 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 2131 | BIOGEN | REGULATORY MEDICAL WRITER | 8052020 | 100 | H0MA08045 | DEM | 2020 | 2020 | H | P | 230605 |
19 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 02131 | BIOGEN | REGULATORY MEDICAL WRITER | 6302020 | 100 | H0MA08045 | DEM | 2020 | 2020 | H | P | 230605 |
20 | C00745687 | GRIFFITH, LISA | CAMBRIDGE | MA | 021394369 | BIOGEN | MARKETING | 7132020 | 500 | H0MA04267 | DEM | 2020 | 2020 | H | P | 233009 |
21 | C00196774 | LOVEDAY, KENNETH S. | BROOKLINE | MA | 024465827 | BIOGEN | BIOLOGIST | 8042020 | 250 | S4MA00028 | DEM | 2020 | 2020 | S | P | 222822 |
22 | C00666149 | LOVEDAY, KENNETH S | BROOKLINE | MA | 024465827 | BIOGEN INC | DIRECTOR | 6302020 | 500 | H8NM02248 | DEM | 2020 | 2020 | H | P | 223821 |
23 | C00500843 | FLANNELLY-KING, SHANE | SOMERVILLE | MA | 2.14421e+07 | BIOGEN IDEC | BUSINESS ANALYST | 6302020 | 250 | S2MA00170 | DEM | 2024 | 2020 | S | P | 222817 |
24 | C00649376 | NEWLAND, BART G. | BELMONT | MA | 2.4784e+07 | BIOGEN INC | ATTORNEY | 6302020 | 100 | H8GA07201 | DEM | 2020 | 2020 | H | P | 224868 |
25 | C00701599 | SEGAL, KATE | BATTLE CREEK | MI | 4.90159e+08 | BIOGEN | GOVERNMENT AFFAIRS | 6302020 | 500 | H0MI06152 | DEM | 2020 | 2020 | H | P | 227095 |
df6[df6['OCCUPATION'].str.contains('DIRECTOR')]['TRANSACTION_DT']
22 6302020 Name: TRANSACTION_DT, dtype: int64
df6[df6['OCCUPATION'].str.contains('VP')]['TRANSACTION_DT']
6 7302020 7 7182020 8 7252020 11 7042020 12 7112020 16 6302020 Name: TRANSACTION_DT, dtype: int64
CD = ['MA-05', 'MA-03', 'MA-03', 'MA-03', 'MA-03', 'MA-05', 'MA-05', 'MA-05', 'MA-05', 'MA-07', 'TN-09', 'MA-05', 'MA-05', 'WA-10', 'TX-20', 'MA-08', 'MA-05', 'TN-09', 'MA-07', 'MA-07', 'MA-05', 'MA-04', 'MA-04', 'MA-07', 'MA-05', 'MI-03']
df6['CD'] = CD
df7 = df6.drop(columns=['CAND_ELECTION_YR', 'FEC_ELECTION_YR', 'CMTE_TP', 'CMTE_DSGN'])
df7
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | LINKAGE_ID | CD | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | MARX, ISAAC | ARLINGTON | MA | 2.47438e+07 | BIOGEN | CHEMIST | 7182020 | 250 | P80000722 | DEM | 227491 | MA-05 |
1 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7292020 | 50 | P80000722 | DEM | 227491 | MA-03 |
2 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7152020 | 50 | P80000722 | DEM | 227491 | MA-03 |
3 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7222020 | 50 | P80000722 | DEM | 227491 | MA-03 |
4 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7082020 | 50 | P80000722 | DEM | 227491 | MA-03 |
5 | C00703975 | DILLEY, ANNE | ARLINGTON | MA | 2.47648e+07 | BIOGEN | EPIDEMIOLOGIST | 7052020 | 50 | P80000722 | DEM | 227491 | MA-05 |
6 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7302020 | 50 | P80000722 | DEM | 227491 | MA-05 |
7 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7182020 | 50 | P80000722 | DEM | 227491 | MA-05 |
8 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7252020 | 50 | P80000722 | DEM | 227491 | MA-05 |
9 | C00703975 | VANDER STOEP, STEPHEN | BOSTON | MA | 2.12925e+07 | BIOGEN | ATTORNEY | 7172020 | 100 | P80000722 | DEM | 227491 | MA-07 |
10 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 7172020 | 20 | P80000722 | DEM | 227491 | TN-09 |
11 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7042020 | 50 | P80000722 | DEM | 227491 | MA-05 |
12 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7112020 | 50 | P80000722 | DEM | 227491 | MA-05 |
13 | C00703975 | LYKINS, JIM | DUPONT | WA | 9.83277e+08 | BIOGEN | SALES | 7282020 | 50 | P80000722 | DEM | 227491 | WA-10 |
14 | C00703975 | MALDONADO, REBECCA | SAN ANTONIO | TX | 782491598 | BIOGEN | SALES | 7032020 | 150 | P80000722 | DEM | 227491 | TX-20 |
15 | C00703975 | HOWE, MICHAEL | CANTON | MA | 2.02116e+07 | BIOGEN | ATTORNEY | 7162020 | 100 | P80000722 | DEM | 227491 | MA-08 |
16 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 6302020 | 50 | P80000722 | DEM | 227491 | MA-05 |
17 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 6302020 | 15 | P80000722 | DEM | 227491 | TN-09 |
18 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 2131 | BIOGEN | REGULATORY MEDICAL WRITER | 8052020 | 100 | H0MA08045 | DEM | 230605 | MA-07 |
19 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 02131 | BIOGEN | REGULATORY MEDICAL WRITER | 6302020 | 100 | H0MA08045 | DEM | 230605 | MA-07 |
20 | C00745687 | GRIFFITH, LISA | CAMBRIDGE | MA | 021394369 | BIOGEN | MARKETING | 7132020 | 500 | H0MA04267 | DEM | 233009 | MA-05 |
21 | C00196774 | LOVEDAY, KENNETH S. | BROOKLINE | MA | 024465827 | BIOGEN | BIOLOGIST | 8042020 | 250 | S4MA00028 | DEM | 222822 | MA-04 |
22 | C00666149 | LOVEDAY, KENNETH S | BROOKLINE | MA | 024465827 | BIOGEN INC | DIRECTOR | 6302020 | 500 | H8NM02248 | DEM | 223821 | MA-04 |
23 | C00500843 | FLANNELLY-KING, SHANE | SOMERVILLE | MA | 2.14421e+07 | BIOGEN IDEC | BUSINESS ANALYST | 6302020 | 250 | S2MA00170 | DEM | 222817 | MA-07 |
24 | C00649376 | NEWLAND, BART G. | BELMONT | MA | 2.4784e+07 | BIOGEN INC | ATTORNEY | 6302020 | 100 | H8GA07201 | DEM | 224868 | MA-05 |
25 | C00701599 | SEGAL, KATE | BATTLE CREEK | MI | 4.90159e+08 | BIOGEN | GOVERNMENT AFFAIRS | 6302020 | 500 | H0MI06152 | DEM | 227095 | MI-03 |
trends = pd.read_excel(data_dir+'/CD_trends.xlsx')
trends.head()
CD | Party | |
---|---|---|
0 | AK-AL | (R) |
1 | AL-01 | (R) |
2 | AL-02 | (R) |
3 | AL-03 | (R) |
4 | AL-04 | (R) |
inner_join = pd.merge(df7,
trends,
on ='CD',
how ='inner')
inner_join
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | LINKAGE_ID | CD | Party | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | MARX, ISAAC | ARLINGTON | MA | 2.47438e+07 | BIOGEN | CHEMIST | 7182020 | 250 | P80000722 | DEM | 227491 | MA-05 | (D) |
1 | C00703975 | DILLEY, ANNE | ARLINGTON | MA | 2.47648e+07 | BIOGEN | EPIDEMIOLOGIST | 7052020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) |
2 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7302020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) |
3 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7182020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) |
4 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7252020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) |
5 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7042020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) |
6 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7112020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) |
7 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 6302020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) |
8 | C00745687 | GRIFFITH, LISA | CAMBRIDGE | MA | 021394369 | BIOGEN | MARKETING | 7132020 | 500 | H0MA04267 | DEM | 233009 | MA-05 | (D) |
9 | C00649376 | NEWLAND, BART G. | BELMONT | MA | 2.4784e+07 | BIOGEN INC | ATTORNEY | 6302020 | 100 | H8GA07201 | DEM | 224868 | MA-05 | (D) |
10 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7292020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) |
11 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7152020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) |
12 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7222020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) |
13 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7082020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) |
14 | C00703975 | VANDER STOEP, STEPHEN | BOSTON | MA | 2.12925e+07 | BIOGEN | ATTORNEY | 7172020 | 100 | P80000722 | DEM | 227491 | MA-07 | (D) |
15 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 2131 | BIOGEN | REGULATORY MEDICAL WRITER | 8052020 | 100 | H0MA08045 | DEM | 230605 | MA-07 | (D) |
16 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 02131 | BIOGEN | REGULATORY MEDICAL WRITER | 6302020 | 100 | H0MA08045 | DEM | 230605 | MA-07 | (D) |
17 | C00500843 | FLANNELLY-KING, SHANE | SOMERVILLE | MA | 2.14421e+07 | BIOGEN IDEC | BUSINESS ANALYST | 6302020 | 250 | S2MA00170 | DEM | 222817 | MA-07 | (D) |
18 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 7172020 | 20 | P80000722 | DEM | 227491 | TN-09 | (D) |
19 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 6302020 | 15 | P80000722 | DEM | 227491 | TN-09 | (D) |
20 | C00703975 | LYKINS, JIM | DUPONT | WA | 9.83277e+08 | BIOGEN | SALES | 7282020 | 50 | P80000722 | DEM | 227491 | WA-10 | (D) |
21 | C00703975 | MALDONADO, REBECCA | SAN ANTONIO | TX | 782491598 | BIOGEN | SALES | 7032020 | 150 | P80000722 | DEM | 227491 | TX-20 | (D) |
22 | C00703975 | HOWE, MICHAEL | CANTON | MA | 2.02116e+07 | BIOGEN | ATTORNEY | 7162020 | 100 | P80000722 | DEM | 227491 | MA-08 | (D) |
23 | C00196774 | LOVEDAY, KENNETH S. | BROOKLINE | MA | 024465827 | BIOGEN | BIOLOGIST | 8042020 | 250 | S4MA00028 | DEM | 222822 | MA-04 | (D) |
24 | C00666149 | LOVEDAY, KENNETH S | BROOKLINE | MA | 024465827 | BIOGEN INC | DIRECTOR | 6302020 | 500 | H8NM02248 | DEM | 223821 | MA-04 | (D) |
25 | C00701599 | SEGAL, KATE | BATTLE CREEK | MI | 4.90159e+08 | BIOGEN | GOVERNMENT AFFAIRS | 6302020 | 500 | H0MI06152 | DEM | 227095 | MI-03 | (L) |
inner_join['INDEX']= [1 if x =='DEM' else 0 for x in inner_join['CAND_PTY_AFFILIATION']]
inner_join
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | LINKAGE_ID | CD | Party | INDEX | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | MARX, ISAAC | ARLINGTON | MA | 2.47438e+07 | BIOGEN | CHEMIST | 7182020 | 250 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
1 | C00703975 | DILLEY, ANNE | ARLINGTON | MA | 2.47648e+07 | BIOGEN | EPIDEMIOLOGIST | 7052020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
2 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7302020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
3 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7182020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
4 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7252020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
5 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7042020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
6 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7112020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
7 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 6302020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 |
8 | C00745687 | GRIFFITH, LISA | CAMBRIDGE | MA | 021394369 | BIOGEN | MARKETING | 7132020 | 500 | H0MA04267 | DEM | 233009 | MA-05 | (D) | 1 |
9 | C00649376 | NEWLAND, BART G. | BELMONT | MA | 2.4784e+07 | BIOGEN INC | ATTORNEY | 6302020 | 100 | H8GA07201 | DEM | 224868 | MA-05 | (D) | 1 |
10 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7292020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 |
11 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7152020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 |
12 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7222020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 |
13 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7082020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 |
14 | C00703975 | VANDER STOEP, STEPHEN | BOSTON | MA | 2.12925e+07 | BIOGEN | ATTORNEY | 7172020 | 100 | P80000722 | DEM | 227491 | MA-07 | (D) | 1 |
15 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 2131 | BIOGEN | REGULATORY MEDICAL WRITER | 8052020 | 100 | H0MA08045 | DEM | 230605 | MA-07 | (D) | 1 |
16 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 02131 | BIOGEN | REGULATORY MEDICAL WRITER | 6302020 | 100 | H0MA08045 | DEM | 230605 | MA-07 | (D) | 1 |
17 | C00500843 | FLANNELLY-KING, SHANE | SOMERVILLE | MA | 2.14421e+07 | BIOGEN IDEC | BUSINESS ANALYST | 6302020 | 250 | S2MA00170 | DEM | 222817 | MA-07 | (D) | 1 |
18 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 7172020 | 20 | P80000722 | DEM | 227491 | TN-09 | (D) | 1 |
19 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 6302020 | 15 | P80000722 | DEM | 227491 | TN-09 | (D) | 1 |
20 | C00703975 | LYKINS, JIM | DUPONT | WA | 9.83277e+08 | BIOGEN | SALES | 7282020 | 50 | P80000722 | DEM | 227491 | WA-10 | (D) | 1 |
21 | C00703975 | MALDONADO, REBECCA | SAN ANTONIO | TX | 782491598 | BIOGEN | SALES | 7032020 | 150 | P80000722 | DEM | 227491 | TX-20 | (D) | 1 |
22 | C00703975 | HOWE, MICHAEL | CANTON | MA | 2.02116e+07 | BIOGEN | ATTORNEY | 7162020 | 100 | P80000722 | DEM | 227491 | MA-08 | (D) | 1 |
23 | C00196774 | LOVEDAY, KENNETH S. | BROOKLINE | MA | 024465827 | BIOGEN | BIOLOGIST | 8042020 | 250 | S4MA00028 | DEM | 222822 | MA-04 | (D) | 1 |
24 | C00666149 | LOVEDAY, KENNETH S | BROOKLINE | MA | 024465827 | BIOGEN INC | DIRECTOR | 6302020 | 500 | H8NM02248 | DEM | 223821 | MA-04 | (D) | 1 |
25 | C00701599 | SEGAL, KATE | BATTLE CREEK | MI | 4.90159e+08 | BIOGEN | GOVERNMENT AFFAIRS | 6302020 | 500 | H0MI06152 | DEM | 227095 | MI-03 | (L) | 1 |
inner_join['INDEX_BOSS'] = 1
inner_join
CMTE_ID | NAME | CITY | STATE | ZIP_CODE | EMPLOYER | OCCUPATION | TRANSACTION_DT | TRANSACTION_AMT | CAND_ID | CAND_PTY_AFFILIATION | LINKAGE_ID | CD | Party | INDEX | INDEX_BOSS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00703975 | MARX, ISAAC | ARLINGTON | MA | 2.47438e+07 | BIOGEN | CHEMIST | 7182020 | 250 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
1 | C00703975 | DILLEY, ANNE | ARLINGTON | MA | 2.47648e+07 | BIOGEN | EPIDEMIOLOGIST | 7052020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
2 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7302020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
3 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7182020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
4 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 24931439 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7252020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
5 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7042020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
6 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 7112020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
7 | C00703975 | SMIRNAKIS, KAREN | WESTON | MA | 2.49314e+07 | BIOGEN | VP HEAD OF GLOBAL MEDICAL SAFETY | 6302020 | 50 | P80000722 | DEM | 227491 | MA-05 | (D) | 1 | 1 |
8 | C00745687 | GRIFFITH, LISA | CAMBRIDGE | MA | 021394369 | BIOGEN | MARKETING | 7132020 | 500 | H0MA04267 | DEM | 233009 | MA-05 | (D) | 1 | 1 |
9 | C00649376 | NEWLAND, BART G. | BELMONT | MA | 2.4784e+07 | BIOGEN INC | ATTORNEY | 6302020 | 100 | H8GA07201 | DEM | 224868 | MA-05 | (D) | 1 | 1 |
10 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7292020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 | 1 |
11 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7152020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 | 1 |
12 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7222020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 | 1 |
13 | C00703975 | EDMONDSON, FRAZOR | MARLBOROUGH | MA | 1.75267e+07 | BIOGEN | ATTORNEY | 7082020 | 50 | P80000722 | DEM | 227491 | MA-03 | (D) | 1 | 1 |
14 | C00703975 | VANDER STOEP, STEPHEN | BOSTON | MA | 2.12925e+07 | BIOGEN | ATTORNEY | 7172020 | 100 | P80000722 | DEM | 227491 | MA-07 | (D) | 1 | 1 |
15 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 2131 | BIOGEN | REGULATORY MEDICAL WRITER | 8052020 | 100 | H0MA08045 | DEM | 230605 | MA-07 | (D) | 1 | 1 |
16 | C00727149 | GATES, CYNTHIA | ROSLINDALE | MA | 02131 | BIOGEN | REGULATORY MEDICAL WRITER | 6302020 | 100 | H0MA08045 | DEM | 230605 | MA-07 | (D) | 1 | 1 |
17 | C00500843 | FLANNELLY-KING, SHANE | SOMERVILLE | MA | 2.14421e+07 | BIOGEN IDEC | BUSINESS ANALYST | 6302020 | 250 | S2MA00170 | DEM | 222817 | MA-07 | (D) | 1 | 1 |
18 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 7172020 | 20 | P80000722 | DEM | 227491 | TN-09 | (D) | 1 | 1 |
19 | C00703975 | THOMAS, DONNA | MEMPHIS | TN | 3.81155e+08 | PMC BIOGENIX INC. | CUSTOMER SERVICE | 6302020 | 15 | P80000722 | DEM | 227491 | TN-09 | (D) | 1 | 1 |
20 | C00703975 | LYKINS, JIM | DUPONT | WA | 9.83277e+08 | BIOGEN | SALES | 7282020 | 50 | P80000722 | DEM | 227491 | WA-10 | (D) | 1 | 1 |
21 | C00703975 | MALDONADO, REBECCA | SAN ANTONIO | TX | 782491598 | BIOGEN | SALES | 7032020 | 150 | P80000722 | DEM | 227491 | TX-20 | (D) | 1 | 1 |
22 | C00703975 | HOWE, MICHAEL | CANTON | MA | 2.02116e+07 | BIOGEN | ATTORNEY | 7162020 | 100 | P80000722 | DEM | 227491 | MA-08 | (D) | 1 | 1 |
23 | C00196774 | LOVEDAY, KENNETH S. | BROOKLINE | MA | 024465827 | BIOGEN | BIOLOGIST | 8042020 | 250 | S4MA00028 | DEM | 222822 | MA-04 | (D) | 1 | 1 |
24 | C00666149 | LOVEDAY, KENNETH S | BROOKLINE | MA | 024465827 | BIOGEN INC | DIRECTOR | 6302020 | 500 | H8NM02248 | DEM | 223821 | MA-04 | (D) | 1 | 1 |
25 | C00701599 | SEGAL, KATE | BATTLE CREEK | MI | 4.90159e+08 | BIOGEN | GOVERNMENT AFFAIRS | 6302020 | 500 | H0MI06152 | DEM | 227095 | MI-03 | (L) | 1 | 1 |
subset2 = inner_join[['INDEX','INDEX_BOSS']]
from sklearn.linear_model import LinearRegression
linear_regressor = LinearRegression()
from sklearn.preprocessing import MinMaxScaler
scaler1 = MinMaxScaler()
scaler1.fit(subset2)
inner_join_scaled=scaler1.transform(subset2)
x = inner_join_scaled[:,0].reshape(-1,1)
y = inner_join_scaled[:,1].reshape(-1,1)
linear_regressor.fit(x, y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)