import pandas as pd # pandas is a library for data manipulation and analysis
pd.options.mode.chained_assignment = None # This will turn off an annoying warning
import plotly # plotly is a library for interactive data visualization
import plotly.graph_objs as go # this let's me make graphs with plotly more easily
plotly.offline.init_notebook_mode() # this let's the graphs appear offline in the jupyter notebook
df = pd.read_csv("pgh_arrest_data.csv") # loading in the data
print(df.shape) # seeing how many rows and columns the data has
df.head() # previewing the first five rows of the dataset
(21311, 17)
_id | PK | CCR | AGE | GENDER | RACE | ARRESTTIME | ARRESTLOCATION | OFFENSES | INCIDENTLOCATION | INCIDENTNEIGHBORHOOD | INCIDENTZONE | INCIDENTTRACT | COUNCIL_DISTRICT | PUBLIC_WORKS_DIVISION | X | Y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5403 | 1981357 | 17024448 | 26.0 | M | W | 2017-02-09T01:04:00 | 20 Block Bailey AV Pittsburgh, PA 15211 | 5505 Public Drunkenness / 5503(a)(4) DISORDERL... | 20 Block Bailey AV Pittsburgh, PA 15211 | Mount Washington | 3 | 1807.0 | 3.0 | 5.0 | -80.004461 | 40.427049 |
1 | 5404 | 1981358 | 17021346 | 31.0 | F | W | 2017-02-04T01:30:00 | N CHARLES ST & PERRYSVILLE AV Pittsburgh, PA 1... | 3925 Receiving Stolen Property. / 1543 Driving... | N CHARLES ST & PERRYSVILLE AV Pittsburgh, PA 1... | Perry South | 1 | 2615.0 | 6.0 | 1.0 | -80.008598 | 40.471006 |
2 | 5405 | 1981359 | 17021351 | 22.0 | M | B | 2017-02-04T01:51:00 | 2200 Block Somers DR Pittsburgh, PA 15219 | 13(a)(16) Possession of Controlled Substance /... | 2200 Block Somers DR Pittsburgh, PA 15219 | Bedford Dwellings | 2 | 509.0 | 6.0 | 3.0 | -79.975687 | 40.449422 |
3 | 10102 | 1987251 | 17120778 | 33.0 | F | B | 2017-06-29T20:04:00 | 1400 Block 5th AV Pittsburgh, PA 15219 | 13(a)(16) Possession of Controlled Substance /... | 1400 Block 5th AV Pittsburgh, PA 15219 | Bluff | 2 | 103.0 | 6.0 | 3.0 | -79.986310 | 40.438370 |
4 | 10103 | 1987255 | 16164667 | 25.0 | M | B | 2017-07-02T02:00:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 3921 Theft by Unlawful Taking or Disposition. | 20 Block Stanwix ST Pittsburgh, PA 15222 | Central Business District | 2 | 201.0 | 6.0 | 6.0 | -80.005178 | 40.438667 |
df = df[(df.AGE.values<120)&(df.AGE.values>0)] # FILTER TO REMOVE CASES WHERE AGE IS MISSSING OR TOO LOW/HIGH TO BE CORRECT
len(df) # compare with previous number of rows to see how many cases were removed
21104
df.sort_values(by='ARRESTTIME') # sorting values by the date/time of arrest to see how far the data goes back
_id | PK | CCR | AGE | GENDER | RACE | ARRESTTIME | ARRESTLOCATION | OFFENSES | INCIDENTLOCATION | INCIDENTNEIGHBORHOOD | INCIDENTZONE | INCIDENTTRACT | COUNCIL_DISTRICT | PUBLIC_WORKS_DIVISION | X | Y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11894 | 17346 | 1996555 | 5247781 | 10.0 | M | B | 2005-12-21T09:00:00 | 40 Block COWAN ST PITTSBURGH, PA 15211 | 912 Possession of Weapon on School Property. | 40 Block COWAN ST PITTSBURGH, PA 15211 | NaN | 3 | NaN | 2.0 | 5.0 | -80.010184 | 40.427336 |
175 | 16946 | 1995957 | 6216056 | 29.0 | M | B | 2006-11-02T09:00:00 | 900 Block second AV Pittsburgh, PA 15219 | 3921 Theft by Unlawful Taking or Disposition. ... | 900 Block CRESSWELL ST PITTSBURGH, PA | NaN | 3 | NaN | 3.0 | 3.0 | -79.976204 | 40.408768 |
13644 | 9504 | 1986575 | 7034314 | 21.0 | F | B | 2007-02-21T13:30:00 | 100 Block Rhine PL Pittsburgh, PA 15212 | 2701 Simple Assault. | 100 Block RHINE PL PITTSBURGH, PA 15212 | Spring Hill-City View | 1 | 2620.0 | 1.0 | 1.0 | -79.994099 | 40.466786 |
571 | 10289 | 1987517 | 7102272 | 40.0 | M | B | 2007-05-22T16:42:00 | Tioga ST & Rosedale ST Pittsburgh, PA 15208 | 13(a)(16) Possession of Controlled Substance /... | TIOGA ST & ROSEDALE ST PITTSBURGH, PA 15208 | Homewood South | 5 | 1303.0 | 9.0 | 2.0 | -79.886833 | 40.450037 |
2663 | 7666 | 1984153 | 8234583 | 14.0 | F | B | 2008-10-04T12:25:00 | 200 Block Greenfield AV Pittsburgh, PA 15207 | 2701 Simple Assault. | 200 Block GREENFIELD AV PITTSBURGH, PA | Greenfield | 4 | 1516.0 | 5.0 | 3.0 | -79.949808 | 40.425719 |
6060 | 8981 | 1985946 | 11003677 | 16.0 | M | B | 2011-01-07T13:00:00 | 3300 Block CENTRE AV PITTSBURGH, PA 15219 | 2701(a)(1) Simple Assault - Intent., Know., Re... | 500 Block N Highland AV Pittsburgh, PA 15206 | East Liberty | 5 | 1113.0 | 8.0 | 2.0 | -79.924961 | 40.453011 |
17190 | 19721 | 1999542 | 11115122 | 22.0 | M | W | 2011-06-23T02:30:00 | E Carson ST & S 22nd ST Pittsburgh, PA 15203 | 4903 False Swearing. / 5505 Public Drunkenness... | E Carson ST & S 22nd ST Pittsburgh, PA 15203 | South Side Flats | 3 | 1702.0 | 3.0 | 3.0 | -79.974639 | 40.428413 |
7695 | 21261 | 2001455 | 12192736 | 17.0 | M | B | 2012-10-03T13:48:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 2702 Aggravated Assault. | 200 Block S Negley AV Pittsburgh, PA 15206 | East Liberty | 5 | 1115.0 | NaN | NaN | 0.000000 | 0.000000 |
16996 | 20991 | 2001135 | 13004163 | 17.0 | M | W | 2013-02-27T01:21:00 | 800 Block E Warrington AV Pittsburgh, PA 15210 | 2711 Domestic Violence / 2701(a)(1) Simple Ass... | 300 Block Merrimac ST Pittsburgh, PA 15211 | Mount Washington | 3 | 1903.0 | 2.0 | 5.0 | -80.013919 | 40.431633 |
11973 | 20297 | 2000316 | 14025127 | 24.0 | M | B | 2014-02-09T01:22:00 | 2200 Block E Carson ST Pittsburgh, PA 15203 | 2702 Aggravated Assault. / 5104 Resisting Arre... | 2200 Block E Carson ST Pittsburgh, PA 15203 | South Side Flats | 3 | 1609.0 | 3.0 | 3.0 | -79.974454 | 40.428227 |
9640 | 17847 | 1997180 | 14086284 | 27.0 | F | W | 2014-05-10T04:05:00 | 4000 Block Cabinet WY Pittsburgh, PA 15224 | 2701 Simple Assault. / 5503(a)(3) DISORDERLY C... | 4000 Block Cabinet WY Pittsburgh, PA 15224 | Bloomfield | 5 | 903.0 | 7.0 | 2.0 | -79.957555 | 40.463864 |
5883 | 12186 | 1989823 | 14204457 | 23.0 | M | W | 2014-10-13T03:47:00 | Copperfield AV & Mt Joseph ST Pittsburgh, PA 1... | 2706 Terroristic Threats. / 5104 Resisting Arr... | Copperfield AV & Mt Joseph ST Pittsburgh, PA 1... | Carrick | 3 | 2901.0 | 4.0 | 3.0 | -79.989725 | 40.395400 |
18311 | 4620 | 1980392 | 14227728 | 20.0 | F | B | 2014-11-16T03:42:00 | 3200 Block Middletown RD Pittsburgh, PA 15204 | 903 Criminal Conspiracy. / 2702 Aggravated Ass... | 3200 Block Middletown RD Pittsburgh, PA 15204 | Windgap | 6 | 2807.0 | 2.0 | 5.0 | -80.066741 | 40.450950 |
18312 | 4621 | 1980393 | 14227728 | 30.0 | M | B | 2014-11-16T03:42:00 | 3200 Block Middletown RD Pittsburgh, PA 15204 | 903 Criminal Conspiracy. / 2702 Aggravated Ass... | 3200 Block Middletown RD Pittsburgh, PA 15204 | Windgap | 6 | 2807.0 | 2.0 | 5.0 | -80.066741 | 40.450950 |
2762 | 4619 | 1980391 | 14227728 | 27.0 | F | B | 2014-11-16T03:42:00 | 3200 Block Middletown RD Pittsburgh, PA 15204 | 903 Criminal Conspiracy. / 2702 Aggravated Ass... | 3200 Block Middletown RD Pittsburgh, PA 15204 | Windgap | 6 | 2807.0 | 2.0 | 5.0 | -80.066741 | 40.450950 |
11576 | 18991 | 1998615 | 14179431 | 33.0 | F | B | 2015-02-19T14:00:00 | 200 Block Allegheny River BL Pittsburgh, PA 15147 | 2709(a)(1) Harassment by Physical Contact, or ... | 5300 Block Broad ST Pittsburgh, PA 15224 | Garfield | 5 | 1017.0 | 9.0 | 2.0 | -79.938203 | 40.465812 |
10251 | 633 | 1975435 | 15095591 | 25.0 | M | B | 2015-05-29T07:55:00 | Gladys AV & Crane AV Pittsburgh, PA 15216 | 3111 Obedience to Traffic-Control Devices. / 3... | Gladys AV & Crane AV Pittsburgh, PA 15216 | Beechview | 6 | 1916.0 | 4.0 | 5.0 | -80.024245 | 40.416917 |
11802 | 15467 | 1994019 | 15120242 | 16.0 | M | B | 2015-07-02T19:19:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 2701 Simple Assault. | 400 Block N Taylor AV Pittsburgh, PA 15212 | Central North Side | 1 | 2206.0 | 1.0 | 1.0 | -80.011298 | 40.456933 |
2622 | 3706 | 1979277 | 15204846 | 22.0 | M | W | 2015-11-04T19:45:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 6310.1 Selling Liquor to Minors / 601.04 (c) (... | 3800 Block Bates ST Pittsburgh, PA 15213 | Central Oakland | 4 | 406.0 | 3.0 | 3.0 | -79.953347 | 40.438601 |
2680 | 3707 | 1979278 | 15204846 | 21.0 | M | W | 2015-11-04T19:45:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 6310.1 Selling Liquor to Minors / 601.04 (c) (... | 3800 Block Bates ST Pittsburgh, PA 15213 | Central Oakland | 4 | 406.0 | 3.0 | 3.0 | -79.953347 | 40.438601 |
14592 | 4672 | 1980444 | 16024425 | 24.0 | F | B | 2016-02-12T21:00:00 | 600 Block First AV Pittsburgh, PA 15219 | 2701(a)(1) Simple Assault - Intent., Know., Re... | 1100 Block McKinney LANE Pittsburgh, PA 15205 | Ridgemont | 6 | 2016.0 | 2.0 | 5.0 | -80.035034 | 40.423529 |
17650 | 632 | 1975385 | 16029218 | 31.0 | M | W | 2016-02-17T15:25:00 | 800 Block Concord ST Pittsburgh, PA 15212 | 13(a)(16) Possession of Controlled Substance /... | 800 Block Concord ST Pittsburgh, PA 15212 | East Allegheny | 1 | 2304.0 | 1.0 | 1.0 | -79.997924 | 40.457564 |
5272 | 3338 | 1978790 | 16004552 | 31.0 | F | O | 2016-03-03T09:45:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 9015 Failure To Appear/Arrest on Attachment Order | 600 Block Liberty AV Pittsburgh, PA 15222 | Central Business District | 2 | 201.0 | 6.0 | 6.0 | -80.001620 | 40.441773 |
20536 | 1245 | 1976029 | 16060056 | 22.0 | M | W | 2016-04-04T00:45:00 | S 15th ST & E Carson ST Pittsburgh, PA 15203 | 2702 Aggravated Assault. / 2706 Terroristic Th... | S 15th ST & E Carson ST Pittsburgh, PA 15203 | South Side Flats | 3 | 1702.0 | 3.0 | 3.0 | -79.983425 | 40.428815 |
12247 | 4666 | 1980438 | 16062908 | 36.0 | M | W | 2016-04-08T02:34:00 | Boyd ST & Diamond ST Pittsburgh, PA 15219 | 13(a)(32) Paraphernalia - Use or Possession | Boyd ST & Diamond ST Pittsburgh, PA 15219 | Bluff | 2 | 103.0 | 6.0 | 6.0 | -79.993293 | 40.438656 |
18572 | 4664 | 1980436 | 16054146 | 60.0 | M | W | 2016-04-10T09:00:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 5104 Resisting Arrest or Other Law Enforcement... | Forbes AV & Armstrong TUN Pittsburgh, PA 15219 | Bluff | 2 | 103.0 | 6.0 | 6.0 | -79.992052 | 40.437971 |
18585 | 4665 | 1980437 | 16058996 | 54.0 | M | B | 2016-04-12T01:30:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 2705 Recklessy Endangering Another Person. / 2... | 1600 Block Forbes AV Pittsburgh, PA 15219 | Bluff | 2 | 103.0 | 6.0 | 3.0 | -79.983870 | 40.437377 |
15079 | 4667 | 1980439 | 16061851 | 20.0 | M | B | 2016-04-12T19:55:00 | 600 Block First AV Pittsburgh, PA 15219 | 3924 Theft of Property Lost, Mislaid or Delive... | 400 Block Wood ST Pittsburgh, PA 15219 | Central Business District | 2 | 201.0 | 6.0 | 6.0 | -80.001223 | 40.439375 |
12248 | 4670 | 1980442 | 16066821 | 31.0 | M | W | 2016-04-13T20:00:00 | 1000 Block Fort Duquesne BL Pittsburgh, PA 15222 | 2702 Aggravated Assault. / 5104 Resisting Arre... | 1000 Block Fort Duquesne BL Pittsburgh, PA 15222 | Central Business District | 2 | 201.0 | 6.0 | 6.0 | -79.995245 | 40.445511 |
14454 | 4668 | 1980440 | 16066821 | 29.0 | M | W | 2016-04-13T20:00:00 | 1000 Block Fort Duquesne BL Pittsburgh, PA 15222 | 2702 Aggravated Assault. / 5104 Resisting Arre... | 1000 Block Fort Duquesne BL Pittsburgh, PA 15222 | Central Business District | 2 | 201.0 | 6.0 | 6.0 | -79.995245 | 40.445511 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
10606 | 21074 | 2001239 | 17249529 | 46.0 | M | B | 2018-06-12T07:03:00 | 1600 Block Penn AV Pittsburgh, PA 15222 | 5101 Obstructing Admin. of Law / 5105 Hinderi... | 1600 Block Penn AV Pittsburgh, PA 15222 | Strip District | 2 | 203.0 | 7.0 | 6.0 | -79.987472 | 40.448835 |
15643 | 21077 | 2001242 | 18035414 | 49.0 | M | W | 2018-06-12T07:30:00 | 400 Block JANICE DR Pittsburgh, PA 15235 | 3502 Burglary. / 3921 Theft by Unlawful Taking... | 1700 Block Jancey ST Pittsburgh, PA 15206 | Morningside | 5 | 1014.0 | 7.0 | 2.0 | -79.927590 | 40.484790 |
17208 | 21081 | 2001246 | 18076507 | 49.0 | M | W | 2018-06-12T07:30:00 | 400 Block JANICE DR Pittsburgh, PA 15235 | 3502 Burglary. / 3921 Theft by Unlawful Taking... | 1700 Block Jancey ST Pittsburgh, PA 15206 | Morningside | 5 | 1014.0 | 7.0 | 2.0 | -79.927590 | 40.484790 |
15677 | 21085 | 2001250 | 18085255 | 49.0 | M | W | 2018-06-12T07:30:00 | 400 Block JANICE DR Pittsburgh, PA 15235 | 3921 Theft by Unlawful Taking or Disposition. ... | 1500 Block Hawthorne ST Pittsburgh, PA 15201 | Stanton Heights | 5 | 1018.0 | 7.0 | 2.0 | -79.934272 | 40.484916 |
7000 | 21080 | 2001245 | 18047226 | 21.0 | M | B | 2018-06-12T10:20:00 | 600 Block 1ST AV Pittsburgh, PA 15219 | 9015 Failure To Appear/Arrest on Attachment Order | 200 Block Fifth AV Pittsburgh, PA 15222 | Central Business District | 2 | 201.0 | 6.0 | 6.0 | -80.002077 | 40.441236 |
1983 | 21082 | 2001247 | 18078665 | 34.0 | F | W | 2018-06-12T11:10:00 | 1400 Block prospect RD Pittsburgh, PA 15227 | 3921 Theft by Unlawful Taking or Disposition. ... | 100 Block N Bellefield AV Pittsburgh, PA 15213 | North Oakland | 4 | 404.0 | 8.0 | 3.0 | -79.951835 | 40.447152 |
15942 | 21160 | 2001341 | 18106111 | 66.0 | M | B | 2018-06-12T11:15:00 | 900 Block 2ND AV Pittsburgh, PA 15219 | 13(a)(16) Possession of Controlled Substance /... | 1500 Block Monterey ST Pittsburgh, PA 15212 | Central North Side | 1 | 2503.0 | 6.0 | 1.0 | -80.013412 | 40.457302 |
15734 | 21099 | 2001264 | 18101188 | 50.0 | F | B | 2018-06-12T11:45:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 2701 Simple Assault. | 4800 Block Friendship AV Pittsburgh, PA 15224 | Bloomfield | 5 | 809.0 | 7.0 | 2.0 | -79.946263 | 40.462132 |
15720 | 21095 | 2001260 | 18099412 | 38.0 | M | B | 2018-06-12T14:05:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 3925 Receiving Stolen Property. / 6105(a)(1) P... | 60 Block E Amanda AV Pittsburgh, PA 15210 | Knoxville | 3 | 3001.0 | 3.0 | 5.0 | -79.988121 | 40.419383 |
18349 | 21073 | 2001238 | 17144182 | 37.0 | M | W | 2018-06-12T16:45:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 2706 Terroristic Threats. / 2718(a)(1) Strangu... | 300 Block Aidyl AV Pittsburgh, PA 15226 | Brookline | 6 | 1919.0 | 4.0 | 5.0 | -80.027560 | 40.395325 |
17026 | 21076 | 2001241 | 18023748 | 27.0 | M | W | 2018-06-12T17:52:00 | Olympia ST & Piermont ST Pittsburgh, PA 15211 | 3921(a) Theft by Unlawful Taking or Dispositio... | 5700 Block Beacon ST Pittsburgh, PA 15217 | Squirrel Hill South | 4 | 1413.0 | 5.0 | 3.0 | -79.924713 | 40.434526 |
4561 | 21075 | 2001240 | 18016037 | 27.0 | M | W | 2018-06-12T17:52:00 | Olympia ST & Piermont ST Pittsburgh, PA 15211 | 2706 Terroristic Threats. | 5700 Block Beacon ST Pittsburgh, PA 15217 | Squirrel Hill South | 4 | 1413.0 | 5.0 | 3.0 | -79.924713 | 40.434526 |
7062 | 21098 | 2001263 | 18100310 | 54.0 | M | B | 2018-06-12T19:30:00 | Forbes AV & Stanwinx ST Pittsburgh, PA 15222 | 3701 Robbery. | 500 Block Francis ST Pittsburgh, PA 15219 | Middle Hill | 2 | 501.0 | 6.0 | 3.0 | -79.967494 | 40.447140 |
17044 | 21097 | 2001262 | 18100059 | 54.0 | M | B | 2018-06-12T19:30:00 | Forbes AV & Stanwix ST Pittsburgh, PA 15222 | 2701 Simple Assault. / 3701 Robbery. | 4000 Block Brighton RD Pittsburgh, PA 15212 | Brighton Heights | 1 | 2701.0 | 1.0 | 1.0 | -80.040852 | 40.487917 |
17027 | 21078 | 2001243 | 18044785 | 42.0 | F | W | 2018-06-12T20:11:00 | E Elizabeth ST & Gertrude ST Pittsburgh, PA 15207 | 9015 Failure To Appear/Arrest on Attachment Order | 2nd AV & Mansion ST Pittsburgh, PA 15207 | Hazelwood | 4 | 1515.0 | 5.0 | 3.0 | -79.942313 | 40.403720 |
17250 | 21087 | 2001252 | 18090363 | 23.0 | M | B | 2018-06-12T22:47:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 9015 Failure To Appear/Arrest on Attachment Order | 3400 Block Gass AV Pittsburgh, PA 15212 | Brighton Heights | 1 | 2703.0 | 1.0 | 1.0 | -80.034672 | 40.477074 |
15702 | 21086 | 2001251 | 18086148 | 25.0 | F | W | 2018-06-13T02:06:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 5104 Resisting Arrest or Other Law Enforcement... | 4000 Block Butler ST Pittsburgh, PA 15201 | Central Lawrenceville | 2 | 902.0 | 7.0 | 2.0 | -79.961681 | 40.468701 |
2466 | 21111 | 2001276 | 18102216 | 28.0 | M | B | 2018-06-13T08:25:00 | 600 Block 1ST AV Pittsburgh, PA 15219 | 2706 Terroristic Threats. / 2709(a) Harassment. | 300 Block Beltzhoover AV Pittsburgh, PA 15210 | Beltzhoover | 3 | 1809.0 | 3.0 | 5.0 | -79.997515 | 40.419880 |
15645 | 21083 | 2001248 | 18080966 | 30.0 | M | W | 2018-06-13T09:00:00 | Zone 2 | 3127 Indecent Exposure. | Zone 2 | NaN | 2 | NaN | NaN | NaN | NaN | NaN |
15759 | 21106 | 2001271 | 18046664 | 22.0 | M | B | 2018-06-13T11:45:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 1543 Driving While Operating Privilege is Susp... | 1300 Block Silverton AV Pittsburgh, PA 15206 | Lincoln-Lemington-Belmar | 5 | 1202.0 | NaN | NaN | 0.000000 | 0.000000 |
10656 | 21107 | 2001272 | 18058660 | 40.0 | M | U | 2018-06-13T13:10:00 | 600 Block 1ST AV Pittsburgh, PA 15219 | 3926(a)(1) Theft of Services; Cable, Gas, Taxi... | 500 Block N Craig ST Pittsburgh, PA 15213 | North Oakland | 4 | 507.0 | 6.0 | 3.0 | -79.954405 | 40.456098 |
15764 | 21105 | 2001270 | 17234757 | 20.0 | M | B | 2018-06-13T16:40:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 9015 Failure To Appear/Arrest on Attachment Order | 100 Block N Negley AV Pittsburgh, PA 15206 | Garfield | 5 | 1114.0 | 7.0 | 2.0 | -79.924731 | 40.474284 |
15649 | 21084 | 2001249 | 18083529 | 18.0 | M | B | 2018-06-13T19:10:00 | 600 Block First AV Pittsburgh, PA 15219 | 9015 Failure To Appear/Arrest on Attachment Order | 100 Block Rhine PL Pittsburgh, PA 15212 | Spring Hill-City View | 1 | 2620.0 | 1.0 | 1.0 | -79.992042 | 40.465360 |
15719 | 21092 | 2001257 | 18095330 | 25.0 | M | B | 2018-06-13T19:29:00 | 700 Block Grand AV Wilkinsburgh, PA 15221 | 13(a)(32) Paraphernalia - Use or Possession / ... | 1400 Block Washington BL Pittsburgh, PA 15206 | Highland Park | 5 | 1106.0 | 9.0 | 2.0 | -79.908608 | 40.470381 |
15714 | 21091 | 2001256 | 18095330 | 56.0 | M | B | 2018-06-13T19:29:00 | Bennett ST & Tokay ST Pittsburgh, PA 15221 | 6105(a)(1) Persons not to possess, use, manufa... | 1400 Block Washington BL Pittsburgh, PA 15206 | Highland Park | 5 | 1106.0 | 9.0 | 2.0 | -79.908608 | 40.470381 |
7379 | 21177 | 2001358 | 18106641 | 31.0 | M | B | 2018-06-13T19:56:00 | 600 Block 1st AV Pittsburgh, PA 15219 | 2705 Recklessy Endangering Another Person. / 1... | 300 Block 5th AV Pittsburgh, PA 15222 | Central Business District | 2 | 201.0 | 6.0 | 6.0 | -80.000290 | 40.440526 |
2403 | 21104 | 2001269 | 17166975 | 38.0 | F | W | 2018-06-13T22:20:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 9015 Failure To Appear/Arrest on Attachment Order | 200 Block S Winebiddle ST Pittsburgh, PA 15224 | Bloomfield | 5 | 809.0 | 7.0 | 2.0 | -79.943252 | 40.464531 |
15765 | 21108 | 2001273 | 18065533 | 24.0 | F | B | 2018-06-13T23:00:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 2701 Simple Assault. / 3502 Burglary. / 3921 T... | 7400 Block Monticello ST Pittsburgh, PA 15208 | Homewood North | 5 | 1302.0 | 9.0 | 2.0 | -79.891016 | 40.458322 |
10657 | 21109 | 2001274 | 18074479 | 60.0 | M | B | 2018-06-14T02:19:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 9015 Failure To Appear/Arrest on Attachment Order | 1400 Block Adams ST Pittsburgh, PA 15233 | Manchester | 1 | 2107.0 | 6.0 | 1.0 | -80.027999 | 40.458723 |
15776 | 21110 | 2001275 | 18074777 | 46.0 | F | B | 2018-06-14T02:50:00 | 900 Block 2nd AV Pittsburgh, PA 15219 | 2701 Simple Assault. / 2706 Terroristic Threats. | 2500 Block Brighton RD Pittsburgh, PA 15212 | Marshall-Shadeland | 1 | 2704.0 | 6.0 | 1.0 | -80.024794 | 40.467289 |
21104 rows × 17 columns
# making columns for year and month based on the arresttime column
df['Year']=df['ARRESTTIME'].apply(lambda x: int(x[:4]))
df['Month']=df['ARRESTTIME'].apply(lambda x: int(x.split('-')[1]))
# try to get a sense for when the data starts being comprehensive.
# Assume anything before then is not reliable.
df['Year'].value_counts()
2017 12022 2018 4975 2016 4087 2014 6 2015 5 2011 2 2007 2 2013 1 2012 1 2008 1 2006 1 2005 1 Name: Year, dtype: int64
df[df['Year']==2016]['Month'].value_counts().sort_index() # when in 2016 does the data start being comprehensive
2 2 3 1 4 11 7 13 8 638 9 854 10 883 11 861 12 824 Name: Month, dtype: int64
# Filter AUGUST 2016 to the present
df = df[(df['Year'].values>=2017) | ((df['Year'].values==2016) & (df['Month'].values>=8))]
dfMinor = df[df['AGE']<18] # a dataset for minors only
len(df),len(dfMinor)
(21057, 1568)
df['isMinor']=df['AGE'].apply(lambda x: '<18' if x<18 else '18+') # Add column saying whether the arrest is of a minor or not
pieData = df['isMinor'].value_counts()
pieData
18+ 19489 <18 1568 Name: isMinor, dtype: int64
labels = pieData.index
values = pieData
trace = go.Pie(labels=labels, values=values,marker=dict(colors=['rgb(169,169,169)','Gold']),hoverinfo='label',
textinfo='text',
text=['92.6%<br>19489','7.4%<br>1568'],
textfont=dict(size=14,color='black'))
data = go.Data([trace])
layout = go.Layout(
hovermode='closest',
title = 'Arrests in Pittsburgh by Age, August 2016-Present',
legend=dict(font=dict(size=15)))
fig = go.Figure(data=data,layout=layout)
plotly.offline.iplot(fig)
dfMinor['OFFENSES'].value_counts().head(10) # Top ten offenses for youth - need to separate the multiple offenses out
13(a)(31) Marijuana: Possession Small Amount 149 2701 Simple Assault. 59 2702 Aggravated Assault. 56 2702 Aggravated Assault. / 5503(a)(4) DISORDERLY CONDUCT - HAZARDOUS/PHYS. OFF 51 912 Possession of Weapon on School Property. 44 2702 Aggravated Assault. / 5503 Disorderly Conduct. 33 903 Criminal Conspiracy. / 3701 Robbery. 32 3925 Receiving Stolen Property. 32 2701 Simple Assault. / 5503 Disorderly Conduct. 29 2701(a)(1) Simple Assault - Intent., Know., Reckless.Cause Bod. Injury 28 Name: OFFENSES, dtype: int64
# split on ' / ' (with spaces) to separate different arrests, avoid hazardouse/phys. off from being separated.
offenseCounts = dict() # initialize a dictionary
for offenses in dfMinor['OFFENSES'].values: # for each entry in the offenses column...
if not isinstance(offenses,str): continue # skip the nans
offensesSplit = offenses.split(' / ') # split the multiple offenses up
for offense in offensesSplit: # for each offense...
if offense in offenseCounts: # if it is already in the dictionary, increase the count by one
offenseCounts[offense]+=1
else: # if it is not in the dictionary, add a new entry and start it at 1
offenseCounts[offense]=1
offenseCountsSer = pd.Series(offenseCounts) # turn dictionary into a series
offenseCountsSer.sort_values(ascending=False).head(20) # now see the top offenses, separated out
2702 Aggravated Assault. 331 903 Criminal Conspiracy. 237 13(a)(31) Marijuana: Possession Small Amount 213 5503(a)(4) DISORDERLY CONDUCT - HAZARDOUS/PHYS. OFF 201 2701 Simple Assault. 185 3925 Receiving Stolen Property. 149 5503 Disorderly Conduct. 134 3701 Robbery. 119 2701(a)(1) Simple Assault - Intent., Know., Reckless.Cause Bod. Injury 118 6110.1 Possesion of Firearm by Minor 113 2705 Recklessy Endangering Another Person. 102 5503(a)(1) DISORDERLY CONDUCT - ENGAGE IN FIGHTING 94 13(a)(16) Possession of Controlled Substance 90 6106 Firearms not to be Carried without a License. 72 912 Possession of Weapon on School Property. 66 13(a)(30) Possession w/ Intent to Del. Controlled Substance 60 2706 Terroristic Threats. 58 5104 Resisting Arrest or Other Law Enforcement. 58 3921 Theft by Unlawful Taking or Disposition. 53 3304 Criminal Mischief. 51 dtype: int64
topYouthOffenses = offenseCountsSer.sort_values(ascending=True).tail(10) # top 10
topYouthOffenses
6110.1 Possesion of Firearm by Minor 113 2701(a)(1) Simple Assault - Intent., Know., Reckless.Cause Bod. Injury 118 3701 Robbery. 119 5503 Disorderly Conduct. 134 3925 Receiving Stolen Property. 149 2701 Simple Assault. 185 5503(a)(4) DISORDERLY CONDUCT - HAZARDOUS/PHYS. OFF 201 13(a)(31) Marijuana: Possession Small Amount 213 903 Criminal Conspiracy. 237 2702 Aggravated Assault. 331 dtype: int64
# get rid of the number/letter codes
topYouthOffenses.index = map(lambda s: s.split(' ',1)[1:][0],topYouthOffenses.index)
# shorten some of the longer names
dictt = {'DISORDERLY CONDUCT - HAZARDOUS/PHYS. OFF':'Disorderly Conduct - Hazardous/Phys. Off',
'Simple Assault - Intent., Know., Reckless.Cause Bod. Injury':'Simple Assault - Type A1'}
topYouthOffenses.index = map(lambda s: dictt[s] if s in dictt else s.replace('.',''),topYouthOffenses.index)
topYouthOffenses.index
Index(['Possesion of Firearm by Minor', 'Simple Assault - Type A1', 'Robbery', 'Disorderly Conduct', 'Receiving Stolen Property', 'Simple Assault', 'Disorderly Conduct - Hazardous/Phys. Off', 'Marijuana: Possession Small Amount', 'Criminal Conspiracy', 'Aggravated Assault'], dtype='object')
# Make a bar graph showing the top offenses
trace = go.Bar(x=topYouthOffenses,
y=topYouthOffenses.index,
name=topYouthOffenses.index,
orientation='h',
marker=dict(color='rgb(255,102,102)'),
text=topYouthOffenses,
textposition = 'auto',
textfont=dict(
size=14,
color='white'))
data = go.Data([trace])
layout = go.Layout(
hovermode='closest',
xaxis = {'title': 'Arrests'},
title = 'Top Alleged Offenses by Minors Arrested in Pittsburgh, August 2016-Present',
margin = {'l':300, 'r':10, 'b':50, 't':50, 'pad':4})
# CAPTION: A Type A1 Simple Assault refers to intentionally, knowingly, or recklessly causing bodily injury.
fig = go.Figure(data=data,layout=layout)
# MAKE A NOTE THAT SOME MIGHT BE ARRESTED FOR MULTIPLE OFFENSES
plotly.offline.iplot(fig)
print(len(dfMinor['RACE']))
dfMinor['RACE'].value_counts()
1568
B 1311 W 178 O 43 H 15 U 14 A 7 Name: RACE, dtype: int64
dfMinor.groupby(["RACE", "GENDER"]).size()
RACE GENDER A F 2 M 5 B F 385 M 921 U 5 H F 3 M 12 O F 14 M 29 U F 2 M 11 U 1 W F 44 M 134 dtype: int64
maleArrests = dfMinor[dfMinor.GENDER=='M']['RACE']
# rearranging the order
bw = maleArrests.value_counts()[0:2]
h = maleArrests.value_counts()[3:4]
a = maleArrests.value_counts()[-1:]
u = maleArrests.value_counts()[-2:-1]
o = maleArrests.value_counts()[2:3]
maleArrests = bw.append(h).append(a).append(u).append(o)
maleArrests
B 921 W 134 H 12 A 5 U 11 O 29 Name: RACE, dtype: int64
femaleArrests = dfMinor[dfMinor.GENDER=='F']['RACE']
# rearranging the order
bw = femaleArrests.value_counts()[0:2]
h = femaleArrests.value_counts()[3:4]
a = femaleArrests.value_counts()[-1:]
u = femaleArrests.value_counts()[-2:-1]
o = femaleArrests.value_counts()[2:3]
femaleArrests = bw.append(h).append(a).append(u).append(o)
femaleArrests
B 385 W 44 H 3 U 2 A 2 O 14 Name: RACE, dtype: int64
GenderUnknownArrests = dfMinor[dfMinor.GENDER=='U']['RACE'].value_counts()
GenderUnknownArrests
B 5 U 1 Name: RACE, dtype: int64
races = ['Black','White','Hispanic','Asian','Unknown','Other']
trace1 = go.Bar(x=races,
y=maleArrests,
name="Male",
marker=dict(color='rgb(102,178,255)'),
text=maleArrests,
textposition = 'auto',
textfont=dict(
size=12,
color='black'))
trace2 = go.Bar(x=races,
y=femaleArrests,
name="Female",
marker=dict(color='rgb(255,178,102)'),
text=femaleArrests,
textposition = 'auto',
textfont=dict(
size=12,
color='black'))
# GOING TO EXCLUDE 6 CASES WHERE GENDER IS UNKNOWN (MAYBE MAKE A NOTE OF THIS IN ARTICLE OR IN FOOTNOTE TO GRAPH)
# trace3 = go.Bar(x=GenderUnknownArrests.index,
# y=GenderUnknownArrests,
# name="Gender Unknown")
data = go.Data([trace1,trace2])
layout = go.Layout(
hovermode='closest',
xaxis = {'title':'Race/Gender'},
yaxis = {'title': 'Arrests'},
title = 'Race and Gender of Minors Arrested in Pittsburgh, August 2016-Present',
legend = dict(font=dict(size=14))
)
fig = go.Figure(data=data,layout=layout)
plotly.offline.iplot(fig)
raceDict = {'A':'Asian','B':'Black','O':'Other','U':'Unknown','W':'White','H':'Hispanic'}
genderDict = {'M':'Male','F':'Female','U':'Unknown'}
dfMinor['Race_Full']=dfMinor['RACE'].apply(lambda s: raceDict[s])
dfMinor['Gender_Full']=dfMinor['GENDER'].apply(lambda s: genderDict[s])
dfMinor['Date']=dfMinor['ARRESTTIME'].apply(lambda s: s.split('T')[0])
dfMinor['Time']=dfMinor['ARRESTTIME'].apply(lambda s: s.split('T')[1])
dfMinor['AGE']=dfMinor['AGE'].apply(int)
dfMinor.head()
_id | PK | CCR | AGE | GENDER | RACE | ARRESTTIME | ARRESTLOCATION | OFFENSES | INCIDENTLOCATION | ... | COUNCIL_DISTRICT | PUBLIC_WORKS_DIVISION | X | Y | Year | Month | Race_Full | Gender_Full | Date | Time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
38 | 3547 | 1979079 | 16150139 | 17 | M | B | 2016-12-20T13:40:00 | 900 Block 2ND AV Pittsburgh, PA 15219 | 903 Criminal Conspiracy. / 3701 Robbery. | 100 Block N St Clair ST Pittsburgh, PA 15206 | ... | 9.0 | 2.0 | -79.927465 | 40.464938 | 2016 | 12 | Black | Male | 2016-12-20 | 13:40:00 |
44 | 8402 | 1985164 | 17076778 | 12 | M | B | 2017-05-02T11:45:00 | 600 Block 1ST AV PITTSBURGH, PA 15219 | 2705 Recklessy Endangering Another Person. / 2... | 600 Block FILBERT ST PITTSBURGH, PA 15232 | ... | 8.0 | 2.0 | -79.934186 | 40.453219 | 2017 | 5 | Black | Male | 2017-05-02 | 11:45:00 |
55 | 11794 | 1989328 | 17151293 | 14 | F | B | 2017-08-10T21:00:00 | 700 Block Chautauqua ST Pittsburgh, PA 15214 | 2701 Simple Assault. | 700 Block Chautauqua CT Pittsburgh, PA 15214 | ... | 6.0 | 1.0 | -80.017851 | 40.463887 | 2017 | 8 | Black | Female | 2017-08-10 | 21:00:00 |
76 | 15544 | 1994111 | 17207824 | 17 | F | B | 2017-12-05T09:00:00 | 600 Block 1ST AV PITTSBURGH, PA 15219 | 2701 Simple Assault. / 5503(a)(4) DISORDERLY C... | 1100 Block N MURTLAND ST PITTSBURGH, PA 15208 | ... | 9.0 | 2.0 | -79.900142 | 40.460411 | 2017 | 12 | Black | Female | 2017-12-05 | 09:00:00 |
117 | 6809 | 1983081 | 17033703 | 12 | F | B | 2017-03-13T14:15:00 | 600 Block 1ST AV PITTSBURGH, PA 15219 | 13(a)(31) Marijuana: Possession Small Amount | 3500 Block FLEMING AV PITTSBURGH, PA 15212 | ... | 1.0 | 1.0 | -80.038475 | 40.478588 | 2017 | 3 | Black | Female | 2017-03-13 | 14:15:00 |
5 rows × 23 columns
#dfMinor.to_csv("MinorData.csv",index=False)