%%capture
try:
from pip import main as pipmain
except:
from pip._internal import main as pipmain
packages = ['pandas', 'psycopg2-binary', 'spacy']
pipmain(['install'] + packages)
%%capture
from nltk.tokenize import casual_tokenize
import nltk
#nltk.download('averaged_perceptron_tagger') #uncomment this out to download
import pandas as pd
import psycopg2
import spacy
from spacy.matcher import PhraseMatcher
import re
from collections import Counter
%%capture
!python -m spacy download en_core_web_lg
import spacy
nlp = spacy.load('en_core_web_lg')
from sklearn.feature_extraction.text import CountVectorizer
import re
import nltk
#nltk.download('stopwords') #uncomment this out to download
from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
Note you need a file with the aws credentials to access the database.
Here, I am using credentials stored in a file called aws.py
with the following format:
host = 'endpoint_url_string'
port = port_number
user = 'user_string'
password = 'password_string'
database = 'db_name_string'
from aws import host as ahost, port as aport, user as auser, password as apassword, database as adatabase
connection = psycopg2.connect(host = ahost,
port = aport,
user = auser,
password = apassword,
dbname = adatabase)
cursor = connection.cursor()
def read_try(sql):
try:
df = pd.read_sql(sql, con=connection)
return pd.DataFrame() if df.empty else df
except Exception as e:
print("READ ERROR", e)
return pd.DataFrame()
# get all the users from the users table
def read_document_tags_table_from_db():
sql = 'SELECT * FROM document_tags_bak'
return read_try(sql)
document_tags = read_document_tags_table_from_db()
document_tags
allegation_id | title | document_text | url | text_bad | incident_date | allegation_category | allegation_name | organization | nudity_penetration | sexual_harassment_remarks | sexual_humiliation_extortion_or_sex_work | tasers | trespass | racial_slurs | planting_drugs_guns | neglect_of_duty | refuse_medical_assistance | irrational_aggressive_unstable | searching_arresting_minors | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 315627 | CRID 315627 CPB | None | https://assets.documentcloud.org/documents/577... | None | 2006-09-20 | Operation/Personnel Violations | Misuse Of Department Equipment / Supplies | CPB | None | None | None | None | None | None | None | None | None | None | None |
1 | 315524 | CRID 315524 CR | SUMMARY REPORT DIGEST- COMPLAINT REGISTER INVE... | https://assets.documentcloud.org/documents/440... | True | 2006-08-24 | Operation/Personnel Violations | Neglect Of Duty | CR | None | None | None | None | None | None | None | None | None | None | None |
2 | 315452 | CRID 315452 CPB | None | https://assets.documentcloud.org/documents/577... | None | 2006-08-31 | Operation/Personnel Violations | Neglect Of Duty | CPB | None | None | None | None | None | None | None | None | None | None | None |
3 | 315343 | CRID 315343 CR | COMMAND CHANNEL REVIEW A Sasso Emp 14982 NW 1 ... | https://assets.documentcloud.org/documents/457... | None | 2006-07-21 | False Arrest | Illegal Arrest / False Arrest | CR | False | False | False | False | False | False | False | False | False | False | False |
4 | 315047 | CRID 315047 CPB | None | https://assets.documentcloud.org/documents/577... | None | 2006-06-11 | Traffic | Improper Processing / Reporting / Procedures | CPB | None | None | None | None | None | None | None | None | None | None | None |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1678 | 1000436 | CRID 1000436 CR | REPORT DIGEST LOG NO. TYPE DATE OF REPORT CHI... | https://assets.documentcloud.org/documents/476... | None | 2006-04-21 | Operation/Personnel Violations | Eeo Investigations | CR | None | None | None | None | None | None | None | None | None | None | None |
1679 | 1000417 | CRID 1000417 CR | COMMAND CHANNEL REVIEW COMPLAINT REGISTER INVE... | https://assets.documentcloud.org/documents/457... | None | 2006-10-06 | False Arrest | Illegal Arrest / False Arrest | CR | None | None | None | None | None | None | None | None | None | None | None |
1680 | 1000301 | CRID 1000301 CR | 7/18/2018 View Incident Print Logout Help ... | https://assets.documentcloud.org/documents/476... | None | 2006-10-07 | Operation/Personnel Violations | Neglect Of Duty | CR | None | None | None | None | None | None | None | None | None | None | None |
1681 | 1000227 | CRID 1000227 CR (Discovery) | 1000227 COMMAND CHANNEL REVIEW-SUSTAINED CASE ... | https://assets.documentcloud.org/documents/522... | None | 2006-05-22 | Operation/Personnel Violations | Misuse Of Department Equipment / Supplies | CR | None | None | None | None | None | None | None | None | None | None | None |
1682 | 1000201 | CRID 1000201 CR | 7/27/2018 View Incident Print Logout Help ... | https://assets.documentcloud.org/documents/476... | None | 2006-10-03 | Use Of Force | Unnecessary Physical Contact / On Duty - No In... | CR | None | None | None | None | None | None | None | None | None | None | None |
1683 rows × 20 columns
Document tags we are looking for:
(List from Piazza)
test_index = 3
text1 = document_tags['document_text'][test_index]
print(text1)
pdf1 = document_tags['url'][test_index]
COMMAND CHANNEL REVIEW A Sasso Emp 14982 NW 1 32007 DATE INITIATED COMPLAINT REGISTER INVESTIGATION No.2 CHICAGO POLICE DEPARTMENT 315343 12 MARCH 200? INVESTIGATOR SECTION T0: UNIT: Commanding Of?cer Unit IZI-GIS PLEASE SUBMIT THIS COMMAND CHANNEL REVIEW FORM THROUGH CHANNELS WITH THE CASE FILE TO: INTERNAL AFFAIRS DIVISION OFFICE OF PROFESSIONAL STANDARDS DATE RECEIVED BY UNIT I CONCUR [a RECOMMEND OPTIONS BE GRANTED NOT RECOMMEND OPTIONS NOT BE GRANTED REVIEWED BY TITLE A UNIT - DATE FORWARDED BY UNIT 0 ?Wrdy a 7 DATE RECEIVED BY UNIT I CONCUR I DO NOT 14/4457: 70%? RECOMMEND OPTIONS BE GRANTED CI RECOMMEND OPTIONS NOT BE GRANTED REVIEWED BYZ UNIT 27/ Maj/6;? 4/ Dem/{Z/ 54? DATE FORWARDED BY UNIT DATE RECEIWY UNIT [3 I CONCUR RECOMMEND OPTIONS NOT BE GRANTED I DO NOT RECOMMEND OPTIONS SE GRANTED REVIEWED BY I TITLE UNIT I I DATE FORWARDED BY UNIT DATE RECEIVED BY UNIT I CONCUR RECOMMEND OPTIONS NOT BE GRANTED I DO NOT RECOMMEND OPTIONS BE GRANTED REVIEWED BY TITLE UNIT DATE FORWARDED BY UNIT DATE RECEIVED BY UNIT I CONCUR [3 RECOMMEND OPTIONS NOT BE GRANTED CI I DO NOT El RECOMMEND OPTIONS SE GRANTED REVIEWED BY . TITLE UNIT . DATE FORWARDED BY UNIT A SEPARATE REPORT PROPOSING ALTERNATE FINDINGS AN ALTERNATE RECOMMENDATION IS REQUIRED WHEN DO NOT HAS BEEN CHECKED. C.R.NO. 315343 INTERNAL AFFAIRS DIVISION [2 March 2007 General Investigation Section TO: Commanding Of?cer General Investigations Section Internal Affairs Division FROM: Police Agent Sharon star I I 129 General Investigations Section Internal Affairs Division SUBJECT: Approval of Complaint Register Investigation C.R. 315343. The attached Complaint Register Investigation has been completed and IS submitted for your approval. Police Agent Sharon Salustro, 129 General Investigation Section Internal Affairs Division Team Sergeant: Josi Maraff' no #2563 ?mam-u 2493 Commanding Of?c?? General Investigation Section Internal Affairs Division SUM MARY REPORT DIGEST- COMPLAINT REGISTER INVESTIGATION NO.: CHICAGO POLICE DEPARTMENT RBdaCted 315343 ame- Elnp 110.: S. p0,) To be used In ail cases that are to be classified in SUSTAINED cases where the Disciplinary Rec as either EXONERATED. UNFOUNDED, NOT SUSTAINED or emmendatlon does not exceed FIVE (5) DAYS SUSPENSION. SUBMIT ORIGINAL AND 3 COPIES IF ASSIGNED TO SAME UNIT AS ACCUSED. SUBMIT ORIGINAL AND 4 COPIES IF NOT ASSIGNED TO SAME UNIT AS ACCUSED. TO: ATTENTION SUPERINTENDENT OF POLICE ADMINISTRATOR IN CHARGE, OFFICE OF PROFESSIONAL STANDARDS ASSISTANT DEPUTY SUPERINTENDENT. INTERNAL AFFAIRS DIVISION NAME Sharon SALUSTRO RANK P.A. STAR NO. 11129 SOCIAL SEC. NO. EMPLOYEE NO. UNIT ASSIGN. 121 ADDRESS OF INCIDEN- DATE OF INCI 21 DENT -TIME Jui 06 1500 LOCATION 17 BEAT OF i DENT I 1 1 NAME 1- Edward MAY RAN P.O. STAR NO. 1 6474 SOCIAL SEC. NO. EMPLOYEE NO. UNIT ASSIGN. 011 2 Brian TOWN P.O. 3599 Oil SEXJRACE 0.0.3. 1. DATE OF APPOINTMENT 26 April 1976 ON DUTY DUTY STATUS (TIME OF COND. SWORN OFF DUTY CIVILIAN 01 ACCUSED 2. MW 19 June 2000 0N DUTY swonn El OFF DUTY Oi ESTEDI NDICTE IF APPLICABLE - 1. CHARGES COURT BRANCH DISPOSITION 3: DATE NAME CITY STATE TELEPHONE sexmace .0 .BJAGE 0ND. NAME CITY STATE TELEPHONE PHYS. eonn SEXIRACE cool-z} NAME CITY STATE TELEPHONE SEXIRACE D.0.BJAGE cools} a VICTIMS OOHPLAINANTS ALLEGATIONS return to the complainant. SEE ATTACHED SHEET FOR ADDITIONAL ACCUSED. COMPLAINANTS. VICTIMS. WITNESSES. On 06 September 2006 at 1508 hours the complainant? this complaint with Civilian Diane WOLFE Emp#44661. The complainant?alleges on 21 July 2006 at 1500 hours at accused without justi?cation. The complainant further alleges that the accus that during the arrest procedures the accused took his State Identi?cation Ca telephoned the Of?ce of Professional Standards and registered _he was searched three times by the cd planted drugs and falsely arrested the complainant and rd and $60.00 U.S.C., which the failed to inventory or I.A.D. LOCATION I.A.D. PHYSICAL CONDITION 01 No Visible Iniury - Apparently Normai 01 11 Public Transportation Veh {Facility - 02 VI - 3: Sm?gh?i?tt?; 12 mammary as time? ?i 13 Air ort 04 Police Building 14 Fugue Pro erty Other 04 Injured, Not Hoopitaiized - Under in?uence as Locku Facility 15 Other PrivaePremlse 05 iniured. Hospitalized 06 Police eintenance Facility 16 Ex resswaylinterstate System 06 Injured. Hospitalized - Under in?uence 3; 8310 Agtrliimotli've Pettind Facility 17 5&1 i 07 Injured. Refused Medical Aid er co roper 8 aerway. . ar 5 re (13% Splicet (?ommunicatione System 19 Private Residence 33 g?zfadgaizefused Medical Aid Under In?uence 00 10 Deceased?Undm?um IF CPD MEMBER. LIST RANK. STAR. SOCIAL SECURITY. EMPLOYEE NOS. IN ADDRESS BOX. PAXIBELL IN TELEPHONE BOX. C-R- ?9 315343 Brie?y summarize the investigation describing your oliorts to prove or disprove the allr-rgaliorda). indicate whether witnesses or evidence Support or do not support the in sustained cases ONLY. copies of the accused member's Summary of Previous Disciplinary Actions and Record of Previous Compliment- ary History will be included an attachments. Upon receipt ot?this investigation the RM sent a certi?ed letter to complainant? asking that he contact her regarding this complaint. The certi?ed letter which was sent to _at the address listed on the Initiation report and the Complaint Against Department Member form was returned to RM by the United States Postal Service with a printed label "Return to Sender - Unknown reason Unable to Forward. 'i?he RM also made two attempts to tclephonically contact _at his home number that was listed on the Complaint form and at the alternate number that was also listed on the Complaint form. IUA was not able to make contact at the home number but did reach a voice mail at the alternate number at which time she identi?ed herself and left a detailed message for?asking that he contact her but all attempts met with negative results. Due to the fact that all attempts made to contact and interview this complainant met with negative results the undersigned did a search of department records and was able to locate the complainant's arrest report and two inventories. The ?rst Inventory #10788402 was for suspect Herein and the second inventory (?0788403 was the Complainant's State identi?cation Card and $60.00 U.S.C., as well as the documentation verifying that the money and identification card were returned to the complainant after his court proceedings. SUMMARY WA was able to identify the accused by the department records but since the undersigned was not able to interview this complainant nor obtain a Signed Sworn Af?davit the accused were identi?ed but not noti?ed nor served under this investigation. Based on a careful review oiall the documents gathered for this investigation this investigator has determined that the preponderance does not support the allegations made against the accused Department members. 'I?heret?ore, the following recommendation is being made. INVESTIGATIVE REPORTS- PHYSICAL EVIDENCE NUMBER or ATTACHMENTS SUPPORTING SUPPORTING ACCUSED SUBMITTED THIS LIST ATTACHMENT NUMBERS: LIST ATTACHMENT NUMBERS: LIST ATTACHMENT NUMBERS: ATTACHMENTS 0 2,3,4,5,6,&7 0 9 Summarize the findings and recommendations. Rule violations will be cited by number only. One overall recommendation for Disciplinary Action will be made by the investigator. The recommendation will be for ALL sustained ?ndings; recommendations will NOT be made for each sustained allegation. - Example: 1. Violation noted, no disciplinary action warranted. 2. That the accused member be reprimanded. 3. That the accused member be suspended for days (not to exceed 5 days). to 9 ALLEGATIONS: UNFOUNDED RECOMMENDATIONS: NO DISCIPLINARY CASE TO BE FILED. DATE DATE COMPLETED ELAPSED TIME COMPLAINT WAS DATE OF THIS (TOTAL TIME. ECEWED FOR 03 Sep 06 EPORT) 12 Mar 07 EXPRESSED tN 134 investigator will initiate the Command Channel Review 3% form by completing the investigator?s Section. Q6 Slam-MM [19% 1F NECESSARY. USE AN x11" SHEET 0F WHITE PAPER TO CONTINUE ANY ITEM. Redacted Name: SAL UST R0, S. Emp no.: 2444 . Date: SUMMARY REPORT DIGEST C.R. 315343 ADDITIONAL ACCUSED: #3 Thomas CARE . Of?cei', star 18795, emp#- Unit 011 District, WW, date of date of appointment 05 June 1995, dUtY status On duty, Sworn #4 . Daniel BORA, Police Of?cer, star 19830, emp#- Unit 011 District, date of birth date of appointment 06 May 1996, duty status On duty, Sworn Code . SUMMARY REPORT DIGEST Attachments; 1. EVIDENCE Complaint Against Department Member 1A. Complaint Register Con?ict Certi?cation 2. 3. Unsigned Sworn Af?davit Copy of Certi?ed letter sent to complainant, And related documents Report by Police Agent Sharon SALUSTRO, 11129, RE: Complainant, refusal to cooperate with this . Investigation Copy of Arrest Report - Copy of Inventory No. 10788402 Copy of Inventory No. 10788403 Copy of Criminal History Arrestee - COMPLAINT AGAINST DEPARTMENT MEMBER INITIAL COMPLAINT CATEGORY C.R.NO. CHICAGO POLICE DEPARTMENT 03D 315343 TO COMMANDING OFFICER UNIT UNIT. MANNER COMPLAINT RECEIVED Internal Affairs Division - 018 121 - GIS *3 BELLE PAXEILETTER PERSON RECEIVED FROM COMPLAINANT RANK STARIEMPL. NO UNIT NO. DAY DFWK. DATE TIME WOLFE, Diane - 113 WED 06 sap 06 15:03 REGISTERED WITH OPS. BT-NAME RANK STARIEM 0 UNIT NO. DAYOFWK DATE TIME WOLFE. Diane VA 113 1AME ANK STAR UNIT NO. SOCIAL SECURITY NO. EIONEOFF LOCATION OF INCIDENT DAY OF DATE TIME TOTAL ACCUSED TOTAL COMPL. TOTAL WIT. FRI 21 JUL 06 15:00 3 NAME (STREET. APT. NO.. CITY, STATE) ZIP CODE 3 50524 HOME PHONE NO. CONTACT AT TIME PHONE NO. 3 - CHECKONE ADDITIONAL NAME ADDRESS (STREET, APT. NO. CITY. STATE) PHONE No, J-.. ACCUSED, (TOMPLAINANTS (C). VICTIMS (V), WITNESSES (W). USE ANOTHER IF CPID MEMBER. LIST RANK, STARIEMPLOYEE, SOCIAL SECURITY NOS. IN ADDRESS. PAXIBELL IN PHONE NO. BOX. NARRATIVE OF The complainant alleges he was searched three times without justi?cation at the ?rst location as he took out the garbage. The complainant alleges at the second location he was searched again, drugs were mysteriously found and he was falsely arrested. The complainant further alleges his State Identi?cation Card, house keys, wallet containing miscellaneous papers and $60.00 U.S.C. were taken and not inventoried or returned. The complainant states the case was dismissed on 10 AUG 06. WVES . SIGNED RAN STARIEMFENU. UNIT NO, SUUIAL OELURITY NU. I. .3 Pat IZIG 2. DATE ASSIGNED TIME I I OF WK. DATE ASSIGNED - TIME 2. DAY OF WK. Complaint Register Investigation Conflict Certification Chicago Police Department I hereby certify that to the best of my knowledge, neither I, nor my spouse or domestic partner, my parent, my sibling or my child (hereinafter my household or immediate family), has a personal, professional or ?nancial relationship with the victim(s), complainant(s), accused department member(s), witness department member(s), or civilian witness(es). I further certify that to the best of my knowledge, the resolution of the matter under investigation will not positively or negatively affect my financial interests or the ?nancial interest of any member of my household or immediate family. - I acknowledge that I must disclose to the Assistant Deputy Superintendent, Internal Affairs Division or the Chief Administrator, Of?ce of Professional Standards, in writing, the acquisition of any ?nancial interest or the development or the discovery of any personal interest that would directly affect my ability to conduct an impartial objective investigation and render unbiased decisions concerning the matter under investigation. I acknowledge that I must disclose to the Assistant Deputy Superintendent, Internal Affairs Division or the Chief Administrator, Of?ce of Professional Standards, in writing, the discovery that a member of my household or immediate family has a personal, professional or ?nancial relationship with the victim(s), complainant(s), accused department member(s), witness department member(s), or civilian witness(es) or if a member of my household or immediate family will be positively or negatively affected by the resolution of the matter under investigation. f? 5 Signature Signa P. A. Sharon Salustro, 11 129 Sergeant Joseph Maraf?no, #2563 Investigator 3:1:52 Date Date Attachment 1 A CPD-44.201 (Rev. 1/04) 3 1 3 4 3 SWORN AFFIDAVIT FOR COMPLAINT REGISTER INVESTIGATION CHICAGO POLICE DEPARTMENT STATE OF ILLINOIS 1 CC COUNTY OF COOK Location of Incident Date Time 21 July 2006 3:00 PM. (1500 hours) I, - hereby state as follows: Under penalties as provided by law pursuant to Section 1-109 of the Illinois Code of Civil Procedure,_i certify that the allegations set forth in my complaint are true and correct, except as to any matters therein stated to be on information and belief and as to such matters, certify as aforementioned that verily believe the same to be true. RA. Sharon Salustro, 11129 Print Witness's Name Complainant's Signature Witness's Signature Date Date CPD-44.126 (Rev. 6105) Attachment 2 315343' ?at? .u 3.. . . Department of Police City of Chicago 3510 South Michigan Avenue Chicago. Illinois 60653 Date 9/3/06 Re: C. R. No. 3 5343 A complaint against a Department member, registered under the above Complaint Register (C.R.) Number, is currently under investigation by the Chicago Police Department Name: P. A. SHARON SALUSTRO INTERNAL AFFAIRS DIVISION Address: 3510 s. MICHIGAN AVENUE Telephone: 312-745-6310 Hours Available: 7:00 a.munti12:45 pm. Mon thru Fri. Sincerely, 5-.- 3 Harlow-1k r; 4 P.A. s. SALUSTRO CPD-44.223 (Rev. 1104) Emergency: EM .1 Non-Em ergency: (Within City limits) 3-1 ~1 Non-Emergency: (Outside City til-- mi a) A- . - I i City of Chicago Department of Police 3510 South Michigan Avenue 1 Chicago, Illinois 60658 llEU 14?399? logazaji?" OIQDJDE Egtgz 5 LU, 590:0 3?31 hEd UHTT 5001 i INTERNAL AFFAIRS DIVISION 13 February 2007 General Investigation Section C.R. 315343 TO: Commanding Of?cer General Investigation Section Internal Affairs Division FROM: Police Agent Sharon Salustro, #11129 General Investigation Section Internal Affairs Division SUBJECT: Report by Police Agent Sharon Salustro, star 11129, RE: Complainant, refusal to cooperate with this Investigation ALLEGATIONS: The com lainant alleges on 21 July 2006 at 1500 hours at he was searched three times by the accused without justi?cation. The complainant further alleges that the accused planted drugs and falsely arrested the complainant and that during the arrest procedures the accused took his State Identi?cation card, and $60.00 U.S.C. which the accused failed to inventory or return to complainant. Upon receipt of this investigation the undersigned sent a certi?ed letter to complainant asking that he contact the undersigned regarding this complaint. On 23 October 2006 the letter was returned to by the United States Postal Service with a label marked ?Return to sender Unclaimed- Unable to forwar I On 27 October 2006 the undersigned attempted to telephonically contacted complainant ?at the home number listed on the Complaint Against Department Member form with negative results. On 27 October 2006 an attempt was also made to reach at the alternate phone number listed and met with negative results. On 22 November 2006 the undersigned attempted to telephonically contacted complainan_ at the home number _listed on the Complaint Against Department Member form with neiative results. then tried to reach?at the alternate phone number listed and the call went into voice mail at which time identified herself and left a detailed message asking_ to contact her but did not respond to the message . SN General Investigation Section Team S?geant: Josepl? Maraf?no #2563 .LALAUJ. Page 1 Of 7 Print Logout i ?wres Report Set-trend This is the Orginai Report CB No. 016603614 IR No. 944104 RD No. HM49044-5 Arrest Date 21-JUL-2006 17:40 District of . .. Initial Approval DISTRICT 011 Holding 011 MALE LOCKUP Status IDENT. CLEARED POD Related? NO Arrestee Armed With Offender information gshet Last Name First Name Middle Name I Name Suf?x Nickname SSN Alias Last Name Alias First Name No SID N0 Drivers License No. Issuing State Resisted Arrest? NO TRR Completed? NO NO DCFS Ward? NO View All Sang Affiliation (Gang Arrest Card) Gang Name Faction Name Unlisted Gang Narne Rank Role Known Hangout Self Admitted Offender DeScription Birth Place Birth Date Age 34 we; Sex MALE Race ASIANIPACIFIC ESLANDER Color esowm 3/9/2007 Hair Color BLACK Hair Style SHORT Complexion DARK Height - Weight - Occupation NONE - Employer! School Q?ender Scar Marks . Name Type Scars ocation Photographed ?m to . row(s) 1 of1 Offender__ldentifications no data found erree I Offender. Address . Page 2 of 7 Type Location Address County Beet Phone CHA Properly Arrest Ailey 1112 Residence 2 row(s) 1 - 2 of 2 Non Offender Information Role Name Sex Race Birth A A rox?7 Deceased? Hos italized?? Hos ital Treated and Comments Date 9 pp :3 Released? ?dim and NO no no no Complaman=. Emergency Contact REFLSED AJQ NO NO no row(s) 1 - 2 of 2 tie.e.._0tfender Addresses . .. . Role Name Type Street No. Street Name $3393 SI gig Beat Phone No. 3/9/2007 . Page 3 0f7 Emergency Comte-oi REFUSED Residence Non Offender Contact Numbers NO I I I Non Offender lnjuriesW No Records Found A5151 Eelony 1_ Detective Information No Records Found Offense .. . . Domestic Amended/Amended to Line Line IUCR Statute lnchoate Code Violence Victim No 1 720 Possession - Less Than 15 4F Offense As No - Heroin Cited row(s) 1 - 1 of1 Recovered Nameties .. .. . Type Approx. Weight/Quantity Units Estimated Street Value Suspect Controrled Substance 1 GRAMS $50.00 row(s) 1 - 1 of 1 Atxssiee Questiennaitsm. Question Response 1. Presentty Taking Medication? 2. {if FemalejAre You Pregnant? - No 3, First Time Ever Been Arrested? No 4. Attempted SuicidefSerious Harm? No 5? Serious Medical Or Meets! Problems? N0 6~ Are You Receiving Treatment? No 33 8045 954533 1 3/9/2007 a . U11 LAILVA LlnuLxUlJ row(s) 1 - 6 of 6 Vissal Check 0'1 .Arrestee .. .. Question Response 1. Is There Obv?ous Pain Or Injury? No 2. ls There Obvious Signs Of Infection? No 3. Under The In?uence Of Alcohoerrugs? No 4? Signs Of AlcoholfDrug Withdrawal? No 5. Appears To Be Despondent? No 6. Appears To Be irrational? No 7. Carrying Medication? No row(s) 1 - 7 of? Lockuo Keeper Remarks Lockup Keeper Response Referred To Placed in one person cell? NO Placed under close observation? NO ?531131113 intormetion . No Records Found. Associated Incidents No Records Found Associated Arrests No Record Found. Associated Cases . No Record Found. Dispersal Events Dispersal Event No. Arrest Event No. 33 1 8045 954533 Page 4 of 7 . .31 u-rsw-um '5 r1: .- 3/9f2007 1 a ?gunman,? Page 5 of 7 Arrestee Vehicle information Year Model Make Type Style 1 Top Color Bottom Color 2: 1 Vin No Inventory No 1demote License information License No License State! Expiry Date Used as Weapons? NO Disposition Vehicle impounded? NO Towing Agency Tow Report No. Transport information 5 Assigned to Vehicle Transport Beat Transport Tirne 113-4 21-JUL-2006 127:4? intro ed Employees ?Information Employee Role Last Name First Name Ernployee No Star No. Agency Name? Beat tat Arresting Officer MAY Edward 164,74 1182 2nd Arresting Of?cer TOWN Brian 3599 1182 Ass sh . ngArresting Or?cer CAREY Thomas ?48795 1183 A351 ing A rresti Officer BORA Daniel ?59830 4183 Ade:t?ng Of? BORA Danie! 19830 Desk Reviewed By SPAGNOLA Michael 2008 Lo ookup Keeper JOHNSON Hubert 18235 Searched By JOHNSON Hubert 1c235 Fl. nger printed By STEWART Neison 4125 row(s) ?l 9 of9 3/9/2007 Vuxuu 111a Page 6 of 7 spp?roratiaiormatiw Name . Employee No. Star No. Probable Cause Approved By: SPAGNOLA. MICHAEL 200:5 . Final Approved By: smouom MICHAEL 2008 IntervietrLoss No Record Found. . No Record Found. Fingerprints Taken Palmprints Taken? Time Printed 19:10 Time Photographed 21-JUL-2000 19ri? Time Fed Cell No. Ct .Phone Number Called Time Called Received In Lockup 210? i9:09 Court. Information . .. .. . . . Court Sergeant? NO Released from Holding Facility. 08:15 Desired Court Date Desired Branch Call 44-2 Room No. Address lntial Court Date 22-JUL-2006 00:00 Initial Court Branch Initial Room No. Address no data found Brand Information Bond Date Bond Type Bond Amount Bond Receipt No. 3/9/2007 ??15 JENT NO. OF TO ARREST: PM) ON ABOVE ON NUMEROUS OCCASIONS TO RECEIVE use TENDER SMALL OFFENDER RETRIEVED OBJECTTET FROM PLASTIC BAG IN HES SOCK. OFFENDER DETAENED 8: RIOS RECOVERED PLASTIC BAG [5393556) TEN FGELS EACH CONTAINING SUSPECT HEROIN. OFFENDER ARRESTED ED a TRANSPORTED TO 011 1315mm: REVEALED $80.00 use. 1000 FT. 0E 403*?- Wafcch Commander Comments No Record Found. CHECK CLEAR NO. "157G?"?"v a Page 7 of 7 User: Al: Mamie: 13315 SRevfsi-on ?1 1193 .. ngh?a reserved 3/9/2007 View. Inventory tide ti?epertn?eni Home ., ei'ratck Creeleftittrt Inventory Inventory No: 10788402 inventory: 011 Page 1 of2 eTreck Homet Logoutl Heip nventory Collection}? Incident No: RD HM490445 Date Recovered: Unitof Statue: RECEIVED Recovery Search Warrant No Date Recovered Unit of Inventory 0'11 DISTRICT 11 Incident Number RD Hmeems Comments 2024 item list . Package Parnent No. ocation Item No Item Type Quantity . PLASTIC Y97621 to/ 1493143 DRUG 18MB TIN FOIL PACKETS EACH 1493144 Egg? ?Ci 5 WITH POWDER SUSPECT HEROIN Court Date 15 AUG- 2008 Court Branch 44-2 State Charges ?20 ELCS Inchoate Action Hold For investigation Investigating Of?cer MAY. EDWARD Star No.16474 UnitO'It DISTRICT Initial Destination 17? FORENSIC SERVICES SECTION Transport Mode POLICE MAIL {Courier} RecoveredFrortt (non-CPD) Recovered/Seized From Arrested Last Name_ First Name- MI. Sex Racet] Recovered At Street No- City CHICAGO State IL ZIP aNarcotic Type Status Recerveo HEROINIWHITE RECEIVED Street Dir- Street Name? Apartment No. "It 15H 5 View Inventory z. u; L. Phone No. Beat 1 1 '12 Owner (non? LastName?Fifst Name_ A M. I- Street No- Street Dir- Street Name ?Apartment No. City State IL - ZIP Phone No. Found By: (CPD Employee) CPD Employee MAY, EDWARD Involved Officers . 1st Ofrcer CAREY THOMAS Star No 113795 Unit 011 11 2nd Of?cer BORA, DANIEL Star No.19830 Unit 011 A DISTRICT .3 i? User 13131111193 Copyright-1712002QAII rightmesnwed. Module; 1010!} Sl?iwihaon 1 11353 '21 thanhriq 3/14/2007 ,View' Inventory Page 1 of 2 .. nit-ant eTrack Home] Logouti Help 1. ??tnventory Loiiection??; Home E) e't'rack >1Craeteit?idittrweIttory inventory No: 10788103 Incident No.: RD HM498445 Date Recovered: Unitof Irwentory:011 Status: TURNOVER View Attachments Recovery Search Warrant No Date Recovered Unit of Inventory 011 DISTRECT tt Incident Number RD HM490445 Comments IUCR 2024 Package No. Location Item No. Quantity Descriptlon [Signal Value - Status _Unpackaged 1493145 133210125 TURNOVER Classification .Court Date 2006 Court Branch 44-2 Currency Disposition NARCOTICS SEIZURE TO BE DEPOSITED State Charges T20 ILCS Inchoate I 1 Action Hold For investigation Investigating Of?cer MAY. EDWARD Star No.16474 UnitOtt DISTRICT 11 Initial Destination 16? sees Transport Mode POLICE MAIL. (Courier) Recevered From. Recovered/Seized From Arrested Last Name -irst Name- MI. Sex Race Recovered At Street No- Street Dir- Street Nam_ Apartment No. I City CHIL AGO State IL ZIP Phone No. Beat1134 Owner (now-CPD) LastName_FirstName- I M..I we ?Task I I: I (M .n4nn if- 1 321.5 .View? Inventory . Page 2 of 2 Street No - Street Dir -Street Name_ Apartment No. City CHICAGO State IL ZIP Phone No. Found By (CPD Employee) . CPD Employee MAY. EDWARD Involved Officers . 1st Of?cer CAREY, THOMAS Star No. 18795 Unit 011 .. 11 2nd Of?cer BORA. DANIEL Star No. 19830 Unit 011 11 :Llser: I Cb?rl?ghl at}: 2602. All?ghl? Module: 10100 ISRevisaon: 1.1855 "ha? :i h?h'fl?l??o hint nu . Page 1 0f I "saw ?3.91553 . . . .. .. mama.? 3 gears. 7?mug 5.. gag ?Kim? ifl?r v.3 ?fmg??L armimm?mg .. hmu?w? .nwm? mu??a. Qw?umm? $313.53. .. . 5.2. .. i . . all: .. . (Etna? I . .11. . nil. . . . I . ..1 51xi. 1.5.. . .. .. .ulI . 11.1.. . . . 5 .1. Jan-11.3.! {15.1.5.1} ?x .W gamma-HO 1.31 20. W123. ?27.31.32.13. . .1, . 0270210 ggwp?zm5.. 3.78 27. a: .W . . 1:3.33.611.1.31137183 .- . . .. . .. . - . . 1 . 1.1.1.11. . why-n1?. 111111. 1.1.. {7.1.1 1 .1 2.113 224%.. 7077.01.53.11.- mm .7.. 77.91.1117. . ??gtW??1?C1Ari-Wham3.7.71 ?u .27 111. .. .. girl . 71.1,? 7 . (47.1.70131? 41.1.7! 61W .1 1W1 .. 3..an3.1171. .7. mmndoz 32.1.-. 1.1.7.4.. Wmibg .-..-. .. -..-1.11.1.1 1. .1. . .1.-1.111.111-11111.753.107.15w W10 mm ?mnnumW WOW-31.1. ??dmm. 11.9%. 1.71111.- Wham-.713 . 1_ . 1 11.1315. 11 . 11.111.11.111 .- . - 111W11-1..1 . .1.-1.111-111-1111-.. 11.0.11?. 33.. .033. zmvoi?? . .u 7131?.? 01.71.?? mum-.717 Hub :18 35.93810 371.07.101.73. mau?n udl. I nm?hamn? .. . .-11..-.-.-11: 7.1.1.: 171.176 .3113 TYNE. We .. 1 .1.-310.013111lulalw7l-1 ?Mu-Mat. 131.733.117.17 . .71.. W. mama-1.57 3117.17.77: M71. (.Im?sq . 1.71 ?n . .17 1.5.7.0 .. 11.. .1 71.211.11.112 ..5.w7457722 01.1.1.8? 1 91.11..? 20. :23. . ..1-7. F. . 1. 1 .77.. 1 -. .17 311.17%..mLf11 12.11.?. 77.7.1..II: 11 Ull- . . . .. 1. 1:1.1 1. 1 .7.. .1111. T579801. W10 whuj?m 17.1.1. 577.724.9377. ?2 [711,111 111.11. lid 7c ?Gm . 333133.; 1- .. 1.1.0 mm 91111177319. 9130.71.37. 7 1171.140 .1.11.11-30mm.m 2.7.777m .. . . 1.51.11.12.1? 7.01 .W mam-.7. 1- 1.1.1.11. .. . . g?xabu?aqr . . 1m. 77.1112197203nmxm 29.11173. 1.1.1.320 . . 7 $0225.77 .m-w.7mzo.1 7 .. . .. 11-111.. 1 . . ni..i1.11.1.i.sl7I1l-11 .1 lilii . - onu?1ammng?m 1.7793711 . . . .W . .. .. .51. .1 7.1 .mm raw Elmmnirz? Mil-mmwtim JILL . .. .. E3 ?1111 HIAUFE 3 ntmu?tilia- im nil?? ?allRIGHT. mZ?? 0.9.302 Ozzmm? ilvi m. an. wry?. O??womn? @QZb??mm 002m.w?a?102 "3 58 un?myw rw?mzmm 2.0 . Umy m; lfw..-.w L. r. .. . Gizma? 0.330 If}! It.) . .5. Ub.mm?w .93; 53.3 ugnm?swd 1 1.. . . .. n?a?mgm?ummn $362 5m 25 $.33 20. . r: ZEEL. a hm . .. 95% 12% $053?.can. . Grim m. mm?bm?m l. ?n H. .111. .. v. . s. I ..IJ 1 my ?3x420 ?cmz?amw L. a. may.? 2.9. ?Farx?m Quagm?.? 10 9. 5. 3x gonad 5,3393.? :gi?bizrd 55113055.}33223533 .3353er 35105.?v?chigan Avrnuc 5 From Unit?- 1 96 1.133112: 95.131333331332310 ?1.1116 111352111035?1511333333331 [11?81'3'i11311. . 33133333333: $530130 R15 3313133313333- 1113349031311: (3'33 3311:3'21201363133: a?u?uni' 13631 C30 wag: Mad the. (T :30 1?33] 33.3. Dcpa?nm?i' 1351-- 3301113101: of 3'1: 31:33: luztcr' 3.3; 33:. advise. wt: 313333 the. maria},- mixed timiCI 133333133331: 33:113-333ch 33333331133333. um: {Etmn} 33:: obtained by 3333335333135 {1133 1:33:03 333-1113: {.1-1icz3go 130133;:- 1'3 viticncc 1333.3. "Ed 1130:3333}; 8352:3933 located at ?5 130mm Amhcmecn 3113313011113 0132513011331 23103331433 31330333313 316w 33:13:331?33333: 33333131332315 12) M36 fi?fG?} 73153 3.33 insum 21111231333 pro-{3321333 '13 3:033:33} ,1 mtum?i you will he requirf'd 33333113333. ., 53:33: nud'ul (Huber .1ppr333333.:33. 313336; Identitzcatzma. - -- . 533.333 31:13:: 33333-33. 311E (Edi-E 353135. 3. (3:13.33: 1131". 151:3. "3:31'331'33'3'31 1.13:1: 31333333.}; 331111-101 31:3: 1331}; 3111:333- 11.35 35333:. '13' 1" 5. 3 -. vamp?- . . 136333.51}: 13.1113155' 5533:5039 Sc Gang-1333'. 25320113333 111131{331333333}. 5 333334.3331333331334? 5'5 N3333-153333-iy3-m3: 1113313335: 1 313' I13ml "ii?3-1353?: Chicago Police Department 15:29 by PCOJ393 for IR 944104 Page 1 of 9 CHICAGO POLICE DEPARTMENT 3510 South Michigan Avenue/Chicago. Illinois a a a: 60653 Identification Section grail: a? CRIMINAL HISTORY REPORT CPD-319030 (REV. W04) CONVICTED FELON 0N FAROLE SID 29460180 FBI 377470MA1 IDOC 876702 Current Arrest Information: 9931 34 years Place of Sign; HAIR BLK SSN if; - Mam COMPLEXION Drivers Lic. State; Seats. Marisa &Tattogs; Key Historical Identifiers: Alias or AKA used Qate?Used Dates of Birth Used Social Security Numbers Used OS-MAY-2001 30-AUG-1997 18-DEC-1996 30-MAR-1996 . 02-MAR-1996 27-OCT-1995 20-AUG-1995 26-JAN-1995 21 DEC-1993 O4-MAY-1993 19-SEP-1990 Not Available Not Available Not Available Not Available Not Available? Not Available Criminal Justice Summary: Total arrests: 24 Felony, 6 Misdemeanor) Total convictions: 6 IDOC EVENT EVENT: ON Chicago Police Department 15:29 by PCOJ393 for IR 944104 Page 2 of 9 PAROLE institution Name: Parole Date: 30-DEC-05 Case Number: 01CR2011602 Discharge Date: 30-DEC-2008 ARREST ArrestNamez? Arrest Date: 21-JUL-2006 Holding Factlity: CPD 011 MALE Dale of Birth: Arrest Address: DON or CB: 016603614 Residence: Of?cer: MAY Of?cer Badgeiit: rresting Agency: CPD II II IAIrreIsIt IIJnIthloIat? 4 720 ILCS Pcs- Possession - Less Than 15 - Heroin OFFENSE AS CITED .COURT CHARGESIDISPOSITION Statute Charge Class Case# 720-5701402-c PCS - POSSESSION - LESS M. as112327201? iDisposii?ion: FINDING - NO PROBABLE CAUSE - DISMISSED Disposition ESenience: Sentence Date: i ARREST Arrest Name: Arrest Date: Holding Facility: CPD - DISTRICT 011 MALE Date of Birth: Arrest Address: or CE: 016543111 Residence: Of?cer: BECKMAN Of?cer Badge#: 16837 Arresting Agency: CPD .St?t?ii? . . ?1.1.1 1 "3 ?.1915 ?2mm OFFENSE AS .1 3299:9123? WSW. .9 9.1.: . OFFENSE A3 10-8- 515 Soliciting Unlawfut Business OFFENSE AS CITED ECOURT istatute Charge Class Case# 720-570i402-C PCS .. POSSESSION - LESS 06112037201: Eorsposrtron: NOLLE PROSEQUI Disposition Date: 15-JUN-2006 ESentence: Sentence Date: 5 gDiSposition: NOLLE PROSEQUI Disposition ESenrence: Sentence Date. ARREST Arrest Name: Arrest Date: 18-JUL-2001 Holding Facility: CPD - DISTRICT 011 MALE Date of Birth: Arrest Address: DCN or CE: 014844672 Residence: Officer: KELLER Of?cer Badge#: [11 '3 3 720 ILCS 5. 0i24-1. 1-A Uuw- -Weapon- Felon. PossessiUse Firearm CHARGESIDISPOSITION E-Statute Char Class Case# f2 1)le hffh?ffr?l'lrl'a Inlanr?n". .1 L1- 1.1 1n rt 11.; Hui-oi Dru-nth; .H .. n. .. Chicago POIicc Department 15:29 by PCOJ393 for IR ii 944104 Page 3 of9 .. ARMED ROBBERY ARMED WIFIR 0 2601CR20116 EDisposition: SENTENCEDIILLINOIS DEPARTMENT OF CORRECTIONS Disposition Date: 24-MAR-2003 CONVICTED g'Sentenca: 000 010 YEARS 00 MONTHS 000 DAYS Sentence Date: 24-MAR-2003 ECOURT CHARGESIDISPOSITION Statute gene 9% eggs Possruss WEAPONIF 2001cn2011s? gDisposition: STRICKEN FROM DOCKET WITH LEAVE TO REINSTATE Disposition Date: 24-MAR-2003 Estanrence: NO SENTENCE one YEARS no MONTHS 000 DAYS Sentence Date: 5 ARREST Arrest Name Arrest Date: 16-MAY-2001 Hotding Facilit CPD - DISTRICT 011 MALE Date of Birth: Arrest Address: DCN or CB: 014788299 Residence: Of?cer: SMITH Of?cer Badgeit: 354 Arresting Agency: CPD . . . .. 10-8-515 Soliciting Unlawful Business CHARGESIDISPOSITION Statute Charge Class Case# 10-13-515 SOLICITING UNLAWFUL BUSINESS 014235413 SAN. BOND FORFEITURE - NON SUIT Disposition Date: 13-JUN-2001 ESentence: Sentence Date: ARREST . Arrest Narhe: Arrest Date: US-MAY-2001 Holding CPD - DISTRICT 011 MALE Date of Birth: Arrest Address: DCN or CB: 014778544 Residence: Of?cer: WARE Of?cer Badge?: rres mg gency: ?5 FREDDIE . . . .. .. .. .. . . . .. ..-. . 10-8-515 Soliciting Unlawful Business gcounr Statute Charge Ciass Case? 10-8-515 SOLICITINO UNLAWFUL BUSINESS - 014233765 EDisposition: NOLLE Disposition Date: 29-MAY-2001 ESentence.? Sentence Date: ARREST Arrest Name: Arrest Date: Holding Facility: CPD - DISTRICT 011 MALE Dale of Birth: Arrest Address: Of?cer: Of?cer Badge?: Arresting Agency: CPD ?at 10-8-515 Soliciting Uniawful Business ECOURT CHARGESIDISPOSITION .9 .1 - I I I an-iininm'n Chicago Police Department 15:29 by PCOJ393 For IR 944104 Page 4 of9 i ELF-0010 Qh?lg? Ctass Case# 5 10-8-515 SOLICITING UNLAWFUL BUSINESS tit-1194804 . NON-SUIT Disposition Date: 05-JUN-2001 ESentence: Sentence Date: 5 ARREST ArrestName: Arrest Date: worse-2001 Holding Facility: cpo - CENTRAL MALE Date of Birth: Arrest Address: DCN or CB: 014709108 - Residence: Of?cer: WALLER Of?cer Badge?l: rres mg gency: CPD .-..A 720 ILCS Criminal Trespass - Remain On Land ARREST Arrest Name Arrest Date: 05-JAN-2000 Hoiding CPD - DISTRICT 013 Date of Birth: Arrest Address DCN or CE: 014376409 Residence: Of?cer: SZESZOL Of?cer Badge#: 6183 Arresting Agency: CPD 4 720 ILCS 570.0!402 Possession Of A Controiied Substance icOuRT CHARGESIDISPOSITION ?tatute Charge Class Case# i PCS Possess CONTROLLED SUBSTANCE . FINDING - no PROBABLE CAUSE - Disposition Date: 31 ESentence: - Sentence Date: ILL DEPT OF CORRECTION PAROLE Arrest Name: DCNiArrestii: . Local Arresting Of?cer. Ident: Agency: ?at Information: FROM DANWLLE CORRECTIONAL CENTER ON 24-Nov-1999 FOR - ARREST Arrest Name: Arrest Date: Date of Birth: Arrest Address: DCN or CB: 011063735 Residence: 18-MAY-1998 Holding Of?cer: Of?cer Badgeii: Arresting Agency: ti 1 PCS Possession Of Other Co
class PDF(object):
def __init__(self, pdf, size=(200,200)):
self.pdf = pdf
self.size = size
def _repr_html_(self):
return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)
def _repr_latex_(self):
return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)
print(pdf1)
PDF(pdf1,size=(1000,450))
https://assets.documentcloud.org/documents/4575428/CAR-CR315343-Redacted.pdf
As we can see from the text and the pdf (retrieved from the url), the text version of the pdf is terribly unclean.
# tokenize
def get_tokens(text):
doc = nlp(text)
return [token.orth_ for token in doc if not token.is_punct | token.is_space]
# split by puncuation
def split_by_punc(text):
return [x.strip() for x in re.split('[,/]', text)]
# split by puncuation and whitespace
# also remove stopwords
def split_by_space_punc(text):
words = [x.strip() for x in re.split('\W+', text)]
new_words = []
for w in words:
if not nlp.vocab[w].is_stop:
new_words.append(w)
return new_words
matcher = PhraseMatcher(nlp.vocab, attr='LOWER')# the list containing the pharses to be matched
terminology_list = ["Nudity, Penetration",
"sexual harassment, Sexual remarks",
"Sexual humiliation, Sexual extortion, Prostitution/sex work",
"Taser, Baton, physical touch, gun",
"Trespass/robbery, Racial slurs, xenophobic remarks",
"Undocumented status, ICE",
"Planting drug, planting guns",
"Neglect of duty, failure to serve",
"Refusing to provide medical assistance",
"Irrational, Aggressive, Unstable",
"searching minors/patting down minors/arresting minors"]
# split by punctuation and spaces
for t in terminology_list:
print(t, ": ", split_by_punc(t))
Nudity, Penetration : ['Nudity', 'Penetration'] sexual harassment, Sexual remarks : ['sexual harassment', 'Sexual remarks'] Sexual humiliation, Sexual extortion, Prostitution/sex work : ['Sexual humiliation', 'Sexual extortion', 'Prostitution', 'sex work'] Taser, Baton, physical touch, gun : ['Taser', 'Baton', 'physical touch', 'gun'] Trespass/robbery, Racial slurs, xenophobic remarks : ['Trespass', 'robbery', 'Racial slurs', 'xenophobic remarks'] Undocumented status, ICE : ['Undocumented status', 'ICE'] Planting drug, planting guns : ['Planting drug', 'planting guns'] Neglect of duty, failure to serve : ['Neglect of duty', 'failure to serve'] Refusing to provide medical assistance : ['Refusing to provide medical assistance'] Irrational, Aggressive, Unstable : ['Irrational', 'Aggressive', 'Unstable'] searching minors/patting down minors/arresting minors : ['searching minors', 'patting down minors', 'arresting minors']
for t in terminology_list:
terms = split_by_punc(t)
patterns = [nlp.make_doc(text) for text in terms]
matcher.add(t, None, *patterns)
testtext = "Taser, Baton, physical touch, gun"
doc = nlp(text1)
matches = matcher(doc)#print the matched results and extract out the results
for match_id, start, end in matches:
# Get the string representation
string_id = nlp.vocab.strings[match_id]
span = doc[start:end] # The matched span
#print("Category: {}, Indexes: {} to {}, Word: {}".format(string_id, start, end, span.text))
print("Category: {} Word: {}".format(string_id, span.text))
Category: Irrational, Aggressive, Unstable Word: irrational Category: Trespass/robbery, Racial slurs, xenophobic remarks Word: ROBBERY Category: Trespass/robbery, Racial slurs, xenophobic remarks Word: Trespass
tokens_text1 = get_tokens(text1)
Counter(tokens_text1).most_common()
[('the', 97), ('1', 75), ('No', 70), ('of', 64), ('Date', 54), ('I', 48), ('to', 47), ('NO', 37), ('or', 35), ('3', 34), ('Name', 33), ('Arrest', 33), ('and', 31), ('a', 29), ('Of?cer', 28), ('5', 27), ('UNIT', 26), ('2', 26), ('DATE', 22), ('at', 22), ('that', 21), ('CPD', 20), ('OF', 19), ('complainant', 19), ('011', 18), ('was', 18), ('by', 18), ('7', 15), ('Police', 15), ('Page', 15), ('TO', 14), ('NOT', 14), ('accused', 14), ('Department', 14), ('my', 14), ('Address', 14), ('Street', 14), ('BY', 13), ('The', 13), ('i', 13), ('State', 13), ('$', 13), ('11', 13), ('.1', 13), ('Birth', 13), ('A', 12), ('Investigation', 12), ('Section', 12), ('be', 12), ('as', 12), ('with', 12), ('investigation', 12), ('Inventory', 12), ('Complaint', 11), ('no', 11), ('4', 11), ('this', 11), ('on', 11), ('Residence', 11), ('Chicago', 11), ('DISTRICT', 11), ('Found', 11), ('Disposition', 11), ('RECOMMEND', 10), ('OPTIONS', 10), ('BE', 10), ('GRANTED', 10), ('for', 10), ('not', 10), ('06', 10), ('he', 10), ('10', 10), ('ARREST', 10), ('315343', 9), ('Unit', 9), ('General', 9), ('Internal', 9), ('Affairs', 9), ('Division', 9), ('TIME', 9), ('2006', 9), ('Facility', 9), ('9', 9), ('will', 9), ('number', 9), ('Holding', 9), ('MALE', 9), ('Recovered', 9), ('Agency', 9), ('Sentence', 9), ('POLICE', 8), ('RECEIVED', 8), ('Sharon', 8), ('ACCUSED', 8), ('NAME', 8), ('On', 8), ('City', 8), ('Type', 8), ('row(s', 8), ('Star', 8), ('Arresting', 8), ('Court', 8), ('Case', 8), ('COMPLAINT', 7), ('CHICAGO', 7), ('DEPARTMENT', 7), ('21', 7), ('MAY', 7), ('Not', 7), ('8', 7), ('33', 7), ('member', 7), ('contact', 7), ('listed', 7), ('made', 7), ('under', 7), ('Available', 7), ('Of', 7), ('Information', 7), ('720', 7), ('By', 7), ('515', 7), ('12', 6), ('FROM', 6), ('in', 6), ('IN', 6), ('STATE', 6), ('alleges', 6), ('13', 6), ("'s", 6), ('Record', 6), ('form', 6), ('undersigned', 6), ('department', 6), ('Report', 6), ('Time', 6), ('CB', 6), ('Offender', 6), ('3/9/2007', 6), ('m', 6), ('Phone', 6), ('Statute', 6), ('6', 6), ('Employee', 6), ('mm', 6), ('CHARGESIDISPOSITION', 6), ('DCN', 6), ('INTERNAL', 5), ('AFFAIRS', 5), ('DIVISION', 5), ('CONCUR', 5), ('REVIEWED', 5), ('FORWARDED', 5), ('0', 5), ('DO', 5), ('REPORT', 5), ('Agent', 5), ('Register', 5), ('Salustro', 5), ('IF', 5), ('AS', 5), ('RANK', 5), ('11129', 5), ('SOCIAL', 5), ('ON', 5), ('DUTY', 5), ('CITY', 5), ('complaint', 5), ('his', 5), ('which', 5), ('inventory', 5), ('Under', 5), ('15', 5), ('LIST', 5), ('an', 5), ('letter', 5), ('but', 5), ('results', 5), ('BORA', 5), ('Copy', 5), ('ZIP', 5), ('Incident', 5), ('information', 5), ('Number', 5), ('Non', 5), ('IR', 5), ('944104', 5), ('RD', 5), ('Initial', 5), ('Last', 5), ('First', 5), ('View', 5), ('Transport', 5), ('ILCS', 5), ('Charge', 5), ('Class', 5), ('Badge', 5), ('ESentence', 5), ('Commanding', 4), ('WITH', 4), ('TITLE', 4), ('star', 4), ('C.R.', 4), ('has', 4), ('u', 4), ('To', 4), ('ASSIGNED', 4), ('SALUSTRO', 4), ('STAR', 4), ('ADDRESS', 4), ('1500', 4), ('TELEPHONE', 4), ('FOR', 4), ('hours', 4), ('further', 4), ('searched', 4), ('60.00', 4), ('U.S.C.', 4), ('sent', 4), ('Against', 4), ('Member', 4), ('returned', 4), ('home', 4), ('alternate', 4), ('able', 4), ('negative', 4), ('Complainant', 4), ('were', 4), ('Sworn', 4), ('TOTAL', 4), ('date', 4), ('PHONE', 4), ('certify', 4), ('household', 4), ('immediate', 4), ('family', 4), ('nancial', 4), ('member(s', 4), ('any', 4), ('Illinois', 4), ('3510', 4), ('Emergency', 4), ('Status', 4), ('License', 4), ('Role', 4), ('Sex', 4), ('Color', 4), ('Comments', 4), ('Beat', 4), ('Records', 4), ('Possession', 4), ('Branch', 4), ('Bond', 4), ('20', 4), ('EDWARD', 4), ('From', 4), ('IL', 4), ('Apartment', 4), ('.W', 4), ('15:29', 4), ('PCOJ393', 4), ('OFFENSE', 4), ('PCS', 4), ('Soliciting', 4), ('Business', 4), ('REGISTER', 3), ('INVESTIGATION', 3), ('SUBMIT', 3), ('THIS', 3), ('129', 3), ('Sergeant', 3), ('2563', 3), ('cases', 3), ('Disciplinary', 3), ('DAYS', 3), ('EMPLOYEE', 3), ('LOCATION', 3), ('17', 3), ('1-', 3), ('01', 3), ('19', 3), ('VICTIMS', 3), ('ALLEGATIONS', 3), ('ADDITIONAL', 3), ('Diane', 3), ('WOLFE', 3), ('July', 3), ('without', 3), ('justi?cation', 3), ('arrest', 3), ('took', 3), ('Identi?cation', 3), ('Of?ce', 3), ('Professional', 3), ('Standards', 3), ('three', 3), ('times', 3), ('drugs', 3), ('falsely', 3), ('arrested', 3), ('time', 3), ('Other', 3), ('Injured', 3), ('Medical', 3), ('MEMBER', 3), ('SECURITY', 3), ('BOX', 3), ('sustained', 3), ('RM', 3), ('certi?ed', 3), ('asking', 3), ('her', 3), ('also', 3), ('attempts', 3), ('did', 3), ('message', 3), ('all', 3), ('met', 3), ('10788402', 3), ('Card', 3), ('SUMMARY', 3), ('nor', 3), ('investigator', 3), ('recommendation', 3), ('is', 3), ('ATTACHMENT', 3), ('NUMBERS', 3), ('Action', 3), ('District', 3), ('duty', 3), ('19830', 3), ('Code', 3), ('Criminal', 3), ('Arrestee', 3), ('DAY', 3), ('113', 3), ('found', 3), ('personal', 3), ('matter', 3), ('interest', 3), ('if', 3), ('Signature', 3), ('Rev.', 3), ('Location', 3), ('South', 3), ('Michigan', 3), ('Avenue', 3), ('s.', 3), ('October', 3), ('27', 3), ('21-JUL-2006', 3), ('Alias', 3), ('Gang', 3), ('Race', 3), ('Quantity', 3), ('Response', 3), ('Arrested', 3), ('Keeper', 3), ('Associated', 3), ('Vehicle', 3), ('Used', 3), ('Officer', 3), ('2nd', 3), ('CAREY', 3), ('2008', 3), ('of9', 3), ('44', 3), ('OFFENDER', 3), ('PLASTIC', 3), ('Home', 3), ('Item', 3), ('For', 3), ('No-', 3), ('Dir-', 3), ('L.', 3), ('E', 3), ('.7', 3), ('111', 3), ('II', 3), ('CE', 3), ('ECOURT', 3), ('NOLLE', 3), ('24-MAR-2003', 3), ('000', 3), ('UNLAWFUL', 3), ('BUSINESS', 3), ('COMMAND', 2), ('CHANNEL', 2), ('REVIEW', 2), ('Emp', 2), ('SECTION', 2), ('GIS', 2), ('CASE', 2), ('OFFICE', 2), ('PROFESSIONAL', 2), ('STANDARDS', 2), ('CI', 2), ('SE', 2), ('El', 2), ('ALTERNATE', 2), ('AN', 2), ('IS', 2), ('C.R.NO', 2), ('2007', 2), ('Investigations', 2), ('SUBJECT', 2), ('Approval', 2), ('your', 2), ('Team', 2), ('S.', 2), ('used', 2), ('In', 2), ('are', 2), ('SUSTAINED', 2), ('UNFOUNDED', 2), ('does', 2), ('exceed', 2), ('ORIGINAL', 2), ('AND', 2), ('COPIES', 2), ('SAME', 2), ('SUPERINTENDENT', 2), ('P.A.', 2), ('SEC', 2), ('ASSIGN', 2), ('121', 2), ('DENT', 2), ('Edward', 2), ('RAN', 2), ('P.O.', 2), ('Brian', 2), ('TOWN', 2), ('3599', 2), ('SWORN', 2), ('OFF', 2), ('June', 2), ('0N', 2), ('SEXIRACE', 2), ('return', 2), ('SHEET', 2), ('WITNESSES', 2), ('during', 2), ('procedures', 2), ('registered', 2), ('planted', 2), ('failed', 2), ('I.A.D.', 2), ('PHYSICAL', 2), ('04', 2), ('in?uence', 2), ('05', 2), ('Hospitalized', 2), ('16', 2), ('System', 2), ('07', 2), ('Aid', 2), ('In?uence', 2), ('00', 2), ('NOS', 2), ('PAXIBELL', 2), ('C', 2), ('support', 2), ('Summary', 2), ('Previous', 2), ('History', 2), ('Upon', 2), ('receipt', 2), ('regarding', 2), ('report', 2), ('United', 2), ('States', 2), ('Postal', 2), ('Service', 2), ('printed', 2), ('label', 2), ('Return', 2), ('Unable', 2), ('two', 2), ('reach', 2), ('voice', 2), ('mail', 2), ('identi?ed', 2), ('herself', 2), ('left', 2), ('detailed', 2), ('interview', 2), ('records', 2), ('rst', 2), ('second', 2), ('card', 2), ('Af?davit', 2), ('documents', 2), ('allegations', 2), ('against', 2), ('EVIDENCE', 2), ('ATTACHMENTS', 2), ('SUPPORTING', 2), ('recommendations', 2), ('That', 2), ('days', 2), ('USE', 2), ('DIGEST', 2), ('Thomas', 2), ('18795', 2), ('emp#-', 2), ('appointment', 2), ('status', 2), ('Daniel', 2), ('Attachments', 2), ('RE', 2), ('refusal', 2), ('cooperate', 2), ('STREET', 2), ('APT', 2), ('location', 2), ('.3', 2), ('WK', 2), ('hereby', 2), ('best', 2), ('knowledge', 2), ('professional', 2), ('relationship', 2), ('victim(s', 2), ('complainant(s', 2), ('witness', 2), ('civilian', 2), ('witness(es', 2), ('resolution', 2), ('positively', 2), ('negatively', 2), ('affect', 2), ('acknowledge', 2), ('must', 2), ('disclose', 2), ('Assistant', 2), ('Deputy', 2), ('Superintendent', 2), ('Chief', 2), ('Administrator', 2), ('writing', 2), ('discovery', 2), ('P.', 2), ('A.', 2), ('Maraf?no', 2), ('Attachment', 2), ('PM', 2), ('true', 2), ('matters', 2), ('Print', 2), ('Witness', 2), ('.u', 2), ('60653', 2), ('r', 2), ('23', 2), ('attempted', 2), ('telephonically', 2), ('contacted', 2), ('phone', 2), ('016603614', 2), ('SSN', 2), ('SID', 2), ('N0', 2), ('Drivers', 2), ('Place', 2), ('34', 2), ('we', 2), ('Hair', 2), ('Style', 2), ('Weight', 2), ('ocation', 2), ('Photographed', 2), ('of1', 2), ('data', 2), ('Hos', 2), ('Released', 2), ('Contact', 2), ('oi', 2), ('Numbers', 2), ('Offense', 2), ('Amended', 2), ('Line', 2), ('IUCR', 2), ('Less', 2), ('Than', 2), ('Heroin', 2), ('Value', 2), ('Substance', 2), ('Question', 2), ('Medication', 2), ('You', 2), ('Or', 2), ('8045', 2), ('954533', 2), ('There', 2), ('Signs', 2), ('Appears', 2), ('Be', 2), ('Lockup', 2), ('Placed', 2), ('one', 2), ('Dispersal', 2), ('Event', 2), ('um', 2), ('1182', 2), ('SPAGNOLA', 2), ('JOHNSON', 2), ('Hubert', 2), ('Approved', 2), ('MICHAEL', 2), ('Taken', 2), ('Called', 2), ('Desired', 2), ('Room', 2), ('use', 2), ('BAG', 2), ('EACH', 2), ('SUSPECT', 2), ('HEROIN', 2), ('User', 2), ('HM490445', 2), ('Unitof', 2), ('Recovery', 2), ('Search', 2), ('Warrant', 2), ('2024', 2), ('Package', 2), ('18', 2), ('Charges', 2), ('Inchoate', 2), ('Hold', 2), ('Investigating', 2), ('No.16474', 2), ('Destination', 2), ('Mode', 2), ('MAIL', 2), ('Courier', 2), ('non', 2), ('Seized', 2), ('Name-', 2), ('MI', 2), ('At', 2), ('It', 2), ('Owner', 2), ('M.', 2), ('Involved', 2), ('Officers', 2), ('1st', 2), ('THOMAS', 2), ('DANIEL', 2), ('Module', 2), ('TURNOVER', 2), ('M', 2), ('nil', 2), ('1.1.1.11', 2), ('1.1', 2), ('7.1.1', 2), ('W10', 2), ('n', 2), ('.17', 2), ('m.', 2), ('CONVICTED', 2), ('IDOC', 2), ('MAY-2001', 2), ('Total', 2), ('EVENT', 2), ('PAROLE', 2), ('Dale', 2), ('CITED', 2), ('POSSESSION', 2), ('LESS', 2), ('FINDING', 2), ('PROBABLE', 2), ('CAUSE', 2), ('.9', 2), ('PROSEQUI', 2), ('ARMED', 2), ('EDisposition', 2), ('YEARS', 2), ('MONTHS', 2), ('Unlawful', 2), ('SOLICITING', 2), ('NON', 2), ('SUIT', 2), ('rres', 2), ('mg', 2), ('gency', 2), ('Sasso', 1), ('14982', 1), ('NW', 1), ('32007', 1), ('INITIATED', 1), ('No.2', 1), ('MARCH', 1), ('200', 1), ('INVESTIGATOR', 1), ('T0', 1), ('IZI', 1), ('PLEASE', 1), ('FORM', 1), ('THROUGH', 1), ('CHANNELS', 1), ('THE', 1), ('FILE', 1), ('Wrdy', 1), ('14/4457', 1), ('70', 1), ('BYZ', 1), ('27/', 1), ('Maj/6', 1), ('4/', 1), ('Dem/{Z/', 1), ('54', 1), ('RECEIWY', 1), ('SEPARATE', 1), ('PROPOSING', 1), ('FINDINGS', 1), ('RECOMMENDATION', 1), ('REQUIRED', 1), ('WHEN', 1), ('HAS', 1), ('BEEN', 1), ('CHECKED', 1), ('March', 1), ('attached', 1), ('been', 1), ('completed', 1), ('submitted', 1), ('approval', 1), ('Josi', 1), ('Maraff', 1), ('mam', 1), ('2493', 1), ('Of?c', 1), ('SUM', 1), ('MARY', 1), ('DIGEST-', 1), ('RBdaCted', 1), ('ame-', 1), ('Elnp', 1), ('110', 1), ('p0', 1), ('ail', 1), ('classified', 1), ('where', 1), ('Rec', 1), ('either', 1), ('EXONERATED', 1), ('emmendatlon', 1), ('FIVE', 1), ('SUSPENSION', 1), ('ATTENTION', 1), ('ADMINISTRATOR', 1), ('CHARGE', 1), ('ASSISTANT', 1), ('DEPUTY', 1), ('INCIDEN-', 1), ('INCI', 1), ('-TIME', 1), ('Jui', 1), ('BEAT', 1), ('6474', 1), ('Oil', 1), ('SEXJRACE', 1), ('0.0.3', 1), ('APPOINTMENT', 1), ('26', 1), ('April', 1), ('1976', 1), ('STATUS', 1), ('COND', 1), ('CIVILIAN', 1), ('MW', 1), ('2000', 1), ('swonn', 1), ('Oi', 1), ('ESTEDI', 1), ('NDICTE', 1), ('APPLICABLE', 1), ('CHARGES', 1), ('COURT', 1), ('BRANCH', 1), ('DISPOSITION', 1), ('sexmace', 1), ('.0', 1), ('.BJAGE', 1), ('0ND', 1), ('PHYS', 1), ('eonn', 1), ('cool', 1), ('z', 1), ('D.0.BJAGE', 1), ('cools', 1), ('OOHPLAINANTS', 1), ('SEE', 1), ('ATTACHED', 1), ('COMPLAINANTS', 1), ('September', 1), ('1508', 1), ('Civilian', 1), ('Emp#44661', 1), ('complainant?alleges', 1), ('accus', 1), ('Ca', 1), ('telephoned', 1), ('cd', 1), ('rd', 1), ('CONDITION', 1), ('Visible', 1), ('Iniury', 1), ('Apparently', 1), ('Normai', 1), ('Public', 1), ('Transportation', 1), ('Veh', 1), ('02', 1), ('VI', 1), ('Sm?gh?i?tt', 1), ('mammary', 1), ('Air', 1), ('ort', 1), ('Building', 1), ('14', 1), ('Fugue', 1), ('Pro', 1), ('erty', 1), ('Hoopitaiized', 1), ('Locku', 1), ('PrivaePremlse', 1), ('iniured', 1), ('eintenance', 1), ('Ex', 1), ('resswaylinterstate', 1), ('8310', 1), ("Agtrliimotli've", 1), ('Pettind', 1), ('5&1', 1), ('Refused', 1), ('er', 1), ('co', 1), ('roper', 1), ('aerway', 1), ('ar', 1), ('re', 1), ('Splicet', 1), ('ommunicatione', 1), ('Private', 1), ('g?zfadgaizefused', 1), ('Deceased?Undm?um', 1), ('R-', 1), ('Brie?y', 1), ('summarize', 1), ('describing', 1), ('oliorts', 1), ('prove', 1), ('disprove', 1), ('allr', 1), ('rgaliorda', 1), ('indicate', 1), ('whether', 1), ('witnesses', 1), ('evidence', 1), ('Support', 1), ('do', 1), ('ONLY', 1), ('copies', 1), ('Actions', 1), ('Compliment-', 1), ('ary', 1), ('included', 1), ('attachments', 1), ('ot?this', 1), ('address', 1), ('Initiation', 1), ('Sender', 1), ('Unknown', 1), ('reason', 1), ('Forward', 1), ('i?he', 1), ('tclephonically', 1), ('IUA', 1), ('make', 1), ('she', 1), ('for?asking', 1), ('Due', 1), ('fact', 1), ('search', 1), ('locate', 1), ('inventories', 1), ('suspect', 1), ('Herein', 1), ('0788403', 1), ('identi?cation', 1), ('well', 1), ('documentation', 1), ('verifying', 1), ('money', 1), ('identification', 1), ('after', 1), ('court', 1), ('proceedings', 1), ('WA', 1), ('identify', 1), ('since', 1), ('obtain', 1), ('Signed', 1), ('noti?ed', 1), ('served', 1), ('Based', 1), ('careful', 1), ('review', 1), ('oiall', 1), ('gathered', 1), ('determined', 1), ('preponderance', 1), ('members', 1), ('I?heret?ore', 1), ('following', 1), ('being', 1), ('INVESTIGATIVE', 1), ('REPORTS-', 1), ('NUMBER', 1), ('SUBMITTED', 1), ('2,3,4,5,6,&7', 1), ('Summarize', 1), ('findings', 1), ('Rule', 1), ('violations', 1), ('cited', 1), ('only', 1), ('One', 1), ('overall', 1), ('ALL', 1), ('ndings', 1), ('each', 1), ('allegation', 1), ('Example', 1), ('Violation', 1), ('noted', 1), ('disciplinary', 1), ('action', 1), ('warranted', 1), ('reprimanded', 1), ('suspended', 1), ('RECOMMENDATIONS', 1), ('DISCIPLINARY', 1), ('FILED', 1), ('COMPLETED', 1), ('ELAPSED', 1), ('WAS', 1), ('ECEWED', 1), ('03', 1), ('Sep', 1), ('EPORT', 1), ('Mar', 1), ('EXPRESSED', 1), ('tN', 1), ('134', 1), ('initiate', 1), ('Command', 1), ('Channel', 1), ('Review', 1), ('completing', 1), ('investigator?s', 1), ('Q6', 1), ('Slam', 1), ('MM', 1), ('1F', 1), ('NECESSARY', 1), ('x11', 1), ('0F', 1), ('WHITE', 1), ('PAPER', 1), ('CONTINUE', 1), ('ANY', 1), ('ITEM', 1), ('Redacted', 1), ('SAL', 1), ('UST', 1), ('R0', 1), ('2444', 1), ('CARE', 1), ('Of?cei', 1), ('WW', 1), ('1995', 1), ('dUtY', 1), ('birth', 1), ('May', 1), ('1996', 1), ('1A.', 1), ('Con?ict', 1), ('Certi?cation', 1), ('Unsigned', 1), ('Certi?ed', 1), ('And', 1), ('related', 1), ('10788403', 1), ('AGAINST', 1), ('INITIAL', 1), ('CATEGORY', 1), ('03D', 1), ...]
As we can see, we get a lot of useless results, filled with stop words. Let's try using count vectorizer to filter out stop words and maybe ignore words that are too frequent. - cv=CountVectorizer(max_df=0.8,stop_words=stop_words, max_features=10000, ngram_range=(1,3)) - max_df — When building the vocabulary ignore terms that have a document frequency strictly higher than the given threshold (corpus-specific stop words). This is to ensure that we only have words relevant to the context and not commonly used words. - max_features — determines the number of columns in the matrix. - n-gram range — we would want to look at a list of single words, two words (bi-grams) and three words (tri-gram) combinations.
Resource: Automated Keyword Extraction from Articles using NLP
cv=CountVectorizer(max_df=0.9,stop_words=stop_words, max_features=10000, ngram_range=(1,3))
X=cv.fit_transform(split_by_space_punc(text1))
list(cv.vocabulary_.keys())[:10]
['command', 'channel', 'review', 'sasso', 'emp', '14982', 'nw', '32007', 'date', 'initiated']
Well... it seems like there are just too much noise in the data. Let's see if we can find a part of the text that just describes the allegation.
matcher2 = PhraseMatcher(nlp.vocab, attr='LOWER')# the list containing the pharses to be matched
terminology_list2 = ["Narrative of", "signed"]
patterns2 = [nlp.make_doc(text) for text in terminology_list2]
matcher2.add("narrative", None, *patterns2)
matches2 = matcher2(doc)#print the matched results and extract out the results
for match_id, start, end in matches2:
# Get the string representation
string_id = nlp.vocab.strings[match_id]
span = doc[start:end] # The matched span
print("Indexes: {} to {}, Word: {}".format(start, end, span.text))
#print("Category: {} Word: {}".format(string_id, span.text))
Indexes: 1206 to 1207, Word: Signed Indexes: 1842 to 1844, Word: NARRATIVE OF Indexes: 1931 to 1932, Word: SIGNED
text_narrative = doc[1844:1931]
text_narrative
The complainant alleges he was searched three times without justi?cation at the ?rst location as he took out the garbage. The complainant alleges at the second location he was searched again, drugs were mysteriously found and he was falsely arrested. The complainant further alleges his State Identi?cation Card, house keys, wallet containing miscellaneous papers and $60.00 U.S.C. were taken and not inventoried or returned. The complainant states the case was dismissed on 10 AUG 06. WVES .
Looks like we need to use some lemmatiziation. Let's lemmatize the docs and phrases that we are using to match.
doc2 = ' '.join([token.lemma_ for token in text_narrative])
doc2
' the complainant allege -PRON- be search three time without justi?cation at the ? rst location as -PRON- take out the garbage . the complainant allege at the second location -PRON- be search again , drug be mysteriously find and -PRON- be falsely arrest . the complainant further allege -PRON- State identi?cation card , house key , wallet contain miscellaneous paper and $ 60.00 U.S.C. be take and not inventorie or return . the complainant state the case be dismiss on 10 AUG 06 . WVES .'
doc2 = ' '.join([token.lemma_ for token in doc])
doc2
'COMMAND channel review a Sasso Emp 14982 NW 1 32007 date initiate COMPLAINT REGISTER investigation no.2 CHICAGO POLICE DEPARTMENT 315343 12 MARCH 200 ? investigator SECTION T0 : unit : command Of?cer Unit IZI - GIS please submit this command channel review FORM through channel with the case file to : INTERNAL AFFAIRS DIVISION OFFICE of PROFESSIONAL STANDARDS date receive by unit -PRON- concur [ a recommend option be grant not recommend option not be grant reviewed by TITLE a unit - date forward by unit 0 ? Wrdy a 7 date receive by unit -PRON- concur -PRON- do not 14/4457 : 70 % ? recommend option be grant CI recommend option not be GRANTED REVIEWED BYZ unit 27/ Maj/6 ; ? 4/ Dem/{Z/ 54 ? date forward by unit date RECEIWY unit [ 3 -PRON- concur recommend option not be grant -PRON- do not recommend option SE GRANTED reviewed by I title unit -PRON- -PRON- date forward by unit date receive by unit -PRON- concur recommend option not be grant -PRON- do not recommend option be grant reviewed by TITLE unit date forward by unit date receive by unit -PRON- concur [ 3 recommend option not be grant CI -PRON- do not El recommend OPTIONS SE GRANTED reviewed BY . title unit . date forward by unit a separate report propose alternate finding an alternate recommendation be required when do not have be check . c.r.no . 315343 internal AFFAIRS division [ 2 March 2007 General Investigation Section to : command Of?cer General Investigations Section Internal Affairs Division from : police Agent Sharon star -PRON- -PRON- 129 General Investigations Section Internal Affairs Division SUBJECT : approval of Complaint Register Investigation C.R. 315343 . the attached Complaint Register Investigation have be complete and be submit for -PRON- approval . Police Agent Sharon Salustro , 129 General Investigation Section Internal Affairs Division Team Sergeant : Josi Maraff \' no # 2563 ? mam - u 2493 Commanding Of?c ? ? General Investigation Section Internal Affairs Division SUM MARY REPORT DIGEST- COMPLAINT REGISTER investigation no . : CHICAGO POLICE DEPARTMENT rbdacte 315343 ame- Elnp 110 . : S. p0 , ) to be use in ail case that be to be classify in sustained case where the Disciplinary Rec as either exonerate . unfounded , not sustained or emmendatlon do not exceed five ( 5 ) days suspension . submit original and 3 copy if assigned to same unit as accuse . submit original and 4 copy if not assigned to same unit as accuse . to : attention superintendent of police administrator in charge , office of PROFESSIONAL STANDARDS ASSISTANT DEPUTY superintendent . internal AFFAIRS division name Sharon SALUSTRO RANK P.A. star no . 11129 SOCIAL SEC . no . employee NO . unit assign . 121 address of inciden- date of INCI 21 DENT -TIME Jui 06 1500 location 17 beat of i DENT I 1 1 name 1- Edward MAY run P.O. STAR no . 1 6474 SOCIAL SEC . no . employee NO . unit ASSIGN . 011 2 Brian TOWN P.O. 3599 Oil SEXJRACE 0.0.3 . 1 . date of APPOINTMENT 26 April 1976 on DUTY duty status ( time of COND . swear off duty civilian 01 accuse 2 . MW 19 June 2000 0N duty swonn El OFF duty oi estedi NDICTE if APPLICABLE - 1 . charge COURT BRANCH disposition 3 : date name city state telephone sexmace .0 .BJAGE 0nd . name city state TELEPHONE PHYS . eonn SEXIRACE cool - z } name city state telephone SEXIRACE d.0.bjage cool } a victim OOHPLAINANTS allegations return to the complainant . see attached sheet for additional accuse . complainant . victim . witness . on 06 September 2006 at 1508 hour the complainant ? this complaint with Civilian Diane WOLFE Emp#44661 . the complainant?allege on 21 July 2006 at 1500 hour at accuse without justi?cation . the complainant further allege that the accus that during the arrest procedure the accuse take -PRON- state identi?cation Ca telephone the of?ce of Professional Standards and register _ -PRON- be search three time by the cd plant drug and falsely arrest the complainant and rd and $ 60.00 U.S.C. , which the fail to inventory or I.A.D. LOCATION I.A.D. PHYSICAL condition 01 no visible Iniury - apparently Normai 01 11 Public Transportation Veh { Facility - 02 VI - 3 : Sm?gh?i?tt ? ; 12 mammary as time ? ? i 13 Air ort 04 Police Building 14 Fugue Pro erty other 04 injure , not Hoopitaiized - Under in?uence as Locku Facility 15 other PrivaePremlse 05 iniure . hospitalize 06 police eintenance Facility 16 Ex resswaylinterstate system 06 injure . hospitalize - Under in?uence 3 ; 8310 Agtrliimotli\'ve Pettind Facility 17 5&1 i 07 injure . refuse Medical Aid er co roper 8 aerway . . ar 5 re ( 13 % Splicet ( ? ommunicatione System 19 Private Residence 33 g?zfadgaizefuse Medical Aid under In?uence 00 10 deceased?undm?um if CPD member . LIST RANK . STAR . SOCIAL SECURITY . employee NOS . in ADDRESS BOX . PAXIBELL in TELEPHONE BOX . C - R- ? 9 315343 Brie?y summarize the investigation describe -PRON- oliort to prove or disprove the allr - rgaliorda ) . indicate whether witness or evidence support or do not support the in sustained case only . copy of the accuse member \'s Summary of Previous Disciplinary Actions and Record of Previous Compliment- ary history will be include an attachment . upon receipt ot?this investigation the RM send a certi?ed letter to complainant ? ask that -PRON- contact -PRON- regard this complaint . the certi?ed letter which be send to _ at the address list on the Initiation report and the Complaint against Department Member form be return to RM by the United States Postal Service with a print label " return to sender - unknown reason unable to Forward . \' i?he RM also make two attempt to tclephonically contact _ at -PRON- home number that be list on the complaint form and at the alternate number that be also list on the complaint form . IUA be not able to make contact at the home number but do reach a voice mail at the alternate number at which time -PRON- identi?e -PRON- and leave a detailed message for?aske that -PRON- contact -PRON- but all attempt meet with negative result . due to the fact that all attempt make to contact and interview this complainant meet with negative result the undersigned do a search of department record and be able to locate the complainant \'s arrest report and two inventory . the ? rst inventory # 10788402 be for suspect Herein and the second inventory ( ? 0788403 be the Complainant \'s State identi?cation Card and $ 60.00 U.S.C. , as well as the documentation verify that the money and identification card be return to the complainant after -PRON- court proceeding . SUMMARY WA be able to identify the accuse by the department record but since the undersigned be not able to interview this complainant nor obtain a Signed Sworn af?davit the accuse be identi?ed but not noti?ed nor serve under this investigation . base on a careful review oiall the document gather for this investigation this investigator have determine that the preponderance do not support the allegation make against the accuse Department member . \' I?heret?ore , the follow recommendation be be make . investigative reports- physical evidence number or attachment support support accused submit this list attachment number : LIST ATTACHMENT number : LIST ATTACHMENT number : attachment 0 2,3,4,5,6,&7 0 9 summarize the finding and recommendation . rule violation will be cite by number only . one overall recommendation for Disciplinary Action will be make by the investigator . the recommendation will be for all sustain ? nding ; recommendation will not be make for each sustain allegation . - example : 1 . violation note , no disciplinary action warrant . 2 . that the accuse member be reprimand . 3 . that the accuse member be suspend for day ( not to exceed 5 day ) . to 9 allegation : unfounded recommendation : no disciplinary case to be file . date date complete ELAPSED time COMPLAINT be date of this ( total time . ECEWED for 03 Sep 06 EPORT ) 12 Mar 07 express tN 134 investigator will initiate the Command Channel Review 3 % form by complete the investigator?s section . Q6 Slam - MM [ 19 % 1F NECESSARY . use an x11 " sheet 0F white PAPER to continue any item . redact name : SAL UST R0 , S. Emp no . : 2444 . date : SUMMARY REPORT DIGEST C.R. 315343 ADDITIONAL accuse : # 3 Thomas CARE . of?cei \' , star 18795 , emp#- Unit 011 District , WW , date of date of appointment 05 June 1995 , dUtY status on duty , swear # 4 . Daniel BORA , Police Of?cer , star 19830 , emp#- Unit 011 district , date of birth date of appointment 06 May 1996 , duty status on duty , Sworn Code . SUMMARY REPORT DIGEST Attachments ; 1 . evidence complaint against Department Member 1a. complaint Register con?ict certi?cation 2 . 3 . Unsigned Sworn af?davit copy of certi?ed letter send to complainant , and related document report by Police Agent Sharon SALUSTRO , 11129 , RE : complainant , refusal to cooperate with this . investigation Copy of Arrest Report - Copy of inventory no . 10788402 copy of inventory no . 10788403 copy of criminal History Arrestee - COMPLAINT against DEPARTMENT member INITIAL complaint category c.r.no . CHICAGO POLICE DEPARTMENT 03d 315343 to COMMANDING OFFICER unit unit . manner complaint receive Internal Affairs Division - 018 121 - GIS * 3 BELLE PAXEILETTER person receive from complainant RANK STARIEMPL . no unit NO . DAY DFWK . date TIME WOLFE , Diane - 113 WED 06 sap 06 15:03 register with OPS . bt - name rank stariem 0 unit no . DAYOFWK date TIME WOLFE . Diane VA 113 1AME ANK star unit NO . SOCIAL SECURITY NO . EIONEOFF LOCATION of incident day of date time total ACCUSED total COMPL . total WIT . FRI 21 JUL 06 15:00 3 name ( street . apt . no .. city , state ) zip code 3 50524 home phone no . CONTACT at time phone no . 3 - CHECKONE ADDITIONAL name ADDRESS ( street , APT . no . city . state ) phone no , j- .. accuse , ( TOMPLAINANTS ( c ) . victim ( v ) , witness ( w ) . use another if CPID member . LIST RANK , STARIEMPLOYEE , SOCIAL SECURITY NOS . in address . PAXIBELL in phone no . BOX . narrative of the complainant allege -PRON- be search three time without justi?cation at the ? rst location as -PRON- take out the garbage . the complainant allege at the second location -PRON- be search again , drug be mysteriously find and -PRON- be falsely arrest . the complainant further allege -PRON- State identi?cation card , house key , wallet contain miscellaneous paper and $ 60.00 U.S.C. be take and not inventorie or return . the complainant state the case be dismiss on 10 AUG 06 . WVES . SIGNED RAN STARIEMFENU . unit no , SUUIAL OELURITY NU . I. .3 Pat IZIG 2 . date ASSIGNED time -PRON- -PRON- of WK . date ASSIGNED - time 2 . day of WK . complaint Register investigation Conflict Certification Chicago Police Department -PRON- hereby certify that to the good of -PRON- knowledge , neither -PRON- , nor -PRON- spouse or domestic partner , -PRON- parent , -PRON- sibling or -PRON- child ( hereinafter -PRON- household or immediate family ) , have a personal , professional or ? nancial relationship with the victim(s ) , complainant(s ) , accuse department member(s ) , witness department member(s ) , or civilian witness(es ) . -PRON- further certify that to the good of -PRON- knowledge , the resolution of the matter under investigation will not positively or negatively affect -PRON- financial interest or the ? nancial interest of any member of -PRON- household or immediate family . - -PRON- acknowledge that -PRON- must disclose to the Assistant Deputy Superintendent , Internal Affairs Division or the Chief Administrator , of?ce of Professional Standards , in writing , the acquisition of any ? nancial interest or the development or the discovery of any personal interest that would directly affect -PRON- ability to conduct an impartial objective investigation and render unbiased decision concern the matter under investigation . -PRON- acknowledge that -PRON- must disclose to the Assistant Deputy Superintendent , Internal Affairs Division or the Chief Administrator , of?ce of Professional Standards , in writing , the discovery that a member of -PRON- household or immediate family have a personal , professional or ? nancial relationship with the victim(s ) , complainant(s ) , accuse department member(s ) , witness department member(s ) , or civilian witness(es ) or if a member of -PRON- household or immediate family will be positively or negatively affect by the resolution of the matter under investigation . f ? 5 Signature Signa P. a. Sharon Salustro , 11 129 Sergeant Joseph Maraf?no , # 2563 Investigator 3:1:52 date date attachment 1 a CPD-44.201 ( Rev. 1/04 ) 3 1 3 4 3 SWORN AFFIDAVIT for COMPLAINT REGISTER investigation CHICAGO POLICE DEPARTMENT STATE of ILLINOIS 1 CC COUNTY of COOK location of incident date Time 21 July 2006 3:00 pm . ( 1500 hour ) -PRON- , - hereby state as follow : under penalty as provide by law pursuant to section 1 - 109 of the Illinois Code of Civil procedure,_i certify that the allegation set forth in -PRON- complaint be true and correct , except as to any matter therein state to be on information and belief and as to such matter , certify as aforementioned that verily believe the same to be true . RA . Sharon Salustro , 11129 Print Witness \'s Name Complainant \'s Signature Witness \'s signature date date CPD-44.126 ( Rev. 6105 ) attachment 2 315343 \' ? at ? .u 3 .. . . Department of Police City of Chicago 3510 South Michigan Avenue Chicago . Illinois 60653 date 9/3/06 re : C. R. No . 3 5343 a complaint against a Department member , register under the above Complaint Register ( C.R. ) Number , be currently under investigation by the Chicago Police Department name : P. a. SHARON SALUSTRO INTERNAL AFFAIRS DIVISION Address : 3510 s. MICHIGAN AVENUE Telephone : 312 - 745 - 6310 hour available : 7:00 a.munti12:45 pm . Mon thru Fri . sincerely , 5-.- 3 Harlow-1k r ; 4 p.a. s. SALUSTRO CPD-44.223 ( Rev. 1104 ) emergency : EM .1 non - em ergency : ( within City limit ) 3 - 1 ~1 Non - emergency : ( Outside City til-- mi a ) a- . - -PRON- i City of Chicago Department of Police 3510 South Michigan Avenue 1 Chicago , Illinois 60658 llEU 14?399 ? logazaji ? " OIQDJDE Egtgz 5 LU , 590:0 3?31 hEd UHTT 5001 i internal AFFAIRS DIVISION 13 February 2007 General Investigation Section C.R. 315343 to : command Of?cer General Investigation Section Internal Affairs Division from : police Agent Sharon Salustro , # 11129 General Investigation Section Internal Affairs Division SUBJECT : report by Police Agent Sharon Salustro , star 11129 , RE : complainant , refusal to cooperate with this Investigation allegation : the com lainant allege on 21 July 2006 at 1500 hour at -PRON- be search three time by the accuse without justi?cation . the complainant further allege that the accuse plant drug and falsely arrest the complainant and that during the arrest procedure the accuse take -PRON- state identi?cation card , and $ 60.00 U.S.C. which the accuse fail to inventory or return to complainant . upon receipt of this investigation the undersigned send a certi?ed letter to complainant ask that -PRON- contact the undersigned regard this complaint . on 23 October 2006 the letter be return to by the United States Postal Service with a label mark ? return to sender Unclaimed- unable to forwar -PRON- on 27 October 2006 the undersigned attempt to telephonically contact complainant ? at the home number list on the complaint against Department Member form with negative result . on 27 October 2006 an attempt be also make to reach at the alternate phone number list and meet with negative result . on 22 November 2006 the undersigned attempt to telephonically contact complainan _ at the home number _ list on the complaint against Department Member form with neiative result . then try to reach?at the alternate phone number list and the call go into voice mail at which time identify -PRON- and leave a detailed message ask _ to contact -PRON- but do not respond to the message . SN General Investigation Section Team S?geant : Josepl ? Maraf?no # 2563 .LALAUJ . page 1 of 7 print Logout i ? wres Report set - trend this be the Orginai Report CB no . 016603614 IR No . 944104 RD no . hm49044 - 5 arrest date 21-jul-2006 17:40 District of . .. Initial Approval DISTRICT 011 Holding 011 male lockup Status IDENT . clear pod related ? no Arrestee Armed with offender information gshet Last Name First name middle name -PRON- name Suf?x Nickname SSN Alias last name Alias First Name No SID N0 Drivers License no . issue state resist arrest ? no TRR complete ? no NO DCFS Ward ? no view all Sang Affiliation ( Gang Arrest Card ) Gang name Faction Name Unlisted Gang Narne Rank Role Known Hangout self admit Offender DeScription Birth Place Birth date Age 34 -PRON- ; sex male race ASIANIPACIFIC ESLANDER color esowm 3/9/2007 hair color black hair style short Complexion DARK Height - Weight - occupation none - employer ! school q?ender Scar mark . name type Scars ocation photograph ? m to . row(s ) 1 of1 Offender__ldentifications no datum find erree I Offender . address . page 2 of 7 type Location Address County Beet Phone CHA properly arrest Ailey 1112 Residence 2 row(s ) 1 - 2 of 2 Non offender information Role Name Sex Race Birth a a rox?7 deceased ? ho italized ? ? ho ital treat and comment date 9 pp :3 release ? ? dim and no no no no Complaman=. Emergency Contact REFLSED AJQ no no no row(s ) 1 - 2 of 2 tie.e .. _0tfender address . .. . Role Name Type Street No . street name $ 3393 SI gig Beat phone no . 3/9/2007 . page 3 0f7 Emergency Comte - oi refuse Residence Non Offender Contact number no -PRON- -PRON- -PRON- non Offender lnjuriesW no Records find A5151 Eelony 1 _ detective Information no Records find offense .. . . Domestic amend / amend to Line line IUCR Statute lnchoate Code violence Victim no 1 720 possession - Less than 15 4f offense as no - heroin cite row(s ) 1 - 1 of1 Recovered Nameties .. .. . Type Approx . weight / Quantity Units Estimated Street Value Suspect Controrled substance 1 gram $ 50.00 row(s ) 1 - 1 of 1 Atxssiee Questiennaitsm . question response 1 . Presentty Taking Medication ? 2 . { if FemalejAre -PRON- pregnant ? - no 3 , First Time ever be arrest ? no 4 . attempt SuicidefSerious Harm ? no 5 ? serious Medical or meet ! problem ? n0 6~ be -PRON- receive treatment ? no 33 8045 954533 1 3/9/2007 a . U11 LAILVA LlnuLxUlJ row(s ) 1 - 6 of 6 Vissal Check 0\'1 .Arrestee .. .. Question response 1 . be there Obv?ous Pain or injury ? no 2 . ls there obvious sign of infection ? no 3 . under the in?uence of Alcohoerrugs ? no 4 ? sign of AlcoholfDrug Withdrawal ? no 5 . appear to be despondent ? no 6 . appear to be irrational ? no 7 . carry Medication ? no row(s ) 1 - 7 of ? Lockuo Keeper remark Lockup Keeper response refer to place in one person cell ? no place under close observation ? no ? 531131113 intormetion . no Records find . Associated Incidents No Records find Associated Arrests no record find . Associated case . no Record find . dispersal event Dispersal event no . arrest event No . 33 1 8045 954533 page 4 of 7 . .31 u - rsw - um \' 5 r1 : .- 3/9f2007 1 a ? gunman , ? page 5 of 7 Arrestee Vehicle information Year Model Make type style 1 Top Color bottom color 2 : 1 Vin no inventory no 1demote license information license no license state ! expiry date use as weapon ? no disposition vehicle impound ? no Towing Agency Tow Report no . transport information 5 assign to Vehicle Transport Beat Transport Tirne 113 - 4 21-jul-2006 127:4 ? intro ed Employees ? information employee Role last name first name Ernployee no Star No . agency name ? Beat tat arrest Officer MAY Edward 164,74 1182 2nd arrest Of?cer town Brian 3599 1182 ass sh . ngArresting Or?cer CAREY Thomas ? 48795 1183 A351 e a rresti Officer BORA Daniel ? 59830 4183 Ade : t?ng of ? BORA Danie ! 19830 Desk review by SPAGNOLA Michael 2008 Lo ookup Keeper JOHNSON Hubert 18235 search by JOHNSON Hubert 1c235 Fl . nger print by STEWART Neison 4125 row(s ) ? l 9 of9 3/9/2007 Vuxuu 111a page 6 of 7 spp?roratiaiormatiw name . employee no . Star no . probable Cause approve by : SPAGNOLA . MICHAEL 200:5 . Final Approved by : smouom MICHAEL 2008 IntervietrLoss no Record find . . no Record find . fingerprint take Palmprints take ? Time Printed 19:10 time photograph 21-jul-2000 19ri ? Time Fed Cell No . Ct .Phone Number call Time Called receive in Lockup 210 ? i9:09 Court . information . .. .. . . . Court Sergeant ? no release from Holding Facility . 08:15 Desired Court Date Desired Branch Call 44 - 2 room no . address lntial Court date 22-jul-2006 00:00 Initial Court Branch Initial Room no . address no datum find Brand Information Bond date bond type Bond Amount bond Receipt no . 3/9/2007 ? ? 15 JENT NO . of to arrest : pm ) on above on numerous occasion to receive use tender small offender retrieve OBJECTTET from plastic bag in HES SOCK . offender DETAENED 8 : RIOS recover plastic bag [ 5393556 ) ten FGELS each contain suspect heroin . offender arrest ED a transport to 011 1315 mm : reveal $ 80.00 use . 1000 ft . 0e 403*?- Wafcch Commander comment no record find . check CLEAR no . " 157g?"?"v a page 7 of 7 user : Al : Mamie : 13315 SRevfsi - on ? 1 1193 .. ngh?a reserve 3/9/2007 view . inventory tide ti?epertn?eni home . , ei\'ratck Creeleftittrt inventory inventory no : 10788402 inventory : 011 page 1 of2 eTreck Homet Logoutl Heip nventory Collection } ? incident no : RD hm490445 date recover : Unitof Statue : receive Recovery Search Warrant no date recover unit of inventory 0\'11 district 11 Incident Number RD Hmeems comment 2024 item list . package Parnent No . ocation item no item Type Quantity . plastic y97621 to/ 1493143 drug 18 mb tin foil packet each 1493144 egg ? ? Ci 5 with powder SUSPECT HEROIN Court date 15 AUG- 2008 Court Branch 44 - 2 State charge ? 20 elc inchoate action Hold for investigation investigate Of?cer MAY . EDWARD Star No.16474 UnitO\'It DISTRICT initial Destination 17 ? forensic service section Transport Mode police mail { Courier } RecoveredFrortt ( non - cpd ) recover / seize from arrest last name _ First Name- MI . Sex Racet ] recover at Street No- City CHICAGO State IL zip aNarcotic type Status Recerveo HEROINIWHITE receive Street Dir- Street name ? apartment no . " -PRON- 15h 5 view inventory z. u ; L. Phone no . beat 1 1 \' 12 owner ( non ? LastName?Fifst name _ A M. I- Street No- Street Dir- Street name ? apartment no . City State IL - ZIP phone no . find by : ( CPD Employee ) CPD Employee MAY , EDWARD Involved Officers . 1st Ofrcer CAREY THOMAS Star No 113795 Unit 011 11 2nd of?cer BORA , DANIEL Star No.19830 Unit 011 a district .3 i ? user 13131111193 Copyright-1712002QAII rightmesnwe . module ; 1010 ! } sl?iwihaon 1 11353 \' 21 thanhriq 3/14/2007 , View \' inventory page 1 of 2 .. nit - ant eTrack Home ] Logouti Help 1 . ? ? tnventory Loiiection ? ? ; home e ) e\'t\'rack > 1craeteit?idittrweittory inventory no : 10788103 Incident no . : RD hm498445 date recover : Unitof irwentory:011 status : turnover view attachment Recovery Search warrant no date recover unit of inventory 011 DISTRECT tt Incident Number RD HM490445 comment IUCR 2024 package no . location item no . quantity Descriptlon [ Signal Value - status _ unpackage 1493145 133210125 turnover classification .Court date 2006 Court Branch 44 - 2 Currency Disposition NARCOTICS seizure to be deposit State Charges T20 ilc Inchoate i 1 Action Hold for investigation investigate Of?cer MAY . EDWARD Star No.16474 UnitOtt DISTRICT 11 Initial Destination 16 ? see Transport Mode police mail . ( courier ) recevere from . recover / seize from arrest last name -irst Name- MI . sex race recover at Street No- Street Dir- Street Nam _ Apartment no . -PRON- City CHIL AGO State IL zip phone no . Beat1134 owner ( now - CPD ) lastname_firstname- i m .. -PRON- -PRON- ? task -PRON- -PRON- : -PRON- ( m .n4nn if- 1 321.5 .View ? inventory . page 2 of 2 Street No - street Dir -street name _ Apartment no . City CHICAGO State IL zip phone no . find by ( CPD Employee ) . CPD Employee MAY . EDWARD Involved Officers . 1st of?cer CAREY , THOMAS Star No . 18795 Unit 011 .. 11 2nd of?cer BORA . DANIEL Star No . 19830 Unit 011 11 : Llser : -PRON- Cb?rl?ghl at } : 2602 . All?ghl ? module : 10100 isrevisaon : 1.1855 " ha ? : i h?h\'fl?l??o hint nu . page 1 0f -PRON- " see ? 3.91553 . . . .. .. mama . ? 3 gear . 7?mug 5 .. gag ? Kim ? ifl?r v.3 ? fmg??l armimm?mg .. hmu?w ? .nwm ? mu??a . qw?umm ? $ 313.53 . .. . 5.2 . .. i . . all : .. . ( Etna ? -PRON- . .11 . . nil . . . . -PRON- . .. 1 51xi . 1.5 .. . .. .. .ulI . 11.1 .. . . . 5 .1 . Jan-11.3 . ! { 15.1.5.1 } ? x .W gamma - HO 1.31 20 . W123 . ? 27.31.32.13 . . .1 , . 0270210 ggwp?zm5 .. 3.78 27 . a : .W . . 1:3.33.611.1.31137183 .- . . .. . .. . - . . 1 . 1.1.1.11 . . why - n1 ? . 111111 . 1.1 .. { 7.1.1 1 .1 2.113 224 % .. 7077.01.53.11.- mm .7 .. 77.91.1117 . . ? ? gtW??1?C1Ari - Wham3.7.71 ? u .27 111 . .. .. girl . 71.1 , ? 7 . ( 47.1.70131 ? 41.1.7 ! 61w .1 1w1 .. 3 .. an3.1171 . .7 . mmndoz 32.1.- . 1.1.7.4 .. Wmibg .- .. - . .. - .. -1.11.1.1 1 . .1 . . .1.-1.111.111 - 11111.753.107.15w W10 mm ? mnnumW WOW-31.1 . ? ? dmm . 11.9 % . 1.71111.- Wham-.713 . 1 _ . 1 11.1315 . 11 . 11.111.11.111 .- . - 111w11 - 1 .. 1 . .1.-1.111 - 111 - 1111- .. 11.0.11 ? . 33 .. .033 . zmvoi ? ? . .u 7131 ? . ? 01.71 . ? ? mum-.717 hub : 18 35.93810 371.07.101.73 . mau?n udl . -PRON- nm?hamn ? .. . .-11 .. -.-.-11 : 7.1.1 . : 171.176 .3113 TYNE . -PRON- .. 1 .1.-310.013111lulalw7l-1 ? Mu - Mat . 131.733.117.17 . .71 .. W. mama-1.57 3117.17.77 : M71 . ( .Im?sq . 1.71 ? n . .17 1.5.7.0 .. 11 .. .1 71.211.11.112 .. 5.w7457722 01.1.1.8 ? 1 91.11 .. ? 20 . : 23 . . .. 1 - 7 . F. . 1 . 1 .77 .. 1 - . .17 311.17% .. mLf11 12.11 . ? . 77.7.1 .. II : 11 Ull- . . . .. 1 . 1:1.1 1 . 1 .7 .. .1111 . t579801 . W10 whuj?m 17.1.1 . 577.724.9377 . ? 2 [ 711,111 111.11 . lid 7c ? Gm . 333133 . ; 1- .. 1.1.0 mm 91111177319 . 9130.71.37 . 7 1171.140 .1.11.11 - 30mm.m 2.7.777 m .. . . 1.51.11.12.1 ? 7.01 .W mam-.7 . 1- 1.1.1.11 . .. . . g?xabu?aqr . . 1 m . 77.1112197203nmxm 29.11173 . 1.1.1.320 . . 7 $ 0225.77 .m - w.7mzo.1 7 .. . .. 11 - 111 .. 1 . . ni .. i1.11.1.i.sl7I1l-11 .1 lilii . - onu?1ammng?m 1.7793711 . . . .W . .. .. .51 . .1 7.1 .mm raw Elmmnirz ? Mil - mmwtim JILL . .. .. e3 ? 1111 HIAUFE 3 ntmu?tilia- -PRON- be nil ? ? ? allright . mz ? ? 0.9.302 Ozzmm ? ilvi m. an . wry ? . o??womn ? @QZb??mm 002m.w?a?102 " 3 58 un?myw rw?mzmm 2.0 . Umy m ; lfw .. -.w L. r. .. . Gizma ? 0.330 if } ! -PRON- . ) . .5 . Ub.mm?w .93 ; 53.3 ugnm?swd 1 1 .. . . .. n?a?mgm?ummn $ 362 5 m 25 $ .33 20 . . r : ZEEL . a hm . .. 95 % 12 % $ 053?.can . . grim m. mm?bm?m l. ? n H. .111 . .. v. . s. I .. IJ 1 -PRON- ? 3x420 ? cmz?amw L. a. may . ? 2.9 . ? Farx?m Quagm ? . ? 10 9 . 5 . 3x gonad 5,3393 . ? : gi?bizrd 55113055.}33223533 .3353er 35105.?v?chigan Avrnuc 5 from Unit?- 1 96 1.133112 : 95.131333331332310 ? 1.1116 111352111035?1511333333331 [ 11?81\'3\'i11311 . . 33133333333 : $ 530130 R15 3313133313333- 1113349031311 : ( 3\'33 3311:3\'21201363133 : a?u?uni \' 13631 C30 wag : mad the . ( t : 30 1?33 ] 33.3 . Dcpa?nm?i \' 1351 - - 3301113101 : of 3\'1 : 31:33 : luztcr \' 3.3 ; 33 : . advise . wt : 313333 the . maria},- mixed timiCI 133333133331 : 33:113 - 333ch 33333331133333 . um : { etmn } 33 : : obtain by 3333335333135 { 1133 1:33:03 333 - 1113 : { .1 - 1icz3go 130133;:- 1\'3 viticncc 1333.3 . " Ed 1130:3333 } ; 8352:3933 locate at ? 5 130 mm Amhcmecn 3113313011113 0132513011331 23103331433 31330333313 316w 33:13:331?33333 : 33333131332315 12 ) M36 fi?fg ? } 73153 3.33 insum 21111231333 pro-{3321333 \' 13 3:033:33 } , 1 mtum?i -PRON- will -PRON- requirf\'d 33333113333 . . , 53:33 : nud\'ul ( Huber .1ppr333333.:33 . 313336 ; Identitzcatzma . - -- . 533.333 31:13 : : 33333 - 33 . 311e ( Edi - E 353135 . 3 . ( 3:13.33 : 1131 " . 151:3 . " 3:31\'331\'33\'3\'31 1.13:1 : 31333333 . } ; 331111 - 101 31:3 : 1331 } ; 3111:333- 11.35 35333 : . \' 13 \' 1 " 5 . 3 - . vamp?- . . 136333.51 } : 13.1113155 \' 5533:5039 Sc Gang-1333 \' . 25320113333 111131{331333333 } . 5 333334.3331333331334 ? 5\'5 n3333 - 153333-iy3-m3 : 1113313335 : 1 313 \' I13ml " ii?3 - 1353 ? : Chicago Police Department 15:29 by PCOJ393 for IR 944104 page 1 of 9 CHICAGO POLICE department 3510 South Michigan Avenue / Chicago . Illinois a a a : 60653 Identification Section grail : a ? criminal history report CPD-319030 ( REV . W04 ) convict felon 0n FAROLE SID 29460180 FBI 377470ma1 IDOC 876702 Current Arrest information : 9931 34 year Place of Sign ; hair BLK SSN if ; - Mam COMPLEXION Drivers Lic . state ; Seats . Marisa & Tattogs ; key historical identifier : Alias or aka use Qate?Used Dates of Birth Used Social Security Numbers Used OS - MAY-2001 30-aug-1997 18-dec-1996 30-mar-1996 . 02-mar-1996 27-oct-1995 20-aug-1995 26-jan-1995 21 dec-1993 o4-may-1993 19-sep-1990 not available not available not available not available not available ? not available Criminal Justice Summary : total arrest : 24 felony , 6 misdemeanor ) total conviction : 6 IDOC event event : on Chicago Police Department 15:29 by PCOJ393 for IR 944104 page 2 of 9 parole institution name : parole date : 30-dec-05 case number : 01cr2011602 discharge date : 30-dec-2008 arrest ArrestNamez ? arrest date : 21-jul-2006 Holding Factlity : CPD 011 male Dale of Birth : Arrest Address : DON or CB : 016603614 residence : of?cer : MAY of?cer Badgeiit : rresting Agency : CPD II II iairreisit iijnithloiat ? 4 720 ilc Pcs- possession - Less than 15 - heroin offense as cite .court CHARGESIDISPOSITION Statute Charge Class Case # 720 - 5701402-c PCS - possession - LESS m. as112327201 ? idisposii?ion : finding - no probable cause - dismiss disposition ESenience : sentence date : i arrest Arrest name : arrest date : Holding Facility : CPD - DISTRICT 011 male date of birth : Arrest Address : or CE : 016543111 residence : of?cer : BECKMAN Of?cer Badge # : 16837 Arresting Agency : CPD .St?t?ii ? . . ? 1.1.1 1 " 3 ? .1915 ? 2 mm offense as .1 3299:9123 ? WSW . .9 9.1 . : . offense A3 10 - 8- 515 solicit Unlawfut business offense as cite ECOURT istatute Charge Class Case # 720 - 570i402-c pc .. possession - LESS 06112037201 : Eorsposrtron : NOLLE PROSEQUI Disposition Date : 15-jun-2006 ESentence : sentence date : 5 gDiSposition : NOLLE PROSEQUI Disposition esenrence : sentence date . arrest Arrest name : arrest date : 18-jul-2001 Holding Facility : CPD - DISTRICT 011 male date of birth : Arrest Address : DCN or CE : 014844672 residence : officer : KELLER Of?cer Badge # : [ 11 \' 3 3 720 ilc 5 . 0i24 - 1 . 1-a Uuw- -Weapon- Felon . PossessiUse Firearm CHARGESIDISPOSITION E - Statute Char Class Case # f2 1)le hffh?ffr?l\'lrl\'a Inlanr?n " . .1 l1- 1.1 1n rt 11 . ; Hui - oi Dru - nth ; .H .. n. .. Chicago POIicc Department 15:29 by PCOJ393 for IR ii 944104 page 3 of9 .. ARMED robbery ARMED WIFIR 0 2601CR20116 edisposition : SENTENCEDIILLINOIS DEPARTMENT of CORRECTIONS Disposition date : 24-mar-2003 convict g\'Sentenca : 000 010 year 00 month 000 day sentence date : 24-mar-2003 ECOURT CHARGESIDISPOSITION statute gene 9 % egg Possruss WEAPONIF 2001cn2011s ? gDisposition : STRICKEN from DOCKET with leave to reinstate Disposition date : 24-mar-2003 Estanrence : no sentence one year no month 000 day sentence date : 5 arrest arrest name arrest date : 16-may-2001 Hotding Facilit CPD - DISTRICT 011 male date of birth : Arrest Address : DCN or CB : 014788299 residence : of?cer : SMITH Of?cer Badgeit : 354 Arresting Agency : CPD . . . .. 10 - 8 - 515 solicit Unlawful Business CHARGESIDISPOSITION Statute Charge Class Case # 10 - 13 - 515 solicit unlawful business 014235413 SAN . bond forfeiture - NON suit disposition date : 13-jun-2001 esentence : sentence date : arrest . arrest Narhe : arrest date : US - MAY-2001 Holding CPD - DISTRICT 011 male date of birth : Arrest Address : DCN or CB : 014778544 residence : of?cer : WARE Of?cer Badge ? : rres mg gency : ? 5 FREDDIE . . . .. .. .. .. . . . .. .. - . . 10 - 8 - 515 solicit Unlawful Business gcounr Statute Charge Ciass Case ? 10 - 8 - 515 SOLICITINO UNLAWFUL business - 014233765 edisposition : NOLLE Disposition date : 29-may-2001 ESentence . ? sentence date : arrest Arrest name : arrest date : Holding Facility : CPD - DISTRICT 011 male Dale of Birth : Arrest Address : of?cer : of?cer Badge ? : arrest Agency : CPD ? at 10 - 8 - 515 Soliciting Uniawful Business ECOURT CHARGESIDISPOSITION .9 .1 - -PRON- -PRON- -PRON- an - iininm\'n Chicago Police Department 15:29 by PCOJ393 for IR 944104 page 4 of9 i elf-0010 qh?lg ? Ctass case # 5 10 - 8 - 515 solicit unlawful business tit-1194804 . non - suit disposition date : 05-JUN-2001 ESentence : sentence date : 5 arrest ArrestName : arrest date : worse-2001 Holding Facility : cpo - CENTRAL male date of birth : Arrest Address : DCN or CB : 014709108 - residence : of?cer : WALLER Of?cer Badge?l : rres mg gency : CPD .- .. a 720 ilc Criminal Trespass - remain on land arrest arrest name arrest date : 05-jan-2000 Hoiding CPD - DISTRICT 013 date of birth : arrest Address DCN or CE : 014376409 residence : of?cer : SZESZOL Of?cer Badge # : 6183 Arresting Agency : CPD 4 720 ilc 570.0!402 possession of a Controiied Substance icOuRT CHARGESIDISPOSITION ? tatute Charge Class Case # i pc possess controlled substance . finding - no probable cause - disposition date : 31 ESentence : - sentence date : ILL DEPT of correction PAROLE Arrest name : DCNiArrestii : . local arrest Of?cer . ident : Agency : ? at information : from DANWLLE CORRECTIONAL CENTER on 24-nov-1999 for - arrest arrest name : arrest date : date of birth : Arrest Address : DCN or CB : 011063735 residence : 18-may-1998 Holding of?cer : of?cer Badgeii : arrest Agency : ti 1 PCS possession of other Co'
def split_by_punc_lemma(text):
if isinstance(text, str):
doc = nlp(text)
elif is_instance(text, spacy.tokens.doc.Doc):
doc = text
else:
return
doc2 = ' '.join([token.lemma_ for token in doc])
return [x.strip() for x in re.split('[,/]', doc2)]
split_by_punc_lemma("searching minors/patting down minors/arresting minors")
['search minor', 'pat down minor', 'arrest minor']
matcher3 = PhraseMatcher(nlp.vocab, attr='LOWER')# the list containing the pharses to be matched
for t in terminology_list:
print(t, ": ", split_by_punc_lemma(t))
Nudity, Penetration : ['nudity', 'penetration'] sexual harassment, Sexual remarks : ['sexual harassment', 'sexual remark'] Sexual humiliation, Sexual extortion, Prostitution/sex work : ['sexual humiliation', 'sexual extortion', 'prostitution', 'sex work'] Taser, Baton, physical touch, gun : ['Taser', 'Baton', 'physical touch', 'gun'] Trespass/robbery, Racial slurs, xenophobic remarks : ['trespass', 'robbery', 'racial slur', 'xenophobic remark'] Undocumented status, ICE : ['undocumented status', 'ice'] Planting drug, planting guns : ['plant drug', 'plant gun'] Neglect of duty, failure to serve : ['neglect of duty', 'failure to serve'] Refusing to provide medical assistance : ['refuse to provide medical assistance'] Irrational, Aggressive, Unstable : ['irrational', 'aggressive', 'unstable'] searching minors/patting down minors/arresting minors : ['search minor', 'pat down minor', 'arrest minor']
for t in terminology_list:
terms = split_by_punc_lemma(t)
patterns = [nlp.make_doc(text) for text in terms]
matcher3.add(t, None, *patterns)
matches3 = matcher3(doc)#print the matched results and extract out the results
for match_id, start, end in matches3:
# Get the string representation
string_id = nlp.vocab.strings[match_id]
span = doc[start:end] # The matched span
#print("Category: {}, Indexes: {} to {}, Word: {}".format(string_id, start, end, span.text))
print("Category: {} Word: {}".format(string_id, span.text))
Category: Irrational, Aggressive, Unstable Word: irrational Category: Trespass/robbery, Racial slurs, xenophobic remarks Word: ROBBERY Category: Trespass/robbery, Racial slurs, xenophobic remarks Word: Trespass
test = nlp("falsely")
doc2 = ' '.join([token.lemma_ for token in test])
doc2
'falsely'
There are two types of stemmers in NLTK: Porter Stemmer and Snowball stemmers.
import nltk
from nltk.stem.porter import *
stemmer = PorterStemmer()
stemmer.stem("falsely")
'fals'
def split_by_space_stop_punc_stem(text):
words = [x.strip() for x in re.split('\W+', text)]
new_words = []
for w in words:
if not nlp.vocab[w].is_stop:
new_words.append(stemmer.stem(w))
return new_words
def get_stemmed_text(text):
return " ".join(split_by_space_stop_punc_stem(text))
text1_stemmed = get_stemmed_text(text1)
text1_stemmed
'command channel review sasso emp 14982 NW 1 32007 date initi complaint regist investig 2 chicago polic depart 315343 12 march 200 investig section T0 unit command cer unit izi gi submit command channel review form channel case file intern affair divis offic profession standard date receiv unit concur recommend option grant recommend option grant review titl unit date forward unit 0 wrdi 7 date receiv unit concur 14 4457 70 recommend option grant CI recommend option grant review byz unit 27 maj 6 4 dem Z 54 date forward unit date receiwi unit 3 concur recommend option grant recommend option SE grant review titl unit date forward unit date receiv unit concur recommend option grant recommend option grant review titl unit date forward unit date receiv unit concur 3 recommend option grant CI El recommend option SE grant review titl unit date forward unit separ report propos altern find altern recommend requir check C R 315343 intern affair divis 2 march 2007 gener investig section command cer gener investig section intern affair divis polic agent sharon star 129 gener investig section intern affair divis subject approv complaint regist investig C R 315343 attach complaint regist investig complet submit approv polic agent sharon salustro 129 gener investig section intern affair divis team sergeant josi maraff 2563 mam u 2493 command c gener investig section intern affair divis sum mari report digest complaint regist investig chicago polic depart rbdact 315343 ame elnp 110 S p0 ail case classifi sustain case disciplinari rec exoner unfound sustain emmendatlon exceed 5 day suspens submit origin 3 copi assign unit accus submit origin 4 copi assign unit accus attent superintend polic administr charg offic profession standard assist deputi superintend intern affair divis sharon salustro rank P star 11129 social sec employe unit assign 121 address inciden date inci 21 dent time jui 06 1500 locat 17 beat dent 1 1 1 edward ran P O star 1 6474 social sec employe unit assign 011 2 brian town P O 3599 oil sexjrac 0 0 3 1 date appoint 26 april 1976 duti duti statu time cond sworn duti civilian 01 accus 2 MW 19 june 2000 0N duti swonn El duti Oi estedi ndict applic 1 charg court branch disposit 3 date citi state telephon sexmac 0 bjage 0nd citi state telephon phi eonn sexirac cool z citi state telephon sexirac D 0 bjage cool victim oohplain alleg return complain attach sheet addit accus complain victim wit 06 septemb 2006 1508 hour complain complaint civilian dian wolf emp 44661 complain alleg 21 juli 2006 1500 hour accus justi cation complain alleg accu arrest procedur accus took state identi cation telephon ce profession standard regist _he search time cd plant drug fals arrest complain rd 60 00 U S C fail inventori D locat D physic condit 01 visibl iniuri appar normai 01 11 public transport veh facil 02 VI 3 Sm gh tt 12 mammari time 13 air ort 04 polic build 14 fugu pro erti 04 injur hoopitai uenc locku facil 15 privaepremls 05 iniur hospit 06 polic einten facil 16 Ex resswaylinterst system 06 injur hospit uenc 3 8310 agtrliimotli ve pettind facil 17 5 1 07 injur refus medic aid er co roper 8 aerway ar 5 13 splicet ommunication system 19 privat resid 33 g zfadgaizefus medic aid uenc 00 10 deceas undm um cpd member list rank star social secur employe no address box paxibel telephon box C R 9 315343 brie y summar investig describ oliort prove disprov allr rgaliorda indic wit evid support support sustain case copi accus member s summari previou disciplinari action record previou compliment ari histori includ attach receipt ot investig RM sent certi ed letter complain ask contact complaint certi ed letter sent _at address list initi report complaint depart member form return RM unit state postal servic print label return sender unknown reason unabl forward RM attempt tclephon contact _at home number list complaint form altern number list complaint form iua abl contact home number reach voic mail altern number time identi ed left detail messag ask contact attempt met neg result fact attempt contact interview complain met neg result undersign search depart record abl locat complain s arrest report inventori rst inventori 10788402 suspect second inventori 0788403 complain s state identi cation card 60 00 U S C document verifi money identif card return complain court proceed summari WA abl identifi accus depart record undersign abl interview complain obtain sign sworn Af davit accus identi ed noti ed serv investig base care review oiall document gather investig investig determin preponder support alleg accus depart member heret ore follow recommend investig report physic evid number attach support support accus submit list attach number list attach number list attach number attach 0 2 3 4 5 6 7 0 9 summar find recommend rule violat cite number overal recommend disciplinari action investig recommend sustain nding recommend sustain alleg exampl 1 violat note disciplinari action warrant 2 accus member reprimand 3 accus member suspend day exceed 5 day 9 alleg unfound recommend disciplinari case file date date complet elaps time complaint date total time ecew 03 sep 06 eport 12 mar 07 express tN 134 investig initi command channel review 3 form complet investig s section Q6 slam MM 19 1F necessari use x11 sheet 0F white paper continu item redact sal ust R0 S emp 2444 date summari report digest C R 315343 addit accus 3 thoma care cei star 18795 emp unit 011 district WW date date appoint 05 june 1995 duti statu duti sworn 4 daniel bora polic cer star 19830 emp unit 011 district date birth date appoint 06 1996 duti statu duti sworn code summari report digest attach 1 evid complaint depart member 1A complaint regist con ict certi cation 2 3 unsign sworn Af davit copi certi ed letter sent complain relat document report polic agent sharon salustro 11129 complain refus cooper investig copi arrest report copi inventori 10788402 copi inventori 10788403 copi crimin histori arreste complaint depart member initi complaint categori C R chicago polic depart 03d 315343 command offic unit unit manner complaint receiv intern affair divis 018 121 gi 3 bell paxeilett person receiv complain rank stariempl unit day dfwk date time wolf dian 113 wed 06 sap 06 15 03 regist op BT rank stariem 0 unit dayofwk date time wolf dian VA 113 1ame ank star unit social secur eioneoff locat incid day date time total accus total compl total wit fri 21 jul 06 15 00 3 street apt citi state zip code 3 50524 home phone contact time phone 3 checkon addit address street apt citi state phone J accus tomplain C victim V wit W use cpid member list rank stariemploye social secur no address paxibel phone box narr complain alleg search time justi cation rst locat took garbag complain alleg second locat search drug mysteri found fals arrest complain alleg state identi cation card hous key wallet contain miscellan paper 60 00 U S C taken inventori return complain state case dismiss 10 aug 06 wve sign ran stariemfenu unit suuial oelur NU 3 pat izig 2 date assign time WK date assign time 2 day WK complaint regist investig conflict certif chicago polic depart certifi best knowledg spous domest partner parent sibl child hereinaft household immedi famili person profession nancial relationship victim s complain s accus depart member s wit depart member s civilian wit es certifi best knowledg resolut matter investig posit neg affect financi interest nancial interest member household immedi famili acknowledg disclos assist deputi superintend intern affair divis chief administr ce profession standard write acquisit nancial interest develop discoveri person interest directli affect abil conduct imparti object investig render unbias decis concern matter investig acknowledg disclos assist deputi superintend intern affair divis chief administr ce profession standard write discoveri member household immedi famili person profession nancial relationship victim s complain s accus depart member s wit depart member s civilian wit es member household immedi famili posit neg affect resolut matter investig f 5 signatur signa P sharon salustro 11 129 sergeant joseph maraf 2563 investig 3 1 52 date date attach 1 cpd 44 201 rev 1 04 3 1 3 4 3 sworn affidavit complaint regist investig chicago polic depart state illinoi 1 CC counti cook locat incid date time 21 juli 2006 3 00 PM 1500 hour state follow penalti provid law pursuant section 1 109 illinoi code civil procedur _i certifi alleg set forth complaint true correct matter state inform belief matter certifi aforement verili believ true RA sharon salustro 11129 print wit s complain s signatur wit s signatur date date cpd 44 126 rev 6105 attach 2 315343 u 3 depart polic citi chicago 3510 south michigan avenu chicago illinoi 60653 date 9 3 06 C R 3 5343 complaint depart member regist complaint regist C R number current investig chicago polic depart P sharon salustro intern affair divis address 3510 s michigan avenu telephon 312 745 6310 hour avail 7 00 munti12 45 pm mon fri sincer 5 3 harlow 1k r 4 P s salustro cpd 44 223 rev 1104 emerg EM 1 non Em ergenc citi limit 3 1 1 non emerg outsid citi til mi citi chicago depart polic 3510 south michigan avenu 1 chicago illinoi 60658 lleu 14 399 logazaji oiqdjd egtgz 5 LU 590 0 3 31 hed uhtt 5001 intern affair divis 13 februari 2007 gener investig section C R 315343 command cer gener investig section intern affair divis polic agent sharon salustro 11129 gener investig section intern affair divis subject report polic agent sharon salustro star 11129 complain refus cooper investig alleg com lainant alleg 21 juli 2006 1500 hour search time accus justi cation complain alleg accus plant drug fals arrest complain arrest procedur accus took state identi cation card 60 00 U S C accus fail inventori return complain receipt investig undersign sent certi ed letter complain ask contact undersign complaint 23 octob 2006 letter return unit state postal servic label mark return sender unclaim unabl forwar 27 octob 2006 undersign attempt telephon contact complain home number list complaint depart member form neg result 27 octob 2006 attempt reach altern phone number list met neg result 22 novemb 2006 undersign attempt telephon contact complainan_ home number _list complaint depart member form neiativ result tri reach altern phone number list went voic mail time identifi left detail messag asking_ contact respond messag SN gener investig section team S geant josepl maraf 2563 lalauj page 1 7 print logout wre report set trend orginai report CB 016603614 IR 944104 RD hm49044 5 arrest date 21 jul 2006 17 40 district initi approv district 011 hold 011 male lockup statu ident clear pod relat arreste arm offend inform gshet middl suf x nicknam ssn alia alia sid N0 driver licens issu state resist arrest trr complet dcf ward view sang affili gang arrest card gang faction unlist gang narn rank role known hangout self admit offend descript birth place birth date age 34 sex male race asianipacif esland color esowm 3 9 2007 hair color black hair style short complexion dark height weight occup employ school Q ender scar mark type scar ocat photograph m row s 1 of1 offender__ldentif data found erre offend address page 2 7 type locat address counti beet phone cha properli arrest ailey 1112 resid 2 row s 1 2 2 non offend inform role sex race birth rox 7 deceas ho ital ho ital treat comment date 9 pp 3 releas dim complaman emerg contact refls ajq row s 1 2 2 tie e _0tfender address role type street street 3393 SI gig beat phone 3 9 2007 page 3 0f7 emerg comt oi refus resid non offend contact number non offend lnjuriesw record found a5151 eeloni 1_ detect inform record found offens domest amend amend line line iucr statut lnchoat code violenc victim 1 720 possess 15 4F offens heroin cite row s 1 1 of1 recov nameti type approx weight quantiti unit estim street valu suspect controrl substanc 1 gram 50 00 row s 1 1 1 atxssie questiennaitsm question respons 1 presentti take medic 2 femalejar pregnant 3 time arrest 4 attempt suicidefseri harm 5 medic meet problem N0 6 receiv treatment 33 8045 954533 1 3 9 2007 u11 lailva llnulxulj row s 1 6 6 vissal check 0 1 arreste question respons 1 obv ou pain injuri 2 ls obviou sign infect 3 uenc alcohoerrug 4 sign alcoholfdrug withdraw 5 appear despond 6 appear irrat 7 carri medic row s 1 7 lockuo keeper remark lockup keeper respons refer place person cell place close observ 531131113 intormet record found associ incid record found associ arrest record found associ case record found dispers event dispers event arrest event 33 1 8045 954533 page 4 7 31 u rsw um 5 r1 3 9f2007 1 gunman page 5 7 arreste vehicl inform year model type style 1 color color 2 1 vin inventori 1demot licens inform licens licens state expiri date weapon disposit vehicl impound tow agenc tow report transport inform 5 assign vehicl transport beat transport tirn 113 4 21 jul 2006 127 4 intro ed employe inform employe role ernploye star agenc beat tat arrest offic edward 164 74 1182 2nd arrest cer town brian 3599 1182 ass sh ngarrest cer carey thoma 48795 1183 a351 ing rresti offic bora daniel 59830 4183 ade t ng bora dani 19830 desk review spagnola michael 2008 Lo ookup keeper johnson hubert 18235 search johnson hubert 1c235 Fl nger print stewart neison 4125 row s l 9 of9 3 9 2007 vuxuu 111a page 6 7 spp roratiaiormatiw employe star probabl caus approv spagnola michael 200 5 final approv smouom michael 2008 intervietrloss record found record found fingerprint taken palmprint taken time print 19 10 time photograph 21 jul 2000 19ri time fed cell Ct phone number call time call receiv lockup 210 i9 09 court inform court sergeant releas hold facil 08 15 desir court date desir branch 44 2 room address lntial court date 22 jul 2006 00 00 initi court branch initi room address data found brand inform bond date bond type bond bond receipt 3 9 2007 15 jent arrest PM numer occas receiv use tender small offend retriev objecttet plastic bag he sock offend detaen 8 rio recov plastic bag 5393556 fgel contain suspect heroin offend arrest ED transport 011 1315mm reveal 80 00 use 1000 FT 0E 403 wafcch command comment record found check clear 157g v page 7 7 user Al mami 13315 srevfsi 1 1193 ngh reserv 3 9 2007 view inventori tide ti epertn eni home ei ratck creeleftittrt inventori inventori 10788402 inventori 011 page 1 of2 etreck homet logoutl heip nventori collect incid RD hm490445 date recov unitof statu receiv recoveri search warrant date recov unit inventori 0 11 district 11 incid number RD hmeem comment 2024 item list packag parnent ocat item item type quantiti plastic y97621 1493143 drug 18mb tin foil packet 1493144 egg Ci 5 powder suspect heroin court date 15 aug 2008 court branch 44 2 state charg 20 elc inchoat action hold investig investig cer edward star 16474 unito district initi destin 17 forens servic section transport mode polic mail courier recoveredfrortt non cpd recov seiz arrest name_ MI sex racet recov street citi chicago state IL zip anarcot type statu recerveo heroiniwhit receiv street dir street apart 15h 5 view inventori z u L phone beat 1 1 12 owner non lastnam fifst name_ M street street dir street apart citi state IL zip phone found cpd employe cpd employe edward involv offic 1st ofrcer carey thoma star 113795 unit 011 11 2nd cer bora daniel star 19830 unit 011 district 3 user 13131111193 copyright 1712002qaii rightmesnw modul 1010 Sl iwihaon 1 11353 21 thanhriq 3 14 2007 view inventori page 1 2 nit ant etrack home logouti help 1 tnventori loiiection home E e t rack 1craeteit idittrweittori inventori 10788103 incid RD hm498445 date recov unitof irwentori 011 statu turnov view attach recoveri search warrant date recov unit inventori 011 distrect tt incid number RD hm490445 comment iucr 2024 packag locat item quantiti descriptlon signal valu statu _unpackag 1493145 133210125 turnov classif court date 2006 court branch 44 2 currenc disposit narcot seizur deposit state charg t20 ilc inchoat 1 action hold investig investig cer edward star 16474 unitott district 11 initi destin 16 see transport mode polic mail courier recev recov seiz arrest irst MI sex race recov street street dir street nam_ apart citi chil ago state IL zip phone beat1134 owner cpd lastname_firstnam M task M n4nn 1 321 5 view inventori page 2 2 street street dir street name_ apart citi chicago state IL zip phone found cpd employe cpd employe edward involv offic 1st cer carey thoma star 18795 unit 011 11 2nd cer bora daniel star 19830 unit 011 11 llser Cb rl ghl 2602 ghl modul 10100 isrevisaon 1 1855 ha h h fl l o hint nu page 1 0f saw 3 91553 mama 3 gear 7 mug 5 gag kim ifl r v 3 fmg L armimm mg hmu w nwm mu Qw umm 313 53 5 2 etna 11 nil 1 51xi 1 5 uli 11 1 5 1 jan 11 3 15 1 5 1 x W gamma HO 1 31 20 w123 27 31 32 13 1 0270210 ggwp zm5 3 78 27 W 1 3 33 611 1 31137183 1 1 1 1 11 n1 111111 1 1 7 1 1 1 1 2 113 224 7077 01 53 11 mm 7 77 91 1117 gtw 1 c1ari wham3 7 71 u 27 111 girl 71 1 7 47 1 70131 41 1 7 61w 1 1w1 3 an3 1171 7 mmndoz 32 1 1 1 7 4 wmibg 1 11 1 1 1 1 1 1 111 111 11111 753 107 15w w10 mm mnnumw wow 31 1 dmm 11 9 1 71111 wham 713 1_ 1 11 1315 11 11 111 11 111 111w11 1 1 1 1 111 111 1111 11 0 11 33 033 zmvoi u 7131 01 71 mum 717 hub 18 35 93810 371 07 101 73 mau n udl nm hamn 11 11 7 1 1 171 176 3113 tyne 1 1 310 013111lulalw7l 1 Mu mat 131 733 117 17 71 W mama 1 57 3117 17 77 m71 Im sq 1 71 n 17 1 5 7 0 11 1 71 211 11 112 5 w7457722 01 1 1 8 1 91 11 20 23 1 7 F 1 1 77 1 17 311 17 mlf11 12 11 77 7 1 II 11 ull 1 1 1 1 1 1 7 1111 t579801 w10 whuj m 17 1 1 577 724 9377 2 711 111 111 11 lid 7c Gm 333133 1 1 1 0 mm 91111177319 9130 71 37 7 1171 140 1 11 11 30mm m 2 7 777m 1 51 11 12 1 7 01 W mam 7 1 1 1 1 11 g xabu aqr 1m 77 1112197203nmxm 29 11173 1 1 1 320 7 0225 77 m w 7mzo 1 7 11 111 1 ni i1 11 1 sl7i1l 11 1 lilii onu 1ammng m 1 7793711 W 51 1 7 1 mm raw elmmnirz mil mmwtim jill E3 1111 hiauf 3 ntmu tilia im nil allright mZ 0 9 302 ozzmm ilvi m wri O womn qzb mm 002m w 102 3 58 un myw rw mzmm 2 0 umi m lfw w L r gizma 0 330 5 Ub mm w 93 53 3 ugnm swd 1 1 n mgm ummn 362 5m 25 33 20 r zeel hm 95 12 053 grim m mm bm m l n H 111 v s IJ 1 3x420 cmz amw L 2 9 farx m quagm 10 9 5 3x gonad 5 3393 gi bizrd 55113055 33223533 3353er 35105 v chigan avrnuc 5 unit 1 96 1 133112 95 131333331332310 1 1116 111352111035 1511333333331 11 81 3 i11311 33133333333 530130 r15 3313133313333 1113349031311 3 33 3311 3 21201363133 u uni 13631 c30 wag mad T 30 1 33 33 3 dcpa nm 1351 3301113101 3 1 31 33 luztcr 3 3 33 advis wt 313333 maria mix timici 133333133331 33 113 333ch 33333331133333 um etmn 33 obtain 3333335333135 1133 1 33 03 333 1113 1 1icz3go 130133 1 3 viticncc 1333 3 Ed 1130 3333 8352 3933 locat 5 130mm amhcmecn 3113313011113 0132513011331 23103331433 31330333313 316w 33 13 331 33333 33333131332315 12 m36 fi fG 73153 3 33 insum 21111231333 pro 3321333 13 3 033 33 1 mtum requirf d 33333113333 53 33 nud ul huber 1ppr333333 33 313336 identitzcatzma 533 333 31 13 33333 33 311e edi E 353135 3 3 13 33 1131 151 3 3 31 331 33 3 31 1 13 1 31333333 331111 101 31 3 1331 3111 333 11 35 35333 13 1 5 3 vamp 136333 51 13 1113155 5533 5039 Sc gang 1333 25320113333 111131 331333333 5 333334 3331333331334 5 5 n3333 153333 iy3 m3 1113313335 1 313 i13ml ii 3 1353 chicago polic depart 15 29 pcoj393 IR 944104 page 1 9 chicago polic depart 3510 south michigan avenu chicago illinoi 60653 identif section grail crimin histori report cpd 319030 rev w04 convict felon 0N farol sid 29460180 fbi 377470ma1 idoc 876702 current arrest inform 9931 34 year place sign hair blk ssn mam complexion driver lic state seat marisa tattog key histor identifi alia aka qate date birth social secur number OS 2001 30 aug 1997 18 dec 1996 30 mar 1996 02 mar 1996 27 oct 1995 20 aug 1995 26 jan 1995 21 dec 1993 O4 1993 19 sep 1990 avail avail avail avail avail avail crimin justic summari total arrest 24 feloni 6 misdemeanor total convict 6 idoc event event chicago polic depart 15 29 pcoj393 IR 944104 page 2 9 parol institut parol date 30 dec 05 case number 01cr2011602 discharg date 30 dec 2008 arrest arrestnamez arrest date 21 jul 2006 hold factliti cpd 011 male dale birth arrest address don CB 016603614 resid cer cer badgeiit rrest agenc cpd II II iairreisit iijnithloiat 4 720 ilc pc possess 15 heroin offens cite court chargesidisposit statut charg class case 720 5701402 c pc possess M as112327201 idisposii ion find probabl caus dismiss disposit eseni sentenc date arrest arrest arrest date hold facil cpd district 011 male date birth arrest address CE 016543111 resid cer beckman cer badg 16837 arrest agenc cpd St t ii 1 1 1 1 3 1915 2mm offens 1 3299 9123 wsw 9 9 1 offens A3 10 8 515 solicit unlawfut busi offens cite ecourt istatut charg class case 720 570i402 C pc possess 06112037201 eorsposrtron noll prosequi disposit date 15 jun 2006 esent sentenc date 5 gdisposit noll prosequi disposit esenr sentenc date arrest arrest arrest date 18 jul 2001 hold facil cpd district 011 male date birth arrest address dcn CE 014844672 resid offic keller cer badg 11 3 3 720 ilc 5 0i24 1 1 uuw weapon felon possessius firearm chargesidisposit E statut char class case f2 1 le hffh ffr l lrl inlanr n 1 L1 1 1 1n rt 11 hui oi dru nth H n chicago poiicc depart 15 29 pcoj393 IR ii 944104 page 3 of9 arm robberi arm wifir 0 2601cr20116 edisposit sentencediillinoi depart correct disposit date 24 mar 2003 convict g sentenca 000 010 year 00 month 000 day sentenc date 24 mar 2003 ecourt chargesidisposit statut gene 9 egg possruss weaponif 2001cn2011 gdisposit stricken docket leav reinstat disposit date 24 mar 2003 estanr sentenc year month 000 day sentenc date 5 arrest arrest arrest date 16 2001 hotd facilit cpd district 011 male date birth arrest address dcn CB 014788299 resid cer smith cer badgeit 354 arrest agenc cpd 10 8 515 solicit unlaw busi chargesidisposit statut charg class case 10 13 515 solicit unlaw busi 014235413 san bond forfeitur non suit disposit date 13 jun 2001 esent sentenc date arrest arrest narh arrest date 2001 hold cpd district 011 male date birth arrest address dcn CB 014778544 resid cer ware cer badg rre mg genci 5 freddi 10 8 515 solicit unlaw busi gcounr statut charg ciass case 10 8 515 solicitino unlaw busi 014233765 edisposit noll disposit date 29 2001 esent sentenc date arrest arrest arrest date hold facil cpd district 011 male dale birth arrest address cer cer badg arrest agenc cpd 10 8 515 solicit uniaw busi ecourt chargesidisposit 9 1 iininm n chicago polic depart 15 29 pcoj393 IR 944104 page 4 of9 elf 0010 Qh lg ctass case 5 10 8 515 solicit unlaw busi tit 1194804 non suit disposit date 05 jun 2001 esent sentenc date 5 arrest arrestnam arrest date wors 2001 hold facil cpo central male date birth arrest address dcn CB 014709108 resid cer waller cer badg l rre mg genci cpd 720 ilc crimin trespass remain land arrest arrest arrest date 05 jan 2000 hoid cpd district 013 date birth arrest address dcn CE 014376409 resid cer szeszol cer badg 6183 arrest agenc cpd 4 720 ilc 570 0 402 possess controii substanc icourt chargesidisposit tatut charg class case pc possess control substanc find probabl caus disposit date 31 esent sentenc date ill dept correct parol arrest dcniarrestii local arrest cer ident agenc inform danwll correct center 24 nov 1999 arrest arrest arrest date date birth arrest address dcn CB 011063735 resid 18 1998 hold cer cer badgeii arrest agenc ti 1 pc possess Co'
# split by puncuation and stem
def split_by_punc_stem(text):
phrases = [x.strip() for x in re.split('[,/]', text)]
return [' '.join([stemmer.stem(s) for s in phrase.split()]) for phrase in phrases]
split_by_punc_stem("searching minors/patting down minors/arresting minors")
['search minor', 'pat down minor', 'arrest minor']
matcher4 = PhraseMatcher(nlp.vocab, attr='LOWER')# the list containing the pharses to be matched
for t in terminology_list:
print(t, ": ", split_by_punc_stem(t))
Nudity, Penetration : ['nuditi', 'penetr'] sexual harassment, Sexual remarks : ['sexual harass', 'sexual remark'] Sexual humiliation, Sexual extortion, Prostitution/sex work : ['sexual humili', 'sexual extort', 'prostitut', 'sex work'] Taser, Baton, physical touch, gun : ['taser', 'baton', 'physic touch', 'gun'] Trespass/robbery, Racial slurs, xenophobic remarks : ['trespass', 'robberi', 'racial slur', 'xenophob remark'] Undocumented status, ICE : ['undocu statu', 'ice'] Planting drug, planting guns : ['plant drug', 'plant gun'] Neglect of duty, failure to serve : ['neglect of duti', 'failur to serv'] Refusing to provide medical assistance : ['refus to provid medic assist'] Irrational, Aggressive, Unstable : ['irrat', 'aggress', 'unstabl'] searching minors/patting down minors/arresting minors : ['search minor', 'pat down minor', 'arrest minor']
for t in terminology_list:
terms = split_by_punc_stem(t)
patterns = [nlp.make_doc(text) for text in terms]
matcher4.add(t, None, *patterns)
doc_stemmed = nlp(text1_stemmed)
matches4 = matcher4(doc_stemmed)#print the matched results and extract out the results
for match_id, start, end in matches4:
# Get the string representation
string_id = nlp.vocab.strings[match_id]
span = doc_stemmed[start:end] # The matched span
#print("Category: {}, Indexes: {} to {}, Word: {}".format(string_id, start, end, span.text))
print("Category: {} Word: {}".format(string_id, span.text))
Category: Planting drug, planting guns Word: plant drug Category: Planting drug, planting guns Word: plant drug Category: Irrational, Aggressive, Unstable Word: irrat Category: Trespass/robbery, Racial slurs, xenophobic remarks Word: robberi Category: Trespass/robbery, Racial slurs, xenophobic remarks Word: trespass
allegation_name_set = set(document_tags['allegation_name'])
print(allegation_name_set)
print(len(allegation_name_set))
{'Unnecessary Physical Contact / Off Duty - No Injury', 'Fail To Obtain A Complaint Register Number', 'Unnecessary Physical Contact -On Duty', 'Shots Fired - No Hits', 'Taser (Probe Discharge)', 'Traffic Violations (Other Than D.U.I.)', 'Improper Search Of Vehicle', 'Seat Belts', 'Inventory Procedures', 'Firearm Discharge No Hits - Handgun', 'Slow / No Response', 'Court Attendance Irregularities', 'Injury', 'Firearm Discharge With Hits / Off Duty', 'Proper Care - Injury / Death', 'Knee Strike', 'U Converted To C.R. (Records Keeping Only, Initial)', 'Misconduct During Issuance Of Citation', 'Insubordination', 'Reports', 'Sexual Orientation', 'Excessive Force - Use Of Firearm / Off Duty - No Injury', 'Inadequate / Failure To Provide Service', 'Gang Affiliation', 'Excessive Force / On Duty - No Injury', 'Neglect Of Duty', 'Misuse Of Department Equipment / Supplies', 'Excessive Force - Use Of Firearm / Off Duty - Injury', 'Firearm Discharge With Hits / On Duty', 'Search - Person / Property', 'Parking Complaints', 'Improper Detention', 'Improper Processing / Reporting / Procedures', 'Excessive Force / On Duty - Injury', 'Theft', 'Excessive Force - Use Of Firearm / On Duty - Injury', 'No Injury', 'Indebtedness To City', 'Residency', 'Just Cause', 'Motor Vehicle Fatality - On Duty', 'Coercion - Threat Of Arrest/Charges', 'Eeo Investigations', 'Federal Civil Suit', 'Initiate Proper Action', 'Positive Drug Screen - Originated From Complaint', 'Telephone / Attorney / Relative Privileges', 'Criminal Sexual Assault', 'Firearm Discharge With Hits - Handgun', 'Secondary/Special Employment', 'Street', 'Drugs / Controlled Substance, Possession Or Sale', 'State Civil Suit', 'Threats', 'Civil Suits - Third Party', 'Judicial Process/Directive - Contempt', 'Violation (Other Than D.U.I.) - On Duty', 'Harassment', 'Altercation / Disturbance - Traffic', 'Other Felony', 'Escape', 'Racial Profiling', 'Abuse Of Authority', 'Excessive Force / Off Duty - No Injury', 'Use Of Profanity', 'Unnecessary Display Of Weapon / On Duty', 'Excessive Force - Use Of Firearm / On Duty - No Injury', 'Sexual Misconduct', 'Use / Abuse Drugs / Controlled Substance - On Duty', 'Push/Pull/Grab', 'Absent Without Permission', 'Open Hand Strike (Slap)', 'Failure To Ensure - Civil Rights', 'Racial / Ethnic, Etc.', 'Miscellaneous', 'Closed Hand Strike (Punch)', 'Search Of Premise Without Warrant', 'Domestic Altercation - Physical Abuse', 'Positive Drug Screen - Other Physical Exam', 'Shoplifting', 'Solicit / Accept Bribe (Non-Traffic)', 'Use Of Official Position', 'Unlawful / Excessive Investigative Detention (Witness)', 'Illegal Arrest / False Arrest', 'Firearm Used As An Impact Weapon', 'Dragged', 'Take Down (Thrown To Ground)', 'Domestic Incident - Not Physical', 'Impact Weapon - Unauthorized (Flashlight, Handcuffs, Etc)', 'Excessive Force / Off Duty - Injury', 'Intoxicated Off Duty', 'Weapon / Ammunition', 'Choked', 'Unnecessary Display Of Weapon / Off Duty', 'Improper Search Of Person', 'Assault / Battery, Etc.', 'Use / Abuse Drugs / Controlled Substance - Off Duty', 'Auto Theft', 'D.U.I. - Off Duty', 'Medical Roll Violation', 'Damage / Trespassing To Property', 'Kicked', 'Unnecessary Physical Contact / On Duty - No Injury', 'Prisoners Property', 'Association With Felon', 'Conspiracy To Commit A Crime', 'Misuse Of Department Records', 'Recommend Professional Service'} 108
allegation_category_set = set(document_tags['allegation_category'])
print(allegation_category_set)
print(len(allegation_category_set))
allegation_category_set.remove(None)
print(len(allegation_category_set))
{'Bribery / Official Corruption', None, 'Operation/Personnel Violations', 'Lockup Procedures', 'Traffic', 'Domestic', 'Conduct Unbecoming (Off-Duty)', 'Use Of Force', 'Criminal Misconduct', 'Supervisory Responsibilities', 'Illegal Search', 'Racial Profiling', 'Unknown', 'Medical', 'Drug / Alcohol Abuse', 'Verbal Abuse', 'Excessive Force', 'False Arrest'} 18 17
terminology_list2 = list(allegation_category_set)
matcher5 = PhraseMatcher(nlp.vocab, attr='LOWER')# the list containing the pharses to be matched
for t in terminology_list2:
print(t, ": ", split_by_punc_stem(t))
Bribery / Official Corruption : ['briberi', 'offici corrupt'] Operation/Personnel Violations : ['oper', 'personnel violat'] Lockup Procedures : ['lockup procedur'] Traffic : ['traffic'] Domestic : ['domest'] Conduct Unbecoming (Off-Duty) : ['conduct unbecom (off-duty)'] Use Of Force : ['use Of forc'] Criminal Misconduct : ['crimin misconduct'] Supervisory Responsibilities : ['supervisori respons'] Illegal Search : ['illeg search'] Racial Profiling : ['racial profil'] Unknown : ['unknown'] Medical : ['medic'] Drug / Alcohol Abuse : ['drug', 'alcohol abus'] Verbal Abuse : ['verbal abus'] Excessive Force : ['excess forc'] False Arrest : ['fals arrest']
for t in terminology_list2:
terms = split_by_punc_stem(t)
patterns = [nlp.make_doc(text) for text in terms]
matcher5.add(t, None, *patterns)
doc_stemmed = nlp(text1_stemmed)
matches5 = matcher5(doc_stemmed)#print the matched results and extract out the results
for match_id, start, end in matches5:
# Get the string representation
string_id = nlp.vocab.strings[match_id]
span = doc_stemmed[start:end] # The matched span
#print("Category: {}, Indexes: {} to {}, Word: {}".format(string_id, start, end, span.text))
print("Category: {} Word: {}".format(string_id, span.text))
Category: Drug / Alcohol Abuse Word: drug Category: False Arrest Word: fals arrest Category: Medical Word: medic Category: Medical Word: medic Category: Unknown Word: unknown Category: Drug / Alcohol Abuse Word: drug Category: False Arrest Word: fals arrest Category: Domestic Word: domest Category: Drug / Alcohol Abuse Word: drug Category: False Arrest Word: fals arrest Category: Domestic Word: domest Category: Medical Word: medic Category: Medical Word: medic Category: Medical Word: medic Category: Drug / Alcohol Abuse Word: drug
text_narrative_str = str(text_narrative)
text_narrative_str
' The complainant alleges he was searched three times without justi?cation at the ?rst location as he took out the garbage. The complainant alleges at the second location he was searched again, drugs were mysteriously found and he was falsely arrested. The complainant further alleges his State Identi?cation Card, house keys, wallet containing miscellaneous papers and $60.00 U.S.C. were taken and not inventoried or returned. The complainant states the case was dismissed on 10 AUG 06. WVES .'
Now let's try stemming this text and then see what we get by running the matcher.
text_narrative_stemmed = get_stemmed_text(text_narrative_str)
text_narrative_stemmed
' complain alleg search time justi cation rst locat took garbag complain alleg second locat search drug mysteri found fals arrest complain alleg state identi cation card hous key wallet contain miscellan paper 60 00 U S C taken inventori return complain state case dismiss 10 aug 06 wve '
doc_narrative_stemmed = nlp(text_narrative_stemmed)
matches6 = matcher5(doc_narrative_stemmed)#print the matched results and extract out the results
for match_id, start, end in matches6:
# Get the string representation
string_id = nlp.vocab.strings[match_id]
span = doc_narrative_stemmed[start:end] # The matched span
#print("Category: {}, Indexes: {} to {}, Word: {}".format(string_id, start, end, span.text))
print("Category: {} Word: {}".format(string_id, span.text))
Category: Drug / Alcohol Abuse Word: drug Category: False Arrest Word: fals arrest