Georgia grades child placing agencies on a 100-point scale. (Due to "incentive credits," it's possible to score above 100.) The state provided BuzzFeed News with reports covering the previous 10 fiscal quarters: FY2013-Q1 through FY2015-Q2.
The Georgia reports come as PDFs, which BuzzFeed News then converted to XML documents using pdftohtml. The section of code below parses the XML documents to extract key details from each provider's summary page.
import pandas as pd
import lxml.html
report_paths = [
"../reports/RBWO_FY2015_Provider_Profile_Guide.xml",
"../reports/RBWO_FY2014_Provider_Profile_Guide.xml",
"../reports/RBWO_FY2013_Provider_Profile_Guide.xml"
]
class Page(object):
def __init__(self, el):
self.el = el
self.page_num = int(el.attrib["number"])
def get_text_el(self, search_string):
matching = [ sub_el for sub_el in self.el.cssselect("text")
if search_string in sub_el.text_content() ]
if len(matching):
return matching[0]
else:
return None
@property
def year(self):
return self.get_text_el("(FY").text_content().split("FY")[1][:2]
@property
def is_profile_page(self):
return "SHINES Resource ID" in self.el.text_content()
@property
def provider_name(self):
prev = self.get_text_el("RBWO Provider Profile")
return prev.getnext().text_content()
@property
def total_children(self):
label = self.get_text_el("Total Children:")
return int(label.getnext().text_content())
@property
def vendor_id(self):
label = self.get_text_el("Vendor ID:")
return int(label.text_content().split(" ")[-1])
@property
def license_type(self):
label = self.get_text_el("Type:")
label_text = label.text_content().strip()
label_split = label_text.split(": ")
if len(label_split) > 1: return label_split[1]
return label.getnext().text_content().strip()
def extract_score_from_el(self, el):
text = el.text_content()
if "N/A" in text: return None
split = text.split(u"\xa0")
pct = split[1].strip()
if not pct: return None
return float(pct[:-1])
@property
def scores(self):
year = self.year
el = self.get_text_el("(FY")
_scores = []
for i in range(4):
el = el.getnext()
quarter = "FY{0}Q{1}".format(self.year, i+1)
_scores.append({
"quarter": quarter,
"score": self.extract_score_from_el(el)
})
return _scores
class Report(object):
def __init__(self, path):
with open(path) as f:
self.dom = lxml.html.fromstring(f.read())
self.pages = list(map(Page, self.dom.cssselect("page")))
self.profile_pages = list(filter(lambda p: p.is_profile_page, self.pages))
self.providers = pd.DataFrame({
"vendor_id": p.vendor_id,
"provider_name": p.provider_name,
"license_type": p.license_type,
"total_children": p.total_children,
"scores": p.scores,
"year": "FY" + p.year
} for p in self.profile_pages)
reports = map(Report, report_paths)
providers = pd.concat(r.providers for r in reports)
Here, we place the scores into its own dataframe, so that we can compute aggregate statistics.
scores = pd.concat([ pd.DataFrame({
"vendor_id": vendor_id,
"provider_name": provider_name,
"quarter": score["quarter"],
"score": score["score"]
} for score in scores) for ix, vendor_id, provider_name, scores
in providers[[ "vendor_id", "provider_name", "scores" ]].itertuples() ])\
.dropna()\
.reset_index(drop=True)
license_types = providers[["license_type", "vendor_id"]].drop_duplicates().set_index("vendor_id")
Here, we separate out child placing agencies (CPAs) from child caring institutions (CCIs):
cpas = license_types[license_types["license_type"] == "CPA"].copy()
score_by_vendor = scores.groupby("vendor_id")
aggregate_scores = pd.DataFrame({
"avg_score": score_by_vendor["score"].mean().round(2),
"high_score": score_by_vendor["score"].max(),
"low_score": score_by_vendor["score"].min(),
"n_quarters": score_by_vendor["score"].size(),
"provider_name": score_by_vendor["provider_name"].first()
})
Here, we rank all CPAs that received scores in all 10 quarters, by average score:
ranked = cpas.join(aggregate_scores).sort("avg_score")
all_quarters = ranked[ranked["n_quarters"] == 10].copy()
all_quarters["avg_score_rank"] = all_quarters["avg_score"].rank(ascending=False)
main_cols = [ "provider_name", "n_quarters", "avg_score", "avg_score_rank" ]
all_quarters[main_cols]
provider_name | n_quarters | avg_score | avg_score_rank | |
---|---|---|---|---|
vendor_id | ||||
35509 | New Horizons Initiatives, Inc. (968) | 10 | 45.04 | 54 |
35508 | New Horizons Community Services | 10 | 74.25 | 53 |
35443 | Laurel Heights Hospital -Universal | 10 | 78.81 | 52 |
35249 | Bethany Christian Services Atlanta (573) | 10 | 80.75 | 51 |
35497 | Mentor Network Mentor Athens (734) | 10 | 81.58 | 50 |
40080 | New Beginnings, Life Changing | 10 | 82.55 | 49 |
84761 | National Youth Advocate Program | 10 | 82.71 | 48 |
35219 | All God's Children (861) | 10 | 82.80 | 47 |
35387 | National Youth Advocate Program | 10 | 83.44 | 46 |
108643 | Elks Aidmore Children's Center Child Placing A... | 10 | 84.10 | 45 |
35493 | Mentor Network Mentor Atlanta (736) | 10 | 84.32 | 44 |
35296 | Creative Community Services (612) | 10 | 84.33 | 43 |
35494 | Mentor Network Mentor Savannah (742) | 10 | 84.79 | 42 |
35248 | Bethany Christian Services Columbus | 10 | 85.09 | 41 |
44182 | Universal Health Services of | 10 | 85.70 | 40 |
53071 | Morningstar Children and Family | 10 | 85.74 | 39 |
99720 | Benchmark Family Services, Inc | 10 | 86.56 | 38 |
84514 | ENA, Inc., dba NECCO (formerly GA | 10 | 87.60 | 37 |
99719 | Benchmark Family Services, Inc | 10 | 87.66 | 36 |
35495 | Mentor Network Mentor Augusta | 10 | 88.26 | 35 |
84513 | ENA, Inc., dba NECCO (formerly GA | 10 | 88.49 | 34 |
35505 | Neighbor to Family Fulton County (774) | 10 | 88.70 | 33 |
35498 | Mentor Network Mentor Albany (733) | 10 | 89.82 | 32 |
35611 | Twin Cedars Youth Services Foster | 10 | 90.29 | 31 |
35451 | Lutheran Services of Georgia Lutheran of Atlanta | 10 | 90.95 | 30 |
35384 | Meritan, Inc. d/b/a Meritan Stepping Stones | 10 | 91.11 | 29 |
35496 | Mentor Network Mentor Macon (740) | 10 | 91.27 | 28 |
35335 | Families First Foster Care Program | 10 | 91.50 | 27 |
84510 | ENA, Inc., dba NECCO (formerly GA | 10 | 91.86 | 26 |
35448 | Lookout Mountain Community | 10 | 92.18 | 25 |
35450 | Lutheran Services of Georgia | 10 | 92.27 | 24 |
35452 | Lutheran Services of Georgia | 10 | 92.62 | 23 |
35446 | Lighthouse Therapeutic Foster Care | 10 | 93.10 | 22 |
35415 | Hillside Connections Program (700) | 10 | 93.49 | 21 |
35506 | Neighbor to Family Richmond | 10 | 93.98 | 20 |
35385 | Meritan, Inc. d/b/a Meritan Stepping Stones Macon | 10 | 94.53 | 19 |
62037 | Lutheran Services of Georgia | 10 | 95.16 | 18 |
82494 | Faithbridge Foster Care Atlanta (974) | 10 | 95.63 | 17 |
40245 | Trinity J and D, LLC Trinity J and D, | 10 | 96.24 | 16 |
35502 | Neighbor to Family Douglas County | 10 | 96.50 | 15 |
89583 | Neighbor to Family Chatham County | 10 | 96.85 | 14 |
35356 | Georgia Agape (655) | 10 | 97.57 | 13 |
62038 | Neighbor to Family Henry County | 10 | 97.83 | 12 |
35504 | Neighbor to Family Gwinnett County | 10 | 98.36 | 11 |
35378 | Georgia Parent Support Network (670) | 10 | 98.54 | 10 |
35503 | Neighbor to Family Dekalb County | 10 | 99.84 | 9 |
40276 | Giving Children A Chance of Georgia | 10 | 99.95 | 8 |
84512 | ENA, Inc., dba NECCO (formerly GA | 10 | 100.31 | 7 |
35305 | Devereux GA Treatment Network | 10 | 100.42 | 6 |
35275 | Choices for Life Of GA Valdosta (943) | 10 | 101.28 | 5 |
35292 | Community Connections (586) | 10 | 101.55 | 4 |
35485 | Murphy-Harpst Children's Centers | 10 | 102.10 | 3 |
66182 | Georgia Baptist Children's Home & | 10 | 102.72 | 2 |
45624 | United Methodist Children Home of the North GA... | 10 | 103.21 | 1 |
For ease of reference, here are Mentor's scores and ranks alone:
all_quarters[all_quarters["provider_name"].apply(lambda x: "Mentor" in x)][main_cols]
provider_name | n_quarters | avg_score | avg_score_rank | |
---|---|---|---|---|
vendor_id | ||||
35497 | Mentor Network Mentor Athens (734) | 10 | 81.58 | 50 |
35493 | Mentor Network Mentor Atlanta (736) | 10 | 84.32 | 44 |
35494 | Mentor Network Mentor Savannah (742) | 10 | 84.79 | 42 |
35495 | Mentor Network Mentor Augusta | 10 | 88.26 | 35 |
35498 | Mentor Network Mentor Albany (733) | 10 | 89.82 | 32 |
35496 | Mentor Network Mentor Macon (740) | 10 | 91.27 | 28 |