import pandas as pd #We can download a CSV file containing measures for a particular region #For example, Jobseeker allowance on the Isle of Wight tmp=pd.read_csv('http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1.data.csv?geography=2038431803&sex=5&item=1&measures=20100') tmp[:3] tmp.columns #We can project just a subset of the columns using the select parameter baseURL='http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1.data.csv?' url=baseURL+'geography=2038431803&sex=5,6,7&item=1&measures=20100' #Projection url+='&select=sex_name,geography_name,measures_name,date_code,date_name,obs_value' tmp=pd.read_csv(url) tmp[:9] #And can project just a subset of the columns, and also reshape them using the rows and cols parameters url+='&rows=date_code,date_name,geography_name,measures_name,&cols=sex_name' tmp=pd.read_csv(url) tmp[:3] #We can also limit the time - for example, over theprevious year url+='&time=latest,prevyear' df=pd.read_csv(url) df mostRecent=df.iloc[-1] txt1='The most recent figure ({0}) for {1} for the {2} area is {3}.'.format(mostRecent['DATE_NAME'], mostRecent['MEASURES_NAME'], mostRecent['GEOGRAPHY_NAME'], mostRecent['Total']) print(txt1) #http://stackoverflow.com/a/3847369/454773 decase = lambda s: s[:1].lower() + s[1:] if s else '' txt1='The most recent figure ({0}) for {1} for the {2} area is {3}.'.format(mostRecent['DATE_NAME'], decase(mostRecent['MEASURES_NAME']), mostRecent['GEOGRAPHY_NAME'], mostRecent['Total']) print(txt1) yearago=df.iloc[0] txt2='This compares with a figure of {0} from a year ago ({1}).'.format(yearago['Total'],yearago['DATE_NAME']) print(txt1) print(txt2) txt3=txt2[:-1] yeardelta= mostRecent['Total'] - yearago['Total'] if yeardelta==0: txt3+=', exactly the same amount.' else: txt3+=', a change of {0}.'.format(yeardelta) print(txt1) print(txt3) txt4=txt2[:-1] if yeardelta==0: txt4+=', exactly the same amount.' else: if yeardelta <0: direction='decrease' else: direction='increase' txt4+=', {0} of {1}.'.format(p.a(direction),abs(yeardelta)) print(txt1) print(txt4) #Library to support natural language text generation #!pip3 install git+https://github.com/pwdyson/inflect.py import inflect p = inflect.engine() import random def _txt5(): txt5=txt2[:-1] if yeardelta==0: txt4+=', exactly the same amount.' else: if yeardelta <0: direction=random.choice(['decrease', 'fall']) else: direction=random.choice(['increase','rise']) txt5+=', and represents {0} of {1} since then.'.format(p.a(direction),abs(yeardelta)) return txt5 print(txt1) print(_txt5()) print(_txt5()) print(_txt5()) def _txt6(filler=','): def _magnitude(term): #A heuristic here if propDelta<0.05: mod=random.choice(['slight']) elif propDelta<0.10: mod=random.choice(['significant']) else: mod=random.choice(['considerable','large' ]) term=' '.join([mod,term]) return p.a(term) txt6=txt2[:-1] propDelta= abs(yeardelta)/mostRecent['Total'] if yeardelta==0: txt6+=', exactly the same amount.' else: if yeardelta <0: direction=_magnitude(random.choice([ 'decrease','fall'])) else: direction=_magnitude(random.choice(['increase','rise'])) txt6+='{_filler} {0} of {1} since then.'.format(p.a(direction), abs(yeardelta), _filler= filler ) return txt6 print(txt1) print(_txt6()) print(_txt6()) print(_txt6(' and represents')) print(_txt6()) print(_txt6()) lastMonth=df.iloc[-2] def updown(now, then,amount=False, magnitude=False): txt=[] delta=now - then propdelta = delta/now if magnitude and delta!=0: if delta<0.05: txt.append(random.choice(['slightly'])) elif delta<0.10: txt.append(random.choice(['significantly'])) else: txt.append(random.choice(['considerably'])) if now>then: txt.append('up') elif now0: txt=p.a(random.choice(['rise','increase'])) elif delta<0: txt=p.a(random.choice(['fall','decrease'])) if amount: txt+=' of {0}'.format(c(abs(delta))) return txt otw1=''' Figures came out for {date} for unemployed claimants which show that {currNum} people on {place} \ were claiming Job Seekers Allowance (JSA) in {currDate}. \ Unemployment Benefit figures released by the Office for National Statistics show \ {monthChange} since {lastMonthDate}, which reported {lastMonthNum} JSA claimants, \ and {yearChange} from {yearagoDate} ({yearagoNum}). '''.format(date=mostRecent['DATE_NAME'], currNum=c(mostRecent['Total']), place='the '+mostRecent['GEOGRAPHY_NAME'], currDate=mostRecent['DATE_NAME'], monthChange=otwRiseFall(mostRecent['Total'],lastMonth['Total'],True), lastMonthDate=lastMonth['DATE_NAME'], lastMonthNum=c(lastMonth['Total']), yearChange=otwRiseFall(mostRecent['Total'],yearago['Total'],True), yearagoDate=yearago['DATE_NAME'], yearagoNum=c(yearago['Total']) ) print(otw1) def getLatestJSA(code): baseURL='http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1.data.csv?' url=baseURL+'geography={code}&date=latest&sex=7&item=1&measures=20100'.format(code=code) #Projection url+='&select=sex_name,geography_name,measures_name,date_code,date_name,obs_value' return pd.read_csv(url).dropna() def get16_64Population(code): url='http://www.nomisweb.co.uk/api/v01/dataset/NM_17_1.data.csv?date=latest&geography={code}&measures=20100&cell=402720769'.format(code=code) url+='&select=date_code,date_name,geography_name,measures_name,cell_name,obs_value,cell_code'#&item=1&measures=20100 return pd.read_csv(url).dropna() def JSA_rate(code): return getLatestJSA(code)['OBS_VALUE'].iloc[0]/get16_64Population(code)['OBS_VALUE'].iloc[0] iwCode=2038431803 ukCode=2092957697 seCode=2013265928 getLatestJSA(iwCode) get16_64Population(iwCode) def rateGetter(code): jsa=getLatestJSA(code) pop=get16_64Population(code) txt=''' The percentage rate of {claim} for the {loc} region in {date} was {rate}, based on {num} claimants and \ population {poptype} of {popnum} ({popDate} figure). '''.format(claim=decase(jsa['MEASURES_NAME'].iloc[0]), loc=jsa['GEOGRAPHY_NAME'].iloc[0], date=jsa['DATE_NAME'].iloc[0], rate='{0:.2f}%'.format(100 * JSA_rate(code)), num=c(int(jsa['OBS_VALUE'].iloc[0])), poptype=decase(pop['CELL_NAME'].iloc[0].split('(')[1].split(' -')[0]), popnum=c(int(pop['OBS_VALUE'].iloc[0])), popDate=pop['DATE_NAME'].iloc[0]) print(txt) rateGetter(iwCode) rateGetter(seCode) rateGetter(ukCode) localCode=iwCode regionCode=seCode jsaLocal=getLatestJSA(localcode) jsaRegion=getLatestJSA(regionCode) jsaLocal_rate=JSA_rate(localCode) jsaRegion_rate=JSA_rate(regionCode) jsaUK_rate=JSA_rate(ukCode) import decimal ''' ROUND_CEILING (towards Infinity), ROUND_DOWN (towards zero), ROUND_FLOOR (towards -Infinity), ROUND_HALF_DOWN (to nearest with ties going towards zero), ROUND_HALF_EVEN (to nearest with ties going to nearest even integer), ROUND_HALF_UP (to nearest with ties going away from zero), or ROUND_UP (away from zero). ROUND_05UP (away from zero if last digit after rounding towards zero would have been 0 or 5; otherwise towards zero) ''' def pc(amount,rounding=''): if rounding=='down': rounding=decimal.ROUND_DOWN elif rounding=='up': rounding=decimal.ROUND_UP else: rounding=decimal.ROUND_HALF_UP ramount=float(decimal.Decimal(100 * amount).quantize(decimal.Decimal('.1'), rounding=rounding)) return '{0:.1f}%'.format(ramount) def otwMoreLess(now,then): delta=now-then if delta>0: txt=random.choice(['more']) elif delta<0: txt=random.choice(['less']) return txt def otwPCmoreLess(this,that): delta=this-that return '{delta} {diff}'.format(delta=pc(abs(delta)),diff=otwMoreLess(this,that)) otw3=''' That means {localrate} of the resident {localarea} population {poptype} are {claim} \ – {regiondiff} than the rest of the {region} ({regionrate}), \ and {ukdiff} than the whole of the UK ({ukrate}). '''.format(localrate=pc(jsaLocal_rate), localarea=jsaLocal['GEOGRAPHY_NAME'].iloc[0], poptype=decase(get16_64Population(localcode)['CELL_NAME'].iloc[0].split('(')[1].split(' -')[0]), claim=decase(jsaLocal['MEASURES_NAME'].iloc[0]), regiondiff=otwPCmoreLess(jsaLocal_rate,jsaRegion_rate), region=jsaRegion['GEOGRAPHY_NAME'].iloc[0], regionrate=pc(JSA_rate(regionCode)), ukdiff=otwPCmoreLess(jsaLocal_rate,jsaUK_rate), ukrate=pc(jsaUK_rate)) print(otw3) JSA_rate(seCode) JSA_rate(ukCode) #.....doodles and fragments... #Geographies codelist #http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography/2092957697.def.htm #http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography/2092957697TYPE480.def.htm ''' CL_1_1_GEOGRAPHY value Description 2013265921 North East 2013265922 North West 2013265923 Yorkshire and The Humber 2013265924 East Midlands 2013265925 West Midlands 2013265926 East 2013265927 London 2013265928 South East 2013265929 South West 2013265930 Wales 2013265931 Scotland 2013265932 Northern Ireland '''