import pandas as pd

#We can download a CSV file containing measures for a particular region
#For example, Jobseeker allowance on the Isle of Wight
tmp=pd.read_csv('http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1.data.csv?geography=2038431803&sex=5&item=1&measures=20100')
tmp[:3]

tmp.columns

#We can project just a subset of the columns using the select parameter
baseURL='http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1.data.csv?'
url=baseURL+'geography=2038431803&sex=5,6,7&item=1&measures=20100'
#Projection
url+='&select=sex_name,geography_name,measures_name,date_code,date_name,obs_value'
tmp=pd.read_csv(url)
tmp[:9]

#And can project just a subset of the columns, and also reshape them using the rows and cols parameters
url+='&rows=date_code,date_name,geography_name,measures_name,&cols=sex_name'
tmp=pd.read_csv(url)
tmp[:3]

#We can also limit the time - for example, over theprevious year
url+='&time=latest,prevyear'
df=pd.read_csv(url)
df

mostRecent=df.iloc[-1]
txt1='The most recent figure ({0}) for {1} for the {2} area is {3}.'.format(mostRecent['DATE_NAME'],
                                                                       mostRecent['MEASURES_NAME'],
                                                                       mostRecent['GEOGRAPHY_NAME'],
                                                                       mostRecent['Total'])
print(txt1)

#http://stackoverflow.com/a/3847369/454773
decase = lambda s: s[:1].lower() + s[1:] if s else ''

txt1='The most recent figure ({0}) for {1} for the {2} area is {3}.'.format(mostRecent['DATE_NAME'],
                                                                       decase(mostRecent['MEASURES_NAME']),
                                                                       mostRecent['GEOGRAPHY_NAME'],
                                                                       mostRecent['Total'])
print(txt1)

yearago=df.iloc[0]
txt2='This compares with a figure of {0} from a year ago ({1}).'.format(yearago['Total'],yearago['DATE_NAME'])

print(txt1)
print(txt2)

txt3=txt2[:-1]
yeardelta= mostRecent['Total'] - yearago['Total']

if yeardelta==0:
    txt3+=', exactly the same amount.'
else:
    txt3+=', a change of {0}.'.format(yeardelta)

print(txt1)
print(txt3)    

txt4=txt2[:-1]

if yeardelta==0:
    txt4+=', exactly the same amount.'
else:
    if yeardelta <0: direction='decrease'
    else: direction='increase'
    txt4+=', {0} of {1}.'.format(p.a(direction),abs(yeardelta))

print(txt1)
print(txt4) 

#Library to support natural language text generation
#!pip3 install git+https://github.com/pwdyson/inflect.py
import inflect
p = inflect.engine()

import random

def _txt5():
    txt5=txt2[:-1]
    
    if yeardelta==0:
        txt4+=', exactly the same amount.'
    else:
        if yeardelta <0: direction=random.choice(['decrease', 'fall'])
        else: direction=random.choice(['increase','rise'])
        txt5+=', and represents {0} of {1} since then.'.format(p.a(direction),abs(yeardelta))
    return txt5

print(txt1)
print(_txt5())
print(_txt5()) 
print(_txt5()) 

def _txt6(filler=','):
    
    def _magnitude(term):

        #A heuristic here
        if propDelta<0.05:
            mod=random.choice(['slight'])
        elif propDelta<0.10:
             mod=random.choice(['significant'])
        else:
             mod=random.choice(['considerable','large' ])
        term=' '.join([mod,term])
        return p.a(term)
            
    txt6=txt2[:-1]
    
    propDelta= abs(yeardelta)/mostRecent['Total']
    
    if yeardelta==0:
        txt6+=', exactly the same amount.'
    else:
        if yeardelta <0: direction=_magnitude(random.choice([ 'decrease','fall']))
        else: direction=_magnitude(random.choice(['increase','rise']))
        
        txt6+='{_filler} {0} of {1} since then.'.format(p.a(direction),
                                                         abs(yeardelta),
                                                         _filler= filler )
    return txt6

print(txt1)
print(_txt6())
print(_txt6()) 
print(_txt6(' and represents')) 
print(_txt6()) 
print(_txt6())

lastMonth=df.iloc[-2]

def updown(now, then,amount=False, magnitude=False):
    
    txt=[]
    
    delta=now - then
    propdelta = delta/now
    if magnitude and delta!=0:
        if delta<0.05:
            txt.append(random.choice(['slightly']))
        elif delta<0.10:
            txt.append(random.choice(['significantly']))
        else:
            txt.append(random.choice(['considerably']))
    
    if now>then: txt.append('up')
    elif now<then: txt.append('down')
    else: txt.append('no change')
    
    txt=' '.join(txt)
    
    if amount:
        txt='{0} {1}'.format(txt, abs(now-then) )

    return txt
    
def _txt7(amount=False,magnitude=False):
    txt7=txt6[:-1]

    txt7+=', {0} on the previous month.'.format(updown(mostRecent['Total'],lastMonth['Total'],amount,magnitude))
    return txt7

txt6=_txt6()
print(_txt7())
txt6=_txt6()
print(_txt7(amount=True,magnitude=False))
txt6=_txt6()
print(_txt7(amount=False,magnitude=True))
txt6=_txt6()
print(_txt7(amount=True,magnitude=True))

def _txt8():
    txt8='''\
Of the current total{_figure}, {maleCount} applicants \
were male and {femaleCount} female.\
'''.format(femaleCount=mostRecent['Female'],
           maleCount=mostRecent['Male'],
           _figure=random.choice(['',' figure']))
    return txt8

print(_txt8())
print(_txt8())
print(_txt8())
print(_txt8())

def _txt9():
    genderCount=[ ('male',mostRecent['Male']),('female',mostRecent['Female'])]
    if random.choice([True, False]):
        genderCount=[genderCount[1],genderCount[0]]
        
    txt9='''\
Of {_period} total{_figure}, {A_count} applicants \
were {A_gender} and {B_count}{_space_were}{B_gender}.\
'''.format(A_count=genderCount[0][1], A_gender=genderCount[0][0],
           B_count=genderCount[1][1], B_gender=genderCount[1][0],
           _figure=random.choice(['',' figure',' amount']),
           _space_were=random.choice([' ',' were ']),
           _period=random.choice(["this month's",'the current'])
           )
    return txt9

print(_txt9())
print(_txt9())
print(_txt9())
print(_txt9())
print(_txt9())

def _txt10(data, period,randGender=False, opener='Of',total=True,showTotal=False,filler=False):
    genderCount=[ ('male',data['Male']),('female',data['Female'])]
    if randGender and random.choice([True, False]):
        genderCount=[genderCount[1],genderCount[0]]
        
    txt10='''\
{_opener} {_period}{_total},{_filler} {A_count} applicants \
were {A_gender} and {B_count}{_space_were}{B_gender}.\
'''.format(A_count=genderCount[0][1], A_gender=genderCount[0][0],
           B_count=genderCount[1][1], B_gender=genderCount[1][0],
           _space_were=random.choice([' ',' were ']),
           _period=random.choice(period),
           _opener=opener,
           _total = ' total{_figure}{_showTotal}'.format(_figure=random.choice(['',' figure',' amount']),
                                                         _showTotal= '' if not showTotal else ' of {0}'.format(data['Total'])) if total else '',
           _filler='' if not filler else filler,
           
           )
    return txt10

print(_txt10(mostRecent,["this month's",'the current','the most recent']))
print(_txt10(mostRecent,["this month's",'the current','the most recent'],randGender=True))
print(_txt10(mostRecent,["this month's",'the current','the most recent'],randGender=True))
print(_txt10(yearago,["the same month last year's",],opener='From',showTotal=True))
print(_txt10(lastMonth,["last month's"]))
print(_txt10(mostRecent,['the most recent'],opener='From',showTotal=True))
print()
print(_txt10(mostRecent,["this month's",'the current','the most recent']))
print(_txt10(lastMonth,["last month's"],opener='This compares with',filler=' of which'))
print(_txt10(yearago,["the same time last year"],opener='At',total=False))

print(txt1)
print(_txt6())
print(_txt10(lastMonth,["month saw a"],opener='Last',filler=' of which',showTotal=True))
print(_txt10(mostRecent,["this month's",'the current','the most recent']))
print(_txt10(yearago,["the same time last year"],opener='At',total=False))

from ggplot import *
df_long=pd.melt(df, id_vars=['DATE_CODE','DATE_NAME','GEOGRAPHY_NAME','MEASURES_NAME'], value_vars= ['Female','Male','Total'] )
df_long['date']=pd.to_datetime(df_long['DATE_CODE'], format='%Y-%m')
df_long[:5]

ggplot(df_long,aes(x='date',y='value',colour='variable')) + geom_line() + ylim(0,3500) + ggtitle('JSA Claims, Isle of Wight')


#Thousands separator
def c(amount):
    return '{:,}'.format(amount)

def otwRiseFall(now,then,amount=False):
    delta=now-then
    if delta>0:
        txt=p.a(random.choice(['rise','increase']))
    elif delta<0:
        txt=p.a(random.choice(['fall','decrease']))
    if amount:
        txt+=' of {0}'.format(c(abs(delta)))
    return txt
    
otw1='''
Figures came out for {date} for unemployed claimants which show that {currNum} people on {place} \
were claiming Job Seekers Allowance (JSA) in {currDate}. \
Unemployment Benefit figures released by the Office for National Statistics show \
{monthChange} since {lastMonthDate}, which reported {lastMonthNum} JSA claimants, \
and {yearChange} from {yearagoDate} ({yearagoNum}). 
'''.format(date=mostRecent['DATE_NAME'],
           currNum=c(mostRecent['Total']),
           place='the '+mostRecent['GEOGRAPHY_NAME'],
           currDate=mostRecent['DATE_NAME'],
           monthChange=otwRiseFall(mostRecent['Total'],lastMonth['Total'],True),
           lastMonthDate=lastMonth['DATE_NAME'],
           lastMonthNum=c(lastMonth['Total']),
           yearChange=otwRiseFall(mostRecent['Total'],yearago['Total'],True),
           yearagoDate=yearago['DATE_NAME'],
           yearagoNum=c(yearago['Total'])
           )
print(otw1)

def getLatestJSA(code):
    baseURL='http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1.data.csv?'
    url=baseURL+'geography={code}&date=latest&sex=7&item=1&measures=20100'.format(code=code)
    #Projection
    url+='&select=sex_name,geography_name,measures_name,date_code,date_name,obs_value'
    return pd.read_csv(url).dropna()

def get16_64Population(code):
    url='http://www.nomisweb.co.uk/api/v01/dataset/NM_17_1.data.csv?date=latest&geography={code}&measures=20100&cell=402720769'.format(code=code)
    url+='&select=date_code,date_name,geography_name,measures_name,cell_name,obs_value,cell_code'#&item=1&measures=20100
    
    return pd.read_csv(url).dropna()

def JSA_rate(code):
    return getLatestJSA(code)['OBS_VALUE'].iloc[0]/get16_64Population(code)['OBS_VALUE'].iloc[0] 

iwCode=2038431803
ukCode=2092957697
seCode=2013265928

getLatestJSA(iwCode)

get16_64Population(iwCode)

def rateGetter(code):
    jsa=getLatestJSA(code)
    pop=get16_64Population(code)
    txt='''
The percentage rate of {claim} for the {loc} region in {date} was {rate}, based on {num} claimants and \
population {poptype} of {popnum} ({popDate} figure).
'''.format(claim=decase(jsa['MEASURES_NAME'].iloc[0]),
               loc=jsa['GEOGRAPHY_NAME'].iloc[0],
               date=jsa['DATE_NAME'].iloc[0],
               rate='{0:.2f}%'.format(100 * JSA_rate(code)),
               num=c(int(jsa['OBS_VALUE'].iloc[0])),
               poptype=decase(pop['CELL_NAME'].iloc[0].split('(')[1].split(' -')[0]),
               popnum=c(int(pop['OBS_VALUE'].iloc[0])),
               popDate=pop['DATE_NAME'].iloc[0])
    print(txt)

rateGetter(iwCode)
rateGetter(seCode)
rateGetter(ukCode)

localCode=iwCode
regionCode=seCode


jsaLocal=getLatestJSA(localcode)
jsaRegion=getLatestJSA(regionCode)

jsaLocal_rate=JSA_rate(localCode)
jsaRegion_rate=JSA_rate(regionCode)
jsaUK_rate=JSA_rate(ukCode)

import decimal

'''
ROUND_CEILING (towards Infinity),
ROUND_DOWN (towards zero),
ROUND_FLOOR (towards -Infinity),
ROUND_HALF_DOWN (to nearest with ties going towards zero),
ROUND_HALF_EVEN (to nearest with ties going to nearest even integer),
ROUND_HALF_UP (to nearest with ties going away from zero), or
ROUND_UP (away from zero).
ROUND_05UP (away from zero if last digit after rounding towards zero would have been 0 or 5; otherwise towards zero)
'''

def pc(amount,rounding=''):
    if rounding=='down': rounding=decimal.ROUND_DOWN
    elif rounding=='up': rounding=decimal.ROUND_UP
    else: rounding=decimal.ROUND_HALF_UP

    ramount=float(decimal.Decimal(100 * amount).quantize(decimal.Decimal('.1'), rounding=rounding))
    return '{0:.1f}%'.format(ramount)

def otwMoreLess(now,then):
    delta=now-then
    if delta>0:
        txt=random.choice(['more'])
    elif delta<0:
        txt=random.choice(['less'])
    return txt

def otwPCmoreLess(this,that):
    delta=this-that
    return '{delta} {diff}'.format(delta=pc(abs(delta)),diff=otwMoreLess(this,that))

otw3='''
That means {localrate} of the resident {localarea} population {poptype} are {claim} \
– {regiondiff} than the rest of the {region} ({regionrate}), \
and {ukdiff} than the whole of the UK ({ukrate}).
'''.format(localrate=pc(jsaLocal_rate),
           localarea=jsaLocal['GEOGRAPHY_NAME'].iloc[0],
           poptype=decase(get16_64Population(localcode)['CELL_NAME'].iloc[0].split('(')[1].split(' -')[0]),
           claim=decase(jsaLocal['MEASURES_NAME'].iloc[0]),
           regiondiff=otwPCmoreLess(jsaLocal_rate,jsaRegion_rate),
           region=jsaRegion['GEOGRAPHY_NAME'].iloc[0],
           regionrate=pc(JSA_rate(regionCode)),
           ukdiff=otwPCmoreLess(jsaLocal_rate,jsaUK_rate),
           ukrate=pc(jsaUK_rate))

print(otw3)

JSA_rate(seCode)

JSA_rate(ukCode)

#.....doodles and fragments...

#Geographies codelist
#http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography/2092957697.def.htm
#http://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography/2092957697TYPE480.def.htm
'''
CL_1_1_GEOGRAPHY
value	Description
2013265921	North East
2013265922	North West
2013265923	Yorkshire and The Humber
2013265924	East Midlands
2013265925	West Midlands
2013265926	East
2013265927	London
2013265928	South East
2013265929	South West
2013265930	Wales
2013265931	Scotland
2013265932	Northern Ireland
'''