from Goulib.notebook import *
from Goulib.table import *
small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data
small
1 | 2 | 3 |
4 | 5 |
#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)
from Goulib.image import Image
lena=Image('../tests/data/lena.png').resize((128,128))
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])
complex | content | |||||
---|---|---|---|---|---|---|
| $\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$ | |||||
# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)
t=Table('../tests/data/test.xls')
print(t.titles) #Tables have optional column headers
['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total']
t.setcol('Total',None)
Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation
#notice the Order Dates are messy because of Excel representaion of dates
OrderDate | Région | Rep | Item | Unités | Cost | Total |
---|---|---|---|---|---|---|
41061 | East | Jones | Pencil | 95 | 1.99 | |
1/23/2012 | Central | Kivell | Binder | 50 | 19.99 | |
41154 | Central | Jardine | Pencil | 36 | 4.99 | |
2/26/2012 | Central | Gill | Pen | 27 | 19.99 | |
3/15/2012 | West | Sorvino | Pencil | 56 | 2.99 |
t[2,5],t[2,'Cost'] # cells can be accessed by row,col index or title
(4.99, 4.99)
print(t[:5,'Cost']) # indexing supports slices too
[1.99, 19.99, 4.99, 19.99, 2.99]
# handle the mess in Excel Cell types ...
t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...
t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats
True
# math between columns is still a bit tedious...
from Goulib.math2 import vecmul
t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))
#it's easy to make a "total" line from columns
#with a list of reduce-like functions applied to each column
from Goulib.stats import avg
from Goulib.itertools2 import count_unique
t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])
t.footer #result is stored in a separated footer field
[datetime.date(2013, 12, 21), 3, 11, 5, 2121, 20.308604651162796, 19627.88000000001]
h(t.html(start=5,stop=10)) # a way to shorten long tables
OrderDate | Région | Rep | Item | Unités | Cost | Total |
---|---|---|---|---|---|---|
... | ... | ... | ... | ... | ... | ... |
2012-01-04 | East | Jones | Binder | 60 | 4.99 | 299.40 |
2012-04-18 | Central | Andrews | Pencil | 75 | 1.99 | 149.25 |
2012-05-05 | Central | Jardine | Pencil | 90 | 4.99 | 449.10 |
2012-05-22 | West | Thompson | Pencil | 32 | 1.99 | 63.68 |
2012-08-06 | East | Jones | Binder | 60 | 8.99 | 539.40 |
... | ... | ... | ... | ... | ... | ... |
2013-12-21 | 3 | 11 | 5 | 2121 | 20.31 | 19627.88 |
t.sort('Total',reverse=True) # Tables can be sorted by column easily
h(t.html(stop=5)) # show only the 5 lines with highest total
OrderDate | Région | Rep | Item | Unités | Cost | Total |
---|---|---|---|---|---|---|
2013-04-12 | Central | Jardine | Binder | 94 | 19.99 | 1879.06 |
2012-07-29 | East | Parent | Binder | 81 | 19.99 | 1619.19 |
2013-01-02 | Central | Smith | Binder | 87 | 15.00 | 1305.00 |
2012-12-29 | East | Parent | Pen Set | 74 | 15.99 | 1183.26 |
2013-10-14 | West | Thompson | Binder | 57 | 19.99 | 1139.43 |
... | ... | ... | ... | ... | ... | ... |
2013-12-21 | 3 | 11 | 5 | 2121 | 20.31 | 19627.88 |
region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support
region['East'] # isn't it nice ?
OrderDate | Rep | Item | Unités | Cost | Total |
---|---|---|---|---|---|
2012-07-29 | Parent | Binder | 81 | 19.99 | 1619.19 |
2012-12-29 | Parent | Pen Set | 74 | 15.99 | 1183.26 |
2012-10-22 | Jones | Pen | 64 | 8.99 | 575.36 |
2012-08-06 | Jones | Binder | 60 | 8.99 | 539.40 |
2013-04-27 | Howard | Pen | 96 | 4.99 | 479.04 |
2013-04-07 | Jones | Pen Set | 62 | 4.99 | 309.38 |
2012-08-11 | Parent | Pen | 15 | 19.99 | 299.85 |
2012-01-04 | Jones | Binder | 60 | 4.99 | 299.40 |
2012-09-18 | Jones | Pen Set | 16 | 15.99 | 255.84 |
2012-06-01 | Jones | Pencil | 95 | 1.99 | 189.05 |
2012-08-15 | Jones | Pencil | 35 | 4.99 | 174.65 |
2012-12-07 | Howard | Binder | 29 | 1.99 | 57.71 |
2013-02-18 | Jones | Binder | 4 | 4.99 | 19.96 |
#a row can be extracted as a dict where column titles are keys
t.rowasdict(1)
OrderedDict([('OrderDate', datetime.date(2013, 1, 2)), ('Région', 'Central'), ('Rep', 'Smith'), ('Item', 'Binder'), ('Unités', 87), ('Cost', 15.0), ('Total', 1305.0)])
t.json()[:250]+'...' #rowasdict is handy to build json representation
'[{"OrderDate": "2013-04-12", "R\\u00e9gion": "Central", "Rep": "Jardine", "Item": "Binder", "Unit\\u00e9s": 94, "Cost": 19.99, "Total": 1879.06}, {"OrderDate": "2013-01-02", "R\\u00e9gion": "Central", "Rep": "Smith", "Item": "Binder", "Unit\\u00e9s": 87,...'
from Goulib.math2 import *
from Goulib.itertools2 import *
res=Table(t) #copy
s=len(res)
for i in range(s-1):
line=res[i]
d=[hamming(line,res[j]) for j in range(i+1,s)]
j=index_min(d)[0]+i
res[i+1],res[j]=res[j],res[i+1] #swap
res
OrderDate | Région | Rep | Item | Unités | Cost | Total |
---|---|---|---|---|---|---|
2013-04-12 | Central | Jardine | Binder | 94 | 19.99 | 1879.06 |
2013-07-08 | Central | Kivell | Pen Set | 42 | 23.95 | 1005.90 |
2012-01-23 | Central | Kivell | Binder | 50 | 19.99 | 999.50 |
2013-05-31 | Central | Gill | Binder | 80 | 8.99 | 719.20 |
2012-02-26 | Central | Gill | Pen | 27 | 19.99 | 539.73 |
2012-06-25 | Central | Morgan | Pencil | 90 | 4.99 | 449.10 |
2012-11-25 | Central | Kivell | Pen Set | 96 | 4.99 | 479.04 |
2013-06-17 | Central | Kivell | Desk | 5 | 125.00 | 625.00 |
2012-01-09 | Central | Smith | Desk | 2 | 125.00 | 250.00 |
2012-05-10 | Central | Morgan | Binder | 28 | 8.99 | 251.72 |
2013-07-21 | Central | Morgan | Pen Set | 55 | 12.49 | 686.95 |
2013-01-02 | Central | Smith | Binder | 87 | 15.00 | 1305.00 |
2012-05-05 | Central | Jardine | Pencil | 90 | 4.99 | 449.10 |
2013-03-24 | Central | Jardine | Pen Set | 50 | 4.99 | 249.50 |
2013-01-15 | Central | Gill | Binder | 46 | 8.99 | 413.54 |
2012-04-18 | Central | Andrews | Pencil | 75 | 1.99 | 149.25 |
2013-12-21 | Central | Andrews | Binder | 28 | 4.99 | 139.72 |
2013-05-14 | Central | Gill | Pencil | 53 | 1.29 | 68.37 |
2012-12-12 | Central | Smith | Pencil | 67 | 1.29 | 86.43 |
2013-10-31 | Central | Andrews | Pencil | 14 | 1.29 | 18.06 |
2012-09-02 | Central | Jardine | Pencil | 36 | 4.99 | 179.64 |
2013-11-17 | Central | Jardine | Binder | 11 | 4.99 | 54.89 |
2012-08-11 | East | Parent | Pen | 15 | 19.99 | 299.85 |
2013-10-09 | Central | Gill | Pencil | 7 | 1.29 | 9.03 |
2013-04-07 | East | Jones | Pen Set | 62 | 4.99 | 309.38 |
2013-10-04 | Central | Andrews | Pencil | 66 | 1.99 | 131.34 |
2012-09-18 | East | Jones | Pen Set | 16 | 15.99 | 255.84 |
2012-07-29 | East | Parent | Binder | 81 | 19.99 | 1619.19 |
2013-04-27 | East | Howard | Pen | 96 | 4.99 | 479.04 |
2012-08-06 | East | Jones | Binder | 60 | 8.99 | 539.40 |
2012-01-04 | East | Jones | Binder | 60 | 4.99 | 299.40 |
2012-10-22 | East | Jones | Pen | 64 | 8.99 | 575.36 |
2012-06-01 | East | Jones | Pencil | 95 | 1.99 | 189.05 |
2012-08-15 | East | Jones | Pencil | 35 | 4.99 | 174.65 |
2012-12-07 | East | Howard | Binder | 29 | 1.99 | 57.71 |
2013-02-18 | East | Jones | Binder | 4 | 4.99 | 19.96 |
2013-10-14 | West | Thompson | Binder | 57 | 19.99 | 1139.43 |
2013-08-24 | West | Sorvino | Desk | 3 | 275.00 | 825.00 |
2012-03-15 | West | Sorvino | Pencil | 56 | 2.99 | 167.44 |
2013-09-27 | West | Sorvino | Pen | 76 | 1.99 | 151.24 |
2013-07-03 | West | Sorvino | Binder | 7 | 19.99 | 139.93 |
2012-05-22 | West | Thompson | Pencil | 32 | 1.99 | 63.68 |
2012-12-29 | East | Parent | Pen Set | 74 | 15.99 | 1183.26 |
2013-12-21 | 3 | 11 | 5 | 2121 | 20.31 | 19627.88 |
hamming(t[1],t[2])
6
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])
complex | content | |||||
---|---|---|---|---|---|---|
| $\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$ | |||||