In [2]:
%matplotlib inline
In [30]:
import pandas as pd
import matplotlib as mpl
In [31]:
mpl.rc('font', family='Ubuntu', size=16)
mpl.rc('figure', figsize=(16, 10))
In [20]:
balsavimai = pd.read_csv('balsavimai/balsavimai.csv.gz')
In [24]:
balsavimai.shape
Out[24]:
(5530115, 9)
In [21]:
balsavimai.head()
Out[21]:
data laikas rezultatai.p_asm_id rezultatai.vardas rezultatai.frakcija rezultatai.už rezultatai.prieš rezultatai.susilaikė key
0 2017-05-04 10:05:43 79161 Ačienė Vida LVŽSF + NaN NaN http://www.lrs.lt/sip/portal.show?p_r=15275&p_...
1 2017-05-04 10:05:43 48690 Adomėnas Mantas TS-LKDF + NaN NaN http://www.lrs.lt/sip/portal.show?p_r=15275&p_...
2 2017-05-04 10:05:43 79162 Alekna Virgilijus LSF + NaN NaN http://www.lrs.lt/sip/portal.show?p_r=15275&p_...
3 2017-05-04 10:05:43 79163 Andrikis Rimas TTF + NaN NaN http://www.lrs.lt/sip/portal.show?p_r=15275&p_...
4 2017-05-04 10:05:43 53916 Anušauskas Arvydas TS-LKDF NaN NaN NaN http://www.lrs.lt/sip/portal.show?p_r=15275&p_...

Balsų skaičius per metus

Pasirodo, lrs.lt svetainėje balsavimų rezultatai pateikiami tik nuo 1997 metų.

In [33]:
balsavimai.data.str.slice(None, 4).value_counts().sort_index().plot.bar(grid=True)
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fbbcb892668>

Balsavimų skaičius per metus

In [44]:
balsavimai.groupby('key').agg({'data': 'first'}).data.str.slice(None, 4).value_counts().sort_index().plot.bar(grid=True)
Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fbbd2e696a0>

Viso balsų, pagal partijas

In [42]:
balsavimai['rezultatai.frakcija'].value_counts().sort_values(ascending=True).plot.barh(grid=True, figsize=(16, 16))
Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fbbd735fa90>

Viso balsų pagal seimo narius (top 50 daugiausiai balsavusių)

In [35]:
balsavimai['rezultatai.vardas'].value_counts().head(50)
Out[35]:
Sysas Algirdas                  39560
Salamakinas Algimantas          39560
Glaveckas Kęstutis              39560
Gražulis Petras                 39560
Šiaulienė Irena                 39560
Olekas Juozas                   39560
Butkevičius Algirdas            39560
Razma Jurgis                    39560
Degutienė Irena                 39560
Kirkilas Gediminas              39549
Kubilius Andrius                39542
Juknevičienė Rasa               38000
Simulik Valerijus               35803
Vareikis Egidijus               35775
Steponavičius Gintaras          35773
Sabatauskas Julius              35773
Bradauskas Bronius              34829
Vėsaitė Birutė                  34829
Teišerskytė Dalia               34829
Mikutienė Dangutė               34829
Kašėta Algis                    34665
Masiulis Eligijus               33499
Lydeka Arminas                  31623
Vidžiūnas Arvydas               30763
Ažubalis Audronius              30614
Zingeris Emanuelis              30614
Dagys Rimantas Jonas            30614
Matulas Antanas                 30614
Aleknaitė Abramikienė Vilija    30099
Juršėnas Česlovas               30017
Kupčinskas Rytas                29670
Karosas Justinas                28998
Sinkevičius Rimantas            28357
Bastys Mindaugas                28357
Bukauskas Valentinas            26829
Baškienė Rima                   26829
Starkevičius Kazys              26827
Veselka Julius                  26254
Šukys Raimondas                 26230
Klumbys Egidijus                26230
Skardžius Artūras               25905
Jankauskas Donatas              25883
Daukšys Kęstutis                25883
Petrauskienė Milda              25883
Graužinienė Loreta              25883
Dumčius Arimantas               25883
Čigriejienė Vida Marija         25883
Bucevičius Saulius              25883
Margevičienė Vincė Vaidevutė    25883
Žakaris Edvardas                25883
Name: rezultatai.vardas, dtype: int64
In [45]:
balsavimai['balsavo'] = (
    (balsavimai['rezultatai.už'] == '+') |
    (balsavimai['rezultatai.prieš'] == '+') |
    (balsavimai['rezultatai.susilaikė'] == '+')
)

Mažiausiai balsuojantys Seimo nariai

Žemiau yra top 30 Seimo narių, kurie balsavimo metu nebalsavo. Tai nereišikia, kad jie iš vis nedalyvavo posėdyje. Grafike pateikiama tik ar buvo skaičiai, kiek kartų seimo narys balsavo ir kiek iš viso buvo balsavimų.

In [73]:
frame = balsavimai.groupby('rezultatai.p_asm_id').agg({'balsavo': ['sum', 'count'], 'rezultatai.vardas': 'first'})
frame['balsavo'] = frame[('balsavo', 'sum')]
frame['count'] = frame[('balsavo', 'count')]
frame['Seimo narys'] = frame[('rezultatai.vardas', 'first')]
frame['nebalsavo'] = frame['count'] - frame['balsavo']
frame['nebalsavo_procentais'] = frame['count'] / frame['nebalsavo'] * 100
frame = frame.set_index('Seimo narys', drop=True)
frame = frame.sort_values('nebalsavo_procentais')[['balsavo', 'nebalsavo']]
frame.head(30).plot.barh(stacked=True, grid=True)
/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/indexes/base.py:2653: RuntimeWarning: unorderable types: str() < int(), sort order is undefined for incomparable objects
  return this.join(other, how=how, return_indexers=return_indexers)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2103             try:
-> 2104                 return self._engine.get_loc(key)
   2105             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4160)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)()

KeyError: 'nebalsavo'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/internals.py in set(self, item, value, check)
   3644         try:
-> 3645             loc = self.items.get_loc(item)
   3646         except KeyError:

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/indexes/multi.py in get_loc(self, key, method)
   1596         if not isinstance(key, tuple):
-> 1597             loc = self._get_level_indexer(key, level=0)
   1598             return _maybe_to_slice(loc)

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/indexes/multi.py in _get_level_indexer(self, key, level, indexer)
   1858 
-> 1859             loc = level_index.get_loc(key)
   1860             if level > 0 or self.lexsort_depth == 0:

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2105             except KeyError:
-> 2106                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2107 

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4160)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)()

KeyError: 'nebalsavo'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-73-6008f95132d3> in <module>()
      3 frame['count'] = frame[('balsavo', 'count')]
      4 frame['Seimo narys'] = frame[('rezultatai.vardas', 'first')]
----> 5 frame['nebalsavo'] = frame['count'] - frame['balsavo']
      6 frame['nebalsavo_procentais'] = frame['count'] / frame['nebalsavo'] * 100
      7 frame = frame.set_index('Seimo narys', drop=True)

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   2415         else:
   2416             # set column
-> 2417             self._set_item(key, value)
   2418 
   2419     def _setitem_slice(self, key, value):

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   2482         self._ensure_valid_index(value)
   2483         value = self._sanitize_column(key, value)
-> 2484         NDFrame._set_item(self, key, value)
   2485 
   2486         # check if we are modifying a copy

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/generic.py in _set_item(self, key, value)
   1498 
   1499     def _set_item(self, key, value):
-> 1500         self._data.set(key, value)
   1501         self._clear_item_cache()
   1502 

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/internals.py in set(self, item, value, check)
   3646         except KeyError:
   3647             # This item wasn't present, just insert at end
-> 3648             self.insert(len(self.items), item, value)
   3649             return
   3650 

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
   3747 
   3748         block = make_block(values=value, ndim=self.ndim,
-> 3749                            placement=slice(loc, loc + 1))
   3750 
   3751         for blkno, count in _fast_count_smallints(self._blknos[loc:]):

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   2662                      placement=placement, dtype=dtype)
   2663 
-> 2664     return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
   2665 
   2666 # TODO: flexible with index=None and/or items=None

/home/sirex/.venvs/databot/lib/python3.5/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
    108             raise ValueError('Wrong number of items passed %d, placement '
    109                              'implies %d' % (len(self.values),
--> 110                                              len(self.mgr_locs)))
    111 
    112     @property

ValueError: Wrong number of items passed 527, placement implies 1
In [70]:
frame.columns.levels
Out[70]:
FrozenList([['rezultatai.vardas', 'balsavo'], ['count', 'first', 'sum']])