import pandas as pd
import numpy as np
pd
<module 'pandas' from '/Users/kei/anaconda3/lib/python3.7/site-packages/pandas/__init__.py'>
np
<module 'numpy' from '/Users/kei/anaconda3/lib/python3.7/site-packages/numpy/__init__.py'>
help(pd.any)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-5-7eea23420119> in <module> ----> 1 help(pd.any) ~/anaconda3/lib/python3.7/site-packages/pandas/__init__.py in __getattr__(name) 212 213 return Panel --> 214 raise AttributeError("module 'pandas' has no attribute '{}'".format(name)) 215 216 AttributeError: module 'pandas' has no attribute 'any'
pd
<module 'pandas' from '/Users/kei/anaconda3/lib/python3.7/site-packages/pandas/__init__.py'>
pd.any
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-7-28821a5f7027> in <module> ----> 1 pd.any ~/anaconda3/lib/python3.7/site-packages/pandas/__init__.py in __getattr__(name) 212 213 return Panel --> 214 raise AttributeError("module 'pandas' has no attribute '{}'".format(name)) 215 216 AttributeError: module 'pandas' has no attribute 'any'
help(np.any)
Help on function any in module numpy: any(a, axis=None, out=None, keepdims=<no value>) Test whether any array element along a given axis evaluates to True. Returns single boolean unless `axis` is not ``None`` Parameters ---------- a : array_like Input array or object that can be converted to an array. axis : None or int or tuple of ints, optional Axis or axes along which a logical OR reduction is performed. The default (`axis` = `None`) is to perform a logical OR over all the dimensions of the input array. `axis` may be negative, in which case it counts from the last to the first axis. .. versionadded:: 1.7.0 If this is a tuple of ints, a reduction is performed on multiple axes, instead of a single axis or all the axes as before. out : ndarray, optional Alternate output array in which to place the result. It must have the same shape as the expected output and its type is preserved (e.g., if it is of type float, then it will remain so, returning 1.0 for True and 0.0 for False, regardless of the type of `a`). See `doc.ufuncs` (Section "Output arguments") for details. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the input array. If the default value is passed, then `keepdims` will not be passed through to the `any` method of sub-classes of `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any exceptions will be raised. Returns ------- any : bool or ndarray A new boolean or `ndarray` is returned unless `out` is specified, in which case a reference to `out` is returned. See Also -------- ndarray.any : equivalent method all : Test whether all elements along a given axis evaluate to True. Notes ----- Not a Number (NaN), positive infinity and negative infinity evaluate to `True` because these are not equal to zero. Examples -------- >>> np.any([[True, False], [True, True]]) True >>> np.any([[True, False], [False, False]], axis=0) array([ True, False]) >>> np.any([-1, 0, 5]) True >>> np.any(np.nan) True >>> o=np.array(False) >>> z=np.any([-1, 4, 5], out=o) >>> z, o (array(True), array(True)) >>> # Check now that z is a reference to o >>> z is o True >>> id(z), id(o) # identity of z and o # doctest: +SKIP (191614240, 191614240)
np.any([[True, False]])
True
np.any([[True, False]], axis=0)
array([ True, False])
np.any([[True, False]], axis=1)
array([ True])
data = [[True, False, False], [True, False, False], [True, False, True]]
data = np.array([[True, False, False], [True, False, False], [True, False, True]])
data
array([[ True, False, False], [ True, False, False], [ True, False, True]])
np.any(data)
True
np.any(data, axis=(0,1))
True
np.any(data, axis=[0,1])
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-17-f4177b3ca390> in <module> ----> 1 np.any(data, axis=[0,1]) <__array_function__ internals> in any(*args, **kwargs) ~/anaconda3/lib/python3.7/site-packages/numpy/core/fromnumeric.py in any(a, axis, out, keepdims) 2268 2269 """ -> 2270 return _wrapreduction(a, np.logical_or, 'any', axis, None, out, keepdims=keepdims) 2271 2272 ~/anaconda3/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs) 88 return reduction(axis=axis, out=out, **passkwargs) 89 ---> 90 return ufunc.reduce(obj, axis, dtype, out, **passkwargs) 91 92 TypeError: 'list' object cannot be interpreted as an integer
np.any(data)
True
data
array([[ True, False, False], [ True, False, False], [ True, False, True]])
np.any(data, axis=0)
array([ True, False, True])
np.any(data, axis=1)
array([ True, True, True])
np.any(data, axis=0, keepdims=True)
array([[ True, False, True]])
np.any(data, axis=1, keepdims=True)
array([[ True], [ True], [ True]])
df = pd.DataFrame(np.arange(5 * 4).reshape((5, 4)))
sampler = nparray([3, 1, 4, 2, 0])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-25-fac5f0ba09da> in <module> ----> 1 sampler = nparray([3, 1, 4, 2, 0]) NameError: name 'nparray' is not defined
sampler = np.array([3, 1, 4, 2, 0])
sampler
array([3, 1, 4, 2, 0])
df
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 0 | 1 | 2 | 3 |
1 | 4 | 5 | 6 | 7 |
2 | 8 | 9 | 10 | 11 |
3 | 12 | 13 | 14 | 15 |
4 | 16 | 17 | 18 | 19 |
df.take(sampler)
0 | 1 | 2 | 3 | |
---|---|---|---|---|
3 | 12 | 13 | 14 | 15 |
1 | 4 | 5 | 6 | 7 |
4 | 16 | 17 | 18 | 19 |
2 | 8 | 9 | 10 | 11 |
0 | 0 | 1 | 2 | 3 |
df.loc[sampler,:]
0 | 1 | 2 | 3 | |
---|---|---|---|---|
3 | 12 | 13 | 14 | 15 |
1 | 4 | 5 | 6 | 7 |
4 | 16 | 17 | 18 | 19 |
2 | 8 | 9 | 10 | 11 |
0 | 0 | 1 | 2 | 3 |
df.iloc[sampler,:]
0 | 1 | 2 | 3 | |
---|---|---|---|---|
3 | 12 | 13 | 14 | 15 |
1 | 4 | 5 | 6 | 7 |
4 | 16 | 17 | 18 | 19 |
2 | 8 | 9 | 10 | 11 |
0 | 0 | 1 | 2 | 3 |
df[sampler]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-32-d101416d9913> in <module> ----> 1 df[sampler] ~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key) 2984 if is_iterator(key): 2985 key = list(key) -> 2986 indexer = self.loc._convert_to_indexer(key, axis=1, raise_missing=True) 2987 2988 # take() does not accept boolean indexers ~/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter, raise_missing) 1283 # When setting, missing keys are not allowed, even with .loc: 1284 kwargs = {"raise_missing": True if is_setter else raise_missing} -> 1285 return self._get_listlike_indexer(obj, axis, **kwargs)[1] 1286 else: 1287 try: ~/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing) 1090 1091 self._validate_read_indexer( -> 1092 keyarr, indexer, o._get_axis_number(axis), raise_missing=raise_missing 1093 ) 1094 return keyarr, indexer ~/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing) 1183 if not (self.name == "loc" and not raise_missing): 1184 not_found = list(set(key) - set(ax)) -> 1185 raise KeyError("{} not in index".format(not_found)) 1186 1187 # we skip the warning on Categorical/Interval KeyError: '[4] not in index'
df
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 0 | 1 | 2 | 3 |
1 | 4 | 5 | 6 | 7 |
2 | 8 | 9 | 10 | 11 |
3 | 12 | 13 | 14 | 15 |
4 | 16 | 17 | 18 | 19 |
df[[2,3]]
2 | 3 | |
---|---|---|
0 | 2 | 3 |
1 | 6 | 7 |
2 | 10 | 11 |
3 | 14 | 15 |
4 | 18 | 19 |
df[2]
0 2 1 6 2 10 3 14 4 18 Name: 2, dtype: int64
np.random.sample([1,1])
array([[0.92366697]])
help(np.random.sample)
Help on built-in function sample in module numpy.random.mtrand: sample(...) This is an alias of `random_sample`. See `random_sample` for the complete documentation.
help(np.random.random_sample)
Help on built-in function random_sample: random_sample(...) method of numpy.random.mtrand.RandomState instance random_sample(size=None) Return random floats in the half-open interval [0.0, 1.0). Results are from the "continuous uniform" distribution over the stated interval. To sample :math:`Unif[a, b), b > a` multiply the output of `random_sample` by `(b-a)` and add `a`:: (b - a) * random_sample() + a Parameters ---------- size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. Returns ------- out : float or ndarray of floats Array of random floats of shape `size` (unless ``size=None``, in which case a single float is returned). Examples -------- >>> np.random.random_sample() 0.47108547995356098 # random >>> type(np.random.random_sample()) <class 'float'> >>> np.random.random_sample((5,)) array([ 0.30220482, 0.86820401, 0.1654503 , 0.11659149, 0.54323428]) # random Three-by-two array of random numbers from [-5, 0): >>> 5 * np.random.random_sample((3, 2)) - 5 array([[-3.99149989, -0.52338984], # random [-2.99091858, -0.79479508], [-1.23204345, -1.75224494]])
import random
random.sample
<bound method Random.sample of <random.Random object at 0x7fe980884c20>>
random.sample([1,3])
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-41-eb7af90a1d91> in <module> ----> 1 random.sample([1,3]) TypeError: sample() missing 1 required positional argument: 'k'
random.sample([1,3], 4)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-42-48d45532ac96> in <module> ----> 1 random.sample([1,3], 4) ~/anaconda3/lib/python3.7/random.py in sample(self, population, k) 319 n = len(population) 320 if not 0 <= k <= n: --> 321 raise ValueError("Sample larger than population or is negative") 322 result = [None] * k 323 setsize = 21 # size of a small set minus size of an empty list ValueError: Sample larger than population or is negative
help(random.sample)
Help on method sample in module random: sample(population, k) method of random.Random instance Chooses k unique random elements from a population sequence or set. Returns a new list containing elements from the population while leaving the original population unchanged. The resulting list is in selection order so that all sub-slices will also be valid random samples. This allows raffle winners (the sample) to be partitioned into grand prize and second place winners (the subslices). Members of the population need not be hashable or unique. If the population contains repeats, then each occurrence is a possible selection in the sample. To choose a sample in a range of integers, use range as an argument. This is especially fast and space efficient for sampling from a large population: sample(range(10000000), 60)
import random
random.sample([1,2], 3)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-45-ccb5b8484189> in <module> ----> 1 random.sample([1,2], 3) ~/anaconda3/lib/python3.7/random.py in sample(self, population, k) 319 n = len(population) 320 if not 0 <= k <= n: --> 321 raise ValueError("Sample larger than population or is negative") 322 result = [None] * k 323 setsize = 21 # size of a small set minus size of an empty list ValueError: Sample larger than population or is negative
random.sample([1,2], 2)
[1, 2]
random.sample([1,2,3], 2)
[3, 2]
help(pd.get_dummies)
Help on function get_dummies in module pandas.core.reshape.reshape: get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False, dtype=None) Convert categorical variable into dummy/indicator variables. Parameters ---------- data : array-like, Series, or DataFrame Data of which to get dummy indicators. prefix : str, list of str, or dict of str, default None String to append DataFrame column names. Pass a list with length equal to the number of columns when calling get_dummies on a DataFrame. Alternatively, `prefix` can be a dictionary mapping column names to prefixes. prefix_sep : str, default '_' If appending prefix, separator/delimiter to use. Or pass a list or dictionary as with `prefix`. dummy_na : bool, default False Add a column to indicate NaNs, if False NaNs are ignored. columns : list-like, default None Column names in the DataFrame to be encoded. If `columns` is None then all the columns with `object` or `category` dtype will be converted. sparse : bool, default False Whether the dummy-encoded columns should be backed by a :class:`SparseArray` (True) or a regular NumPy array (False). drop_first : bool, default False Whether to get k-1 dummies out of k categorical levels by removing the first level. .. versionadded:: 0.18.0 dtype : dtype, default np.uint8 Data type for new columns. Only a single dtype is allowed. .. versionadded:: 0.23.0 Returns ------- DataFrame Dummy-coded data. See Also -------- Series.str.get_dummies : Convert Series to dummy codes. Examples -------- >>> s = pd.Series(list('abca')) >>> pd.get_dummies(s) a b c 0 1 0 0 1 0 1 0 2 0 0 1 3 1 0 0 >>> s1 = ['a', 'b', np.nan] >>> pd.get_dummies(s1) a b 0 1 0 1 0 1 2 0 0 >>> pd.get_dummies(s1, dummy_na=True) a b NaN 0 1 0 0 1 0 1 0 2 0 0 1 >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], ... 'C': [1, 2, 3]}) >>> pd.get_dummies(df, prefix=['col1', 'col2']) C col1_a col1_b col2_a col2_b col2_c 0 1 1 0 0 1 0 1 2 0 1 1 0 0 2 3 1 0 0 0 1 >>> pd.get_dummies(pd.Series(list('abcaa'))) a b c 0 1 0 0 1 0 1 0 2 0 0 1 3 1 0 0 4 1 0 0 >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) b c 0 0 0 1 1 0 2 0 1 3 0 0 4 0 0 >>> pd.get_dummies(pd.Series(list('abc')), dtype=float) a b c 0 1.0 0.0 0.0 1 0.0 1.0 0.0 2 0.0 0.0 1.0
pd.unique([1,1,2])
array([1, 2])
type(pd.unique([1,1,2]))
numpy.ndarray
random.choice([1,2,3])
3
random.choice([1,2,3])
3
help(random.choice)
Help on method choice in module random: choice(seq) method of random.Random instance Choose a random element from a non-empty sequence.
random.choices([1,2,3], k=5)
[2, 2, 2, 2, 2]
random.choices([1,2,3], 2)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-55-6667b11b7ef1> in <module> ----> 1 random.choices([1,2,3], 2) ~/anaconda3/lib/python3.7/random.py in choices(self, population, weights, cum_weights, k) 355 total = len(population) 356 return [population[_int(random() * total)] for i in range(k)] --> 357 cum_weights = list(_itertools.accumulate(weights)) 358 elif weights is not None: 359 raise TypeError('Cannot specify both weights and cumulative weights') TypeError: 'int' object is not iterable
random.choices([1,2,3], [1,1,1])
[3]
random.choices([1,2,3], [1,2])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-57-5490eea5d8e8> in <module> ----> 1 random.choices([1,2,3], [1,2]) ~/anaconda3/lib/python3.7/random.py in choices(self, population, weights, cum_weights, k) 359 raise TypeError('Cannot specify both weights and cumulative weights') 360 if len(cum_weights) != len(population): --> 361 raise ValueError('The number of weights does not match the population') 362 bisect = _bisect.bisect 363 total = cum_weights[-1] ValueError: The number of weights does not match the population
random.choices([1,2,3])
[3]
help(random.choices)
Help on method choices in module random: choices(population, weights=None, *, cum_weights=None, k=1) method of random.Random instance Return a k sized list of population elements chosen with replacement. If the relative weights or cumulative weights are not specified, the selections are made with equal probability.
random.choices([1,2,3], k=5)
[3, 1, 1, 3, 2]
help(np.random.random_sample)
Help on built-in function random_sample: random_sample(...) method of numpy.random.mtrand.RandomState instance random_sample(size=None) Return random floats in the half-open interval [0.0, 1.0). Results are from the "continuous uniform" distribution over the stated interval. To sample :math:`Unif[a, b), b > a` multiply the output of `random_sample` by `(b-a)` and add `a`:: (b - a) * random_sample() + a Parameters ---------- size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. Returns ------- out : float or ndarray of floats Array of random floats of shape `size` (unless ``size=None``, in which case a single float is returned). Examples -------- >>> np.random.random_sample() 0.47108547995356098 # random >>> type(np.random.random_sample()) <class 'float'> >>> np.random.random_sample((5,)) array([ 0.30220482, 0.86820401, 0.1654503 , 0.11659149, 0.54323428]) # random Three-by-two array of random numbers from [-5, 0): >>> 5 * np.random.random_sample((3, 2)) - 5 array([[-3.99149989, -0.52338984], # random [-2.99091858, -0.79479508], [-1.23204345, -1.75224494]])