Arrays are important because they enable you to express batch operations on data without writing any for loops. This is usually called vectorization.
import pandas as pd
import numpy as np
# creating an array
data1 = [2, 3, 4, 5, 6, 7]
np.array(data1)
array([2, 3, 4, 5, 6, 7])
data2 = ([1, 2, 3, 4], [5, 6, 7, 8])
arr2 = np.array(data2)
arr2
array([[1, 2, 3, 4], [5, 6, 7, 8]])
arr2.shape
(2, 4)
np.zeros(10)
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.ones((2, 3))
array([[ 1., 1., 1.], [ 1., 1., 1.]])
np.arange(15)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
data = np.array([[2, 4, 6],
[3, 5, 9]])
data
array([[2, 4, 6], [3, 5, 9]])
data + data + data
array([[ 6, 12, 18], [ 9, 15, 27]])
data * 2
array([[ 4, 8, 12], [ 6, 10, 18]])
data.shape
(2, 3)
data.dtype
dtype('int64')
df = pd.DataFrame(data)
df
0 | 1 | 2 | |
---|---|---|---|
0 | 2 | 4 | 6 |
1 | 3 | 5 | 9 |
data1 = ([1, 2, 4.5], [1.1, 3.4, 3.9], [0, 0.88, 0.45])
arr1 = np.array(data1)
arr1
array([[ 1. , 2. , 4.5 ], [ 1.1 , 3.4 , 3.9 ], [ 0. , 0.88, 0.45]])
arr1.dtype
dtype('float64')
arr1.astype(np.int32)
array([[1, 2, 4], [1, 3, 3], [0, 0, 0]], dtype=int32)
arr1[0]
array([ 1. , 2. , 4.5])
arr1[0][1]
2.0
arr1[1]
array([ 1.1, 3.4, 3.9])
arr2 = np.random.randn(2, 3).astype(np.float64)
arr2
array([[ 1.18302338, -1.14660054, -0.76273185], [ 1.16602911, 0.08769594, 1.23586991]])
arr3 = np.dot(arr2.T, arr2)
arr3
array([[ 2.7591682 , -1.25419923, 0.53873069], [-1.25419923, 1.32238338, 0.98292953], [ 0.53873069, 0.98292953, 2.10913431]])
np.sqrt(arr3)
/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in sqrt """Entry point for launching an IPython kernel.
array([[ 1.66107441, nan, 0.73398276], [ nan, 1.1499493 , 0.99142802], [ 0.73398276, 0.99142802, 1.45228589]])
arr2.mean()
0.29388099170824028
arr2.std()
0.97232450582277508
arr2.sum()
1.7632859502494416
arr2.sum(0)
array([ 2.34905249, -1.0589046 , 0.47313806])