主要是针对《利用python进行数据分析-第二版》进行第三次的学习 将其中关于numpy和pandas的部分代码进行整理
numpy
代码语言:javascript复制import numpy as np
my_arr = np.arange(1000000)
代码语言:javascript复制my_list = list(range(1000000))
代码语言:javascript复制%time for _ in range(10): my_arr2 = my_arr * 2
代码语言:javascript复制CPU times: user 21.2 ms, sys: 11.2 ms, total: 32.4 ms
Wall time: 32.6 ms
代码语言:javascript复制%time for _ in range(10): my_list = [x * 2 for x in my_list]
代码语言:javascript复制CPU times: user 1.21 s, sys: 252 ms, total: 1.47 s
Wall time: 1.71 s
代码语言:javascript复制data = np.random.randn(2,3)
data
代码语言:javascript复制array([[ 0.90479239, -3.06481532, -1.43044274],
[ 0.77066475, 0.1601102 , 0.48827119]])
代码语言:javascript复制data * 10
代码语言:javascript复制array([[ 9.04792392, -30.64815316, -14.30442743],
[ 7.70664748, 1.60110198, 4.8827119 ]])
代码语言:javascript复制data data
代码语言:javascript复制array([[ 1.80958478, -6.12963063, -2.86088549],
[ 1.5413295 , 0.3202204 , 0.97654238]])
代码语言:javascript复制data.shape
代码语言:javascript复制(2, 3)
代码语言:javascript复制data.dtype
代码语言:javascript复制dtype('float64')
代码语言:javascript复制data1 = [1,3,54,23]
arr1 = np.array(data1)
arr1
代码语言:javascript复制array([ 1, 3, 54, 23])
代码语言:javascript复制data2 = [[1,3,2,2], [93,2,4,3]]
arr2 = np.array(data2)
arr2
代码语言:javascript复制array([[ 1, 3, 2, 2],
[93, 2, 4, 3]])
查看属性
代码语言:javascript复制arr2.ndim
代码语言:javascript复制2
代码语言:javascript复制arr2.shape
代码语言:javascript复制(2, 4)
代码语言:javascript复制np.size(arr2)
代码语言:javascript复制6
代码语言:javascript复制np.zeros(10)
代码语言:javascript复制array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
代码语言:javascript复制np.zeros((2,3,4))
代码语言:javascript复制array([[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]]])
代码语言:javascript复制np.empty((2,3,4))
代码语言:javascript复制array([[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]],
[[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]]])
代码语言:javascript复制np.arange(15)
代码语言:javascript复制array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
numpy类型转换
代码语言:javascript复制arr1 = np.array([1,2,3], dtype=np.float64)
arr1.dtype
代码语言:javascript复制dtype('float64')
代码语言:javascript复制arr = np.array([1,2,3,4,5])
arr.dtype
代码语言:javascript复制dtype('int64')
代码语言:javascript复制float_arr = arr.astype(np.float64)
float_arr.dtype
代码语言:javascript复制dtype('float64')
代码语言:javascript复制arr = np.array([1, 3.8, 0.9, 8.3, 1.2, 2.4])
arr.astype(np.int32)
代码语言:javascript复制array([1, 3, 0, 8, 1, 2], dtype=int32)
代码语言:javascript复制numeric_strings = np.array(['1.2', '-9.6', '3.4'])
numeric_strings.astype(float)
代码语言:javascript复制array([ 1.2, -9.6, 3.4])
代码语言:javascript复制int_array = np.arange(10)
calibers = np.array([.22, .270, .357])
代码语言:javascript复制int_array.astype(calibers.dtype)
代码语言:javascript复制array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
numpy运算
代码语言:javascript复制arr = np.array([[1.2, 2.0, 3.0], [4.0,5.0,6.]])
arr
代码语言:javascript复制array([[1.2, 2. , 3. ],
[4. , 5. , 6. ]])
代码语言:javascript复制arr * arr
代码语言:javascript复制array([[ 1.44, 4. , 9. ],
[16. , 25. , 36. ]])
代码语言:javascript复制arr ** 0.5
代码语言:javascript复制array([[1.09544512, 1.41421356, 1.73205081],
[2. , 2.23606798, 2.44948974]])
代码语言:javascript复制arr2 = np.array([[1.8, 4., 1.],[7., 2., 12.]])
arr2
代码语言:javascript复制array([[ 1.8, 4. , 1. ],
[ 7. , 2. , 12. ]])
代码语言:javascript复制arr2 > arr
代码语言:javascript复制array([[ True, True, False],
[ True, False, True]])
索引和切片
改变切片中的数据,则同时会改变原来的数组中的值,属于是引用传递。
代码语言:javascript复制arr = np.arange(10)
arr
代码语言:javascript复制array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
代码语言:javascript复制arr[3]
代码语言:javascript复制3
代码语言:javascript复制arr[4:8]
代码语言:javascript复制array([4, 5, 6, 7])
代码语言:javascript复制arr[4:8] = 12 # 切片是引用传递;改变切片数据,原数组中的数据也会改变
代码语言:javascript复制arr
代码语言:javascript复制array([ 0, 1, 2, 3, 12, 12, 12, 12, 8, 9])
代码语言:javascript复制arr_slice = arr[4:8]
arr_slice
代码语言:javascript复制array([12, 12, 12, 12])
代码语言:javascript复制arr_slice[1] = 12345
代码语言:javascript复制arr
代码语言:javascript复制array([ 0, 1, 2, 3, 12, 12345, 12, 12, 8,
9])
代码语言:javascript复制arr_slice
代码语言:javascript复制array([ 12, 12345, 12, 12])
代码语言:javascript复制arr_slice[:] = 64
代码语言:javascript复制arr_slice
代码语言:javascript复制array([64, 64, 64, 64])
代码语言:javascript复制#### 高维数组
代码语言:javascript复制arr2d = np.array([[1,2,3],
[4,5,6],
[7,8,9]])
arr2d
代码语言:javascript复制array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
代码语言:javascript复制arr2d[2]
代码语言:javascript复制array([7, 8, 9])
代码语言:javascript复制arr2d[0][2]
代码语言:javascript复制3
代码语言:javascript复制arr2d[0,2] # axis=0表示行,axis=1表示列
代码语言:javascript复制3
代码语言:javascript复制arr3d = np.array([[[1,2,3],[4,5,6]],
[[7,8,9],[10,11,12]]])
arr3d
代码语言:javascript复制array([[[ 1, 2, 3],
[ 4, 5, 6]],
[[ 7, 8, 9],
[10, 11, 12]]])
代码语言:javascript复制old_values = arr3d[0].copy()
代码语言:javascript复制arr3d[0] = 42 # 将整个第一维的数据进行赋值42
代码语言:javascript复制arr3d
代码语言:javascript复制array([[[42, 42, 42],
[42, 42, 42]],
[[ 7, 8, 9],
[10, 11, 12]]])
代码语言:javascript复制arr3d[0] = old_values
代码语言:javascript复制arr3d
代码语言:javascript复制array([[[ 1, 2, 3],
[ 4, 5, 6]],
[[ 7, 8, 9],
[10, 11, 12]]])
代码语言:javascript复制arr3d[1,0]
代码语言:javascript复制array([7, 8, 9])
代码语言:javascript复制x = arr3d[1]
x
代码语言:javascript复制array([[ 7, 8, 9],
[10, 11, 12]])
代码语言:javascript复制x[0]
代码语言:javascript复制array([7, 8, 9])
切片索引
使用数字进行行和列上的切片
代码语言:javascript复制arr
代码语言:javascript复制array([ 0, 1, 2, 3, 64, 64, 64, 64, 8, 9])
代码语言:javascript复制arr[1:6]
代码语言:javascript复制array([ 1, 2, 3, 64, 64])
代码语言:javascript复制arr2d
代码语言:javascript复制array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
代码语言:javascript复制arr2d[:2]
代码语言:javascript复制array([[1, 2, 3],
[4, 5, 6]])
代码语言:javascript复制arr2d[:2, 1:]
代码语言:javascript复制array([[2, 3],
[5, 6]])
代码语言:javascript复制arr2d[1, :2]
代码语言:javascript复制array([4, 5])
代码语言:javascript复制arr2d[:2, 2]
代码语言:javascript复制array([3, 6])
代码语言:javascript复制arr2d[:,:1]
代码语言:javascript复制array([[1],
[4],
[7]])
代码语言:javascript复制arr2d[:2,:] = 10
代码语言:javascript复制arr2d
代码语言:javascript复制array([[10, 10, 10],
[10, 10, 10],
[ 7, 8, 9]])
布尔索引
代码语言:javascript复制names = np.array(['Bob', 'Joe', 'Will', 'Bob','Will', 'Joe', 'Joe'])
代码语言:javascript复制data = np.random.randn(7,4)
代码语言:javascript复制data
代码语言:javascript复制array([[-0.10772198, -0.29184497, 1.3724022 , 0.07641245],
[-0.56546012, -0.21549842, 0.65314245, 2.03431214],
[ 2.01365241, -1.14999284, -0.14336555, -2.08048877],
[ 1.34347561, -0.35609267, -0.55492504, -1.40518892],
[ 0.26469331, 0.61377631, 0.45934999, 0.63168711],
[ 0.37609032, 1.593136 , 1.95688825, -0.53681938],
[ 1.21065416, -0.87146262, -1.26135673, 1.2322734 ]])
代码语言:javascript复制names == "Bob"
代码语言:javascript复制array([ True, False, False, True, False, False, False])
代码语言:javascript复制data[names == "Bob"] # 选择出Bob对应的行
代码语言:javascript复制array([[-0.10772198, -0.29184497, 1.3724022 , 0.07641245],
[ 1.34347561, -0.35609267, -0.55492504, -1.40518892]])
代码语言:javascript复制data[0]
代码语言:javascript复制array([-0.10772198, -0.29184497, 1.3724022 , 0.07641245])
代码语言:javascript复制data[3]
代码语言:javascript复制array([ 1.34347561, -0.35609267, -0.55492504, -1.40518892])
代码语言:javascript复制data[names == 'Bob',2:]
代码语言:javascript复制array([[ 1.3724022 , 0.07641245],
[-0.55492504, -1.40518892]])
代码语言:javascript复制data[names != "Bob"]
代码语言:javascript复制array([[-0.56546012, -0.21549842, 0.65314245, 2.03431214],
[ 2.01365241, -1.14999284, -0.14336555, -2.08048877],
[ 0.26469331, 0.61377631, 0.45934999, 0.63168711],
[ 0.37609032, 1.593136 , 1.95688825, -0.53681938],
[ 1.21065416, -0.87146262, -1.26135673, 1.2322734 ]])
代码语言:javascript复制data[~(names == "Bob")] # 取反的另一种操作
代码语言:javascript复制array([[-0.56546012, -0.21549842, 0.65314245, 2.03431214],
[ 2.01365241, -1.14999284, -0.14336555, -2.08048877],
[ 0.26469331, 0.61377631, 0.45934999, 0.63168711],
[ 0.37609032, 1.593136 , 1.95688825, -0.53681938],
[ 1.21065416, -0.87146262, -1.26135673, 1.2322734 ]])
代码语言:javascript复制cond = (names == "Bob")
data[~cond]
代码语言:javascript复制array([[-0.56546012, -0.21549842, 0.65314245, 2.03431214],
[ 2.01365241, -1.14999284, -0.14336555, -2.08048877],
[ 0.26469331, 0.61377631, 0.45934999, 0.63168711],
[ 0.37609032, 1.593136 , 1.95688825, -0.53681938],
[ 1.21065416, -0.87146262, -1.26135673, 1.2322734 ]])
代码语言:javascript复制data[(names == "Bob") | (names == "Joe")]
代码语言:javascript复制array([[-0.10772198, -0.29184497, 1.3724022 , 0.07641245],
[-0.56546012, -0.21549842, 0.65314245, 2.03431214],
[ 1.34347561, -0.35609267, -0.55492504, -1.40518892],
[ 0.37609032, 1.593136 , 1.95688825, -0.53681938],
[ 1.21065416, -0.87146262, -1.26135673, 1.2322734 ]])
代码语言:javascript复制data[data < 0] = 0 # 将负值设为0
data
代码语言:javascript复制array([[0. , 0. , 1.3724022 , 0.07641245],
[0. , 0. , 0.65314245, 2.03431214],
[2.01365241, 0. , 0. , 0. ],
[1.34347561, 0. , 0. , 0. ],
[0.26469331, 0.61377631, 0.45934999, 0.63168711],
[0.37609032, 1.593136 , 1.95688825, 0. ],
[1.21065416, 0. , 0. , 1.2322734 ]])
代码语言:javascript复制data[names != "Joe"] = 7 # 将不是Joe的行赋值为7
data
代码语言:javascript复制array([[7. , 7. , 7. , 7. ],
[0. , 0. , 0.65314245, 2.03431214],
[7. , 7. , 7. , 7. ],
[7. , 7. , 7. , 7. ],
[7. , 7. , 7. , 7. ],
[0.37609032, 1.593136 , 1.95688825, 0. ],
[1.21065416, 0. , 0. , 1.2322734 ]])
花式索引
花式索引的结果总是一维的。
代码语言:javascript复制arr = np.empty((8,4))
arr
代码语言:javascript复制array([[-2.31584178e 077, -2.31584178e 077, 1.33397724e-322,
0.00000000e 000],
[ 0.00000000e 000, 5.02034658e 175, 2.70088625e-056,
3.69670980e-057],
[ 4.88947235e-033, 5.18944342e 170, 1.47763641e 248,
1.16096346e-028],
[ 7.69165785e 218, 1.35617292e 248, 5.98432176e-067,
4.01584398e-057],
[ 1.09392309e-042, 6.28964548e-066, 4.30240881e-096,
6.32299154e 233],
[ 6.48224638e 170, 5.22411352e 257, 5.74020278e 180,
8.37174974e-144],
[ 1.41529402e 161, 9.16651763e-072, 4.95296723e 097,
7.50274445e-067],
[ 3.07800883e 126, 2.08600674e-308, -2.31584178e 077,
-2.31584178e 077]])
代码语言:javascript复制for i in range(8):
arr[i] = i
代码语言:javascript复制arr
代码语言:javascript复制array([[0., 0., 0., 0.],
[1., 1., 1., 1.],
[2., 2., 2., 2.],
[3., 3., 3., 3.],
[4., 4., 4., 4.],
[5., 5., 5., 5.],
[6., 6., 6., 6.],
[7., 7., 7., 7.]])
代码语言:javascript复制arr[[4,3,0,6]]
代码语言:javascript复制array([[4., 4., 4., 4.],
[3., 3., 3., 3.],
[0., 0., 0., 0.],
[6., 6., 6., 6.]])
代码语言:javascript复制arr = np.arange(32).reshape((8,4))
arr
代码语言:javascript复制array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23],
[24, 25, 26, 27],
[28, 29, 30, 31]])
代码语言:javascript复制arr[[1,5,7,2],[0,3,1,2]]
代码语言:javascript复制array([ 4, 23, 29, 10])
代码语言:javascript复制arr[[1,5,7,2]][:,[0,3,1,2]]
代码语言:javascript复制array([[ 4, 7, 5, 6],
[20, 23, 21, 22],
[28, 31, 29, 30],
[ 8, 11, 9, 10]])
代码语言:javascript复制arr[[1,5,7,2]]
代码语言:javascript复制array([[ 4, 5, 6, 7],
[20, 21, 22, 23],
[28, 29, 30, 31],
[ 8, 9, 10, 11]])
代码语言:javascript复制arr[[1,5,7,2]][:,[0,3,1,2]]
代码语言:javascript复制array([[ 4, 7, 5, 6],
[20, 23, 21, 22],
[28, 31, 29, 30],
[ 8, 11, 9, 10]])
数据转换和轴对换
代码语言:javascript复制arr = np.arange(15).reshape((3,5))
arr
代码语言:javascript复制array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
代码语言:javascript复制arr.T # 求转置
代码语言:javascript复制array([[ 0, 5, 10],
[ 1, 6, 11],
[ 2, 7, 12],
[ 3, 8, 13],
[ 4, 9, 14]])
代码语言:javascript复制np.dot(arr.T, arr)
代码语言:javascript复制array([[125, 140, 155, 170, 185],
[140, 158, 176, 194, 212],
[155, 176, 197, 218, 239],
[170, 194, 218, 242, 266],
[185, 212, 239, 266, 293]])
代码语言:javascript复制arr
代码语言:javascript复制array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
代码语言:javascript复制arr.swapaxes(1,0)
代码语言:javascript复制array([[ 0, 5, 10],
[ 1, 6, 11],
[ 2, 7, 12],
[ 3, 8, 13],
[ 4, 9, 14]])
通用函数
代码语言:javascript复制arr
代码语言:javascript复制array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
代码语言:javascript复制np.sqrt(arr)
代码语言:javascript复制array([[0. , 1. , 1.41421356, 1.73205081, 2. ],
[2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ],
[3.16227766, 3.31662479, 3.46410162, 3.60555128, 3.74165739]])
代码语言:javascript复制np.exp(arr)
代码语言:javascript复制array([[1.00000000e 00, 2.71828183e 00, 7.38905610e 00, 2.00855369e 01,
5.45981500e 01],
[1.48413159e 02, 4.03428793e 02, 1.09663316e 03, 2.98095799e 03,
8.10308393e 03],
[2.20264658e 04, 5.98741417e 04, 1.62754791e 05, 4.42413392e 05,
1.20260428e 06]])
代码语言:javascript复制x = np.random.randn(8)
代码语言:javascript复制y = np.random.randn(8)
代码语言:javascript复制np.maximum(x,y)
代码语言:javascript复制array([-0.53590291, -0.52641016, 1.55493034, 0.45145878, 0.56083064,
0.98578306, 1.34527153, 0.23153891])
代码语言:javascript复制reminder, whole_apart = np.modf(x)
print(reminder) # 小数部分
print(whole_apart) # 整数部分
代码语言:javascript复制[-0.96304512 -0.52641016 0.55493034 0.40595377 -0.02314699 0.56459784
0.34527153 0.04450806]
[-0. -0. 1. 0. -1. 0. 1. 0.]
利用数组进行数据处理
代码语言:javascript复制points = np.arange(-5, 5, 0.01)
代码语言:javascript复制x, y = np.meshgrid(points,points)
print(x)
print(y)
代码语言:javascript复制[[-5. -4.99 -4.98 ... 4.97 4.98 4.99]
[-5. -4.99 -4.98 ... 4.97 4.98 4.99]
[-5. -4.99 -4.98 ... 4.97 4.98 4.99]
...
[-5. -4.99 -4.98 ... 4.97 4.98 4.99]
[-5. -4.99 -4.98 ... 4.97 4.98 4.99]
[-5. -4.99 -4.98 ... 4.97 4.98 4.99]]
[[-5. -5. -5. ... -5. -5. -5. ]
[-4.99 -4.99 -4.99 ... -4.99 -4.99 -4.99]
[-4.98 -4.98 -4.98 ... -4.98 -4.98 -4.98]
...
[ 4.97 4.97 4.97 ... 4.97 4.97 4.97]
[ 4.98 4.98 4.98 ... 4.98 4.98 4.98]
[ 4.99 4.99 4.99 ... 4.99 4.99 4.99]]
代码语言:javascript复制z = np.sqrt(x ** 2 y ** 2)
z
代码语言:javascript复制array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
7.06400028],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
...,
[7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
7.04279774],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568]])
代码语言:javascript复制import matplotlib.pyplot as plt
代码语言:javascript复制plt.imshow(z, cmap=plt.cm.gray)
代码语言:javascript复制<matplotlib.image.AxesImage at 0x119c72e10>
重点:where方法使用
代码语言:javascript复制x = np.array([2.2, 1.3, 9.3, 6.2, 7.4])
y = np.array([8.2, 1.7, 5.3, 4.9, 9.2])
代码语言:javascript复制cond = np.array([True, False, True, False, True])
代码语言:javascript复制arr = np.where(cond, x, y) # True的时候选择x,False的时候选择y
arr
代码语言:javascript复制array([2.2, 1.7, 9.3, 4.9, 7.4])
代码语言:javascript复制z = np.random.randn(4, 4)
z
代码语言:javascript复制array([[ 0.66304028, 1.56104402, -1.64553308, 1.38520089],
[ 0.11778316, 0.73150194, 0.93620809, -1.04122122],
[ 1.02459426, 2.17382769, -0.20349334, -0.58694299],
[ 1.00603491, -1.44444792, -0.11128948, -0.69493726]])
代码语言:javascript复制z > 0
代码语言:javascript复制array([[ True, True, False, True],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False]])
代码语言:javascript复制np.where(z > 0, 2, -2)
代码语言:javascript复制array([[ 2, 2, -2, 2],
[ 2, 2, 2, -2],
[ 2, 2, -2, -2],
[ 2, -2, -2, -2]])
代码语言:javascript复制np.where(z > 0, 2, z)
代码语言:javascript复制array([[ 2. , 2. , -1.64553308, 2. ],
[ 2. , 2. , 2. , -1.04122122],
[ 2. , 2. , -0.20349334, -0.58694299],
[ 2. , -1.44444792, -0.11128948, -0.69493726]])
数组和统计方法
代码语言:javascript复制arr
代码语言:javascript复制array([2.2, 1.7, 9.3, 4.9, 7.4])
代码语言:javascript复制print(np.mean(arr))
arr.mean()
代码语言:javascript复制5.1
5.1
代码语言:javascript复制arr.sum()
代码语言:javascript复制25.5
代码语言:javascript复制arr.mean(axis=0) # 计算列的平均值
代码语言:javascript复制5.1
代码语言:javascript复制arr.cumsum(axis=0)
代码语言:javascript复制array([ 2.2, 3.9, 13.2, 18.1, 25.5])
代码语言:javascript复制np.argmin(arr)
代码语言:javascript复制1
代码语言:javascript复制np.argmax(arr)
代码语言:javascript复制2
sum()函数统计True的个数
代码语言:javascript复制arr = np.random.randn(100)
arr
代码语言:javascript复制array([-3.22314012e-01, -4.20983200e-01, -8.60895143e-01, 1.19762451e 00,
-1.18101654e 00, -1.40988582e 00, 2.69878711e-01, 9.49317233e-01,
-6.17494119e-01, -5.93705233e-01, -1.73454244e 00, 2.44835633e-02,
-1.58724929e 00, -1.93150694e-01, 3.68590165e-01, 5.57850914e-02,
-4.08251237e-01, -3.18700903e-01, 1.10942971e-01, 1.45641795e 00,
1.57429326e 00, -7.83492435e-01, -2.17803238e 00, 7.95258546e-01,
-1.86616031e-01, 8.36777476e-01, 2.80993789e-01, 1.32231167e 00,
4.52074029e-01, 1.34281313e 00, 5.91006237e-01, -8.52972682e-01,
-2.98589281e-01, -3.15472509e-01, -1.53592401e-01, -2.91603279e-01,
2.00179770e-01, 8.93278122e-01, -1.92902599e 00, -6.42802967e-02,
-2.26946105e-01, -4.14173881e-01, -5.61157264e-01, 5.21075788e-01,
1.34999883e 00, -4.56088898e-01, -6.28535428e-02, -1.71763669e 00,
-1.09604392e 00, 5.71857881e-01, -2.40084694e-02, -1.47217876e 00,
2.03613785e 00, 2.58368897e-01, 5.21694750e-01, 3.57046139e-01,
-8.90487809e-02, 1.05867421e 00, -3.73744876e-01, -1.44889846e 00,
1.73627536e 00, -1.23398347e 00, 1.24276070e 00, -8.35275697e-01,
3.95759416e-01, -1.41817016e 00, -1.13538227e-01, -5.51357114e-01,
-1.22022368e 00, 6.42102384e-02, 4.05726450e-01, -9.20956259e-04,
1.78650830e 00, -1.82602641e-01, 4.55384759e-01, -9.64795416e-01,
-1.10006078e 00, -7.36474954e-01, -1.81803987e 00, -6.56197052e-01,
2.33504470e-01, 5.16974528e-01, 4.59053845e-01, 4.65928281e-01,
9.68000084e-01, 1.05966457e 00, -9.25582483e-01, -1.17524366e 00,
3.16498243e-01, -3.36469567e-01, 3.79026924e-01, -3.63310587e-01,
7.75914268e-01, 1.93695657e-01, 1.17826822e 00, 1.20396230e 00,
1.53059394e 00, 7.45524766e-01, 3.62200452e-02, 2.34353279e-01])
代码语言:javascript复制(arr > 0).sum()
代码语言:javascript复制49
代码语言:javascript复制arr_bool = arr > 0
arr_bool
代码语言:javascript复制array([False, False, False, True, False, False, True, True, False,
False, False, True, False, False, True, True, False, False,
True, True, True, False, False, True, False, True, True,
True, True, True, True, False, False, False, False, False,
True, True, False, False, False, False, False, True, True,
False, False, False, False, True, False, False, True, True,
True, True, False, True, False, False, True, False, True,
False, True, False, False, False, False, True, True, False,
True, False, True, False, False, False, False, False, True,
True, True, True, True, True, False, False, True, False,
True, False, True, True, True, True, True, True, True,
True])
代码语言:javascript复制arr_bool.any()
代码语言:javascript复制True
代码语言:javascript复制arr_bool.all()
代码语言:javascript复制False
唯一值及成员资格
成员资:一个数组中的元素是否在另一个中,返回的是布尔型数组
代码语言:javascript复制names
代码语言:javascript复制array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')
代码语言:javascript复制np.unique(names)
代码语言:javascript复制array(['Bob', 'Joe', 'Will'], dtype='<U4')
代码语言:javascript复制sorted(set(names)) # python的方法,先去重再排序
代码语言:javascript复制['Bob', 'Joe', 'Will']
代码语言:javascript复制np.in1d(names, ["Joe", "Bob"])
代码语言:javascript复制array([ True, True, False, True, False, True, True])
线性代数部分
代码语言:javascript复制x = np.array([[9,8,7],[6,5,4]])
代码语言:javascript复制y = np.array([[1,2],[3,4],[5,6]])
y
代码语言:javascript复制array([[1, 2],
[3, 4],
[5, 6]])
代码语言:javascript复制print(np.dot(x,y))
x.dot(y)
代码语言:javascript复制[[68 92]
[41 56]]
array([[68, 92],
[41, 56]])
代码语言:javascript复制np.dot(x, np.ones(3))
代码语言:javascript复制array([24., 15.])
代码语言:javascript复制x @ np.ones(3)
代码语言:javascript复制array([24., 15.])
代码语言:javascript复制# numpy.linalg
from numpy.linalg import inv, qr
代码语言:javascript复制x = np.random.randn(5,5)
代码语言:javascript复制mat = x.T.dot(x)
代码语言:javascript复制inv(mat)
代码语言:javascript复制array([[ 3.97162611, 10.06047893, 20.33957725, 41.96110271,
-13.90272187],
[ 10.06047893, 27.43032073, 55.05907039, 113.90833794,
-37.73094005],
[ 20.33957725, 55.05907039, 111.1893652 , 229.79046577,
-76.19206977],
[ 41.96110271, 113.90833794, 229.79046577, 475.7391915 ,
-157.59917145],
[ -13.90272187, -37.73094005, -76.19206977, -157.59917145,
52.39033219]])
代码语言:javascript复制mat.dot(inv(mat))
代码语言:javascript复制array([[ 1.00000000e 00, -4.07659961e-15, 9.20702620e-15,
-4.41896636e-15, -1.84962408e-15],
[-4.17848390e-15, 1.00000000e 00, 3.46416569e-15,
3.80287405e-15, -7.28660199e-15],
[-1.50445857e-14, -9.63178031e-15, 1.00000000e 00,
-2.43789390e-13, -1.83785834e-15],
[ 6.79492973e-15, -8.93413272e-15, 3.28937408e-14,
1.00000000e 00, 1.59360368e-14],
[-5.98805626e-15, -8.68880470e-15, -9.44099027e-14,
-3.39755600e-14, 1.00000000e 00]])
代码语言:javascript复制q, r = qr(mat)
代码语言:javascript复制print("r:n", r)
q
代码语言:javascript复制r:
[[-4.54992004e 00 -9.08567164e-01 4.77266232e 00 -8.88857419e-01
2.40794600e 00]
[ 0.00000000e 00 -7.07679685e 00 2.64829180e 00 1.05836155e 00
1.94029724e 00]
[ 0.00000000e 00 0.00000000e 00 -5.97731101e 00 1.81171976e 00
-3.24859061e 00]
[ 0.00000000e 00 0.00000000e 00 0.00000000e 00 -1.85854206e 00
-5.60796440e 00]
[ 0.00000000e 00 0.00000000e 00 0.00000000e 00 0.00000000e 00
5.34488990e-03]]
array([[-0.91170893, 0.10377487, -0.38996846, -0.02050499, -0.07430852],
[-0.02064984, -0.95935654, -0.16282362, -0.10966861, -0.20166772],
[ 0.38363091, 0.18615893, -0.78064734, 0.20716887, -0.40723822],
[-0.05534304, 0.16086956, 0.35220049, -0.37075471, -0.84235022],
[ 0.1346287 , 0.09127778, -0.2965765 , -0.89842882, 0.28002056]])
伪随机数生成
代码语言:javascript复制samples = np.random.normal(size=(4,4))
samples
代码语言:javascript复制array([[-0.91715594, -1.39832053, 1.59702221, 1.10642995],
[-0.67526269, 0.54100686, 0.31399945, -0.94730476],
[-0.74915956, 0.33955031, 1.09219803, -0.603868 ],
[ 2.01827315, 0.80366446, 0.14261334, 0.99553088]])
代码语言:javascript复制# 随机种子
np.random.seed(1234) # 全局种子
代码语言:javascript复制rng = np.random.RandomState(1234)
rng.randn(10)
代码语言:javascript复制array([ 0.47143516, -1.19097569, 1.43270697, -0.3126519 , -0.72058873,
0.88716294, 0.85958841, -0.6365235 , 0.01569637, -2.24268495])
随机漫步demo
代码语言:javascript复制# python
import random
position = 0
walk = [position]
steps = 1000
for i in range(steps):
step = 1 if random.randint(0,1) else -1
position = step
walk.append(position)
代码语言:javascript复制plt.plot(walk[:100])
代码语言:javascript复制[<matplotlib.lines.Line2D at 0x11b0a6dd8>]
代码语言:javascript复制# numpy
nsteps = 1000
代码语言:javascript复制draws = np.random.randint(0,2,size=nsteps)
代码语言:javascript复制steps = np.where(draws > 0, 1, -1) # 大于0填充1,小于0填充-1
代码语言:javascript复制walk = steps.cumsum()
代码语言:javascript复制walk.min()
代码语言:javascript复制-9
代码语言:javascript复制walk.max()
代码语言:javascript复制60