需要记住的是: numpy arrays consistently abide by the rule that operations are applied element-wise. Thus, if a and b are numpy arrays, then a*b is the array formed by multiplying the components element-wise。
即对于ndarray, * 表示的是multiplying the components element-wise 必要时需要广播,按照广播规则进行广播,再进行multiplying the components element-wise.
举例:
>>> import numpy as np
>>> a = np.array(range(6)).reshape((2,3))
>>> b = np.array([1,0,1])
>>> a
array([[0, 1, 2],
[3, 4, 5]])
>>> b
array([1, 0, 1])
>>> c= a*b
>>> c
array([[0, 0, 2],
[3, 0, 5]])
>>> d = a*b.T
>>> d
array([[0, 0, 2],
[3, 0, 5]])
而对于matrix,* 则表示矩阵相乘,运算必须保证矩阵相乘的法则:
否则会报错。
例如:
P=matrix([[ 0.11 , 0.06 , 0.25 ],
[ 0.04 , 0.4 , 0.31 ],
[ 0.07 , 0.07 , 0.11 ],
[ 0.1 , 0.1 , 0.15 ],
[ 0.01 , 0.4 , 0.4 ],
[ 0.005, 0.2 , 0.4 ],
[ 0.005, 0.12 , 0.38 ]])
Q=matrix([[-0.43935806],
[-0.83242645],
[-0.21580417],
[-0.29555744],
[-1.00681157],
[-0.85996415],
[-0.76464043]])
P*Q
# 会报错
ValueError: shapes (7,3) and (7,1) not aligned: 3 (dim 1) != 7 (dim 0)
>>> A=np.matrix(a)
>>> B=np.matrix(b)
>>> A
matrix([[0, 1, 2],
[3, 4, 5]])
>>> B
matrix([[1, 0, 1]])
>>> C=A*B
Traceback (most recent call last):
File "" , line 1, in
File "/usr/lib/python2.7/dist-packages/numpy/matrixlib/defmatrix.py", line 341, in __mul__
return N.dot(self, asmatrix(other))
ValueError: objects are not aligned
# 必须遵守矩阵相乘的法则
>>> C=A*B.T
>>> C
matrix([[2],
[8]])
官方文档: Dot product of two arrays.
For 2-D arrays it is equivalent to matrix multiplication, and for 1-D arrays to inner product of vectors (without complex conjugation). For N dimensions it is a sum product over the last axis of a and the second-to-last of b.
所以对于ndarray ,一般情况下,都是进行矩阵乘法或者向量的内积运算。但这仅仅是等价于矩阵相乘,但等于就是矩阵相乘。对于ndarray,有时,dot的运算并不要求操作数像矩阵相乘的要求那么严格,当然相乘的结果的格式也不是矩阵,而是数组,举例:
>>> np.dot(a,b)
array([2, 8]) # a 2-D数组, b 1-D数组,不论b是否转置,得到的都得到相同的1-D数组
>>> np.dot(a,b.T)
array([2, 8])
但是对于matrix,矩阵相乘就是矩阵相乘,铁板钉钉,所以必须满足矩阵相乘的条件,举例:
>>> np.dot(A,B)
Traceback (most recent call last):
File "" , line 1, in
ValueError: objects are not aligned
>>> np.dot(A,B.T)
matrix([[2],
[8]])
# 必须遵守矩阵相乘的法则, 相乘的结果也保证了格式还是矩阵
multiply是numpy的ufunc函数,执行方法是对应元素相乘,而不是线性代数中的矩阵运算方式,类似于matlab中的点乘,当矩阵的维度不相同时,会根据一定的广播规则将维数扩充到一致的形式. 如果不能广播相同的size,multiply就会失败,举例:
>>> np.multiply(a,b)
array([[0, 0, 2],
[3, 0, 5]])
>>> np.multiply(a,b.T)
array([[0, 0, 2],
[3, 0, 5]])
>>> np.multiply(A,B)
matrix([[0, 0, 2],
[3, 0, 5]])
>>> np.multiply(A,B.T)
Traceback (most recent call last):
File "" , line 1, in
ValueError: operands could not be broadcast together with shapes (2,3) (3,1)
q=np.arange(0,16,1)
# array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
g=np.argwhere(q==7)
# array([[7]], dtype=int64)
np.squeeze(g)
# array(7, dtype=int64)
>>> x = np.arange(6).reshape(2,3)
>>> x
array([[0, 1, 2],
[3, 4, 5]])
>>> np.argwhere(x>1)
array([[0, 2],
[1, 0],
[1, 1],
[1, 2]])
a
"""
array([[0.03766775, 0.50672796],
[0.79020832, 0.3360658 ],
[0.42454363, 0.00087503]])
"""
# reshape 并不改变原始数组
a.reshape(2, 3)
"""
array([[0.03766775, 0.50672796, 0.79020832],
[0.3360658 , 0.42454363, 0.00087503]])
"""
# resize 会改变原始数组
a.resize(2, 3)
"""
array([[0.03766775, 0.50672796, 0.79020832],
[0.3360658 , 0.42454363, 0.00087503]])
"""
a.ravel() #展平数组
"""
array([0.03766775, 0.50672796, 0.79020832, 0.3360658 , 0.42454363,
0.00087503])
"""
np.vstack((a, b)) # 垂直拼合数组
np.hstack((a, b)) # 水平拼合数组
np.hsplit(a, 3) # 沿横轴分割数组
np.vsplit(a, 3) # 沿纵轴分割数组
>> type(np.newaxis)
NoneType
>> np.newaxis == None
True
# np.newaxis 在使用和功能上等价于 None,其实就是 None 的一个别名。
#------------------------------------------------------------
>> x = np.arange(3)
>> x
array([0, 1, 2])
>> x.shape
(3,)
>> x[:, np.newaxis]
array([[0],
[1],
[2]])
>> x[:, None]
array([[0],
[1],
[2]])
>> x[:, np.newaxis].shape
(3, 1)
##########################################
>>> b = x[None] # equals b = x[np.newaxis] and b = x[np.newaxis,:]
>>> b
array([[0, 1, 2]])
>>> a.shape
(3,)
>>> b.shape
(1, 3)
Z1 = np.random.randint(0,10,10)
Z2 = np.random.randint(0,10,10)
print("Z1:", Z1)
print("Z2:", Z2)
np.intersect1d(Z1,Z2)
"""
Z1: [6 6 9 9 0 1 0 3 4 2]
Z2: [5 4 2 7 8 8 7 1 0 0]
array([0, 1, 2, 4])
"""
Z = np.zeros((10,10),dtype=int)
Z[1::2,::2] = 1
Z[::2,1::2] = 1
Z
"""
array([[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]])
"""
Z = np.arange(11)
Z[(1 < Z) & (Z <= 9)] *= -1 # 将 (1, 9] 之间的数全部反转成负数
Z[Z.argmax()] = 0 # 将其中最大值替换成 0
#从随机一维数组中找出距离给定数值(0.5)最近的数
Z = np.random.uniform(0,1,(20,1))
print("随机数组: \n", Z)
z = 0.5
m = Z.flat[np.abs(Z - z).argmin()]
#将二维数组的前两行,前两列进行顺序交换
A = np.arange(25).reshape(5,5)
A[[0,1]] = A[[1,0]] # 前两行
A[:,[0,1]]=A[:,[1,0]] # 前两列
# 找到随机一维数组中前 p 个最大值
Z = np.random.randint(1,100,100)
p = 5
Z[np.argsort(Z)[-p:]] # np.argsort() 从小到大排序,输出索引
Z = np.random.random((5,5))
np.set_printoptions(precision=2)
# 生成含缺失值的 2 维数组
Z = np.random.rand(10,10)
a=np.random.randint(10, size=5)
b=np.random.randint(10, size=5)
Z[a,b] = np.nan
print("缺失值总数: \n", np.isnan(Z).sum())
print("缺失值索引: \n", np.where(np.isnan(Z)))
Z[np.sum(np.isnan(Z), axis=1) == 0] # 从随机数组中删除包含缺失值的行
Z = np.random.randint(0,100,25).reshape(5,5)
np.unique(Z, return_counts=True) # 返回值中,第 2 个数组对应第 1 个数组元素的数量
arr3=np.random.randint(-1,3,size=(2,3,4))
arr3
#array([[[ 0, 2, -1, 1],
# [ 2, 1, -1, -1],
# [ 0, 0, -1, 0]],
# [[ 0, -1, 2, 2],
# [ 1, -1, -1, 1],
# [ 0, 2, 0, 1]]])
arr3.sum(0)
#array([[ 2, 3, -3, 0],
# [ 1, 0, 1, 4]])
arr3.sum(1)
#array([[ 2, 3, -3, 0],
# [ 1, 0, 1, 4]])
arr3.sum(2)
#array([[ 2, 1, -1],
# [ 3, 0, 3]])
arr3.sum((2,1))
#array([2, 6])
arr3.sum((1,2))
#array([2, 6])
np.max(a, axis=0) # 返回每列最大值
np.min(a, axis=1) # 返回每行最小值
np.argmax(a, axis=0) # 返回每列最大值索引
np.argmin(a, axis=1) # 返回每行最小值索引
>>> a = np.array([[1, np.nan], [3, 4]])
#[[1, np.nan],
# [3, 4 ]]
>>> np.nanmean(a) #(1+3+4)/3
2.6666666666666665
>>> np.nanmean(a, axis=0)
array([ 2., 4.])
>>> np.nanmean(a, axis=1)
array([ 1., 3.5])
a=array([[ 3., nan, -1., 0.],
[ -1., -2., nan, 1.],
[ nan, nan, nan, nan]])
np.isnan(a)
#array([[False, True, False, False],
# [False, False, True, False],
# [ True, True, True, True]], dtype=bool)
np.isnan(a).all()
#False
np.isnan(a).all(0)
#array([False, False, False, False], dtype=bool)
np.isnan(a).all(1)
#array([False, False, True], dtype=bool)
np.isnan(a).any()
#True
np.isnan(a).any(0)
#array([ True, True, True, True], dtype=bool)
np.isnan(a).any(1)
#array([ True, True, True], dtype=bool)
使用np.corrcoef(a)
可计算行与行之间的相关系数。np.corrcoef(a,rowvar=0)
用于计算各列之间的相关系数,输出为相关系数矩阵。
a
"""
array([[1, 1, 2, 2, 3], # 特征 A
[2, 2, 3, 3, 5], # 特征 B
[1, 4, 2, 2, 3]]) # 特征 C
"""
np.corrcoef(a)
"""
[A] [B] [C]
array([[ 1. , 0.976, 0.105], [A]
[ 0.976, 1. , 0.179], [B]
[ 0.105, 0.179, 1. ]]) [C]
"""
np.corrcoef(a,rowvar=0)
"""
array([[ 1. , -0.189, 1. , 1. , 1. ],
[-0.189, 1. , -0.189, -0.189, -0.189],
[ 1. , -0.189, 1. , 1. , 1. ],
[ 1. , -0.189, 1. , 1. , 1. ],
[ 1. , -0.189, 1. , 1. , 1. ]])
"""
a = np.array([1, 2])
b = np.array([7, 8])
# 数学计算方法
print(np.sqrt(np.power((8-2), 2) + np.power((7-1), 2)))
# NumPy 计算
np.linalg.norm(b-a)
Z-Score 标准化公式:
# 根据公式定义函数
def zscore(x, axis = None):
"""
默认为整个数组一体标准化,可以添加:axis=0进行每列标准化,axis=1进行每行标准化
"""
xmean = x.mean(axis=axis, keepdims=True)
xstd = np.std(x, axis=axis, keepdims=True)
zscore = (x-xmean)/xstd
return zscore
# 生成随机数据
Z = np.random.randint(10, size=(5,5))
print(Z)
zscore(Z)
Min-Max 标准化公式:
# 根据公式定义函数
def min_max(x, axis=None):
"""
默认为整个数组一体标准化,可以添加:axis=0进行每列标准化,axis=1进行每行标准化
"""
min = x.min(axis=axis, keepdims=True)
max = x.max(axis=axis, keepdims=True)
result = (x-min)/(max-min)
return result
# 生成随机数据
Z = np.random.randint(10, size=(5,5))
print(Z)
min_max(Z)
M = np.matrix([[1,2,3], [4,5,6], [7,8,9]])
w, v = np.linalg.eig(M)
# w 对应特征值,v 对应特征向量
Z = np.random.randint(1,10,10)
print(Z)
# 计算 Z 两相邻元素差值
print(np.diff(Z, n=1))
# 重复计算 2 次
print(np.diff(Z, n=2))
# 重复计算 3 次
print(np.diff(Z, n=3))
Z = np.random.randint(1,10,10)
print(Z)
"""
[第一个元素, 第一个元素 + 第二个元素, 第一个元素 + 第二个元素 + 第三个元素, ...]
"""
np.cumsum(Z)
"""
[9 7 8 1 8 4 7 5 7 3]
array([ 9, 16, 24, 25, 33, 37, 44, 49, 56, 59])
"""
# 按列连接两个数组(要求列数一致)
M1 = np.array([1, 2, 3])
M2 = np.array([4, 5, 6])
np.c_[M1, M2]
"""
array([[1, 4],
[2, 5],
[3, 6]])
"""
# 按行连接两个数组(要求行数一致)
M1 = np.array([1, 2, 3])
M2 = np.array([4, 5, 6])
np.r_[M1, M2]
"""
array([1, 2, 3, 4, 5, 6])
"""
100 numpy exercises