7. SVD (Singular Value Decomposition; 特異値分解)#
https://ohke.hateblo.jp/entry/2017/12/14/230500
7.1. SVDとは#
7.2. scikit-learnを使った実験#
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.datasets import load_iris
#from sklearn.decomposition import TruncatedSVD
from scipy.linalg import svd
from sklearn.exceptions import NotFittedError
df = load_iris(as_frame=True)["frame"]
df.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | target | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 |
7.3. NumPyを使って実装する#
class MySVD():
def fit_transform(self, X,y=None):
""" X = U\Sigma V^T
U: (M,M) 左特異行列,
Σ: (N,M) 対角成分がXの特異値,
V: (N,N) 右特異行列,
"""
# C^T@Cの固有値と固有ベクトル
XtX_eigen_values, XtX_eigen_vectors = np.linalg.eig(X.T@X) # (F,),(F,F)←(F,F)=(D,F).T@(D,F)
# 特異値
singular_values = XtX_eigen_values**(1/2) # (F,)
sorted_singular_index = np.argsort(singular_values)[::-1] # (F,)
# 特異値行列
sigma = np.diag(singular_values[sorted_singular_index]) # (F,F) 対角要素のみ値が入っている
# 右特異行列
V = XtX_eigen_vectors[:,sorted_singular_index] #(F,F)
# 左特異行列
U = []
for i in range(len(sigma_diag := sigma.diagonal())): # (F,)
_u = (X @ V[:,i]) / sigma_diag[i] # (D,)=((D,F)@(F,))/(1,)
U.append(_u)
U = np.array(U).T #
return U, sigma_diag, V.T
X = df.drop("target", axis=1)
myoutput = MySVD().fit_transform(X)
output = svd(X)
print(myoutput[0].shape,myoutput[1].shape,myoutput[2].shape)
print(output[0].shape,output[1].shape,output[2].shape)
(150, 4) (4,) (4, 4)
(150, 150) (4,) (4, 4)
a = X@myoutput[2].T[:,1]
a.shape
(150,)
X.shape
(150, 4)
output[2]
array([[-0.75110816, -0.38008617, -0.51300886, -0.16790754],
[ 0.2841749 , 0.5467445 , -0.70866455, -0.34367081],
[ 0.50215472, -0.67524332, -0.05916621, -0.53701625],
[ 0.32081425, -0.31725607, -0.48074507, 0.75187165]])
output[1][:2]
array([95.95991387, 17.76103366])