Principal Component Analysis

主成分分析

In [1]:
import torch, torchvision
from torch.distributions.multivariate_normal import MultivariateNormal

import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

from vlkit.ops import AdditiveAngularMargin

还是以二维为例,方便可视化

生成数据点

In [2]:
mean = torch.tensor([5, 5]).float()
# set random state
torch.manual_seed(7)

cov = torch.tensor([[1, 0.6], [0.6, 1]]).float()

print(mean)
print(cov)

n = 10000

dist = MultivariateNormal(loc=mean, covariance_matrix=cov)
samples = dist.sample(sample_shape=(n,))
tensor([5., 5.])
tensor([[1.0000, 0.6000],
        [0.6000, 1.0000]])

可视化数据

In [3]:
colors = np.random.rand(n)
print(samples[:, 0].shape)
plt.scatter(samples[:, 0], samples[:, 1], alpha=0.5, c=colors)
plt.gca().set_aspect('equal', adjustable='box')
plt.grid()
torch.Size([10000])

估计均值和协方差

In [4]:
# batch-nize data
samples = samples.view(-1, 50, 2)

sigma = torch.eye(2)
mean = torch.zeros(1, 2)

t = 0
n = 0
for batch in samples:
    bs = batch.size(0)
    t = t + 1
    n = n + bs

    m = batch.mean(dim=0)

    mean1 = (mean * bs * (t-1) + m * bs) / n
    sigma = sigma + batch.t().mm(batch) + (n + bs -1) * (mean.t().mm(mean) - mean1.t().mm(mean1)) - bs * mean.t().mm(mean)
    mean = mean1
sigma = sigma / (n-1)
In [5]:
print(mean)
print(sigma)
tensor([[4.9987, 4.9988]])
tensor([[0.8889, 0.4837],
        [0.4837, 0.8675]])

主成分分析

对协方差矩阵做特征值分解

In [6]:
eigval, eigvec = torch.linalg.eig(sigma)
In [7]:
eigval = eigval.real.abs()
eigvec = eigvec.real

根据特征值绝对值大小排序

In [8]:
eigval, idx = eigval.sort(descending=True)
eigvec = eigvec[idx, ]

可以看到一个特征值很大,一个很小

In [9]:
print(eigval)
print(eigvec)
tensor([1.3620, 0.3944])
tensor([[ 0.7149, -0.6992],
        [ 0.6992,  0.7149]])

上面特征向量矩阵的每一列可以看作是新空间的坐标轴,特征值表示数据点投影过去之后在每个坐标轴上的“方差”。 可以看下面的图理解。

将原始数据投影到特征向量方向上,再投影回去

首先将原数据减均值

In [10]:
samples = samples.view(-1, 2)

centered = samples - mean
In [11]:
eigvec.mm(eigvec.t())
Out[11]:
tensor([[1.0000e+00, 1.3013e-08],
        [1.3013e-08, 1.0000e+00]])

可视化

In [12]:
centered_proj = centered.mm(eigvec)

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

axes[0].scatter(centered[:, 0], centered[:, 1], alpha=0.5, c=colors)
axes[0].set_aspect('equal', adjustable='box')
axes[0].grid()
axes[0].set_title('original data')

axes[1].scatter(centered_proj[:, 0], centered_proj[:, 1], alpha=0.5, c=colors)
axes[1].set_aspect('equal', adjustable='box')
axes[1].grid()
axes[1].set_title('Eigen transform')

centered_recon = centered_proj.mm(eigvec.t())

axes[2].scatter(centered_recon[:, 0], centered_recon[:, 1], alpha=0.5, c=colors)
axes[2].set_aspect('equal', adjustable='box')
axes[2].grid()
axes[2].set_title('Reverse')
Out[12]:
Text(0.5, 1.0, 'Reverse')

根据主成分降维,然后返映射回去

In [13]:
centered_proj = centered.mm(eigvec[:, 0:1])

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

axes[0].scatter(centered[:, 0], centered[:, 1], alpha=0.5, c=colors)
axes[0].set_aspect('equal', adjustable='box')
axes[0].grid()
axes[0].set_title('original data')

axes[1].scatter(centered_proj[:, 0], torch.zeros_like(centered_proj[:, 0]), alpha=0.5, c=colors)
axes[1].set_aspect('equal', adjustable='box')
axes[1].set_ylim(-2, 2)
axes[1].grid()
axes[1].set_title('Eigen transform')

centered_recon = centered_proj.mm(eigvec[:, 0:1].t())

axes[2].scatter(centered_recon[:, 0], centered_recon[:, 1], alpha=0.5, c=colors)
axes[2].set_aspect('equal', adjustable='box')
axes[2].grid()
axes[2].set_title('Reverse')
Out[13]:
Text(0.5, 1.0, 'Reverse')

用 nn.Linear 层重建数据

用128维的特征为例,先进性 pca 降维,然后用 Linear 层进行重建

In [14]:
n = 100000
d = 128

mean = torch.randn(d) + 5

cov = torch.randn(d, d)
cov = cov.mm(cov.t())

dist = MultivariateNormal(loc=mean, covariance_matrix=cov)
samples = dist.sample(sample_shape=(n,))

这次不用 通过样本估协方差了,直接用分布的协方差

In [15]:
eigval, eigvec = torch.linalg.eig(cov)

eigval = eigval.real.abs()
eigvec = eigvec.real

eigval, idx = eigval.sort(descending=True)
eigvec = eigvec[idx, ]
In [16]:
cum = eigval.cumsum(dim=0) / eigval.sum()
plt.plot(cum)
plt.grid()

d1 = 100
print(cum[d1-1])
tensor(0.9920)
In [17]:
proj = torch.nn.Linear(d, d1, bias=False)

proj.weight.data.copy_(eigvec[:, 0:d1].t())

inv = torch.nn.Linear(d1, d, bias=False)
inv.weight.data.copy_(eigvec[:, 0:d1])
Out[17]:
tensor([[-0.0597, -0.0681,  0.0275,  ..., -0.0392,  0.1097,  0.0043],
        [ 0.0069,  0.0157,  0.1070,  ..., -0.0128, -0.0752,  0.1185],
        [-0.0835, -0.0073, -0.0715,  ..., -0.0456, -0.0491, -0.1001],
        ...,
        [ 0.0882, -0.0108,  0.0981,  ...,  0.0089, -0.0406, -0.1386],
        [-0.1998, -0.0054,  0.0330,  ...,  0.1342, -0.0256,  0.0122],
        [-0.1411, -0.0446, -0.0901,  ..., -0.1473,  0.0897,  0.2352]])
In [18]:
torch.nn.functional.l1_loss(samples, inv(proj(samples - mean)) + mean)
Out[18]:
tensor(3.8662, grad_fn=<L1LossBackward0>)

我们的数据

In [19]:
cov = torch.from_numpy(np.loadtxt('sigma_512.txt')).float()

classes = 1000
d = 512

# 这里很重要!! 同时也需要估特征的均值
mean = torch.zeros(1, d)

原始模型: backbone model + arcface

In [20]:
model = torchvision.models.mobilenet_v3_small()
model = torch.nn.Sequential(model.features, torch.nn.AdaptiveAvgPool2d((1, 1)), torch.nn.Flatten(), torch.nn.Linear(576, 512))

arcface_linear = AdditiveAngularMargin(d, classes, m=0.5)

对原始模型来说,pipeline是这样的:model 提特征 -> arcface 计算logits

In [21]:
f = model(torch.randn(2, 3, 224, 224))

logits = arcface_linear(f)

print(f.shape, logits.shape)
torch.Size([2, 512]) torch.Size([2, 1000])

采样出一些特征,模拟pca

In [22]:
n = 100
dist = MultivariateNormal(loc=mean, covariance_matrix=cov)
features = dist.sample(sample_shape=(n,)).view(n, -1)
labels = torch.randint(0, classes, size=(n, 1))
In [23]:
eigval, eigvec = torch.linalg.eig(cov)

eigval = eigval.real.abs()
eigvec = eigvec.real.float()

eigval, idx = eigval.sort(descending=True)
eigvec = eigvec[idx, ]
In [24]:
cum = eigval.cumsum(dim=0) / eigval.sum()
plt.plot(cum)
plt.grid()

d = 512
d1 = 400
print(cum[d1-1])
tensor(0.9999)

对于新模型来说,有两点不一样:

  1. 模型提完特征之后,原来的特征要从d维降到d1,因此插入一个 proj 层进行特征映射
  2. 原来的arcface输入特征是d维,新的arcface输入特征是d1 维,因此 arcface 也要对应调整。
In [25]:
proj = torch.nn.Linear(d, d1, bias=False)
proj.weight.data.copy_(eigvec[:, 0:d1].t())

arcface_linear_new = AdditiveAngularMargin(d1, classes, m=0.5)
arcface_linear_new.weight.data.copy_(arcface_linear.weight.data.mm(eigvec[:, 0:d1]))
Out[25]:
tensor([[ 0.0059,  0.0010,  0.0045,  ..., -0.0028,  0.0001,  0.0071],
        [-0.0020, -0.0102, -0.0065,  ...,  0.0022,  0.0094,  0.0243],
        [ 0.0061,  0.0098, -0.0044,  ..., -0.0084, -0.0086, -0.0023],
        ...,
        [-0.0120,  0.0075,  0.0034,  ...,  0.0129, -0.0070,  0.0089],
        [ 0.0019, -0.0015, -0.0037,  ..., -0.0013,  0.0070, -0.0008],
        [ 0.0148, -0.0024, -0.0054,  ..., -0.0037,  0.0007,  0.0054]])

新的模型 pipeline: model 提特征 -> proj 层将特征映射到低维(d1维) -> 新的 arcface 计算 logits

In [26]:
logits_original, _ = arcface_linear(features, label=labels)
In [27]:
feature_reduced = proj(features - mean)
logits_new, _ = arcface_linear_new(feature_reduced, label=labels)
In [28]:
print(logits_original)
tensor([[-1.6710, -0.7606, -1.1665,  ...,  0.8226, -0.6867,  2.1994],
        [ 2.2844, -1.1466,  1.4867,  ..., -1.0680, -0.1780, -1.2772],
        [ 0.4817, -0.1350,  1.5191,  ..., -1.1739,  2.3401, -0.6101],
        ...,
        [-1.0971,  0.0981, -3.1137,  ..., -1.7442,  0.8926, -1.1644],
        [-1.9279, -1.7382,  1.2848,  ...,  0.2243,  0.8894, -1.2594],
        [ 0.2948, -1.2515,  1.2627,  ..., -0.1513,  0.8517,  1.1642]],
       grad_fn=<MulBackward0>)
In [29]:
print(logits_new)
tensor([[-1.7847, -0.2265, -0.2569,  ..., -0.2336, -0.4844,  1.6626],
        [ 1.0632, -1.5317,  0.9792,  ..., -2.1686, -1.0364, -1.1139],
        [ 0.4749, -0.6999,  1.5516,  ..., -1.5065,  1.8958, -0.8365],
        ...,
        [-1.5060,  0.0674, -4.5541,  ..., -1.5675,  1.4957, -0.7974],
        [-1.9697, -2.1687,  1.9962,  ...,  1.1042,  0.6858, -0.2074],
        [ 0.1850, -0.3918,  0.9386,  ..., -0.0917, -0.0225,  1.3249]],
       grad_fn=<MulBackward0>)
In [30]:
logits_original.argmax(dim=1)
Out[30]:
tensor([534, 718, 187, 739, 470, 775,  88, 585, 305, 887, 784, 544, 613, 296,
         19, 305, 420, 513, 432, 409, 862, 705, 283,  59, 543, 600, 262, 413,
        213, 706, 832, 876, 692, 987,  88, 955, 368, 264, 233,  80, 172, 327,
        508, 902, 252, 284, 461,  36, 772, 651,  51, 722, 144,  68, 813, 728,
        670, 245, 191, 912, 763, 742, 527, 572, 831, 691, 639, 324,  55, 572,
        894, 227, 395, 781, 513, 212, 292, 373, 371, 849, 353, 941, 890, 279,
        997, 276, 546, 642, 663, 503, 347, 110, 408, 951, 997,  81, 918, 291,
        680, 109])
In [31]:
logits_new.argmax(dim=1)
Out[31]:
tensor([282, 548, 972,  24, 288, 994, 135, 102,  18, 418, 784, 544, 613, 467,
         19,  98, 420, 348, 145, 363, 396, 705, 539, 651, 880, 989, 484, 413,
        608, 706, 832, 456, 692, 860, 381, 810, 368, 777, 416, 847, 172, 327,
        275, 902, 252,  31, 319,  36, 772, 975,  51, 542, 285,  68, 813, 728,
        670, 245, 145, 912, 763, 371, 623, 709, 831, 100, 426, 935,  55, 380,
        618, 227, 112, 781, 904, 196, 492, 886, 371, 349, 353, 941, 906, 820,
        997, 276, 546, 599, 304, 503, 347, 728, 408, 587, 827,  81, 901, 291,
        908, 109])
In [32]:
(logits_original.argmax(dim=1) == logits_new.argmax(dim=1)).float().mean()
Out[32]:
tensor(0.4100)
In [ ]:
 
For more notebooks, visit https://kaizhao.net/misc.