11

现在已经 18:06 了，但是我感觉自己没做多少事情。

我从今天 10:30 开始做事。先写完了普物作业。然后去吃饭，之后看了看关于 Linux 的文件夹意义。然后就开始研究如何再神经网络上实现差分隐私。

我发现，虽然现成的神经网络的库往往都功能强大，实现起来也比你手动实现要简单，但其实为了实现复杂的叠加网络，它们用到了 OOP 设计范式，然后里面耦合关系也挺复杂。如果你之前经验不足，就容易想当然，忽略里面的耦合关系，然后全是 bug。另外，对于张量处理，细节也很多（如：张量的轴的处理，等等）。最后，数据本身的 obtain 也是个问题：二元分类问题不能用“烂大街”的 MNIST，所以找数据也要费精力。当然可以自己构造数据，但是其实有时构造参数的设定也不好搞。

今天不再搞这个了，还是应该规划一下期末考试的复习。主要是离散和数分。毕竟普物就那些东西，线代我理解地也是比较好的。数分的傅里叶变换倒是有些麻烦。

计划之后再看看 Mackay 讲的 Advanced Monte Carlo Methods。刚刚听完了 Slice Sampling，确实很通用，对 step size 也不太敏感。之前的 Hamiltonian Monte Carlo Method 是基于哈密顿力学（动力学）的，本质上和多次迭代的 Metropolis Monte Carlo 都是四处走动的，但是由于惯性的引入，使得距离为时间根号的随机游走行为得到了一定的抑制。

之后再听听 Exact Sampling，再最后总结一下所有的 Monte Carlo Method 的优缺点，大概就可以了。

另外，这里再附上哈密顿取样和 Metropolis-Hastings 采样的 Python 例程。

注意：采样中点之间没有连线，您可以自行添加功能；另外，您也可以自己调整参数

import numpy as np
import random
import math
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal

# 目标分布（二元高斯分布）

## 定义协方差矩阵和均值向量
cov_matrix = np.array([[1, 0.98], [0.98, 2.5]])
mean_vector = np.array([0, 1])

## 定义二元高斯分布函数
def bivariate_gaussian(x, y, cov, mean):
    pos = np.dstack((x, y))
    rv = multivariate_normal(mean, cov)
    return rv.pdf(pos)

## 定义二元高斯分布函数对x的偏导数
def bivariate_gaussian_dx(x, y, cov, mean):
    pos = np.dstack((x, y))
    rv = multivariate_normal(mean, cov)
    return -(rv.pdf(pos) * np.dot(np.linalg.inv(cov), [x - mean[0], y - mean[1]]))[0]

## 定义二元高斯分布函数对y的偏导数
def bivariate_gaussian_dy(x, y, cov, mean):
    pos = np.dstack((x, y))
    rv = multivariate_normal(mean, cov)
    return -(rv.pdf(pos) * np.dot(np.linalg.inv(cov), [x - mean[0], y - mean[1]]))[1]

## 计算给定点的二元高斯分布函数值和其梯度
def bivariate_gaussian_at_point(x, y, cov_matrix, mean_vector):
    # 计算概率密度函数值
    p = bivariate_gaussian(x, y, cov_matrix, mean_vector)

    # 计算梯度
    dp_dx = bivariate_gaussian_dx(x, y, cov_matrix, mean_vector)
    dp_dy = bivariate_gaussian_dy(x, y, cov_matrix, mean_vector)
    grad = np.array([dp_dx, dp_dy])

    return p, grad

def target_distribution(x, y):
    return bivariate_gaussian(x, y, cov_matrix, mean_vector)

def gradient(x, y):
    dp_dx = bivariate_gaussian_dx(x, y, cov_matrix, mean_vector)
    dp_dy = bivariate_gaussian_dy(x, y, cov_matrix, mean_vector)
    return dp_dx, dp_dy

def target_distribution_hmc(x, y): # exp(-target(x, y)) = gauss(x, y) => target(x, y) = -ln(gauss(x, y))
    return -np.log(target_distribution(x, y))

def gradient_hmc(x, y): # grad(target) = - 1 / gauss(x, y) * grad(gauss)
    factor = - 1 / target_distribution(x, y)
    dp_dx = bivariate_gaussian_dx(x, y, cov_matrix, mean_vector) * factor
    dp_dy = bivariate_gaussian_dy(x, y, cov_matrix, mean_vector) * factor
    return dp_dx, dp_dy

# Metropolis-Hastings采样函数
def metropolis_hastings(target_distribution, num_samples=100):
    samples = []
    x, y = 0, 0  # 初始位置
    for i in range(num_samples):
        # 从当前位置生成一个随机游走
        x_new, y_new = x + random.gauss(0, 1), y + random.gauss(0, 1)
        # 计算接受概率
        accept_prob = min(1, target_distribution(x_new, y_new) / target_distribution(x, y))
        # 接受新状态或保持原状态不变
        if random.random() < accept_prob:
            x, y = x_new, y_new
        samples.append((x, y))
    return samples

# HMC采样函数
def hamiltonian_monte_carlo(target_distribution, num_samples=30, num_steps=10, step_size=0.1):
    samples = []
    x, y = 0, 0  # 初始位置
    for i in range(num_samples):
        # 从动量分布中抽取一个动量
        p_x, p_y = np.random.normal(size=2)
        # 计算动能
        kinetic_energy = 0.5 * (p_x**2 + p_y**2)
        # 计算势能和梯度
        potential_energy = target_distribution_hmc(x, y)
        d_potential_dx, d_potential_dy = gradient_hmc(x, y)
        # 使用Leapfrog算法进行状态转移
        x_new, y_new, p_x_new, p_y_new = x, y, p_x, p_y
        for j in range(num_steps):
            p_x_new -= step_size * gradient_hmc(x_new, y_new)[0]
            p_y_new -= step_size * gradient_hmc(x_new, y_new)[1]
            x_new += step_size * p_x_new
            y_new += step_size * p_y_new
        # 计算接受概率
        potential_energy_new = target_distribution_hmc(x_new, y_new)
        kinetic_energy_new = 0.5 * (p_x_new**2 + p_y_new**2)
        accept_prob = min(1, math.exp(potential_energy + kinetic_energy - potential_energy_new - kinetic_energy_new))
        # 接受新状态或保持原状态不变
        if random.random() < accept_prob:
            x, y = x_new, y_new
        samples.append((x, y))
    return samples

# 绘制采样结果
def plot_samples(samples, title):
    x, y = zip(*samples)
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.scatter(x, y, alpha=0.5)
    plt.title(title)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.xlim(-5,5)
    plt.ylim(-5, 5)
    plt.subplot(1, 2, 2)
    x_range = np.linspace(-5, 5, 100)
    y_range = np.linspace(-5, 5, 100)
    X, Y = np.meshgrid(x_range, y_range)
    Z = target_distribution(X, Y)
    plt.contourf(X, Y, Z, cmap='coolwarm')
    plt.colorbar()
    plt.scatter(x, y, alpha=0.5)
    plt.title('Probability Density Contours')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.xlim(-5, 5)
    plt.ylim(-5, 5)
    plt.show()

# 生成Metropolis-Hastings采样结果
samples_mh = metropolis_hastings(target_distribution)
plot_samples(samples_mh, 'Metropolis-Hastings')

# 生成HMC采样结果
samples_hmc = hamiltonian_monte_carlo(target_distribution)
plot_samples(samples_hmc, 'Hamiltonian Monte Carlo')