高斯混合模型(GMM)
2020-06-28 12:44:24 来源:易采站长站 作者:易采站长站整理
文章目录原理算法实验生成数据高斯混合模型利用高斯混合模型聚类画出概率密度函数
原理
有空再更新吧
算法

实验
生成数据
import numpy as np
import matplotlib.pyplot as pltdef gen_clusters():
mean1 = [0,0] cov1 = [[1,0],[0,10]] data = np.random.multivariate_normal(mean1,cov1,100)
mean2 = [10,10] cov2 = [[10,0],[0,1]] data = np.append(data,
np.random.multivariate_normal(mean2,cov2,100),
0)
mean3 = [10,0] cov3 = [[3,0],[0,4]] data = np.append(data,
np.random.multivariate_normal(mean3,cov3,100),
0)
return np.round(data,4)
def show_scatter(data):
x,y = data.T
plt.scatter(x,y)
plt.axis()
plt.title("scatter")
plt.xlabel("x")
plt.ylabel("y")
data = gen_clusters()
show_scatter(data)

高斯混合模型
class GMM: def __init__(self, k, dim):
self.k = k
self.dim = dim
self.mus = np.random.rand(k, dim)
self.sigmas = [np.eye(dim) for _ in range(k)] self.alphas = np.random.rand(k)
def gaussian_prob(self, x, mu, sigma):
if x.ndim == 1:
x = x[np.newaxis,:] dim = mu.shape[-1] denom = np.sqrt((2 * np.pi) ** dim * (np.abs(np.linalg.det(sigma))))
dists = x - mu
num = np.asarray([np.exp( -dist @ np.linalg.inv(sigma) @ dist.T / 2) for dist in dists])
return num / denom
def multi_gaussian_prob(self, x):
assert self.dim == x.shape[-1] prob = 0
for mu, sigma, alpha in zip(self.mus, self.sigmas, self.alphas):
prob += alpha * self.gaussian_prob(x, mu, sigma)
return prob
def fit(self, X, steps=10):
'''
EM algorithm
'''
N = len(X)
K = self.k
self.mus = X[np.random.choice(N,K)]
z = np.zeros((N, K))
for _ in range(steps):
# E-step
for j, x in enumerate(X):
for i in range(K):
z[j][i] = self.alphas[i] * self.gaussian_prob(x, self.mus[i], self.sigmas[i])
z[j] /= np.sum(z[j])
# M-step
for i in range(self.k):
# updata mus
self.mus[i] = np.dot(z[:, i].T, X) / sum(z[:, i])













闽公网安备 35020302000061号