基于Python——Kmeans聚类算法的实现

2020-06-28 08:55:55 来源：易采站长站作者：易采站长站整理

min = 1 << 30
min_index = -1
# 遍历每个数据，计算与k个簇的质心的距离
for index, cluster in enumerate(clusters):
# 计算每个点与对应质心的距离
dis = self.algorithm(x, cluster)
if dis loss_value:
# 重新计算每个簇的质心
self.calculate_centroid(clusters_collection, clusters)
elif now_loss_value < loss_value:
print("算法正在运行，迭代次数:{}".format(count))
# 重新计算每个簇的质心
self.calculate_centroid(clusters_collection, clusters)
elif now_loss_value == loss_value:
self.cluster_centers_ = clusters
self.loss = now_loss_value
return self
# 更新损失值
loss_value = now_loss_value

def predict(self, X):
"""
预测函数
:param X: 需要预测的数据点
:param clusters: 分配好了的簇中心集合
:return: 返回对应数据点预测对应的簇种类
"""
result = [] for x in X:
min_index = -1
max_dis = 1 < dis:
max_dis = dis
min_index = index
result.append(min_index)
return np.array(result)

def random_choose_cluster(self, data, k):
"""
随机在数据data中选取k个簇
:param data: 数据集
:param k: 选取的簇的个数
:return: 返回包含选取k个簇坐标的列表
"""
clusters = [] pos = random.sample(range(len(data)), k)
for i in pos:
clusters.append(data[i])
return np.array(clusters)

def calculate_centroid(self, collection, clusters):
"""
计算集合的质心
计算方法：将对应集合所有数据的x、y加起来，求平均值，将这个平均值点返回
:param collection: 需要计算质心的集合
:return: 返回这个集合的质心
"""
# 重新计算每个簇的质心
for i in collection.keys():
if len(collection[i]) > 0:
result = np.mean(collection[i], axis=0)
clusters[i] = result

def loss_function(self, data, clusters):
"""
衡量K-means算法停止迭代的损失函数
:param data: 所有簇集合
:param clusters: 每个簇对应的质心
:return: 返回损失值
"""
total = 0
for i in data:
for x in data[i]:
total += self.algorithm(x, clusters[i])

2/5 首页上一页 1 2 3 4 5 下一页尾页