import pandas as pd
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
# Sayısal ve kategorik sütunları ayır
numeric_cols = df_cl.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = df_cl.select_dtypes(include=['object', 'category']).columns
# Ön işleme
preprocessor = ColumnTransformer([
("num", StandardScaler(), numeric_cols),
("cat", OneHotEncoder(), categorical_cols)
])
# Veriyi dönüştür
X_processed = preprocessor.fit_transform(df_cl)
# Sparse -> dense
X_processed = X_processed.toarray()
# Silhouette skorları
silhouette_scores = []
K = range(2, 11)
for k in K:
kmeans = KMeans(
n_clusters=k,
init='k-means++',
random_state=42,
n_init=10
)
cluster_labels = kmeans.fit_predict(X_processed)
score = silhouette_score(X_processed, cluster_labels)
silhouette_scores.append(score)
# Grafik
plt.figure(figsize=(8,5))
plt.plot(K, silhouette_scores, marker='o')
plt.title("Silhouette Analizi")
plt.xlabel("Küme Sayısı")
plt.ylabel("Silhouette Skoru")
plt.xticks(K)
plt.grid(True)
plt.show()
# En iyi küme sayısı
best_k = K[silhouette_scores.index(max(silhouette_scores))]
print("En iyi küme sayısı:", best_k)
print("En yüksek silhouette skoru:", max(silhouette_scores))
Silhouette Method
0 Beğeni
48 okunma

