Created
July 1, 2024 16:21
-
-
Save bijancot/ea7a526474da6c2728e268d8b86b75dd to your computer and use it in GitHub Desktop.
map generate
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.cluster import KMeans | |
import folium | |
from math import ceil | |
# Load your dataset | |
file_data = "finna-rute-exist.csv" | |
data = pd.read_csv(file_data) # Adjust this path to your dataset location | |
jumlah_toko = len(data) | |
print("Jumlah toko:", jumlah_toko) | |
jumlah_toko_perhari = 40 | |
jumlah_cluster = ceil(jumlah_toko / jumlah_toko_perhari) | |
print("Jumlah cluster awal:", jumlah_cluster) | |
data['LONG_SHOP'] = data['LONG_SHOP'].astype(str).str.replace(r'[^0-9.-]', '', regex=True).astype(float) | |
data['LAT_SHOP'] = data['LAT_SHOP'].astype(str).str.replace(r'[^0-9.-]', '', regex=True).astype(float) | |
# Assuming your data has "longitude" and "latitude" columns | |
X = data[['LONG_SHOP', 'LAT_SHOP']].values | |
# Initialize and fit K-means model | |
kmeans = KMeans(n_clusters=jumlah_cluster, n_init=10) | |
kmeans.fit(X) | |
# Get initial cluster labels | |
labels = kmeans.labels_ | |
# Define re-cluster function | |
def recluster_subset(X, num_clusters): | |
kmeans_subset = KMeans(n_clusters=num_clusters, n_init=10) | |
kmeans_subset.fit(X) | |
return kmeans_subset.labels_ | |
# Define recursive re-clustering function | |
def recluster_until_fit(X, labels, jumlah_toko_perhari, start_label): | |
unique_labels = np.unique(labels) | |
final_labels = labels.copy() | |
new_label_start = start_label | |
for cluster_id in unique_labels: | |
cluster_indices = np.where(labels == cluster_id)[0] | |
cluster_size = len(cluster_indices) | |
if cluster_size > jumlah_toko_perhari: | |
new_cluster_count = ceil(cluster_size / jumlah_toko_perhari) | |
new_labels = recluster_subset(X[cluster_indices], new_cluster_count) | |
adjusted_labels = new_labels + new_label_start | |
final_labels[cluster_indices] = adjusted_labels | |
new_label_start += new_cluster_count | |
counts = pd.Series(final_labels).value_counts() | |
if any(counts > jumlah_toko_perhari): | |
return recluster_until_fit(X, final_labels, jumlah_toko_perhari, new_label_start) | |
else: | |
return final_labels | |
# Initialize start label for re-clustering | |
new_label_start = max(labels) + 1 | |
# Perform recursive re-clustering | |
final_labels = recluster_until_fit(X, labels, jumlah_toko_perhari, new_label_start) | |
# Update labels with the final labels | |
labels = final_labels | |
# Visualization with Folium | |
map_center = [np.mean(X[:, 1]), np.mean(X[:, 0])] | |
mymap = folium.Map(location=map_center, zoom_start=10) | |
# Define a list of colors for clusters | |
colors = ['blue', 'green', 'red', 'orange', 'purple', 'pink', 'black', 'gray', 'brown', 'darkblue', 'darkgreen', 'darkred', 'darkpurple', 'darkorange', 'lightblue', 'lightgreen', 'lightred', 'lightgray', 'lightyellow', 'cadetblue', 'beige', 'darkgray', 'darkcyan', 'lightcyan'] | |
# Add markers for each shop location with cluster color | |
for i in range(len(X)): | |
cluster_color = colors[labels[i] % len(colors)] # Cycle through colors for each cluster | |
folium.CircleMarker(location=[X[i, 1], X[i, 0]], radius=2, color=cluster_color).add_to(mymap) | |
# Display the map | |
mymap.save("rute.html") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment