import cv2
import numpy as np
import os
from sklearn.decomposition import PCA
from scipy.spatial.distance import euclidean
import matplotlib.pyplot as plt
folder_path = "images/EasyDiffusion-Images-LRI758YQ/"
from keras.applications.vgg19 import VGG19
# Load the ResNet50 model
model = VGG19(weights='imagenet')
# Print all layer names
for i, layer in enumerate(model.layers):
print(i, layer.name)
0 input_1 1 block1_conv1 2 block1_conv2 3 block1_pool 4 block2_conv1 5 block2_conv2 6 block2_pool 7 block3_conv1 8 block3_conv2 9 block3_conv3 10 block3_conv4 11 block3_pool 12 block4_conv1 13 block4_conv2 14 block4_conv3 15 block4_conv4 16 block4_pool 17 block5_conv1 18 block5_conv2 19 block5_conv3 20 block5_conv4 21 block5_pool 22 flatten 23 fc1 24 fc2 25 predictions
from keras.applications.resnet_v2 import ResNet50V2, preprocess_input
from keras.preprocessing import image
from keras.models import Model
import numpy as np
from scipy.spatial.distance import cdist
model = ResNet50V2(
include_top=True,
weights="imagenet",
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
classifier_activation="softmax"
)
model = Model(inputs=model.input, outputs=model.output)
# experimental feature to test individual layers of network
# layer_name = 'conv5_block2_out'
# intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
def extract_features(img_path, model):
img = image.load_img(img_path, target_size=(224, 224))
img_array = image.img_to_array(img)
expanded_img_array = np.expand_dims(img_array, axis=0)
preprocessed_img = preprocess_input(expanded_img_array)
features = model.predict(preprocessed_img)
return features.flatten()
# model = model_res
model_name = "ResNet50V2"
features_list = []
filenames = []
for filename in os.listdir(folder_path):
img_path = os.path.join(folder_path, filename)
if not img_path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
print(f"Skipping non-image file: {img_path}")
continue
features = extract_features(img_path, model)
if features is not None:
features_list.append(features)
filenames.append(filename)
1/1 [==============================] - 1s 592ms/step 1/1 [==============================] - 0s 52ms/step ... 1/1 [==============================] - 0s 49ms/step 1/1 [==============================] - 0s 47ms/step 1/1 [==============================] - 0s 47ms/step
type(features_list)
list
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# Convert features_list to a DataFrame for easier manipulation
df_features = pd.DataFrame(features_list)
# Histogram for the first feature
plt.figure(figsize=(10, 6))
sns.histplot(df_features[0], kde=True)
plt.title('Distribution of Feature 1')
plt.xlabel('Feature 1 Value')
plt.ylabel('Frequency')
plt.show()
# Pair plot for the first few features (this can be computationally intensive for large datasets)
sns.pairplot(df_features.iloc[:, :5]) # Adjust number of features as needed
plt.show()
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
# Convert features_list to a NumPy array
features_array = np.array(features_list)
# Apply t-SNE to the array
tsne = TSNE(n_components=2, random_state=42)
features_reduced = tsne.fit_transform(features_array)
# Plot the reduced features
plt.figure(figsize=(10, 8))
plt.scatter(features_reduced[:, 0], features_reduced[:, 1])
plt.title('t-SNE of Features')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.show()
from sklearn.cluster import KMeans
# Example: Use KMeans to cluster the features
kmeans = KMeans(n_clusters=6, random_state=42)
clusters = kmeans.fit_predict(features_list)
# Visualize the clusters (if dimensionality reduction has been applied)
plt.scatter(features_reduced[:, 0], features_reduced[:, 1], c=clusters)
plt.title('Features Clustering with t-SNE')
plt.show()
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
import matplotlib.pyplot as plt
# Perform hierarchical/agglomerative clustering
Z = linkage(features_list, method='ward') # 'ward' minimizes the variance of clusters being merged.
# Plot dendrogram to help decide the number of clusters
plt.figure(figsize=(25, 10))
dendrogram(Z)
plt.show()
desired_clusters = 12
clusters = fcluster(Z, desired_clusters, criterion='maxclust')
print(f"Formed {max(clusters)} clusters.")
Formed 12 clusters.
# Step 1: Pre-compute cluster centroids
cluster_centroids = {}
for i in range(1, max(clusters) + 1):
cluster_features = np.array([features_list[idx] for idx, cluster_id in enumerate(clusters) if cluster_id == i])
centroid = np.mean(cluster_features, axis=0)
cluster_centroids[i] = centroid
# Step 2: Determine the order of clusters (example using overall centroid)
overall_centroid = np.mean(list(cluster_centroids.values()), axis=0)
cluster_distances = {i: np.linalg.norm(centroid - overall_centroid) for i, centroid in cluster_centroids.items()}
sorted_cluster_indices = sorted(cluster_distances, key=cluster_distances.get) # Sort cluster IDs by their distance to the overall centroid
from scipy.spatial.distance import cdist
sorted_filenames = []
for cluster_id in sorted_cluster_indices:
indices = [idx for idx, cluster_id_enum in enumerate(clusters) if cluster_id_enum == cluster_id]
cluster_features = np.array([features_list[idx] for idx in indices])
# Calculate distances to the cluster's centroid
distances = np.linalg.norm(cluster_features - cluster_centroids[cluster_id], axis=1)
# Sort indices within the cluster based on distance to centroid
sorted_indices_in_cluster = np.argsort(distances)
# Append sorted filenames for this cluster
sorted_filenames += [filenames[indices[idx]] for idx in sorted_indices_in_cluster]
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
def show_clusters_overview(clusters, sorted_filenames, folder_path, images_per_cluster=5):
unique_clusters = np.unique(clusters)
num_clusters = len(unique_clusters)
# Calculate the number of rows needed for the subplot
rows = num_clusters
plt.figure(figsize=(15, num_clusters * 3)) # Adjust the figure size as needed
current_image = 1
for cluster_id in unique_clusters:
# Get filenames for the current cluster
cluster_filenames = [filename for idx, filename in enumerate(sorted_filenames) if clusters[idx] == cluster_id][:images_per_cluster]
for filename in cluster_filenames:
# Load and plot each image in the cluster
img_path = os.path.join(folder_path, filename)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.subplot(rows, images_per_cluster, current_image)
plt.imshow(img)
plt.title(f"Cluster {cluster_id}")
plt.axis('off')
current_image += 1
plt.tight_layout()
plt.show()
show_clusters_overview(clusters, sorted_filenames, folder_path, images_per_cluster=12)
from moviepy.editor import ImageSequenceClip
# Parameters
fps = 25
image_duration = 1 / fps # Duration each image should display
sorted_full_paths = [os.path.join(folder_path, filename) for filename in sorted_filenames]
clip = ImageSequenceClip(sorted_full_paths, fps=fps)
clip = clip.set_duration(image_duration * len(sorted_full_paths))
# Define the output filename
base_filename = "images/output/EasyDiffusion-Images-LRI758YQ_sorted_sequence"
extension = ".mov"
i = 1
current_filename = f"{base_filename}_{desired_clusters}_{i}{extension}"
while os.path.exists(current_filename):
i += 1
current_filename = f"{base_filename}_{desired_clusters}_{i}{extension}"
# Write the video file
clip.write_videofile(current_filename, codec="libx264")