From 79284d0a5cc144cfa0c546dd9e6f7d2f29b8da2d Mon Sep 17 00:00:00 2001 From: ilia Date: Wed, 16 Jul 2025 12:52:02 -0800 Subject: [PATCH] Add punimtag.py script and update requirements.txt --- punimtag.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 5 +++ 2 files changed, 99 insertions(+) create mode 100644 punimtag.py diff --git a/punimtag.py b/punimtag.py new file mode 100644 index 0000000..fafd461 --- /dev/null +++ b/punimtag.py @@ -0,0 +1,94 @@ +import os +import sqlite3 +import face_recognition +from PIL import Image +import numpy as np +import hdbscan + +# Constants +PHOTOS_DIR = 'photos' +DB_FILE = 'faces.db' + +# Create or connect to database +def init_db(): + conn = sqlite3.connect(DB_FILE) + c = conn.cursor() + c.execute('''CREATE TABLE IF NOT EXISTS images + (id INTEGER PRIMARY KEY, path TEXT UNIQUE)''') + c.execute('''CREATE TABLE IF NOT EXISTS faces + (id INTEGER PRIMARY KEY, image_id INTEGER, location TEXT, encoding BLOB, cluster_id INTEGER, + FOREIGN KEY(image_id) REFERENCES images(id))''') + c.execute('''CREATE TABLE IF NOT EXISTS clusters + (id INTEGER PRIMARY KEY, label TEXT)''') + conn.commit() + return conn + +# Walk folder and process images +def process_images(conn): + c = conn.cursor() + encodings = [] + face_data = [] + + for root, _, files in os.walk(PHOTOS_DIR): + for file in files: + if file.lower().endswith(('.jpg', '.png')): + path = os.path.join(root, file) + try: + image = face_recognition.load_image_file(path) + locations = face_recognition.face_locations(image) + face_encodings = face_recognition.face_encodings(image, locations) + + c.execute("INSERT OR IGNORE INTO images (path) VALUES (?)", (path,)) + c.execute("SELECT id FROM images WHERE path = ?", (path,)) + image_id = c.fetchone()[0] + + for loc, enc in zip(locations, face_encodings): + loc_str = str(loc) + enc_blob = enc.tobytes() + encodings.append(enc) + face_data.append((image_id, loc_str, enc_blob)) + except Exception as e: + print(f"Error processing {path}: {e}") + + conn.commit() + return np.array(encodings), face_data + +# Cluster encodings +def cluster_encodings(encodings): + if len(encodings) == 0: + return [] + clusterer = hdbscan.HDBSCAN(min_cluster_size=2) + labels = clusterer.fit_predict(encodings) + return labels + +# Store clusters and update faces +def store_clusters(conn, labels, face_data): + c = conn.cursor() + unique_labels = set(labels) + for label in unique_labels: + if label != -1: # Ignore noise + c.execute("INSERT INTO clusters (label) VALUES (?)", (f"Cluster {label}",)) + c.execute("SELECT last_insert_rowid()") + cluster_id = c.fetchone()[0] + else: + cluster_id = None + + # Update faces with this label + for i, (image_id, loc_str, enc_blob) in enumerate(face_data): + if labels[i] == label: + c.execute("INSERT INTO faces (image_id, location, encoding, cluster_id) VALUES (?, ?, ?, ?)", + (image_id, loc_str, enc_blob, cluster_id)) + + conn.commit() + +# Main function +def main(): + conn = init_db() + encodings, face_data = process_images(conn) + labels = cluster_encodings(encodings) + store_clusters(conn, labels, face_data) + conn.close() + print("Processing complete. Data stored in faces.db") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ea1f4fb..0b8f57a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,10 @@ click==8.2.1 dlib==20.0.0 face-recognition==1.3.0 face-recognition-models==0.3.0 +hdbscan==0.8.40 +joblib==1.5.1 numpy==2.2.6 pillow==11.3.0 +scikit-learn==1.7.0 +scipy==1.15.3 +threadpoolctl==3.6.0