import sklearn.decomposition import numpy as np import sqlite3 import asyncio import aiohttp import base64 meme_search_backend = "http://localhost:1707/" memes_url = "https://i.osmarks.net/memes-or-something/" meme_search_url = "https://mse.osmarks.net/?e=" db = sqlite3.connect("/srv/mse/data.sqlite3") db.row_factory = sqlite3.Row def fetch_all_files(): csr = db.execute("SELECT embedding FROM files WHERE embedding IS NOT NULL") x = [ np.frombuffer(row[0], dtype="float16").copy() for row in csr.fetchall() ] csr.close() return np.array(x) embeddings = fetch_all_files() print("loaded") pca = sklearn.decomposition.PCA() pca.fit(embeddings) print(pca.explained_variance_ratio_) print(pca.components_) def emb_url(embedding): return meme_search_url + base64.urlsafe_b64encode(embedding.astype(np.float16).tobytes()).decode("utf-8") async def get_exemplars(): with open("components.html", "w") as f: f.write("""