1
0
mirror of https://github.com/osmarks/meme-search-engine.git synced 2025-02-22 14:00:09 +00:00
meme-search-engine/perf_test.py
osmarks e57931d47f Multithread query server
While profiling suggests that most operations are cheap and IO-bound rather than CPU-bound, the GEMM for deduplication is pretty slow. As such, use multiple threads for higher throughput.
2025-01-31 13:47:47 +00:00

30 lines
890 B
Python

import numpy as np
import aiohttp
import asyncio
import sys
queries = np.random.randn(1000, 1152)
async def main():
async with aiohttp.ClientSession() as sess:
async with asyncio.TaskGroup() as tg:
sem = asyncio.Semaphore(100)
async def lookup(embedding):
async with sess.post("http://localhost:5601", json={
"terms": [{ "embedding": list(float(x) for x in embedding) }], # sorry
"k": 10
}) as res:
sys.stdout.write(".")
sys.stdout.flush()
return (await res.json())["matches"]
async def dispatch(i):
await lookup(queries[i])
sem.release()
for i in range(1000):
await sem.acquire()
tg.create_task(dispatch(i))
asyncio.run(main())