diff --git a/.sqlx/query-fccbb4262990c387079141c60a96d4e030ff82b93975f13d96539957b24f3c13.json b/.sqlx/query-fccbb4262990c387079141c60a96d4e030ff82b93975f13d96539957b24f3c13.json
new file mode 100644
index 0000000..a90ef33
--- /dev/null
+++ b/.sqlx/query-fccbb4262990c387079141c60a96d4e030ff82b93975f13d96539957b24f3c13.json
@@ -0,0 +1,12 @@
+{
+ "db_name": "SQLite",
+ "query": "INSERT OR REPLACE INTO files (filename, embedding_time, thumbnail_time) VALUES (?, ?, ?)",
+ "describe": {
+ "columns": [],
+ "parameters": {
+ "Right": 3
+ },
+ "nullable": []
+ },
+ "hash": "fccbb4262990c387079141c60a96d4e030ff82b93975f13d96539957b24f3c13"
+}
diff --git a/clipfront2/src/App.svelte b/clipfront2/src/App.svelte
index b0fa151..6b6ca54 100644
--- a/clipfront2/src/App.svelte
+++ b/clipfront2/src/App.svelte
@@ -67,7 +67,7 @@
border: 1px solid gray
*
display: block
- .result img
+ .result img, .result video
width: 100%
@@ -84,6 +84,7 @@
Capitalization is ignored.
Only English is supported. Other languages might work slightly.
Sliders are generated from PCA on the index. The human-readable labels are approximate.
+ Want your own deployment? Use the open-source code on GitHub..
@@ -138,15 +139,21 @@
{#key `${queryCounter}${result.file}`}
-
+ {#if util.hasFormat(results, result, "VIDEO")}
+
+ {:else}
+
+ {/if}
{/key}
@@ -240,7 +247,10 @@
let displayedResults = []
const runSearch = async () => {
if (!resultPromise) {
- let args = {"terms": queryTerms.filter(x => x.text !== "").map(x => ({ image: x.imageData, text: x.text, embedding: x.embedding, predefined_embedding: x.predefinedEmbedding, weight: x.weight * { "+": 1, "-": -1 }[x.sign] }))}
+ let args = {
+ "terms": queryTerms.filter(x => x.text !== "").map(x => ({ image: x.imageData, text: x.text, embedding: x.embedding, predefined_embedding: x.predefinedEmbedding, weight: x.weight * { "+": 1, "-": -1 }[x.sign] })),
+ "include_video": true
+ }
queryCounter += 1
resultPromise = util.doQuery(args).then(res => {
error = null
diff --git a/src/common.rs b/src/common.rs
index 86e705e..7b207df 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -12,18 +12,20 @@ pub struct InferenceServerConfig {
pub embedding_size: usize,
}
+pub fn resize_for_embed_sync
+ Send + 'static>(config: InferenceServerConfig, image: T) -> Result> {
+ let new = image.borrow().resize(
+ config.image_size.0,
+ config.image_size.1,
+ FilterType::Lanczos3
+ );
+ let mut buf = Vec::new();
+ let mut csr = Cursor::new(&mut buf);
+ new.write_to(&mut csr, ImageFormat::Png)?;
+ Ok::, anyhow::Error>(buf)
+}
+
pub async fn resize_for_embed + Send + 'static>(config: InferenceServerConfig, image: T) -> Result> {
- let resized = tokio::task::spawn_blocking(move || {
- let new = image.borrow().resize(
- config.image_size.0,
- config.image_size.1,
- FilterType::Lanczos3
- );
- let mut buf = Vec::new();
- let mut csr = Cursor::new(&mut buf);
- new.write_to(&mut csr, ImageFormat::Png)?;
- Ok::, anyhow::Error>(buf)
- }).await??;
+ let resized = tokio::task::spawn_blocking(move || resize_for_embed_sync(config, image)).await??;
Ok(resized)
}
diff --git a/src/main.rs b/src/main.rs
index 1a6b740..d9e4e67 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,4 @@
+use std::collections::HashSet;
use std::{collections::HashMap, io::Cursor};
use std::path::Path;
use std::sync::Arc;
@@ -12,11 +13,15 @@ use axum::{
Router,
http::StatusCode
};
+use common::resize_for_embed_sync;
+use ffmpeg_the_third::device::input::video;
+use image::RgbImage;
use image::{imageops::FilterType, io::Reader as ImageReader, DynamicImage, ImageFormat};
use reqwest::Client;
use serde::{Deserialize, Serialize};
+use sqlx::SqliteConnection;
use sqlx::{sqlite::SqliteConnectOptions, SqlitePool};
-use tokio::sync::{broadcast, mpsc};
+use tokio::sync::{broadcast, mpsc, RwLock};
use tokio::task::JoinHandle;
use walkdir::WalkDir;
use base64::prelude::*;
@@ -31,6 +36,7 @@ use ndarray::ArrayBase;
mod ocr;
mod common;
+mod video_reader;
use crate::ocr::scan_image;
use crate::common::{InferenceServerConfig, resize_for_embed, EmbeddingRequest, get_backend_config, query_clip_server};
@@ -41,6 +47,8 @@ lazy_static! {
static ref TERMS_COUNTER: IntCounterVec = register_int_counter_vec!("mse_terms", "terms used in queries, by type", &["type"]).unwrap();
static ref IMAGES_LOADED_COUNTER: IntCounter = register_int_counter!("mse_loads", "images loaded by ingest process").unwrap();
static ref IMAGES_LOADED_ERROR_COUNTER: IntCounter = register_int_counter!("mse_load_errors", "image load fails by ingest process").unwrap();
+ static ref VIDEOS_LOADED_COUNTER: IntCounter = register_int_counter!("mse_video_loads", "video loaded by ingest process").unwrap();
+ static ref VIDEOS_LOADED_ERROR_COUNTER: IntCounter = register_int_counter!("mse_video_load_errors", "video load fails by ingest process").unwrap();
static ref IMAGES_EMBEDDED_COUNTER: IntCounter = register_int_counter!("mse_embeds", "images embedded by ingest process").unwrap();
static ref IMAGES_OCRED_COUNTER: IntCounter = register_int_counter!("mse_ocrs", "images OCRed by ingest process").unwrap();
static ref IMAGES_OCRED_ERROR_COUNTER: IntCounter = register_int_counter!("mse_ocr_errors", "image OCR fails by ingest process").unwrap();
@@ -72,7 +80,7 @@ struct Config {
#[derive(Debug)]
struct IIndex {
vectors: scalar_quantizer::ScalarQuantizerIndexImpl,
- filenames: Vec,
+ filenames: Vec,
format_codes: Vec,
format_names: Vec,
}
@@ -89,35 +97,20 @@ CREATE TABLE IF NOT EXISTS files (
thumbnails BLOB
);
-CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 (
- filename,
- ocr,
- tokenize='unicode61 remove_diacritics 2',
- content='files'
-);
-
CREATE TABLE IF NOT EXISTS predefined_embeddings (
name TEXT NOT NULL PRIMARY KEY,
embedding BLOB NOT NULL
);
-CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON files BEGIN
- INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
-END;
-
-CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON files BEGIN
- INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
-END;
-
-CREATE TRIGGER IF NOT EXISTS ocr_fts_upd AFTER UPDATE ON files BEGIN
- INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
- INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
-END;
+DROP TRIGGER IF EXISTS ocr_fts_upd;
+DROP TRIGGER IF EXISTS ocr_fts_ins;
+DROP TRIGGER IF EXISTS ocr_fts_del;
+DROP TABLE IF EXISTS ocr_fts;
"#;
-#[derive(Debug, sqlx::FromRow, Clone, Default)]
-struct FileRecord {
- filename: String,
+#[derive(Debug, sqlx::FromRow, Clone)]
+struct RawFileRecord {
+ filename: Vec,
embedding_time: Option,
ocr_time: Option,
thumbnail_time: Option,
@@ -128,6 +121,14 @@ struct FileRecord {
thumbnails: Option>,
}
+#[derive(Debug, Clone)]
+struct FileRecord {
+ filename: String,
+ needs_embed: bool,
+ needs_ocr: bool,
+ needs_thumbnail: bool
+}
+
#[derive(Debug, Clone)]
struct WConfig {
backend: InferenceServerConfig,
@@ -138,14 +139,49 @@ struct WConfig {
#[derive(Debug)]
struct LoadedImage {
image: Arc,
- filename: String,
- original_size: usize,
+ filename: Filename,
+ original_size: Option,
+ fast_thumbnails_only: bool
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
+enum Filename {
+ Actual(String),
+ VideoFrame(String, u64)
+}
+
+// this is a somewhat horrible hack, but probably nobody has NUL bytes at the start of filenames?
+impl Filename {
+ fn decode(buf: Vec) -> Result {
+ Ok(match buf.strip_prefix(&[0]) {
+ Some(remainder) => rmp_serde::from_read(&*remainder)?,
+ None => Filename::Actual(String::from_utf8(buf)?.to_string())
+ })
+ }
+
+ fn encode(&self) -> Result> {
+ match self {
+ Self::Actual(s) => Ok(s.to_string().into_bytes()),
+ x => {
+ let mut out = rmp_serde::to_vec(x).context("should not happen")?;
+ out.insert(0, 0);
+ Ok(out)
+ }
+ }
+ }
+
+ fn container_filename(&self) -> String {
+ match self {
+ Self::Actual(s) => s.to_string(),
+ Self::VideoFrame(s, _) => s.to_string()
+ }
+ }
}
#[derive(Debug)]
struct EmbeddingInput {
image: Vec,
- filename: String,
+ filename: Filename,
}
fn timestamp() -> i64 {
@@ -155,21 +191,25 @@ fn timestamp() -> i64 {
#[derive(Debug, Clone)]
struct ImageFormatConfig {
target_width: u32,
- target_filesize: u32,
+ target_filesize: usize,
quality: u8,
format: ImageFormat,
extension: String,
+ is_fast: bool
}
-fn generate_filename_hash(filename: &str) -> String {
+fn generate_filename_hash(filename: &Filename) -> String {
use std::hash::{Hash, Hasher};
let mut hasher = fnv::FnvHasher::default();
- filename.hash(&mut hasher);
+ match filename {
+ Filename::Actual(x) => x.hash(&mut hasher),
+ _ => filename.hash(&mut hasher)
+ };
BASE64_URL_SAFE_NO_PAD.encode(hasher.finish().to_le_bytes())
}
fn generate_thumbnail_filename(
- filename: &str,
+ filename: &Filename,
format_name: &str,
format_config: &ImageFormatConfig,
) -> String {
@@ -200,6 +240,7 @@ fn image_formats(_config: &Config) -> HashMap {
quality: 70,
format: ImageFormat::Jpeg,
extension: "jpg".to_string(),
+ is_fast: true
},
);
formats.insert(
@@ -210,6 +251,7 @@ fn image_formats(_config: &Config) -> HashMap {
quality: 80,
format: ImageFormat::Jpeg,
extension: "jpg".to_string(),
+ is_fast: true
},
);
formats.insert(
@@ -220,6 +262,7 @@ fn image_formats(_config: &Config) -> HashMap {
quality: 0,
format: ImageFormat::Jpeg,
extension: "jpg".to_string(),
+ is_fast: false
},
);
formats.insert(
@@ -230,6 +273,7 @@ fn image_formats(_config: &Config) -> HashMap {
quality: 80,
format: ImageFormat::Avif,
extension: "avif".to_string(),
+ is_fast: false
},
);
formats.insert(
@@ -240,11 +284,19 @@ fn image_formats(_config: &Config) -> HashMap {
quality: 70,
format: ImageFormat::Avif,
extension: "avif".to_string(),
+ is_fast: false
},
);
formats
}
+async fn ensure_filename_record_exists(conn: &mut SqliteConnection, filename_enc: &Vec) -> Result<()> {
+ sqlx::query!("INSERT OR IGNORE INTO files (filename) VALUES (?)", filename_enc)
+ .execute(conn)
+ .await?;
+ Ok(())
+}
+
async fn ingest_files(config: Arc) -> Result<()> {
let pool = initialize_database(&config.service).await?;
let client = Client::new();
@@ -258,47 +310,89 @@ async fn ingest_files(config: Arc) -> Result<()> {
let cpus = num_cpus::get();
+ let video_lengths = Arc::new(RwLock::new(HashMap::new()));
+ let video_thumb_times = Arc::new(RwLock::new(HashMap::new()));
+ let video_embed_times = Arc::new(RwLock::new(HashMap::new()));
+
// Image loading and preliminary resizing
let image_loading: JoinHandle> = tokio::spawn({
let config = config.clone();
+ let video_lengths = video_lengths.clone();
let stream = ReceiverStream::new(to_process_rx).map(Ok);
stream.try_for_each_concurrent(Some(cpus), move |record| {
let config = config.clone();
let to_embed_tx = to_embed_tx.clone();
let to_thumbnail_tx = to_thumbnail_tx.clone();
let to_ocr_tx = to_ocr_tx.clone();
+ let video_lengths = video_lengths.clone();
async move {
let path = Path::new(&config.service.files).join(&record.filename);
let image: Result> = tokio::task::block_in_place(|| Ok(Arc::new(ImageReader::open(&path)?.with_guessed_format()?.decode()?)));
let image = match image {
Ok(image) => image,
Err(e) => {
- log::error!("Could not read {}: {}", record.filename, e);
+ log::warn!("Could not read {} as image: {}", record.filename, e);
+ let filename = record.filename.clone();
IMAGES_LOADED_ERROR_COUNTER.inc();
+ let video_length = tokio::task::spawn_blocking(move || -> Result