mirror of
https://github.com/osmarks/meme-search-engine.git
synced 2026-06-08 22:02:11 +00:00
177 lines
5.4 KiB
Rust
177 lines
5.4 KiB
Rust
use anyhow::{anyhow, Result};
|
|
use image::{DynamicImage, GenericImageView, ImageFormat};
|
|
use regex::Regex;
|
|
use reqwest::{
|
|
header::{HeaderMap, HeaderValue},
|
|
multipart::{Form, Part},
|
|
Client,
|
|
};
|
|
use serde_json::Value;
|
|
use std::{io::Cursor, time::{SystemTime, UNIX_EPOCH}};
|
|
use serde::{Deserialize, Serialize};
|
|
use tracing::instrument;
|
|
|
|
const CALLBACK_REGEX: &str = r">AF_initDataCallback\((\{key: 'ds:1'.*?\})\);</script>";
|
|
const MAX_DIM: u32 = 1024;
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
pub struct SegmentCoords {
|
|
pub x: i32,
|
|
pub y: i32,
|
|
pub w: i32,
|
|
pub h: i32,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize, Serialize)]
|
|
pub struct Segment {
|
|
pub coords: SegmentCoords,
|
|
pub text: String,
|
|
}
|
|
|
|
pub type ScanResult = Vec<Segment>;
|
|
|
|
fn rationalize_coords_format1(
|
|
image_w: f64,
|
|
image_h: f64,
|
|
center_x_fraction: f64,
|
|
center_y_fraction: f64,
|
|
width_fraction: f64,
|
|
height_fraction: f64,
|
|
) -> SegmentCoords {
|
|
SegmentCoords {
|
|
x: ((center_x_fraction - width_fraction / 2.0) * image_w).round() as i32,
|
|
y: ((center_y_fraction - height_fraction / 2.0) * image_h).round() as i32,
|
|
w: (width_fraction * image_w).round() as i32,
|
|
h: (height_fraction * image_h).round() as i32,
|
|
}
|
|
}
|
|
|
|
#[instrument(skip(client, image))]
|
|
async fn scan_image_chunk(
|
|
client: &Client,
|
|
image: &[u8],
|
|
image_width: u32,
|
|
image_height: u32,
|
|
) -> Result<ScanResult> {
|
|
let timestamp = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap()
|
|
.as_micros();
|
|
|
|
let part = Part::bytes(image.to_vec())
|
|
.file_name(format!("ocr{}.png", timestamp))
|
|
.mime_str("image/png")?;
|
|
|
|
let form = Form::new().part("encoded_image", part);
|
|
|
|
let mut headers = HeaderMap::new();
|
|
headers.insert(
|
|
"User-Agent",
|
|
HeaderValue::from_static("Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36"),
|
|
);
|
|
headers.insert("Cookie", HeaderValue::from_str(&format!("SOCS=CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg; stcs={}", timestamp))?);
|
|
|
|
let response = client
|
|
.post(&format!("https://lens.google.com/v3/upload?stcs={}", timestamp))
|
|
.multipart(form)
|
|
.headers(headers)
|
|
.send()
|
|
.await?;
|
|
|
|
let body = response.text().await?;
|
|
|
|
let re = Regex::new(CALLBACK_REGEX)?;
|
|
let captures = re
|
|
.captures(&body)
|
|
.ok_or_else(|| anyhow!("invalid API response"))?;
|
|
let match_str = captures.get(1).unwrap().as_str();
|
|
|
|
let lens_object: Value = json5::from_str(match_str)?;
|
|
|
|
if lens_object.get("errorHasStatus").is_some() {
|
|
return Err(anyhow!("lens failed"));
|
|
}
|
|
|
|
let root = lens_object["data"].as_array().unwrap();
|
|
|
|
let mut text_segments = Vec::new();
|
|
let mut text_regions = Vec::new();
|
|
|
|
let text_segments_raw = root[3][4][0][0]
|
|
.as_array()
|
|
.ok_or_else(|| anyhow!("invalid text segments"))?;
|
|
let text_regions_raw = root[2][3][0]
|
|
.as_array()
|
|
.ok_or_else(|| anyhow!("invalid text regions"))?;
|
|
|
|
for region in text_regions_raw {
|
|
let region_data = region.as_array().unwrap();
|
|
if region_data[11].as_str().unwrap().starts_with("text:") {
|
|
let raw_coords = region_data[1].as_array().unwrap();
|
|
let coords = rationalize_coords_format1(
|
|
image_width as f64,
|
|
image_height as f64,
|
|
raw_coords[0].as_f64().unwrap(),
|
|
raw_coords[1].as_f64().unwrap(),
|
|
raw_coords[2].as_f64().unwrap(),
|
|
raw_coords[3].as_f64().unwrap(),
|
|
);
|
|
text_regions.push(coords);
|
|
}
|
|
}
|
|
|
|
for segment in text_segments_raw {
|
|
let text_segment = segment.as_str().unwrap().to_string();
|
|
text_segments.push(text_segment);
|
|
}
|
|
|
|
Ok(text_segments
|
|
.into_iter()
|
|
.zip(text_regions.into_iter())
|
|
.map(|(text, coords)| Segment { text, coords })
|
|
.collect())
|
|
}
|
|
|
|
#[instrument(skip(client))]
|
|
pub async fn scan_image(client: &Client, image: &DynamicImage) -> Result<ScanResult> {
|
|
let mut result = ScanResult::new();
|
|
let (width, height) = image.dimensions();
|
|
|
|
let (width, height, image) = if width > MAX_DIM {
|
|
let height = ((height as f64) * (MAX_DIM as f64) / (width as f64)).round() as u32;
|
|
let new_image = tokio::task::block_in_place(|| image.resize_exact(MAX_DIM, height, image::imageops::FilterType::CatmullRom));
|
|
(MAX_DIM, height, std::borrow::Cow::Owned(new_image))
|
|
} else {
|
|
(width, height, std::borrow::Cow::Borrowed(image))
|
|
};
|
|
|
|
let mut y = 0;
|
|
while y < height {
|
|
let chunk_height = (height - y).min(MAX_DIM);
|
|
let chunk = tokio::task::block_in_place(|| {
|
|
let chunk = image.view(0, y, width, chunk_height).to_image();
|
|
let mut buf = Vec::new();
|
|
let mut csr = Cursor::new(&mut buf);
|
|
chunk.write_to(&mut csr, ImageFormat::Png)?;
|
|
Ok::<Vec<u8>, anyhow::Error>(buf)
|
|
})?;
|
|
|
|
let res = scan_image_chunk(client, &chunk, width, chunk_height).await?;
|
|
for segment in res {
|
|
result.push(Segment {
|
|
text: segment.text,
|
|
coords: SegmentCoords {
|
|
y: segment.coords.y + y as i32,
|
|
x: segment.coords.x,
|
|
w: segment.coords.w,
|
|
h: segment.coords.h,
|
|
},
|
|
});
|
|
}
|
|
|
|
y += chunk_height;
|
|
}
|
|
|
|
Ok(result)
|
|
}
|