1
0
mirror of https://github.com/janeczku/calibre-web synced 2024-09-20 19:29:46 +00:00
This commit is contained in:
quarz12 2023-07-31 19:35:42 +02:00 committed by GitHub
commit a9a83c8cb4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 72 additions and 27 deletions

View File

@ -20,6 +20,7 @@
import os import os
import re import re
import json import json
import traceback
from datetime import datetime from datetime import datetime
from urllib.parse import quote from urllib.parse import quote
import unidecode import unidecode
@ -32,6 +33,7 @@ from sqlalchemy.orm import relationship, sessionmaker, scoped_session
from sqlalchemy.orm.collections import InstrumentedList from sqlalchemy.orm.collections import InstrumentedList
from sqlalchemy.ext.declarative import DeclarativeMeta from sqlalchemy.ext.declarative import DeclarativeMeta
from sqlalchemy.exc import OperationalError from sqlalchemy.exc import OperationalError
try: try:
# Compatibility with sqlalchemy 2.0 # Compatibility with sqlalchemy 2.0
from sqlalchemy.orm import declarative_base from sqlalchemy.orm import declarative_base
@ -40,6 +42,7 @@ except ImportError:
from sqlalchemy.pool import StaticPool from sqlalchemy.pool import StaticPool
from sqlalchemy.sql.expression import and_, true, false, text, func, or_ from sqlalchemy.sql.expression import and_, true, false, text, func, or_
from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy import desc
from flask_login import current_user from flask_login import current_user
from flask_babel import gettext as _ from flask_babel import gettext as _
from flask_babel import get_locale from flask_babel import get_locale
@ -49,7 +52,10 @@ from . import logger, ub, isoLanguages
from .pagination import Pagination from .pagination import Pagination
from weakref import WeakSet from weakref import WeakSet
from thefuzz.fuzz import partial_ratio, partial_token_set_ratio, partial_token_sort_ratio, ratio
# %-level, 100 means exact match, 75 allows exactly 1 wrong character in a 4 letter word
FUZZY_SEARCH_ACCURACY = 75
log = logger.create() log = logger.create()
@ -399,8 +405,20 @@ class Books(Base):
def __repr__(self): def __repr__(self):
return "<Books('{0},{1}{2}{3}{4}{5}{6}{7}{8}')>".format(self.title, self.sort, self.author_sort, return "<Books('{0},{1}{2}{3}{4}{5}{6}{7}{8}')>".format(self.title, self.sort, self.author_sort,
self.timestamp, self.pubdate, self.series_index, self.timestamp, self.pubdate, self.series_index,
self.last_modified, self.path, self.has_cover) self.last_modified, self.path, self.has_cover)
def __str__(self):
return "{0} {1} {2} {3} {4}".format(self.title, " ".join([tag.name for tag in self.tags]),
" ".join(
[series.name for series
in self.series]),
" ".join(
[author.name for author
in self.authors]),
" ".join([publisher.name for
publisher in
self.publishers]))
@property @property
def atom_timestamp(self): def atom_timestamp(self):
@ -442,13 +460,15 @@ class CustomColumns(Base):
content['category_sort'] = "value" content['category_sort'] = "value"
content['is_csp'] = False content['is_csp'] = False
content['is_editable'] = self.editable content['is_editable'] = self.editable
content['rec_index'] = sequence + 22 # toDo why ?? content['rec_index'] = sequence + 22 # toDo why ??
if isinstance(value, datetime): if isinstance(value, datetime):
content['#value#'] = {"__class__": "datetime.datetime", "__value__": value.strftime("%Y-%m-%dT%H:%M:%S+00:00")} content['#value#'] = {"__class__": "datetime.datetime",
"__value__": value.strftime("%Y-%m-%dT%H:%M:%S+00:00")}
else: else:
content['#value#'] = value content['#value#'] = value
content['#extra#'] = extra content['#extra#'] = extra
content['is_multiple2'] = {} if not self.is_multiple else {"cache_to_list": "|", "ui_to_list": ",", "list_to_ui": ", "} content['is_multiple2'] = {} if not self.is_multiple else {"cache_to_list": "|", "ui_to_list": ",",
"list_to_ui": ", "}
return json.dumps(content, ensure_ascii=False) return json.dumps(content, ensure_ascii=False)
@ -469,7 +489,7 @@ class AlchemyEncoder(json.JSONEncoder):
el = list() el = list()
# ele = None # ele = None
for ele in data: for ele in data:
if hasattr(ele, 'value'): # converter for custom_column values if hasattr(ele, 'value'): # converter for custom_column values
el.append(str(ele.value)) el.append(str(ele.value))
elif ele.get: elif ele.get:
el.append(ele.get()) el.append(ele.get())
@ -508,7 +528,6 @@ class CalibreDB:
if init: if init:
self.init_db(expire_on_commit) self.init_db(expire_on_commit)
def init_db(self, expire_on_commit=True): def init_db(self, expire_on_commit=True):
if self._init: if self._init:
self.init_session(expire_on_commit) self.init_session(expire_on_commit)
@ -680,13 +699,13 @@ class CalibreDB:
if not read_column: if not read_column:
bd = (self.session.query(Books, ub.ReadBook.read_status, ub.ArchivedBook.is_archived).select_from(Books) bd = (self.session.query(Books, ub.ReadBook.read_status, ub.ArchivedBook.is_archived).select_from(Books)
.join(ub.ReadBook, and_(ub.ReadBook.user_id == int(current_user.id), ub.ReadBook.book_id == book_id), .join(ub.ReadBook, and_(ub.ReadBook.user_id == int(current_user.id), ub.ReadBook.book_id == book_id),
isouter=True)) isouter=True))
else: else:
try: try:
read_column = cc_classes[read_column] read_column = cc_classes[read_column]
bd = (self.session.query(Books, read_column.value, ub.ArchivedBook.is_archived).select_from(Books) bd = (self.session.query(Books, read_column.value, ub.ArchivedBook.is_archived).select_from(Books)
.join(read_column, read_column.book == book_id, .join(read_column, read_column.book == book_id,
isouter=True)) isouter=True))
except (KeyError, AttributeError, IndexError): except (KeyError, AttributeError, IndexError):
log.error("Custom Column No.{} does not exist in calibre database".format(read_column)) log.error("Custom Column No.{} does not exist in calibre database".format(read_column))
# Skip linking read column and return None instead of read status # Skip linking read column and return None instead of read status
@ -739,11 +758,11 @@ class CalibreDB:
pos_cc_list = current_user.allowed_column_value.split(',') pos_cc_list = current_user.allowed_column_value.split(',')
pos_content_cc_filter = true() if pos_cc_list == [''] else \ pos_content_cc_filter = true() if pos_cc_list == [''] else \
getattr(Books, 'custom_column_' + str(self.config.config_restricted_column)). \ getattr(Books, 'custom_column_' + str(self.config.config_restricted_column)). \
any(cc_classes[self.config.config_restricted_column].value.in_(pos_cc_list)) any(cc_classes[self.config.config_restricted_column].value.in_(pos_cc_list))
neg_cc_list = current_user.denied_column_value.split(',') neg_cc_list = current_user.denied_column_value.split(',')
neg_content_cc_filter = false() if neg_cc_list == [''] else \ neg_content_cc_filter = false() if neg_cc_list == [''] else \
getattr(Books, 'custom_column_' + str(self.config.config_restricted_column)). \ getattr(Books, 'custom_column_' + str(self.config.config_restricted_column)). \
any(cc_classes[self.config.config_restricted_column].value.in_(neg_cc_list)) any(cc_classes[self.config.config_restricted_column].value.in_(neg_cc_list))
except (KeyError, AttributeError, IndexError): except (KeyError, AttributeError, IndexError):
pos_content_cc_filter = false() pos_content_cc_filter = false()
neg_content_cc_filter = true() neg_content_cc_filter = true()
@ -823,18 +842,18 @@ class CalibreDB:
element = 0 element = 0
while indx: while indx:
if indx >= 3: if indx >= 3:
query = query.outerjoin(join[element], join[element+1]).outerjoin(join[element+2]) query = query.outerjoin(join[element], join[element + 1]).outerjoin(join[element + 2])
indx -= 3 indx -= 3
element += 3 element += 3
elif indx == 2: elif indx == 2:
query = query.outerjoin(join[element], join[element+1]) query = query.outerjoin(join[element], join[element + 1])
indx -= 2 indx -= 2
element += 2 element += 2
elif indx == 1: elif indx == 1:
query = query.outerjoin(join[element]) query = query.outerjoin(join[element])
indx -= 1 indx -= 1
element += 1 element += 1
query = query.filter(db_filter)\ query = query.filter(db_filter) \
.filter(self.common_filters(allow_show_archived)) .filter(self.common_filters(allow_show_archived))
entries = list() entries = list()
pagination = list() pagination = list()
@ -904,12 +923,18 @@ class CalibreDB:
.filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first() .filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first()
def search_query(self, term, config, *join): def search_query(self, term, config, *join):
term.strip().lower() term = term.strip().lower()
self.session.connection().connection.connection.create_function("lower", 1, lcase) self.session.connection().connection.connection.create_function("lower", 1, lcase)
q = list() self.session.connection().connection.connection.create_function("max_ratio", 2, max_ratio)
author_terms = re.split("[, ]+", term) # splits search term into single words
for author_term in author_terms: words = re.split("[,\s]+", term)
q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + author_term + "%"))) # put the longest words first to make queries more efficient
words.sort(key=len, reverse=True)
words=list(filter(lambda w:len(w)>3,words))
# no word in search term is longer than 3 letters -> return empty query #TODO give some kind of error message
if len(words)==0:
return self.session.query(Books).filter(False)
query = self.generate_linked_query(config.config_read_column, Books) query = self.generate_linked_query(config.config_read_column, Books)
if len(join) == 6: if len(join) == 6:
query = query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5]) query = query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5])
@ -920,19 +945,28 @@ class CalibreDB:
elif len(join) == 1: elif len(join) == 1:
query = query.outerjoin(join[0]) query = query.outerjoin(join[0])
filter_expression = []
cc = self.get_cc_columns(config, filter_config_custom_read=True) cc = self.get_cc_columns(config, filter_config_custom_read=True)
filter_expression = [Books.tags.any(func.lower(Tags.name).ilike("%" + term + "%")),
Books.series.any(func.lower(Series.name).ilike("%" + term + "%")),
Books.authors.any(and_(*q)),
Books.publishers.any(func.lower(Publishers.name).ilike("%" + term + "%")),
func.lower(Books.title).ilike("%" + term + "%")]
for c in cc: for c in cc:
if c.datatype not in ["datetime", "rating", "bool", "int", "float"]: if c.datatype not in ["datetime", "rating", "bool", "int", "float"]:
filter_expression.append( filter_expression.append(
getattr(Books, getattr(Books,
'custom_column_' + str(c.id)).any( 'custom_column_' + str(c.id)).any(
func.lower(cc_classes[c.id].value).ilike("%" + term + "%"))) func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
return query.filter(self.common_filters(True)).filter(or_(*filter_expression)) # filter out multiple languages and archived books,
results = query.filter(self.common_filters(True))
filters=[filter_expression] if filter_expression else []
# search tags, series and titles, also add author queries
for word in words:
filters.append(or_(*[
Books.tags.any(func.max_ratio(func.lower(Tags.name), word) >= FUZZY_SEARCH_ACCURACY),
Books.series.any(func.max_ratio(func.lower(Series.name), word) >= FUZZY_SEARCH_ACCURACY),
Books.authors.any(func.max_ratio(func.lower(Authors.name), word) >= FUZZY_SEARCH_ACCURACY),
Books.publishers.any(func.max_ratio(func.lower(Publishers.name), word) >= FUZZY_SEARCH_ACCURACY),
func.max_ratio(func.lower(Books.title), word) >= FUZZY_SEARCH_ACCURACY
]))
results = results.filter(and_(*filters))
return results
def get_cc_columns(self, config, filter_config_custom_read=False): def get_cc_columns(self, config, filter_config_custom_read=False):
tmp_cc = self.session.query(CustomColumns).filter(CustomColumns.datatype.notin_(cc_exceptions)).all() tmp_cc = self.session.query(CustomColumns).filter(CustomColumns.datatype.notin_(cc_exceptions)).all()
@ -955,6 +989,7 @@ class CalibreDB:
order = order[0] if order else [Books.sort] order = order[0] if order else [Books.sort]
pagination = None pagination = None
result = self.search_query(term, config, *join).order_by(*order).all() result = self.search_query(term, config, *join).order_by(*order).all()
result = sorted(result,key=lambda query:partial_token_sort_ratio(str(query[0]),term),reverse=True)
result_count = len(result) result_count = len(result)
if offset != None and limit != None: if offset != None and limit != None:
offset = int(offset) offset = int(offset)
@ -974,8 +1009,8 @@ class CalibreDB:
if with_count: if with_count:
if not languages: if not languages:
languages = self.session.query(Languages, func.count('books_languages_link.book'))\ languages = self.session.query(Languages, func.count('books_languages_link.book')) \
.join(books_languages_link).join(Books)\ .join(books_languages_link).join(Books) \
.filter(self.common_filters(return_all_languages=return_all_languages)) \ .filter(self.common_filters(return_all_languages=return_all_languages)) \
.group_by(text('books_languages_link.lang_code')).all() .group_by(text('books_languages_link.lang_code')).all()
tags = list() tags = list()
@ -1073,6 +1108,11 @@ def lcase(s):
return s.lower() return s.lower()
def max_ratio(string:str,term):
"""applies ratio on each word of string and returns the max value"""
words=string.split()
return max([ratio(word.strip(":"),term) if len(word.strip(":")) > 3 else 0 for word in words]) # ignore words of len < 3#do not compare words of len < 3 -> too generic
class Category: class Category:
name = None name = None
id = None id = None
@ -1085,6 +1125,7 @@ class Category:
self.rating = rating self.rating = rating
self.count = 1 self.count = 1
'''class Count: '''class Count:
count = None count = None

View File

@ -5,6 +5,7 @@
{% if entries|length < 1 %} {% if entries|length < 1 %}
<h2>{{_('No Results Found')}}</h2> <h2>{{_('No Results Found')}}</h2>
<p>{{_('Search Term:')}} {{adv_searchterm}}</p> <p>{{_('Search Term:')}} {{adv_searchterm}}</p>
<p>{{_('Words smaller than 3 letters are not considered')}}</p>
{% else %} {% else %}
<h2>{{result_count}} {{_('Results for:')}} {{adv_searchterm}}</h2> <h2>{{result_count}} {{_('Results for:')}} {{adv_searchterm}}</h2>
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}

View File

@ -17,3 +17,6 @@ flask-wtf>=0.14.2,<1.2.0
chardet>=3.0.0,<4.1.0 chardet>=3.0.0,<4.1.0
advocate>=1.0.0,<1.1.0 advocate>=1.0.0,<1.1.0
Flask-Limiter>=2.3.0,<3.4.0 Flask-Limiter>=2.3.0,<3.4.0
thefuzz~=0.19.0
Levenshtein~=0.21.0