From ada0cc477d93a8ba3119344ab3cb6fd3b5b6c81b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 29 Apr 2023 17:11:52 +0200 Subject: [PATCH 1/2] fuzzy matching for all but cc --- cps/db.py | 32 ++++++++++++++++++-------------- requirements.txt | 8 ++++++++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/cps/db.py b/cps/db.py index 53fa422c..9e9d70d4 100644 --- a/cps/db.py +++ b/cps/db.py @@ -50,7 +50,10 @@ from . import logger, ub, isoLanguages from .pagination import Pagination from weakref import WeakSet -from fuzzywuzzy.fuzz import ratio +from thefuzz.fuzz import partial_ratio + +# %-level, 100 means exact match +FUZZY_SEARCH_ACCURACY=80 log = logger.create() @@ -886,7 +889,7 @@ class CalibreDB: def search_query(self, term, config, *join): term.strip().lower() self.session.connection().connection.connection.create_function("lower", 1, lcase) - self.session.connection().connection.connection.create_function("ratio", 2, ratio) + self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio) q = list() #splits search term into single words words = re.split("[, ]+", term) @@ -894,7 +897,7 @@ class CalibreDB: words.sort(key=len,reverse=True) #search authors for match for word in words: - q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + word + "%"))) + q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name),word)>=FUZZY_SEARCH_ACCURACY)) query = self.generate_linked_query(config.config_read_column, Books) if len(join) == 6: @@ -917,18 +920,19 @@ class CalibreDB: # filter out multiple languages and archived books, results=query.filter(self.common_filters(True)) - # for word in words: - # filter_expression=[ - # Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")), - # Books.series.any(func.lower(Series.name).ilike("%" + word + "%")), - # #change to or_ to allow mix of title and author in query term - # Books.authors.any(or_(*q)), - # Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")), - # func.lower(Books.title).ilike("%" + word + "%") - # ] - # results=results.filter(or_(*filter_expression)) + #search tags, series and titles, also add author queries + for word in words: + filter_expression=[ + Books.tags.any(func.partial_ratio(func.lower(Tags.name),word)>=FUZZY_SEARCH_ACCURACY), + Books.series.any(func.partial_ratio(func.lower(Series.name),word)>=FUZZY_SEARCH_ACCURACY), + #change to or_ to allow mix of title and author in query term + Books.authors.any(or_(*q)), + Books.publishers.any(func.partial_ratio(func.lower(Publishers.name),word)>=FUZZY_SEARCH_ACCURACY), + func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY + ] + results=results.filter(or_(*filter_expression)) - try: return results.filter(func.ratio(Books.title,term)>80) + try: return results except Exception: print(traceback.format_exc()) diff --git a/requirements.txt b/requirements.txt index f0cd81c0..2c6a859b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,11 @@ flask-wtf>=0.14.2,<1.2.0 chardet>=3.0.0,<4.1.0 advocate>=1.0.0,<1.1.0 Flask-Limiter>=2.3.0,<3.4.0 + +thefuzz~=0.19.0 +MarkupSafe~=2.1.1 +Jinja2~=3.1.2 +Levenshtein~=0.21.0 +greenlet~=1.1.3 +cryptography~=38.0.1 +setuptools~=57.0.0 From 8e8c9a14a8b01fba943cacd234b8a918d8945c23 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 29 Apr 2023 17:11:52 +0200 Subject: [PATCH 2/2] fuzzy matching for all categories but cc --- cps/db.py | 32 ++++++++++++++++++-------------- requirements.txt | 8 ++++++++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/cps/db.py b/cps/db.py index 53fa422c..9e9d70d4 100644 --- a/cps/db.py +++ b/cps/db.py @@ -50,7 +50,10 @@ from . import logger, ub, isoLanguages from .pagination import Pagination from weakref import WeakSet -from fuzzywuzzy.fuzz import ratio +from thefuzz.fuzz import partial_ratio + +# %-level, 100 means exact match +FUZZY_SEARCH_ACCURACY=80 log = logger.create() @@ -886,7 +889,7 @@ class CalibreDB: def search_query(self, term, config, *join): term.strip().lower() self.session.connection().connection.connection.create_function("lower", 1, lcase) - self.session.connection().connection.connection.create_function("ratio", 2, ratio) + self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio) q = list() #splits search term into single words words = re.split("[, ]+", term) @@ -894,7 +897,7 @@ class CalibreDB: words.sort(key=len,reverse=True) #search authors for match for word in words: - q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + word + "%"))) + q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name),word)>=FUZZY_SEARCH_ACCURACY)) query = self.generate_linked_query(config.config_read_column, Books) if len(join) == 6: @@ -917,18 +920,19 @@ class CalibreDB: # filter out multiple languages and archived books, results=query.filter(self.common_filters(True)) - # for word in words: - # filter_expression=[ - # Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")), - # Books.series.any(func.lower(Series.name).ilike("%" + word + "%")), - # #change to or_ to allow mix of title and author in query term - # Books.authors.any(or_(*q)), - # Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")), - # func.lower(Books.title).ilike("%" + word + "%") - # ] - # results=results.filter(or_(*filter_expression)) + #search tags, series and titles, also add author queries + for word in words: + filter_expression=[ + Books.tags.any(func.partial_ratio(func.lower(Tags.name),word)>=FUZZY_SEARCH_ACCURACY), + Books.series.any(func.partial_ratio(func.lower(Series.name),word)>=FUZZY_SEARCH_ACCURACY), + #change to or_ to allow mix of title and author in query term + Books.authors.any(or_(*q)), + Books.publishers.any(func.partial_ratio(func.lower(Publishers.name),word)>=FUZZY_SEARCH_ACCURACY), + func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY + ] + results=results.filter(or_(*filter_expression)) - try: return results.filter(func.ratio(Books.title,term)>80) + try: return results except Exception: print(traceback.format_exc()) diff --git a/requirements.txt b/requirements.txt index f0cd81c0..2c6a859b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,11 @@ flask-wtf>=0.14.2,<1.2.0 chardet>=3.0.0,<4.1.0 advocate>=1.0.0,<1.1.0 Flask-Limiter>=2.3.0,<3.4.0 + +thefuzz~=0.19.0 +MarkupSafe~=2.1.1 +Jinja2~=3.1.2 +Levenshtein~=0.21.0 +greenlet~=1.1.3 +cryptography~=38.0.1 +setuptools~=57.0.0