From 5233f78d033ca8604827177df239453294a9d5bd Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Tue, 25 Apr 2023 20:07:27 +0200
Subject: [PATCH 01/25] comments

---
 cps/db.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/cps/db.py b/cps/db.py
index 70b4105b..8eef992d 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -886,9 +886,16 @@ class CalibreDB:
         term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
         q = list()
+        #splits search term into single words
         author_terms = re.split("[, ]+", term)
+
+        #search authors for match
         for author_term in author_terms:
             q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + author_term + "%")))
+
+
+
+
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
             query = query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5])
@@ -900,17 +907,23 @@ class CalibreDB:
             query = query.outerjoin(join[0])
 
         cc = self.get_cc_columns(config, filter_config_custom_read=True)
+
+        #search each category for exact matches with the tag
         filter_expression = [Books.tags.any(func.lower(Tags.name).ilike("%" + term + "%")),
                              Books.series.any(func.lower(Series.name).ilike("%" + term + "%")),
                              Books.authors.any(and_(*q)),
                              Books.publishers.any(func.lower(Publishers.name).ilike("%" + term + "%")),
                              func.lower(Books.title).ilike("%" + term + "%")]
+
+
         for c in cc:
             if c.datatype not in ["datetime", "rating", "bool", "int", "float"]:
                 filter_expression.append(
                     getattr(Books,
                             'custom_column_' + str(c.id)).any(
                         func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
+
+        #filter out multiple languages and archived books, then return all that match at least one of filter_expression
         return query.filter(self.common_filters(True)).filter(or_(*filter_expression))
 
     def get_cc_columns(self, config, filter_config_custom_read=False):

From 4b2e7b883b26b163139b4815ceab5b69b7da6edf Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Wed, 26 Apr 2023 15:27:41 +0200
Subject: [PATCH 02/25] Changed the quick search behavior so that it allows
 title and author in the same query. Also word order does not matter anymore.
 +some more comments

---
 cps/db.py | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 8eef992d..644041da 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -887,14 +887,12 @@ class CalibreDB:
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
         q = list()
         #splits search term into single words
-        author_terms = re.split("[, ]+", term)
-
+        words = re.split("[, ]+", term)
+        #put the longest words first to make queries more efficient
+        words.sort(key=len,reverse=True)
         #search authors for match
-        for author_term in author_terms:
-            q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + author_term + "%")))
-
-
-
+        for word in words:
+            q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + word + "%")))
 
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
@@ -906,25 +904,29 @@ class CalibreDB:
         elif len(join) == 1:
             query = query.outerjoin(join[0])
 
+        filter_expression=[]
         cc = self.get_cc_columns(config, filter_config_custom_read=True)
-
-        #search each category for exact matches with the tag
-        filter_expression = [Books.tags.any(func.lower(Tags.name).ilike("%" + term + "%")),
-                             Books.series.any(func.lower(Series.name).ilike("%" + term + "%")),
-                             Books.authors.any(and_(*q)),
-                             Books.publishers.any(func.lower(Publishers.name).ilike("%" + term + "%")),
-                             func.lower(Books.title).ilike("%" + term + "%")]
-
-
         for c in cc:
             if c.datatype not in ["datetime", "rating", "bool", "int", "float"]:
                 filter_expression.append(
                     getattr(Books,
                             'custom_column_' + str(c.id)).any(
-                        func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
+                        func.lower(cc_classes[c.id].value).ilike("%" + term + "%"))) #TODO ?
+        # filter out multiple languages and archived books,
+        results=query.filter(self.common_filters(True))
 
-        #filter out multiple languages and archived books, then return all that match at least one of filter_expression
-        return query.filter(self.common_filters(True)).filter(or_(*filter_expression))
+        for word in words:
+            filter_expression=[
+                Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")),
+                Books.series.any(func.lower(Series.name).ilike("%" + word + "%")),
+                #change to or_ to allow mix of title and author in query term
+                Books.authors.any(or_(*q)),
+                Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")),
+                func.lower(Books.title).ilike("%" + word + "%")
+            ]
+            results=results.filter(or_(*filter_expression))
+
+        return results
 
     def get_cc_columns(self, config, filter_config_custom_read=False):
         tmp_cc = self.session.query(CustomColumns).filter(CustomColumns.datatype.notin_(cc_exceptions)).all()

From d13d4653bebf9af2836edc1ab5900b811c2c6f3a Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Wed, 26 Apr 2023 22:04:41 +0200
Subject: [PATCH 03/25] proof of concept fuzzy matching

---
 cps/db.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 644041da..53fa422c 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -20,6 +20,7 @@
 import os
 import re
 import json
+import traceback
 from datetime import datetime
 from urllib.parse import quote
 import unidecode
@@ -49,7 +50,7 @@ from . import logger, ub, isoLanguages
 from .pagination import Pagination
 
 from weakref import WeakSet
-
+from fuzzywuzzy.fuzz import ratio
 
 log = logger.create()
 
@@ -885,6 +886,7 @@ class CalibreDB:
     def search_query(self, term, config, *join):
         term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
+        self.session.connection().connection.connection.create_function("ratio", 2, ratio)
         q = list()
         #splits search term into single words
         words = re.split("[, ]+", term)
@@ -915,18 +917,20 @@ class CalibreDB:
         # filter out multiple languages and archived books,
         results=query.filter(self.common_filters(True))
 
-        for word in words:
-            filter_expression=[
-                Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")),
-                Books.series.any(func.lower(Series.name).ilike("%" + word + "%")),
-                #change to or_ to allow mix of title and author in query term
-                Books.authors.any(or_(*q)),
-                Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")),
-                func.lower(Books.title).ilike("%" + word + "%")
-            ]
-            results=results.filter(or_(*filter_expression))
+        # for word in words:
+        #     filter_expression=[
+        #         Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")),
+        #         Books.series.any(func.lower(Series.name).ilike("%" + word + "%")),
+        #         #change to or_ to allow mix of title and author in query term
+        #         Books.authors.any(or_(*q)),
+        #         Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")),
+        #         func.lower(Books.title).ilike("%" + word + "%")
+        #     ]
+        #     results=results.filter(or_(*filter_expression))
 
-        return results
+        try: return results.filter(func.ratio(Books.title,term)>80)
+        except Exception:
+            print(traceback.format_exc())
 
     def get_cc_columns(self, config, filter_config_custom_read=False):
         tmp_cc = self.session.query(CustomColumns).filter(CustomColumns.datatype.notin_(cc_exceptions)).all()

From ada0cc477d93a8ba3119344ab3cb6fd3b5b6c81b Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sat, 29 Apr 2023 17:11:52 +0200
Subject: [PATCH 04/25] fuzzy matching for all but cc

---
 cps/db.py        | 32 ++++++++++++++++++--------------
 requirements.txt |  8 ++++++++
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 53fa422c..9e9d70d4 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -50,7 +50,10 @@ from . import logger, ub, isoLanguages
 from .pagination import Pagination
 
 from weakref import WeakSet
-from fuzzywuzzy.fuzz import ratio
+from thefuzz.fuzz import partial_ratio
+
+# %-level, 100 means exact match
+FUZZY_SEARCH_ACCURACY=80
 
 log = logger.create()
 
@@ -886,7 +889,7 @@ class CalibreDB:
     def search_query(self, term, config, *join):
         term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
-        self.session.connection().connection.connection.create_function("ratio", 2, ratio)
+        self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
         q = list()
         #splits search term into single words
         words = re.split("[, ]+", term)
@@ -894,7 +897,7 @@ class CalibreDB:
         words.sort(key=len,reverse=True)
         #search authors for match
         for word in words:
-            q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + word + "%")))
+            q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name),word)>=FUZZY_SEARCH_ACCURACY))
 
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
@@ -917,18 +920,19 @@ class CalibreDB:
         # filter out multiple languages and archived books,
         results=query.filter(self.common_filters(True))
 
-        # for word in words:
-        #     filter_expression=[
-        #         Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")),
-        #         Books.series.any(func.lower(Series.name).ilike("%" + word + "%")),
-        #         #change to or_ to allow mix of title and author in query term
-        #         Books.authors.any(or_(*q)),
-        #         Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")),
-        #         func.lower(Books.title).ilike("%" + word + "%")
-        #     ]
-        #     results=results.filter(or_(*filter_expression))
+        #search tags, series and titles, also add author queries
+        for word in words:
+            filter_expression=[
+                Books.tags.any(func.partial_ratio(func.lower(Tags.name),word)>=FUZZY_SEARCH_ACCURACY),
+                Books.series.any(func.partial_ratio(func.lower(Series.name),word)>=FUZZY_SEARCH_ACCURACY),
+                #change to or_ to allow mix of title and author in query term
+                Books.authors.any(or_(*q)),
+                Books.publishers.any(func.partial_ratio(func.lower(Publishers.name),word)>=FUZZY_SEARCH_ACCURACY),
+                func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY
+            ]
+            results=results.filter(or_(*filter_expression))
 
-        try: return results.filter(func.ratio(Books.title,term)>80)
+        try: return results
         except Exception:
             print(traceback.format_exc())
 
diff --git a/requirements.txt b/requirements.txt
index f0cd81c0..2c6a859b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,11 @@ flask-wtf>=0.14.2,<1.2.0
 chardet>=3.0.0,<4.1.0
 advocate>=1.0.0,<1.1.0
 Flask-Limiter>=2.3.0,<3.4.0
+
+thefuzz~=0.19.0
+MarkupSafe~=2.1.1
+Jinja2~=3.1.2
+Levenshtein~=0.21.0
+greenlet~=1.1.3
+cryptography~=38.0.1
+setuptools~=57.0.0

From 8e8c9a14a8b01fba943cacd234b8a918d8945c23 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sat, 29 Apr 2023 17:11:52 +0200
Subject: [PATCH 05/25] fuzzy matching for all categories but cc

---
 cps/db.py        | 32 ++++++++++++++++++--------------
 requirements.txt |  8 ++++++++
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 53fa422c..9e9d70d4 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -50,7 +50,10 @@ from . import logger, ub, isoLanguages
 from .pagination import Pagination
 
 from weakref import WeakSet
-from fuzzywuzzy.fuzz import ratio
+from thefuzz.fuzz import partial_ratio
+
+# %-level, 100 means exact match
+FUZZY_SEARCH_ACCURACY=80
 
 log = logger.create()
 
@@ -886,7 +889,7 @@ class CalibreDB:
     def search_query(self, term, config, *join):
         term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
-        self.session.connection().connection.connection.create_function("ratio", 2, ratio)
+        self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
         q = list()
         #splits search term into single words
         words = re.split("[, ]+", term)
@@ -894,7 +897,7 @@ class CalibreDB:
         words.sort(key=len,reverse=True)
         #search authors for match
         for word in words:
-            q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + word + "%")))
+            q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name),word)>=FUZZY_SEARCH_ACCURACY))
 
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
@@ -917,18 +920,19 @@ class CalibreDB:
         # filter out multiple languages and archived books,
         results=query.filter(self.common_filters(True))
 
-        # for word in words:
-        #     filter_expression=[
-        #         Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")),
-        #         Books.series.any(func.lower(Series.name).ilike("%" + word + "%")),
-        #         #change to or_ to allow mix of title and author in query term
-        #         Books.authors.any(or_(*q)),
-        #         Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")),
-        #         func.lower(Books.title).ilike("%" + word + "%")
-        #     ]
-        #     results=results.filter(or_(*filter_expression))
+        #search tags, series and titles, also add author queries
+        for word in words:
+            filter_expression=[
+                Books.tags.any(func.partial_ratio(func.lower(Tags.name),word)>=FUZZY_SEARCH_ACCURACY),
+                Books.series.any(func.partial_ratio(func.lower(Series.name),word)>=FUZZY_SEARCH_ACCURACY),
+                #change to or_ to allow mix of title and author in query term
+                Books.authors.any(or_(*q)),
+                Books.publishers.any(func.partial_ratio(func.lower(Publishers.name),word)>=FUZZY_SEARCH_ACCURACY),
+                func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY
+            ]
+            results=results.filter(or_(*filter_expression))
 
-        try: return results.filter(func.ratio(Books.title,term)>80)
+        try: return results
         except Exception:
             print(traceback.format_exc())
 
diff --git a/requirements.txt b/requirements.txt
index f0cd81c0..2c6a859b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,11 @@ flask-wtf>=0.14.2,<1.2.0
 chardet>=3.0.0,<4.1.0
 advocate>=1.0.0,<1.1.0
 Flask-Limiter>=2.3.0,<3.4.0
+
+thefuzz~=0.19.0
+MarkupSafe~=2.1.1
+Jinja2~=3.1.2
+Levenshtein~=0.21.0
+greenlet~=1.1.3
+cryptography~=38.0.1
+setuptools~=57.0.0

From 61f1e20489e805294b2631a99558faeb4657f64b Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sat, 29 Apr 2023 17:11:52 +0200
Subject: [PATCH 06/25] fuzzy matching for all categories but cc

---
 cps/db.py        | 32 ++++++++++++++++++--------------
 requirements.txt |  8 ++++++++
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 53fa422c..9e9d70d4 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -50,7 +50,10 @@ from . import logger, ub, isoLanguages
 from .pagination import Pagination
 
 from weakref import WeakSet
-from fuzzywuzzy.fuzz import ratio
+from thefuzz.fuzz import partial_ratio
+
+# %-level, 100 means exact match
+FUZZY_SEARCH_ACCURACY=80
 
 log = logger.create()
 
@@ -886,7 +889,7 @@ class CalibreDB:
     def search_query(self, term, config, *join):
         term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
-        self.session.connection().connection.connection.create_function("ratio", 2, ratio)
+        self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
         q = list()
         #splits search term into single words
         words = re.split("[, ]+", term)
@@ -894,7 +897,7 @@ class CalibreDB:
         words.sort(key=len,reverse=True)
         #search authors for match
         for word in words:
-            q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + word + "%")))
+            q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name),word)>=FUZZY_SEARCH_ACCURACY))
 
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
@@ -917,18 +920,19 @@ class CalibreDB:
         # filter out multiple languages and archived books,
         results=query.filter(self.common_filters(True))
 
-        # for word in words:
-        #     filter_expression=[
-        #         Books.tags.any(func.lower(Tags.name).ilike("%" + word + "%")),
-        #         Books.series.any(func.lower(Series.name).ilike("%" + word + "%")),
-        #         #change to or_ to allow mix of title and author in query term
-        #         Books.authors.any(or_(*q)),
-        #         Books.publishers.any(func.lower(Publishers.name).ilike("%" + word + "%")),
-        #         func.lower(Books.title).ilike("%" + word + "%")
-        #     ]
-        #     results=results.filter(or_(*filter_expression))
+        #search tags, series and titles, also add author queries
+        for word in words:
+            filter_expression=[
+                Books.tags.any(func.partial_ratio(func.lower(Tags.name),word)>=FUZZY_SEARCH_ACCURACY),
+                Books.series.any(func.partial_ratio(func.lower(Series.name),word)>=FUZZY_SEARCH_ACCURACY),
+                #change to or_ to allow mix of title and author in query term
+                Books.authors.any(or_(*q)),
+                Books.publishers.any(func.partial_ratio(func.lower(Publishers.name),word)>=FUZZY_SEARCH_ACCURACY),
+                func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY
+            ]
+            results=results.filter(or_(*filter_expression))
 
-        try: return results.filter(func.ratio(Books.title,term)>80)
+        try: return results
         except Exception:
             print(traceback.format_exc())
 
diff --git a/requirements.txt b/requirements.txt
index f0cd81c0..2c6a859b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,11 @@ flask-wtf>=0.14.2,<1.2.0
 chardet>=3.0.0,<4.1.0
 advocate>=1.0.0,<1.1.0
 Flask-Limiter>=2.3.0,<3.4.0
+
+thefuzz~=0.19.0
+MarkupSafe~=2.1.1
+Jinja2~=3.1.2
+Levenshtein~=0.21.0
+greenlet~=1.1.3
+cryptography~=38.0.1
+setuptools~=57.0.0

From f497cc0b031744a0031e456c2d91005c7893c533 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sat, 29 Apr 2023 17:47:04 +0200
Subject: [PATCH 07/25] removed todo

---
 cps/db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cps/db.py b/cps/db.py
index 9e9d70d4..35d9d6a2 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -916,7 +916,7 @@ class CalibreDB:
                 filter_expression.append(
                     getattr(Books,
                             'custom_column_' + str(c.id)).any(
-                        func.lower(cc_classes[c.id].value).ilike("%" + term + "%"))) #TODO ?
+                        func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
         # filter out multiple languages and archived books,
         results=query.filter(self.common_filters(True))
 

From 2e3c93b9e4d299b7c53dd1b7b9f781df139b1b68 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sat, 29 Apr 2023 17:47:04 +0200
Subject: [PATCH 08/25] removed debugging help

---
 cps/db.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 9e9d70d4..a4c6633c 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -916,7 +916,7 @@ class CalibreDB:
                 filter_expression.append(
                     getattr(Books,
                             'custom_column_' + str(c.id)).any(
-                        func.lower(cc_classes[c.id].value).ilike("%" + term + "%"))) #TODO ?
+                        func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
         # filter out multiple languages and archived books,
         results=query.filter(self.common_filters(True))
 
@@ -932,9 +932,7 @@ class CalibreDB:
             ]
             results=results.filter(or_(*filter_expression))
 
-        try: return results
-        except Exception:
-            print(traceback.format_exc())
+        return results
 
     def get_cc_columns(self, config, filter_config_custom_read=False):
         tmp_cc = self.session.query(CustomColumns).filter(CustomColumns.datatype.notin_(cc_exceptions)).all()

From af40feee861b6981efe9cd4869c36df853686323 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Wed, 3 May 2023 19:43:25 +0200
Subject: [PATCH 09/25] I accidentally used pycharms auto-add-to-requirements
 feature which resulted in many more dependencies than needed. These are the
 actually relevant requirements for this pr.

---
 requirements.txt | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 2c6a859b..da518c9a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,9 +20,4 @@ advocate>=1.0.0,<1.1.0
 Flask-Limiter>=2.3.0,<3.4.0
 
 thefuzz~=0.19.0
-MarkupSafe~=2.1.1
-Jinja2~=3.1.2
 Levenshtein~=0.21.0
-greenlet~=1.1.3
-cryptography~=38.0.1
-setuptools~=57.0.0

From 9fc0d54fde2331fd09b7f064d2d64f89e288f772 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Tue, 9 May 2023 00:36:41 +0200
Subject: [PATCH 10/25] idea for weighted sorting

---
 cps/db.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/cps/db.py b/cps/db.py
index a4c6633c..6cbaa29f 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -931,7 +931,15 @@ class CalibreDB:
                 func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY
             ]
             results=results.filter(or_(*filter_expression))
+        #TODO sort
 
+        # score = 0
+        # for word in words:
+        #     score += max(
+        #         attr1 % word,
+        #         attr2 % word,
+        #     )
+        # sort by score desc
         return results
 
     def get_cc_columns(self, config, filter_config_custom_read=False):

From 4ba3b4e4943c72b044b06c6fa66c33c5eda3103b Mon Sep 17 00:00:00 2001
From: quarz12 <danielmartinqwe@gmail.com>
Date: Tue, 9 May 2023 17:35:22 +0200
Subject: [PATCH 11/25] typing for query

---
 cps/db.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cps/db.py b/cps/db.py
index 6cbaa29f..a29f7912 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -23,6 +23,8 @@ import json
 import traceback
 from datetime import datetime
 from urllib.parse import quote
+
+import sqlalchemy
 import unidecode
 
 from sqlite3 import OperationalError as sqliteOperationalError
@@ -918,7 +920,7 @@ class CalibreDB:
                             'custom_column_' + str(c.id)).any(
                         func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
         # filter out multiple languages and archived books,
-        results=query.filter(self.common_filters(True))
+        results:sqlalchemy.orm.Query=query.filter(self.common_filters(True))
 
         #search tags, series and titles, also add author queries
         for word in words:
@@ -933,6 +935,10 @@ class CalibreDB:
             results=results.filter(or_(*filter_expression))
         #TODO sort
 
+        results.order_by(lambda Book:Book.title+Book.tags+Book.authors)
+        # v1
+        # results.order_by(desc(lambda Book:levenshtein(Book.title+Book.tags+Book.authors,term)))
+        # v2
         # score = 0
         # for word in words:
         #     score += max(

From 932c7968ce28f2acf13865198c6cbcb75cb18760 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Wed, 10 May 2023 18:55:40 +0200
Subject: [PATCH 12/25] fixed an issue where the lowering and stripping of the
 search term was not saved

---
 cps/db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cps/db.py b/cps/db.py
index a4c6633c..1cf11216 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -887,7 +887,7 @@ class CalibreDB:
             .filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first()
 
     def search_query(self, term, config, *join):
-        term.strip().lower()
+        term=term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
         self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
         q = list()

From 97c94f2117c5374472a5b51d82a775ed76e3bb0f Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Thu, 11 May 2023 01:34:01 +0200
Subject: [PATCH 13/25] moved sorting back to original place

---
 cps/db.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index a29f7912..57e3f283 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -40,6 +40,7 @@ try:
     from sqlalchemy.orm import declarative_base
 except ImportError:
     from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy import desc,asc
 from sqlalchemy.pool import StaticPool
 from sqlalchemy.sql.expression import and_, true, false, text, func, or_
 from sqlalchemy.ext.associationproxy import association_proxy
@@ -585,7 +586,7 @@ class CalibreDB:
             return False, False
         try:
             check_engine = create_engine('sqlite://',
-                                         echo=False,
+                                         echo=True,
                                          isolation_level="SERIALIZABLE",
                                          connect_args={'check_same_thread': False},
                                          poolclass=StaticPool)
@@ -935,7 +936,6 @@ class CalibreDB:
             results=results.filter(or_(*filter_expression))
         #TODO sort
 
-        results.order_by(lambda Book:Book.title+Book.tags+Book.authors)
         # v1
         # results.order_by(desc(lambda Book:levenshtein(Book.title+Book.tags+Book.authors,term)))
         # v2
@@ -966,9 +966,12 @@ class CalibreDB:
 
     # read search results from calibre-database and return it (function is used for feed and simple search
     def get_search_results(self, term, config, offset=None, order=None, limit=None, *join):
+        self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
+        self.session.connection().connection.connection.create_function("sort", 1, lambda tags :print(f"<Book:  {tags} >") or 3)
         order = order[0] if order else [Books.sort]
         pagination = None
-        result = self.search_query(term, config, *join).order_by(*order).all()
+        result = self.search_query(term, config, *join).order_by(*order).all()#*order
+        #result = self.search_query(term, config, *join).order_by(desc(func.sort(Books.tags))).all()#*order
         result_count = len(result)
         if offset != None and limit != None:
             offset = int(offset)

From 45d8d637839c1fe2985016206e4d85a63e5800ae Mon Sep 17 00:00:00 2001
From: quarz12 <danielmartinqwe@gmail.com>
Date: Thu, 11 May 2023 16:17:59 +0200
Subject: [PATCH 14/25] sort using only authorsort and title

---
 cps/db.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 57e3f283..3204f94f 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -889,7 +889,7 @@ class CalibreDB:
         return self.session.query(Books) \
             .filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first()
 
-    def search_query(self, term, config, *join):
+    def search_query(self, term, config, *join)->sqlalchemy.orm.Query:
         term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
         self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
@@ -921,7 +921,7 @@ class CalibreDB:
                             'custom_column_' + str(c.id)).any(
                         func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
         # filter out multiple languages and archived books,
-        results:sqlalchemy.orm.Query=query.filter(self.common_filters(True))
+        results=query.filter(self.common_filters(True))
 
         #search tags, series and titles, also add author queries
         for word in words:
@@ -934,18 +934,6 @@ class CalibreDB:
                 func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY
             ]
             results=results.filter(or_(*filter_expression))
-        #TODO sort
-
-        # v1
-        # results.order_by(desc(lambda Book:levenshtein(Book.title+Book.tags+Book.authors,term)))
-        # v2
-        # score = 0
-        # for word in words:
-        #     score += max(
-        #         attr1 % word,
-        #         attr2 % word,
-        #     )
-        # sort by score desc
         return results
 
     def get_cc_columns(self, config, filter_config_custom_read=False):
@@ -970,7 +958,11 @@ class CalibreDB:
         self.session.connection().connection.connection.create_function("sort", 1, lambda tags :print(f"<Book:  {tags} >") or 3)
         order = order[0] if order else [Books.sort]
         pagination = None
-        result = self.search_query(term, config, *join).order_by(*order).all()#*order
+        #result = self.search_query(term, config, *join).order_by(*order).all()#*order
+        result = self.search_query(term, config, *join).order_by(desc(func.partial_ratio(Books.title+" "+Books.author_sort,term))).all()
+        for row in result:
+            print(row)
+
         #result = self.search_query(term, config, *join).order_by(desc(func.sort(Books.tags))).all()#*order
         result_count = len(result)
         if offset != None and limit != None:

From c115fe92954484268f648f8d17346a1dd6c718a5 Mon Sep 17 00:00:00 2001
From: quarz12 <danielmartinqwe@gmail.com>
Date: Thu, 11 May 2023 16:21:17 +0200
Subject: [PATCH 15/25] use partial token set ratio instead

---
 cps/db.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 3204f94f..c87c1f07 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -53,7 +53,7 @@ from . import logger, ub, isoLanguages
 from .pagination import Pagination
 
 from weakref import WeakSet
-from thefuzz.fuzz import partial_ratio
+from thefuzz.fuzz import partial_ratio, partial_token_set_ratio
 
 # %-level, 100 means exact match
 FUZZY_SEARCH_ACCURACY=80
@@ -954,7 +954,7 @@ class CalibreDB:
 
     # read search results from calibre-database and return it (function is used for feed and simple search
     def get_search_results(self, term, config, offset=None, order=None, limit=None, *join):
-        self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
+        self.session.connection().connection.connection.create_function("partial_token_set_ratio", 2, partial_token_set_ratio)
         self.session.connection().connection.connection.create_function("sort", 1, lambda tags :print(f"<Book:  {tags} >") or 3)
         order = order[0] if order else [Books.sort]
         pagination = None

From 086527f5eebf65cc32ba4f889c9f1e7e77a3cbf6 Mon Sep 17 00:00:00 2001
From: quarz12 <danielmartinqwe@gmail.com>
Date: Thu, 11 May 2023 16:43:58 +0200
Subject: [PATCH 16/25] test at home

---
 cps/db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cps/db.py b/cps/db.py
index c87c1f07..eee5b841 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -959,7 +959,7 @@ class CalibreDB:
         order = order[0] if order else [Books.sort]
         pagination = None
         #result = self.search_query(term, config, *join).order_by(*order).all()#*order
-        result = self.search_query(term, config, *join).order_by(desc(func.partial_ratio(Books.title+" "+Books.author_sort,term))).all()
+        result = self.search_query(term, config, *join).order_by(desc(func.partial_ratio(Books.title.name+" "+Books.author_sort.name+" "+Books.tags.get(),term))).all()
         for row in result:
             print(row)
 

From ad5313ee79cbfa2c393568b13f485d7edf0ddb1b Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Mon, 15 May 2023 18:17:47 +0200
Subject: [PATCH 17/25] new idea

---
 cps/db.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index eee5b841..133c4b50 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -954,12 +954,10 @@ class CalibreDB:
 
     # read search results from calibre-database and return it (function is used for feed and simple search
     def get_search_results(self, term, config, offset=None, order=None, limit=None, *join):
-        self.session.connection().connection.connection.create_function("partial_token_set_ratio", 2, partial_token_set_ratio)
-        self.session.connection().connection.connection.create_function("sort", 1, lambda tags :print(f"<Book:  {tags} >") or 3)
         order = order[0] if order else [Books.sort]
         pagination = None
-        #result = self.search_query(term, config, *join).order_by(*order).all()#*order
-        result = self.search_query(term, config, *join).order_by(desc(func.partial_ratio(Books.title.name+" "+Books.author_sort.name+" "+Books.tags.get(),term))).all()
+        result = self.search_query(term, config, *join).order_by(*order).all()
+        #sort here
         for row in result:
             print(row)
 

From e45619f2268cfffb218f5cd0c83206f7ce052133 Mon Sep 17 00:00:00 2001
From: quarz12 <danielmartinqwe@gmail.com>
Date: Wed, 17 May 2023 11:11:14 +0200
Subject: [PATCH 18/25] progress building string of book

---
 cps/db.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 133c4b50..f77a30d0 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -23,8 +23,6 @@ import json
 import traceback
 from datetime import datetime
 from urllib.parse import quote
-
-import sqlalchemy
 import unidecode
 
 from sqlite3 import OperationalError as sqliteOperationalError
@@ -40,7 +38,6 @@ try:
     from sqlalchemy.orm import declarative_base
 except ImportError:
     from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy import desc,asc
 from sqlalchemy.pool import StaticPool
 from sqlalchemy.sql.expression import and_, true, false, text, func, or_
 from sqlalchemy.ext.associationproxy import association_proxy
@@ -384,9 +381,11 @@ class Books(Base):
         self.has_cover = (has_cover != None)
 
     def __repr__(self):
-        return "<Books('{0},{1}{2}{3}{4}{5}{6}{7}{8}')>".format(self.title, self.sort, self.author_sort,
+        return "<Books('{0},{1}{2}{3}{4}{5}{6}{7}{8}{9}{10}')>".format(self.title, self.sort, self.author_sort,
                                                                  self.timestamp, self.pubdate, self.series_index,
-                                                                 self.last_modified, self.path, self.has_cover)
+                                                                 self.last_modified, self.path, self.has_cover,
+                                                                       [tag.name for tag in self.tags],
+                                                                       [series.name for series in self.series])
 
     @property
     def atom_timestamp(self):
@@ -586,7 +585,7 @@ class CalibreDB:
             return False, False
         try:
             check_engine = create_engine('sqlite://',
-                                         echo=True,
+                                         echo=False,
                                          isolation_level="SERIALIZABLE",
                                          connect_args={'check_same_thread': False},
                                          poolclass=StaticPool)
@@ -889,8 +888,8 @@ class CalibreDB:
         return self.session.query(Books) \
             .filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first()
 
-    def search_query(self, term, config, *join)->sqlalchemy.orm.Query:
-        term.strip().lower()
+    def search_query(self, term, config, *join):
+        term=term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
         self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
         q = list()
@@ -957,11 +956,10 @@ class CalibreDB:
         order = order[0] if order else [Books.sort]
         pagination = None
         result = self.search_query(term, config, *join).order_by(*order).all()
-        #sort here
-        for row in result:
-            print(row)
-
-        #result = self.search_query(term, config, *join).order_by(desc(func.sort(Books.tags))).all()#*order
+        sorted(result,key=lambda book:1)
+        for res in result:
+            print(res[0])
+            print(f"{res[0].title} {[tag.name for tag in res[0].tags]} {[series.name for series in res[0].series]}")
         result_count = len(result)
         if offset != None and limit != None:
             offset = int(offset)

From a936a333a8de4ea73c3e8a78334ed550bf218f51 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sat, 27 May 2023 15:55:54 +0200
Subject: [PATCH 19/25] updated Books string repr

---
 cps/db.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index f77a30d0..4f7dfaab 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -381,11 +381,12 @@ class Books(Base):
         self.has_cover = (has_cover != None)
 
     def __repr__(self):
-        return "<Books('{0},{1}{2}{3}{4}{5}{6}{7}{8}{9}{10}')>".format(self.title, self.sort, self.author_sort,
+        return "<Books('{0} , {1} {2} {3} {4} {5} {6} {7} {8} {9} {10} {11}')>".format(self.title, self.sort, self.author_sort,
                                                                  self.timestamp, self.pubdate, self.series_index,
                                                                  self.last_modified, self.path, self.has_cover,
-                                                                       [tag.name for tag in self.tags],
-                                                                       [series.name for series in self.series])
+                                                                       " ".join([tag.name for tag in self.tags]),
+                                                                       " ".join([series.name for series in self.series]), " ".join([author.name for author in self.authors])," ".join([publisher.name for publisher in self.publishers]))
+
 
     @property
     def atom_timestamp(self):
@@ -959,7 +960,6 @@ class CalibreDB:
         sorted(result,key=lambda book:1)
         for res in result:
             print(res[0])
-            print(f"{res[0].title} {[tag.name for tag in res[0].tags]} {[series.name for series in res[0].series]}")
         result_count = len(result)
         if offset != None and limit != None:
             offset = int(offset)

From cb5e66facd9713745239117119c84dce5f080fec Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sun, 28 May 2023 13:11:50 +0200
Subject: [PATCH 20/25] add partial token set ratio to db

---
 cps/db.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 4f7dfaab..7c43ea8e 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -954,10 +954,10 @@ class CalibreDB:
 
     # read search results from calibre-database and return it (function is used for feed and simple search
     def get_search_results(self, term, config, offset=None, order=None, limit=None, *join):
+        self.session.connection().connection.connection.create_function("partial_token_set_ratio", 2, partial_token_set_ratio)
         order = order[0] if order else [Books.sort]
         pagination = None
-        result = self.search_query(term, config, *join).order_by(*order).all()
-        sorted(result,key=lambda book:1)
+        result = self.search_query(term, config, *join).order_by(func.desc(func.partial_token_set_ratio(str(Books),term))).all()
         for res in result:
             print(res[0])
         result_count = len(result)

From 025a888906326e923465663450ebfb221074d29c Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sun, 28 May 2023 15:01:35 +0200
Subject: [PATCH 21/25] rolled back string repr of book, moved that part to a
 new method

---
 cps/db.py | 91 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 38 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 7c43ea8e..fa190015 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -33,6 +33,7 @@ from sqlalchemy.orm import relationship, sessionmaker, scoped_session
 from sqlalchemy.orm.collections import InstrumentedList
 from sqlalchemy.ext.declarative import DeclarativeMeta
 from sqlalchemy.exc import OperationalError
+
 try:
     # Compatibility with sqlalchemy 2.0
     from sqlalchemy.orm import declarative_base
@@ -41,6 +42,7 @@ except ImportError:
 from sqlalchemy.pool import StaticPool
 from sqlalchemy.sql.expression import and_, true, false, text, func, or_
 from sqlalchemy.ext.associationproxy import association_proxy
+from sqlalchemy import desc
 from flask_login import current_user
 from flask_babel import gettext as _
 from flask_babel import get_locale
@@ -53,7 +55,7 @@ from weakref import WeakSet
 from thefuzz.fuzz import partial_ratio, partial_token_set_ratio
 
 # %-level, 100 means exact match
-FUZZY_SEARCH_ACCURACY=80
+FUZZY_SEARCH_ACCURACY = 80
 
 log = logger.create()
 
@@ -381,12 +383,21 @@ class Books(Base):
         self.has_cover = (has_cover != None)
 
     def __repr__(self):
-        return "<Books('{0} , {1} {2} {3} {4} {5} {6} {7} {8} {9} {10} {11}')>".format(self.title, self.sort, self.author_sort,
-                                                                 self.timestamp, self.pubdate, self.series_index,
-                                                                 self.last_modified, self.path, self.has_cover,
-                                                                       " ".join([tag.name for tag in self.tags]),
-                                                                       " ".join([series.name for series in self.series]), " ".join([author.name for author in self.authors])," ".join([publisher.name for publisher in self.publishers]))
+        return "<Books('{0},{1}{2}{3}{4}{5}{6}{7}{8}')>".format(self.title, self.sort, self.author_sort,
+                                                                self.timestamp, self.pubdate, self.series_index,
+                                                                self.last_modified, self.path, self.has_cover)
 
+    def __sort_str(self):
+        return "{0} {1} {2} {3} {4}".format(self.title, " ".join([tag.name for tag in self.tags]),
+                                                " ".join(
+                                                    [series.name for series
+                                                     in self.series]),
+                                                " ".join(
+                                                    [author.name for author
+                                                     in self.authors]),
+                                                " ".join([publisher.name for
+                                                          publisher in
+                                                          self.publishers]))
 
     @property
     def atom_timestamp(self):
@@ -428,13 +439,15 @@ class CustomColumns(Base):
         content['category_sort'] = "value"
         content['is_csp'] = False
         content['is_editable'] = self.editable
-        content['rec_index'] = sequence + 22     # toDo why ??
+        content['rec_index'] = sequence + 22  # toDo why ??
         if isinstance(value, datetime):
-            content['#value#'] = {"__class__": "datetime.datetime", "__value__": value.strftime("%Y-%m-%dT%H:%M:%S+00:00")}
+            content['#value#'] = {"__class__": "datetime.datetime",
+                                  "__value__": value.strftime("%Y-%m-%dT%H:%M:%S+00:00")}
         else:
             content['#value#'] = value
         content['#extra#'] = extra
-        content['is_multiple2'] = {} if not self.is_multiple else {"cache_to_list": "|", "ui_to_list": ",", "list_to_ui": ", "}
+        content['is_multiple2'] = {} if not self.is_multiple else {"cache_to_list": "|", "ui_to_list": ",",
+                                                                   "list_to_ui": ", "}
         return json.dumps(content, ensure_ascii=False)
 
 
@@ -455,7 +468,7 @@ class AlchemyEncoder(json.JSONEncoder):
                         el = list()
                         # ele = None
                         for ele in data:
-                            if hasattr(ele, 'value'):       # converter for custom_column values
+                            if hasattr(ele, 'value'):  # converter for custom_column values
                                 el.append(str(ele.value))
                             elif ele.get:
                                 el.append(ele.get())
@@ -494,7 +507,6 @@ class CalibreDB:
         if init:
             self.init_db(expire_on_commit)
 
-
     def init_db(self, expire_on_commit=True):
         if self._init:
             self.init_session(expire_on_commit)
@@ -666,13 +678,13 @@ class CalibreDB:
         if not read_column:
             bd = (self.session.query(Books, ub.ReadBook.read_status, ub.ArchivedBook.is_archived).select_from(Books)
                   .join(ub.ReadBook, and_(ub.ReadBook.user_id == int(current_user.id), ub.ReadBook.book_id == book_id),
-                  isouter=True))
+                        isouter=True))
         else:
             try:
                 read_column = cc_classes[read_column]
                 bd = (self.session.query(Books, read_column.value, ub.ArchivedBook.is_archived).select_from(Books)
                       .join(read_column, read_column.book == book_id,
-                      isouter=True))
+                            isouter=True))
             except (KeyError, AttributeError, IndexError):
                 log.error("Custom Column No.{} does not exist in calibre database".format(read_column))
                 # Skip linking read column and return None instead of read status
@@ -725,11 +737,11 @@ class CalibreDB:
                 pos_cc_list = current_user.allowed_column_value.split(',')
                 pos_content_cc_filter = true() if pos_cc_list == [''] else \
                     getattr(Books, 'custom_column_' + str(self.config.config_restricted_column)). \
-                    any(cc_classes[self.config.config_restricted_column].value.in_(pos_cc_list))
+                        any(cc_classes[self.config.config_restricted_column].value.in_(pos_cc_list))
                 neg_cc_list = current_user.denied_column_value.split(',')
                 neg_content_cc_filter = false() if neg_cc_list == [''] else \
                     getattr(Books, 'custom_column_' + str(self.config.config_restricted_column)). \
-                    any(cc_classes[self.config.config_restricted_column].value.in_(neg_cc_list))
+                        any(cc_classes[self.config.config_restricted_column].value.in_(neg_cc_list))
             except (KeyError, AttributeError, IndexError):
                 pos_content_cc_filter = false()
                 neg_content_cc_filter = true()
@@ -809,18 +821,18 @@ class CalibreDB:
         element = 0
         while indx:
             if indx >= 3:
-                query = query.outerjoin(join[element], join[element+1]).outerjoin(join[element+2])
+                query = query.outerjoin(join[element], join[element + 1]).outerjoin(join[element + 2])
                 indx -= 3
                 element += 3
             elif indx == 2:
-                query = query.outerjoin(join[element], join[element+1])
+                query = query.outerjoin(join[element], join[element + 1])
                 indx -= 2
                 element += 2
             elif indx == 1:
                 query = query.outerjoin(join[element])
                 indx -= 1
                 element += 1
-        query = query.filter(db_filter)\
+        query = query.filter(db_filter) \
             .filter(self.common_filters(allow_show_archived))
         entries = list()
         pagination = list()
@@ -890,17 +902,17 @@ class CalibreDB:
             .filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first()
 
     def search_query(self, term, config, *join):
-        term=term.strip().lower()
+        term = term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
         self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
         q = list()
-        #splits search term into single words
+        # splits search term into single words
         words = re.split("[, ]+", term)
-        #put the longest words first to make queries more efficient
-        words.sort(key=len,reverse=True)
-        #search authors for match
+        # put the longest words first to make queries more efficient
+        words.sort(key=len, reverse=True)
+        # search authors for match
         for word in words:
-            q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name),word)>=FUZZY_SEARCH_ACCURACY))
+            q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name), word) >= FUZZY_SEARCH_ACCURACY))
 
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
@@ -912,7 +924,7 @@ class CalibreDB:
         elif len(join) == 1:
             query = query.outerjoin(join[0])
 
-        filter_expression=[]
+        filter_expression = []
         cc = self.get_cc_columns(config, filter_config_custom_read=True)
         for c in cc:
             if c.datatype not in ["datetime", "rating", "bool", "int", "float"]:
@@ -921,19 +933,19 @@ class CalibreDB:
                             'custom_column_' + str(c.id)).any(
                         func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
         # filter out multiple languages and archived books,
-        results=query.filter(self.common_filters(True))
+        results = query.filter(self.common_filters(True))
 
-        #search tags, series and titles, also add author queries
+        # search tags, series and titles, also add author queries
         for word in words:
-            filter_expression=[
-                Books.tags.any(func.partial_ratio(func.lower(Tags.name),word)>=FUZZY_SEARCH_ACCURACY),
-                Books.series.any(func.partial_ratio(func.lower(Series.name),word)>=FUZZY_SEARCH_ACCURACY),
-                #change to or_ to allow mix of title and author in query term
+            filter_expression = [
+                Books.tags.any(func.partial_ratio(func.lower(Tags.name), word) >= FUZZY_SEARCH_ACCURACY),
+                Books.series.any(func.partial_ratio(func.lower(Series.name), word) >= FUZZY_SEARCH_ACCURACY),
+                # change to or_ to allow mix of title and author in query term
                 Books.authors.any(or_(*q)),
-                Books.publishers.any(func.partial_ratio(func.lower(Publishers.name),word)>=FUZZY_SEARCH_ACCURACY),
-                func.partial_ratio(func.lower(Books.title),word)>=FUZZY_SEARCH_ACCURACY
+                Books.publishers.any(func.partial_ratio(func.lower(Publishers.name), word) >= FUZZY_SEARCH_ACCURACY),
+                func.partial_ratio(func.lower(Books.title), word) >= FUZZY_SEARCH_ACCURACY
             ]
-            results=results.filter(or_(*filter_expression))
+            results = results.filter(or_(*filter_expression))
         return results
 
     def get_cc_columns(self, config, filter_config_custom_read=False):
@@ -954,10 +966,12 @@ class CalibreDB:
 
     # read search results from calibre-database and return it (function is used for feed and simple search
     def get_search_results(self, term, config, offset=None, order=None, limit=None, *join):
-        self.session.connection().connection.connection.create_function("partial_token_set_ratio", 2, partial_token_set_ratio)
+        self.session.connection().connection.connection.create_function("partial_token_set_ratio", 2,
+                                                                        partial_token_set_ratio)
         order = order[0] if order else [Books.sort]
         pagination = None
-        result = self.search_query(term, config, *join).order_by(func.desc(func.partial_token_set_ratio(str(Books),term))).all()
+        result = self.search_query(term, config, *join).order_by(
+            desc(func.partial_token_set_ratio(str(Books), term))).all()
         for res in result:
             print(res[0])
         result_count = len(result)
@@ -979,8 +993,8 @@ class CalibreDB:
 
         if with_count:
             if not languages:
-                languages = self.session.query(Languages, func.count('books_languages_link.book'))\
-                    .join(books_languages_link).join(Books)\
+                languages = self.session.query(Languages, func.count('books_languages_link.book')) \
+                    .join(books_languages_link).join(Books) \
                     .filter(self.common_filters(return_all_languages=return_all_languages)) \
                     .group_by(text('books_languages_link.lang_code')).all()
             tags = list()
@@ -1090,6 +1104,7 @@ class Category:
         self.rating = rating
         self.count = 1
 
+
 '''class Count:
     count = None
 

From caf6079b6bcd29f27a87f3c9e0fa6322d7b8bb0d Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sun, 28 May 2023 22:04:41 +0200
Subject: [PATCH 22/25] moved author filter to the rest of the filters, ignore
 words smaller than 4 letters for searching, introduced max_ratio at new
 filter function

---
 cps/db.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index fa190015..fef610a7 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -52,7 +52,7 @@ from . import logger, ub, isoLanguages
 from .pagination import Pagination
 
 from weakref import WeakSet
-from thefuzz.fuzz import partial_ratio, partial_token_set_ratio
+from thefuzz.fuzz import partial_ratio, partial_token_set_ratio, partial_token_sort_ratio, ratio
 
 # %-level, 100 means exact match
 FUZZY_SEARCH_ACCURACY = 80
@@ -387,7 +387,7 @@ class Books(Base):
                                                                 self.timestamp, self.pubdate, self.series_index,
                                                                 self.last_modified, self.path, self.has_cover)
 
-    def __sort_str(self):
+    def __str__(self):
         return "{0} {1} {2} {3} {4}".format(self.title, " ".join([tag.name for tag in self.tags]),
                                                 " ".join(
                                                     [series.name for series
@@ -904,15 +904,13 @@ class CalibreDB:
     def search_query(self, term, config, *join):
         term = term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
-        self.session.connection().connection.connection.create_function("partial_ratio", 2, partial_ratio)
+        self.session.connection().connection.connection.create_function("max_ratio", 2, max_ratio)
         q = list()
         # splits search term into single words
         words = re.split("[, ]+", term)
         # put the longest words first to make queries more efficient
         words.sort(key=len, reverse=True)
-        # search authors for match
-        for word in words:
-            q.append(Books.authors.any(func.partial_ratio(func.lower(Authors.name), word) >= FUZZY_SEARCH_ACCURACY))
+        words=[x for x in filter(lambda w:len(w)>3,words)]
 
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
@@ -934,18 +932,17 @@ class CalibreDB:
                         func.lower(cc_classes[c.id].value).ilike("%" + term + "%")))
         # filter out multiple languages and archived books,
         results = query.filter(self.common_filters(True))
-
+        filters=[filter_expression] if filter_expression else []
         # search tags, series and titles, also add author queries
         for word in words:
-            filter_expression = [
-                Books.tags.any(func.partial_ratio(func.lower(Tags.name), word) >= FUZZY_SEARCH_ACCURACY),
-                Books.series.any(func.partial_ratio(func.lower(Series.name), word) >= FUZZY_SEARCH_ACCURACY),
-                # change to or_ to allow mix of title and author in query term
-                Books.authors.any(or_(*q)),
-                Books.publishers.any(func.partial_ratio(func.lower(Publishers.name), word) >= FUZZY_SEARCH_ACCURACY),
-                func.partial_ratio(func.lower(Books.title), word) >= FUZZY_SEARCH_ACCURACY
-            ]
-            results = results.filter(or_(*filter_expression))
+            filters.append(or_(*[
+                Books.tags.any(func.max_ratio(func.lower(Tags.name), word) >= FUZZY_SEARCH_ACCURACY),
+                Books.series.any(func.max_ratio(func.lower(Series.name), word) >= FUZZY_SEARCH_ACCURACY),
+                Books.authors.any(func.max_ratio(func.lower(Authors.name), word) >= FUZZY_SEARCH_ACCURACY),
+                Books.publishers.any(func.max_ratio(func.lower(Publishers.name), word) >= FUZZY_SEARCH_ACCURACY),
+                func.max_ratio(func.lower(Books.title), word) >= FUZZY_SEARCH_ACCURACY
+            ]))
+        results = results.filter(and_(*filters))
         return results
 
     def get_cc_columns(self, config, filter_config_custom_read=False):
@@ -966,14 +963,12 @@ class CalibreDB:
 
     # read search results from calibre-database and return it (function is used for feed and simple search
     def get_search_results(self, term, config, offset=None, order=None, limit=None, *join):
-        self.session.connection().connection.connection.create_function("partial_token_set_ratio", 2,
-                                                                        partial_token_set_ratio)
         order = order[0] if order else [Books.sort]
         pagination = None
-        result = self.search_query(term, config, *join).order_by(
-            desc(func.partial_token_set_ratio(str(Books), term))).all()
+        result = self.search_query(term, config, *join).order_by(*order).all()
+        result = sorted(result,key=lambda query:partial_token_sort_ratio(str(query[0]),term),reverse=True)
         for res in result:
-            print(res[0])
+            print(str(res[0]))
         result_count = len(result)
         if offset != None and limit != None:
             offset = int(offset)
@@ -1092,6 +1087,11 @@ def lcase(s):
         return s.lower()
 
 
+def max_ratio(string:str,term):
+    """applies ratio on each word of string and returns the max value"""
+    words=string.split()
+    return max([ratio(word.strip(":"),term) for word in words])
+
 class Category:
     name = None
     id = None

From b96d02c9212c9cdab59848f37a6bc5675fdf4909 Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Wed, 31 May 2023 01:03:33 +0200
Subject: [PATCH 23/25] now return empty list if all words of query are < 3
 letters, only compare term to words > 3 letters of book attributes

---
 cps/db.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index fef610a7..852283e0 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -54,8 +54,8 @@ from .pagination import Pagination
 from weakref import WeakSet
 from thefuzz.fuzz import partial_ratio, partial_token_set_ratio, partial_token_sort_ratio, ratio
 
-# %-level, 100 means exact match
-FUZZY_SEARCH_ACCURACY = 80
+# %-level, 100 means exact match, 75 allows exactly 1 wrong character in a 4 letter word
+FUZZY_SEARCH_ACCURACY = 75
 
 log = logger.create()
 
@@ -911,6 +911,9 @@ class CalibreDB:
         # put the longest words first to make queries more efficient
         words.sort(key=len, reverse=True)
         words=[x for x in filter(lambda w:len(w)>3,words)]
+        # no word in search term is longer than 3 letters -> return empty query #TODO give some kind of error message
+        if not any([len(word)>3 for word in words]):
+            return self.session.query(Books).filter(False)
 
         query = self.generate_linked_query(config.config_read_column, Books)
         if len(join) == 6:
@@ -1090,7 +1093,7 @@ def lcase(s):
 def max_ratio(string:str,term):
     """applies ratio on each word of string and returns the max value"""
     words=string.split()
-    return max([ratio(word.strip(":"),term) for word in words])
+    return max([ratio(word.strip(":"),term) if len(word.strip(":")) > 3 else 0 for word in words]) # ignore words of len < 3#do not compare words of len < 3 -> too generic
 
 class Category:
     name = None

From 5e0430e60e5047bf9a5217509ad026680ed316cf Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Wed, 31 May 2023 15:23:19 +0200
Subject: [PATCH 24/25] message when query returns 0 results

---
 cps/templates/search.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cps/templates/search.html b/cps/templates/search.html
index 41f52b98..b35a67c8 100644
--- a/cps/templates/search.html
+++ b/cps/templates/search.html
@@ -5,6 +5,7 @@
     {% if entries|length < 1 %}
       <h2>{{_('No Results Found')}}</h2>
       <p>{{_('Search Term:')}} {{adv_searchterm}}</p>
+        <p>{{_('Words smaller than 3 letters are not considered')}}</p>
     {% else %}
       <h2>{{result_count}} {{_('Results for:')}} {{adv_searchterm}}</h2>
       {% if current_user.is_authenticated %}

From 896e8fddc3c301d90e5cedec2b587ff62b1469cc Mon Sep 17 00:00:00 2001
From: Daniel <ge64tap@mytum.de>
Date: Sat, 3 Jun 2023 20:25:39 +0200
Subject: [PATCH 25/25] minor cleanup, removed unused code

---
 cps/db.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/cps/db.py b/cps/db.py
index 852283e0..51645346 100644
--- a/cps/db.py
+++ b/cps/db.py
@@ -905,14 +905,13 @@ class CalibreDB:
         term = term.strip().lower()
         self.session.connection().connection.connection.create_function("lower", 1, lcase)
         self.session.connection().connection.connection.create_function("max_ratio", 2, max_ratio)
-        q = list()
         # splits search term into single words
-        words = re.split("[, ]+", term)
+        words = re.split("[,\s]+", term)
         # put the longest words first to make queries more efficient
         words.sort(key=len, reverse=True)
-        words=[x for x in filter(lambda w:len(w)>3,words)]
+        words=list(filter(lambda w:len(w)>3,words))
         # no word in search term is longer than 3 letters -> return empty query #TODO give some kind of error message
-        if not any([len(word)>3 for word in words]):
+        if len(words)==0:
             return self.session.query(Books).filter(False)
 
         query = self.generate_linked_query(config.config_read_column, Books)
@@ -970,8 +969,6 @@ class CalibreDB:
         pagination = None
         result = self.search_query(term, config, *join).order_by(*order).all()
         result = sorted(result,key=lambda query:partial_token_sort_ratio(str(query[0]),term),reverse=True)
-        for res in result:
-            print(str(res[0]))
         result_count = len(result)
         if offset != None and limit != None:
             offset = int(offset)