From d2015dd99836ba6e8dac14e58d86e15482ffe38c Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Fri, 5 Dec 2025 16:33:30 +0100 Subject: [PATCH] Optimized search query --- cps/db.py | 172 +- pyproject.toml | 9 +- test/Calibre-Web TestSummary_Linux.html | 2596 ++++++++++++++++++++--- 3 files changed, 2456 insertions(+), 321 deletions(-) diff --git a/cps/db.py b/cps/db.py index 0cd891a2..bb460c6c 100644 --- a/cps/db.py +++ b/cps/db.py @@ -30,7 +30,7 @@ from sqlite3 import OperationalError as sqliteOperationalError from sqlalchemy import create_engine from sqlalchemy import Table, Column, ForeignKey, CheckConstraint from sqlalchemy import String, Integer, Boolean, TIMESTAMP, Float -from sqlalchemy.orm import relationship, sessionmaker, scoped_session +from sqlalchemy.orm import relationship, sessionmaker, scoped_session, selectinload from sqlalchemy.orm.collections import InstrumentedList from sqlalchemy.ext.declarative import DeclarativeMeta from sqlalchemy.exc import OperationalError @@ -901,29 +901,36 @@ class CalibreDB: for entry in entries: if combined: sort_authors = entry.Books.author_sort.split('&') - ids = [a.id for a in entry.Books.authors] - + authors_list = entry.Books.authors else: sort_authors = entry.author_sort.split('&') - ids = [a.id for a in entry.authors] - authors_ordered = list() - # error = False + authors_list = entry.authors + + # Create dictionary for O(1) lookup instead of nested loops + authors_by_sort = {} + authors_by_id = {} + for author in authors_list: + authors_by_sort[author.sort] = author + authors_by_id[author.id] = author + + authors_ordered = [] + ids_remaining = set(authors_by_id.keys()) + + # Order authors based on sort field using dictionary lookup for auth in sort_authors: auth = strip_whitespaces(auth) - results = self.session.query(Authors).filter(Authors.sort == auth).all() - # ToDo: How to handle not found author name - if not len(results): - book_id = entry.id if isinstance(entry, Books) else entry[0].id - log.error("Author '{}' of book {} not found to display name in right order".format(auth, book_id)) - # error = True - break - for r in results: - if r.id in ids: - authors_ordered.append(r) - ids.remove(r.id) - for author_id in ids: - result = self.session.query(Authors).filter(Authors.id == author_id).first() - authors_ordered.append(result) + if auth in authors_by_sort: + author = authors_by_sort[auth] + authors_ordered.append(author) + ids_remaining.discard(author.id) + else: + # This can happen if author_sort has stale data or formatting issues + book_id = entry.id if isinstance(entry, Books) else (entry.Books.id if combined else entry.id) + log.warning("Author '{}' of book {} not found in author list, skipping in sort order".format(auth, book_id)) + + # Add any remaining authors not in sort order + for author_id in ids_remaining: + authors_ordered.append(authors_by_id[author_id]) if list_return: if combined: @@ -956,36 +963,95 @@ class CalibreDB: .filter(and_(Books.authors.any(and_(*q)), func.lower(Books.title).ilike("%" + title + "%"))).first() def search_query(self, term, config, *join): - strip_whitespaces(term).lower() + term = strip_whitespaces(term).lower() self.create_functions() - # self.session.connection().connection.connection.create_function("lower", 1, lcase) + + # Try FTS5 search first for better performance + fts_ids = None + # Check if FTS5 table exists before attempting search + if not hasattr(self, '_fts_available'): + try: + result = self.session.execute( + text("SELECT name FROM sqlite_master WHERE type='table' AND name='books_fts'") + ).fetchone() + self._fts_available = result is not None + except Exception: + self._fts_available = False + + if self._fts_available: + try: + # Escape FTS5 special characters to prevent query errors + term_fts = term.replace('"', '""') + # Wrap in quotes for phrase matching and better accuracy + fts_results = self.session.execute( + text("SELECT DISTINCT rowid FROM books_fts WHERE books_fts MATCH :term"), + {"term": f'"{term_fts}"'} + ).fetchall() + if fts_results: + fts_ids = [r[0] for r in fts_results] + except Exception as ex: + # FTS5 query failed, fall back to traditional search + log.debug("FTS5 search failed for term '{}', using fallback: {}".format(term, ex)) + + # Build base query with optimized joins + base_query = self.generate_linked_query(config.config_read_column, Books) + base_query = base_query.filter(self.common_filters(True)) + + # Apply eager loading for authors to avoid N+1 queries + base_query = base_query.options(selectinload(Books.authors)) + + + if len(join) == 6: + base_query = base_query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5]) + if len(join) == 3: + base_query = base_query.outerjoin(join[0], join[1]).outerjoin(join[2]) + elif len(join) == 2: + base_query = base_query.outerjoin(join[0], join[1]) + elif len(join) == 1: + base_query = base_query.outerjoin(join[0]) + + # If FTS5 found results, use those IDs + if fts_ids: + return base_query.filter(Books.id.in_(fts_ids)) + + # Fallback to traditional search with optimized subqueries q = list() author_terms = re.split("[, ]+", term) - for author_term in author_terms: - q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + author_term + "%"))) - query = self.generate_linked_query(config.config_read_column, Books) - if len(join) == 6: - query = query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5]) - if len(join) == 3: - query = query.outerjoin(join[0], join[1]).outerjoin(join[2]) - elif len(join) == 2: - query = query.outerjoin(join[0], join[1]) - elif len(join) == 1: - query = query.outerjoin(join[0]) + # Use subquery for authors to avoid expensive .any() with OR + author_subquery = self.session.query(books_authors_link.c.book).join( + Authors, books_authors_link.c.author == Authors.id + ) + author_filters = [] + for author_term in author_terms: + author_filters.append(func.lower(Authors.name).ilike("%" + author_term + "%")) + if author_filters: + author_subquery = author_subquery.filter(or_(*author_filters)) + + # Build optimized filter expressions cc = self.get_cc_columns(config, filter_config_custom_read=True) - filter_expression = [Books.tags.any(func.lower(Tags.name).ilike("%" + term + "%")), - Books.series.any(func.lower(Series.name).ilike("%" + term + "%")), - Books.authors.any(and_(*q)), - Books.publishers.any(func.lower(Publishers.name).ilike("%" + term + "%")), - func.lower(Books.title).ilike("%" + term + "%")] + filter_expression = [ + Books.id.in_(self.session.query(books_tags_link.c.book).join( + Tags, books_tags_link.c.tag == Tags.id + ).filter(func.lower(Tags.name).ilike("%" + term + "%"))), + Books.id.in_(self.session.query(books_series_link.c.book).join( + Series, books_series_link.c.series == Series.id + ).filter(func.lower(Series.name).ilike("%" + term + "%"))), + Books.id.in_(author_subquery), + Books.id.in_(self.session.query(books_publishers_link.c.book).join( + Publishers, books_publishers_link.c.publisher == Publishers.id + ).filter(func.lower(Publishers.name).ilike("%" + term + "%"))), + func.lower(Books.title).ilike("%" + term + "%") + ] + for c in cc: if c.datatype not in ["datetime", "rating", "bool", "int", "float"]: filter_expression.append( getattr(Books, 'custom_column_' + str(c.id)).any( func.lower(cc_classes[c.id].value).ilike("%" + term + "%"))) - return query.filter(self.common_filters(True)).filter(or_(*filter_expression)) + + return base_query.filter(or_(*filter_expression)) def get_cc_columns(self, config, filter_config_custom_read=False): tmp_cc = self.session.query(CustomColumns).filter(CustomColumns.datatype.notin_(cc_exceptions)).all() @@ -1007,18 +1073,32 @@ class CalibreDB: def get_search_results(self, term, config, offset=None, order=None, limit=None, *join): order = order[0] if order else [Books.sort] pagination = None - result = self.search_query(term, config, *join).order_by(*order).all() - result_count = len(result) + if offset is not None and limit is not None: offset = int(offset) - limit_all = offset + int(limit) - pagination = Pagination((offset / (int(limit)) + 1), limit, result_count) + limit_int = int(limit) + + # Use LIMIT+1 pattern to estimate total count without expensive count() + query = self.search_query(term, config, *join).order_by(*order) + result = query.limit(offset + limit_int + 1).all() + + # Check if there are more results + has_more = len(result) > (offset + limit_int) + if has_more: + result_count = offset + limit_int + 1 # Estimate: at least this many + else: + result_count = len(result) + + # Extract the page of results + result = result[offset:offset + limit_int] + pagination = Pagination((offset / limit_int + 1), limit_int, result_count) else: - offset = 0 - limit_all = result_count + # No pagination, fetch all results + result = self.search_query(term, config, *join).order_by(*order).all() + result_count = len(result) ub.store_combo_ids(result) - entries = self.order_authors(result[offset:limit_all], list_return=True, combined=True) + entries = self.order_authors(result, list_return=True, combined=True) return entries, result_count, pagination diff --git a/pyproject.toml b/pyproject.toml index 61358d62..f5ea31aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "python-magic-bin>=0.4.0,<0.5.0;sys_platform=='win32'", "flask-httpAuth>=4.4.0,<5.0.0", "cryptography>=39.0.0,<45.0.0", + "certifi>=2024.7.4,<2025.8.24", ] dynamic = ["version"] @@ -69,7 +70,7 @@ content-type = "text/markdown" [project.optional-dependencies] gdrive = [ - "google-api-python-client>=1.7.11,<2.200.0", + "google-api-python-client>=2.73.00,<2.200.0", "gevent>20.6.0,<24.12.0", "greenlet>=0.4.17,<3.3.0", "httplib2>=0.9.2,<0.23.0", @@ -77,13 +78,13 @@ gdrive = [ "uritemplate>=3.0.0,<4.3.0", "pyasn1-modules>=0.0.8,<0.7.0", "pyasn1>=0.1.9,<0.7.0", - "PyDrive2>=1.3.1,<1.22.0", + "PyDrive2>=1.15.0,<1.22.0", "PyYAML>=3.12,<6.1", "rsa>=3.4.2,<4.10.0", ] gmail = [ - "google-auth-oauthlib>=0.4.3,<1.3.0", - "google-api-python-client>=1.7.11,<2.200.0", + "google-auth-oauthlib>=1.0.0,<1.3.0", + "google-api-python-client>=2.73.00,<2.200.0", ] goodreads = [ "goodreads>=0.3.2,<0.4.0", diff --git a/test/Calibre-Web TestSummary_Linux.html b/test/Calibre-Web TestSummary_Linux.html index 8de336e2..9fa8449f 100644 --- a/test/Calibre-Web TestSummary_Linux.html +++ b/test/Calibre-Web TestSummary_Linux.html @@ -37,20 +37,20 @@
-

Start Time: 2025-08-03 18:23:44

+

Start Time: 2025-12-04 20:11:40

-

Stop Time: 2025-08-04 01:42:17

+

Stop Time: 2025-12-05 03:15:12

-

Duration: 6h 6 min

+

Duration: 5h 48 min

@@ -462,11 +462,11 @@ - + TestCli 13 - 12 - 1 + 13 + 0 0 0 @@ -530,31 +530,11 @@ - +
TestCli - test_dryrun_update
- -
- FAIL -
- - - - + PASS @@ -1043,12 +1023,12 @@ AssertionError: True is not false - + TestEditAdditionalBooks 18 - 18 - 0 - 0 + 12 + 1 + 5 0 Detail @@ -1138,56 +1118,231 @@ AssertionError: True is not false - +
TestEditAdditionalBooks - test_title_sort
- PASS + +
+ FAIL +
+ + + + - +
TestEditAdditionalBooks - test_upload_cbz_coverformats
- PASS + +
+ ERROR +
+ + + + - +
TestEditAdditionalBooks - test_upload_edit_role
- PASS + +
+ ERROR +
+ + + + - +
TestEditAdditionalBooks - test_upload_metadata_cb7
- PASS + +
+ ERROR +
+ + + + - +
TestEditAdditionalBooks - test_upload_metadata_cbr
- PASS + +
+ ERROR +
+ + + + - +
TestEditAdditionalBooks - test_upload_metadata_cbt
- PASS + +
+ ERROR +
+ + + + @@ -1220,12 +1375,12 @@ AssertionError: True is not false - + TestEditBooks 38 - 37 - 0 - 0 + 29 + 1 + 7 1 Detail @@ -1279,74 +1434,241 @@ AssertionError: True is not false - +
TestEditBooks - test_edit_custom_categories
- PASS + +
+ FAIL +
+ + + + - +
TestEditBooks - test_edit_custom_comment
- PASS + +
+ ERROR +
+ + + + - +
TestEditBooks - test_edit_custom_date
- PASS + +
+ ERROR +
+ + + + - +
TestEditBooks - test_edit_custom_float
- PASS + +
+ ERROR +
+ + + + - +
TestEditBooks - test_edit_custom_int
- PASS + +
+ ERROR +
+ + + + - +
TestEditBooks - test_edit_custom_rating
- PASS + +
+ ERROR +
+ + + + - +
TestEditBooks - test_edit_custom_single_select
- PASS + +
+ ERROR +
+ + + + - +
TestEditBooks - test_edit_custom_text
- PASS + +
+ ERROR +
+ + + + @@ -1690,12 +2012,12 @@ AssertionError: True is not false - + TestEditAuthorsGdrive 7 - 7 - 0 + 6 0 + 1 0 Detail @@ -1758,11 +2080,32 @@ AssertionError: True is not false - +
TestEditAuthorsGdrive - test_rename_capital_on_upload
- PASS + +
+ ERROR +
+ + + + @@ -1864,21 +2207,50 @@ AssertionError: True is not false - + TestEditBooksList - 19 - 19 - 0 + 20 + 17 + 3 0 0 - Detail + Detail - + + +
TestEditBooksList - test_booklist_archive_read
+ + +
+ FAIL +
+ + + + + + + + +
TestEditBooksList - test_booklist_xss
@@ -1887,7 +2259,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_author
@@ -1896,7 +2268,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_categories
@@ -1905,7 +2277,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_comment
@@ -1914,7 +2286,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_cust_category
@@ -1923,7 +2295,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_cust_comment
@@ -1932,7 +2304,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_cust_enum
@@ -1941,7 +2313,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_cust_float
@@ -1950,7 +2322,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_cust_int
@@ -1959,7 +2331,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_cust_ratings
@@ -1968,7 +2340,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_cust_text
@@ -1977,7 +2349,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_languages
@@ -1986,7 +2358,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_publisher
@@ -1995,7 +2367,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_series
@@ -2004,7 +2376,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_seriesindex
@@ -2013,7 +2385,7 @@ AssertionError: True is not false - +
TestEditBooksList - test_bookslist_edit_title
@@ -2022,16 +2394,36 @@ AssertionError: True is not false - +
TestEditBooksList - test_list_visibility
- PASS + +
+ FAIL +
+ + + + - +
TestEditBooksList - test_restricted_rights
@@ -2040,21 +2432,44 @@ AssertionError: True is not false - +
TestEditBooksList - test_search_books_list
- PASS + +
+ FAIL +
+ + + + - + TestLoadMetadata 1 - 1 0 + 1 0 0 @@ -2064,11 +2479,31 @@ AssertionError: True is not false - +
TestLoadMetadata - test_load_metadata
- PASS + +
+ FAIL +
+ + + + @@ -2260,11 +2695,11 @@ AssertionError: True is not false - + TestLoadMetadataScholar 1 - 1 0 + 1 0 0 @@ -2274,11 +2709,31 @@ AssertionError: True is not false - +
TestLoadMetadataScholar - test_load_metadata
- PASS + +
+ FAIL +
+ + + + @@ -2647,7 +3102,7 @@ AssertionError: True is not false
Traceback (most recent call last):
-  File "/home/ozzie/Development/calibre-web-test/test/test_filepicker.py", line 87, in test_two_filepickers
+  File "/home/ozzie/Development/calibre-web-test/test/test_filepicker.py", line 88, in test_two_filepickers
     accordions[0].click()
     ~~~~~~~~~~^^^
 IndexError: list index out of range
@@ -2727,11 +3182,11 @@ IndexError: list index out of range - + TestKoboSync 12 - 12 - 0 + 11 + 1 0 0 @@ -2822,11 +3277,31 @@ IndexError: list index out of range - +
TestKoboSync - test_sync_shelf
- PASS + +
+ FAIL +
+ + + + @@ -3053,13 +3528,13 @@ IndexError: list index out of range TestSecurity - 5 - 5 + 6 + 6 0 0 0 - Detail + Detail @@ -3109,14 +3584,23 @@ IndexError: list index out of range + + + +
TestSecurity - test_x_forwarded_host
+ + PASS + + + - + TestCalibreWebListOrders 16 - 16 - 0 - 0 + 13 + 1 + 2 0 Detail @@ -3179,29 +3663,91 @@ IndexError: list index out of range - +
TestCalibreWebListOrders - test_order_authors_all_links
- PASS + +
+ ERROR +
+ + + + - +
TestCalibreWebListOrders - test_order_series_all_links
- PASS + +
+ FAIL +
+ + + + - +
TestCalibreWebListOrders - test_publisher_click_none
- PASS + +
+ ERROR +
+ + + + @@ -3383,11 +3929,11 @@ IndexError: list index out of range - + TestLogin 19 - 19 - 0 + 17 + 2 0 0 @@ -3460,11 +4006,31 @@ IndexError: list index out of range - +
TestLogin - test_login_protected
- PASS + +
+ FAIL +
+ + + + @@ -3514,11 +4080,31 @@ IndexError: list index out of range - +
TestLogin - test_next
- PASS + +
+ FAIL +
+ + + + @@ -3569,15 +4155,15 @@ IndexError: list index out of range - + TestMassEditBooksList - 5 - 5 - 0 + 11 + 8 + 3 0 0 - Detail + Detail @@ -3592,18 +4178,40 @@ IndexError: list index out of range - +
TestMassEditBooksList - test_invalid_author_title
- PASS + +
+ FAIL +
+ + + + -
TestMassEditBooksList - test_protected_author_title
+
TestMassEditBooksList - test_mass_edit_archive
PASS @@ -3612,7 +4220,7 @@ IndexError: list index out of range -
TestMassEditBooksList - test_wrong_parameter_multi
+
TestMassEditBooksList - test_mass_edit_categories
PASS @@ -3620,6 +4228,104 @@ IndexError: list index out of range + +
TestMassEditBooksList - test_mass_edit_languages
+ + PASS + + + + + + +
TestMassEditBooksList - test_mass_edit_publisher
+ + PASS + + + + + + +
TestMassEditBooksList - test_mass_edit_read
+ + PASS + + + + + + +
TestMassEditBooksList - test_mass_edit_series
+ + +
+ FAIL +
+ + + + + + + + + + +
TestMassEditBooksList - test_protected_author_title
+ + +
+ FAIL +
+ + + + + + + + + + +
TestMassEditBooksList - test_wrong_parameter_multi
+ + PASS + + + + +
TestMassEditBooksList - test_wrong_parameter_single
@@ -3629,12 +4335,12 @@ IndexError: list index out of range - + TestMergeBooksList 2 - 2 - 0 0 + 1 + 1 0 Detail @@ -3643,20 +4349,61 @@ IndexError: list index out of range - +
TestMergeBooksList - test_book_merge
- PASS + +
+ FAIL +
+ + + + - +
TestMergeBooksList - test_delete_book
- PASS + +
+ ERROR +
+ + + + @@ -3695,12 +4442,12 @@ IndexError: list index out of range - + TestOPDSFeed 26 - 26 - 0 - 0 + 24 + 1 + 1 0 Detail @@ -3745,11 +4492,32 @@ IndexError: list index out of range - +
TestOPDSFeed - test_opds_books
- PASS + +
+ ERROR +
+ + + + @@ -3871,11 +4639,31 @@ IndexError: list index out of range - +
TestOPDSFeed - test_opds_search
- PASS + +
+ FAIL +
+ + + + @@ -4268,15 +5056,15 @@ IndexError: list index out of range - + TestShelf - 17 - 16 - 0 - 0 + 32 + 1 + 11 + 19 1 - Detail + Detail @@ -4291,127 +5079,871 @@ IndexError: list index out of range - +
TestShelf - test_add_shelf_from_search
- PASS + +
+ ERROR +
+ + + + - + + +
TestShelf - test_add_shelf_from_search
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_adv_search_shelf
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_adv_search_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_arrange_shelf
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_arrange_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_create_public_shelf
- PASS + +
+ ERROR +
+ + + + - + + +
TestShelf - test_create_public_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_create_public_shelf_no_permission
- PASS + +
+ ERROR +
+ + + + - + + +
TestShelf - test_create_public_shelf_no_permission
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_delete_book_of_shelf
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_delete_book_of_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_private_shelf
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_private_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_public_private_shelf
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_public_private_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_public_shelf
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_public_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_rename_shelf
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_rename_shelf
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_shelf_action_non_shelf_edit_role
- PASS + +
+ FAIL +
+ + + + - + + +
TestShelf - test_shelf_action_non_shelf_edit_role
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_shelf_anonymous
- PASS + +
+ ERROR +
+ + + + - + + +
TestShelf - test_shelf_anonymous
+ + +
+ ERROR +
+ + + + + + + + +
TestShelf - test_shelf_database_change
- SKIP + SKIP
-