From 791bc9621a6fbe9894040ec94cdc57e76cdc549f Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Sun, 22 Jan 2023 11:25:24 +0100 Subject: [PATCH] Improved parsing of pdf files, bugfix for pypdf2 > V3.0 --- cps/uploader.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/cps/uploader.py b/cps/uploader.py index e605903d..94f2aaa4 100644 --- a/cps/uploader.py +++ b/cps/uploader.py @@ -64,7 +64,7 @@ except ImportError as e: def process(tmp_file_path, original_file_name, original_file_extension, rarExecutable): - meta = None + meta = default_meta(tmp_file_path, original_file_name, original_file_extension) extension_upper = original_file_extension.upper() try: if ".PDF" == extension_upper: @@ -81,11 +81,11 @@ def process(tmp_file_path, original_file_name, original_file_extension, rarExecu except Exception as ex: log.warning('cannot parse metadata, using default: %s', ex) - if meta and meta.title.strip() and meta.author.strip(): - if meta.author.lower() == 'unknown': - meta = meta._replace(author=_('Unknown')) - return meta - return default_meta(tmp_file_path, original_file_name, original_file_extension) + if not meta.title.strip(): + meta = original_file_name + if not meta.author.strip() or meta.author.lower() == 'unknown': + meta = meta._replace(author=_('Unknown')) + return meta def default_meta(tmp_file_path, original_file_name, original_file_extension): @@ -111,7 +111,7 @@ def parse_xmp(pdf_file): Parse XMP Metadata and prepare for BookMeta object """ try: - xmp_info = pdf_file.getXmpMetadata() + xmp_info = pdf_file.xmp_metadata except Exception as ex: log.debug('Can not read PDF XMP metadata {}'.format(ex)) return None @@ -158,9 +158,8 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension): if use_pdf_meta: with open(tmp_file_path, 'rb') as f: pdf_file = PdfReader(f) - doc_info = pdf_file.getDocumentInfo() try: - doc_info = pdf_file.getDocumentInfo() + doc_info = pdf_file.metadata except Exception as exc: log.debug('Can not read PDF DocumentInfo {}'.format(exc)) xmp_info = parse_xmp(pdf_file)