1
0
mirror of https://github.com/janeczku/calibre-web synced 2025-01-08 00:10:31 +00:00

Merge branch 'pdf'

This commit is contained in:
Ozzie Isaacs 2024-10-19 06:47:32 +02:00
commit b9085129a7

View File

@ -39,17 +39,20 @@ except (ImportError, RuntimeError) as e:
try: try:
from pypdf import PdfReader from pypdf import PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True use_pdf_meta = True
except ImportError as ex: except ImportError as ex:
log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex) log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex)
try: try:
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True use_pdf_meta = True
except ImportError as ex: except ImportError as ex:
log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex) log.debug('PyPDF is recommended for best performance in metadata extracting from pdf files: %s', ex)
log.debug('PyPdf2 is also possible for metadata extracting from pdf files, but not recommended anymore') log.debug('PyPdf2 is also possible for metadata extracting from pdf files, but not recommended anymore')
try: try:
from PyPDF3 import PdfFileReader as PdfReader from PyPDF3 import PdfFileReader as PdfReader
from pypdf.generic import NullObject
use_pdf_meta = True use_pdf_meta = True
except ImportError as e: except ImportError as e:
log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e) log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e)
@ -205,10 +208,12 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension, no_cove
if subject == '': if subject == '':
subject = doc_info.subject or "" subject = doc_info.subject or ""
if tags == '' and '/Keywords' in doc_info: if tags == '' and '/Keywords' in doc_info:
if isinstance(doc_info['/Keywords'], bytes): keywords = doc_info['/Keywords']
tags = doc_info['/Keywords'].decode('utf-8') if not isinstance(keywords, NullObject):
else: if isinstance(keywords, bytes):
tags = doc_info['/Keywords'] tags = keywords.decode('utf-8')
else:
tags = keywords
else: else:
title = original_file_name title = original_file_name