1
0
mirror of https://github.com/janeczku/calibre-web synced 2024-11-24 18:47:23 +00:00

Fix detect correct encoding for txt-reader

This commit is contained in:
Ozzieisaacs 2020-12-09 11:04:29 +01:00
parent dcab8af8ab
commit d957b2d20f
2 changed files with 31 additions and 11 deletions

View File

@ -20,6 +20,7 @@ from __future__ import division, print_function, unicode_literals
import os import os
import json import json
import shutil import shutil
import chardet
from flask import Response, stream_with_context from flask import Response, stream_with_context
from sqlalchemy import create_engine from sqlalchemy import create_engine
@ -29,14 +30,23 @@ from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.exc import OperationalError, InvalidRequestError from sqlalchemy.exc import OperationalError, InvalidRequestError
try:
from apiclient import errors
from httplib2 import ServerNotFoundError
importError = None
gdrive_support = True
except ImportError as e:
importError = e
gdrive_support = False
try:
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from pydrive2.auth import RefreshError
except ImportError as err:
try: try:
from pydrive.auth import GoogleAuth from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive from pydrive.drive import GoogleDrive
from pydrive.auth import RefreshError from pydrive.auth import RefreshError
from apiclient import errors
from httplib2 import ServerNotFoundError
gdrive_support = True
importError = None
except ImportError as err: except ImportError as err:
importError = err importError = err
gdrive_support = False gdrive_support = False
@ -545,21 +555,24 @@ def partial(total_byte_len, part_size_limit):
return s return s
# downloads files in chunks from gdrive # downloads files in chunks from gdrive
def do_gdrive_download(df, headers): def do_gdrive_download(df, headers, convert_encoding=False):
total_size = int(df.metadata.get('fileSize')) total_size = int(df.metadata.get('fileSize'))
download_url = df.metadata.get('downloadUrl') download_url = df.metadata.get('downloadUrl')
s = partial(total_size, 1024 * 1024) # I'm downloading BIG files, so 100M chunk size is fine for me s = partial(total_size, 1024 * 1024) # I'm downloading BIG files, so 100M chunk size is fine for me
def stream(): def stream(convert_encoding):
for byte in s: for byte in s:
headers = {"Range": 'bytes=%s-%s' % (byte[0], byte[1])} headers = {"Range": 'bytes=%s-%s' % (byte[0], byte[1])}
resp, content = df.auth.Get_Http_Object().request(download_url, headers=headers) resp, content = df.auth.Get_Http_Object().request(download_url, headers=headers)
if resp.status == 206: if resp.status == 206:
if convert_encoding:
result = chardet.detect(content)
content = content.decode(result['encoding']).encode('utf-8')
yield content yield content
else: else:
log.warning('An error occurred: %s', resp) log.warning('An error occurred: %s', resp)
return return
return Response(stream_with_context(stream()), headers=headers) return Response(stream_with_context(stream(convert_encoding)), headers=headers)
_SETTINGS_YAML_TEMPLATE = """ _SETTINGS_YAML_TEMPLATE = """

View File

@ -29,13 +29,14 @@ import mimetypes
import traceback import traceback
import binascii import binascii
import re import re
import chardet # dependency of requests
from babel.dates import format_date from babel.dates import format_date
from babel import Locale as LC from babel import Locale as LC
from babel.core import UnknownLocaleError from babel.core import UnknownLocaleError
from flask import Blueprint, jsonify from flask import Blueprint, jsonify
from flask import render_template, request, redirect, send_from_directory, make_response, g, flash, abort, url_for from flask import render_template, request, redirect, send_from_directory, make_response, g, flash, abort, url_for
from flask import session as flask_session from flask import session as flask_session, send_file
from flask_babel import gettext as _ from flask_babel import gettext as _
from flask_login import login_user, logout_user, login_required, current_user, confirm_login from flask_login import login_user, logout_user, login_required, current_user, confirm_login
from sqlalchemy.exc import IntegrityError, InvalidRequestError, OperationalError from sqlalchemy.exc import IntegrityError, InvalidRequestError, OperationalError
@ -1495,8 +1496,14 @@ def serve_book(book_id, book_format, anyname):
headers = Headers() headers = Headers()
headers["Content-Type"] = mimetypes.types_map.get('.' + book_format, "application/octet-stream") headers["Content-Type"] = mimetypes.types_map.get('.' + book_format, "application/octet-stream")
df = getFileFromEbooksFolder(book.path, data.name + "." + book_format) df = getFileFromEbooksFolder(book.path, data.name + "." + book_format)
return do_gdrive_download(df, headers) return do_gdrive_download(df, headers, (book_format.upper() == 'TXT'))
else: else:
if book_format.upper() == 'TXT':
rawdata = open(os.path.join(config.config_calibre_dir, book.path, data.name + "." + book_format),
"rb").read()
result = chardet.detect(rawdata)
return make_response(
rawdata.decode(result['encoding']).encode('utf-8'))
return send_from_directory(os.path.join(config.config_calibre_dir, book.path), data.name + "." + book_format) return send_from_directory(os.path.join(config.config_calibre_dir, book.path), data.name + "." + book_format)