Avoid problems with percent encoded utf-8 abstracts on certain chinese papers

2025-08-10 07:53:55 +00:00 · 2022-02-12 12:41:29 +01:00 · 2022-02-12 12:41:29 +01:00 · 7bb3cac7fb
commit 7bb3cac7fb
parent 9c5970bbfc
1 changed files with 2 additions and 2 deletions
--- a/cps/metadata_provider/scholar.py
+++ b/cps/metadata_provider/scholar.py
@ -17,7 +17,7 @@
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 import itertools
 from typing import Dict, List, Optional
-from urllib.parse import quote
+from urllib.parse import quote, unquote

 try:
    from fake_useragent.errors import FakeUserAgentError
@ -66,7 +66,7 @@ class scholar(Metadata):
        )

        match.cover = result.get("image", {}).get("original_url", generic_cover)
-        match.description = result["bib"].get("abstract", "")
+        match.description = unquote(result["bib"].get("abstract", ""))
        match.publisher = result["bib"].get("venue", "")
        match.publishedDate = result["bib"].get("pub_year") + "-01-01"
        match.identifiers = {"scholar": match.id}