Compare commits

...

189 Commits

Author SHA1 Message Date
Sergey M․
b6c9fe4162 release 2017.07.02 2017-07-02 20:17:10 +07:00
Sergey M․
4d9ba27bba [ChangeLog] Actualize 2017-07-02 20:12:40 +07:00
Sergey M․
50ae3f646e [thisoldhouse] Add more fallbacks for video id (closes #13541) 2017-07-02 20:06:15 +07:00
Parmjit Virk
99a7e76240 [thisoldhouse] Update test 2017-07-02 20:05:11 +07:00
Parmjit Virk
a3a6d01a96 [thisoldhouse] Fix video id extraction (closes #13540) 2017-07-02 20:04:51 +07:00
Sergey M․
02d61a65e2 [xfileshare] Extend format regex (closes #13536) 2017-07-02 08:00:22 +07:00
Sergey M․
9b35297be1 [extractors] Add import for tastytrade 2017-07-01 18:39:29 +07:00
Sergey M․
4917478803 [ted] Fix extraction (closes #13535)) 2017-07-01 18:39:01 +07:00
Sergey M․
54faac2235 [tastytrade] Add extractor (closes #13521) 2017-06-30 22:20:30 +07:00
Sergey M․
c69701c6ab [extractor/common] Improve _json_ld 2017-06-30 22:19:06 +07:00
Sergey M․
d4f8ce6e91 [dplayit] Relax video id regex (closes #13524) 2017-06-30 21:55:45 +07:00
Sergey M․
b311b0ead2 [generic] Extract more generic metadata (closes #13527) 2017-06-30 21:42:04 +07:00
Sergey M․
72d256c434 [bbccouk] Extend _VALID_URL 2017-06-29 22:29:28 +07:00
Sergey M․
b2ed954fc6 [bbccouk] Capture and output error message (closes #13518) 2017-06-29 22:27:53 +07:00
Sergey M․
a919ca0ad6 [cbsnews] Actualize test 2017-06-28 22:30:12 +07:00
Parmjit Virk
88d6b7c2bd [cbsnews] Relax video info regex (fixes #13284) 2017-06-28 22:21:35 +07:00
Sergey M․
fd1c5fba6b [facebook] Add test for plugin video embed (#13493) 2017-06-27 22:38:59 +07:00
Sergey M․
0646e34c7d [facebook] Add support for plugin video embeds and multiple embeds (closes #13493) 2017-06-27 22:38:54 +07:00
Sergey M․
bf2dc9cc6e [soundcloud] Fix tests 2017-06-27 21:26:46 +07:00
Viktor Szakats
f1c051009b [soundcloud] Switch to https for API requests 2017-06-27 21:20:18 +07:00
Sergey M․
33ffb645a6 [pandatv] Switch to https for API and download URLs 2017-06-26 22:11:09 +07:00
Xuan Hu (Sean)
35544690e4 [pandatv] Add support for https URLs 2017-06-26 22:00:31 +07:00
Yen Chi Hsuan
136503e302 [ChangeLog] Update after #13494 2017-06-26 19:56:07 +08:00
Luca Steeb
4a87de72df [niconico] fix sp subdomain links 2017-06-25 21:30:05 +02:00
Sergey M․
a7ce8f16c4 release 2017.06.25 2017-06-25 05:16:06 +07:00
Sergey M․
a5aea53fc8 [ChangeLog] Actualize 2017-06-25 05:13:12 +07:00
Sergey M․
0c7a631b61 [adobepass] Add support for ATTOTT MSO (DIRECTV NOW) (closes #13472) 2017-06-25 05:03:17 +07:00
Sergey M․
fd9ee4de8c [wsj] Add support for barrons.com (closes #13470) 2017-06-25 02:15:35 +07:00
Argn0
5744cf6c03 [ign] Add another video id pattern (closes #13328) 2017-06-25 01:59:15 +07:00
Sergey M․
9c48b5a193 [raiplay:live] Improve and add test (closes #13414) 2017-06-25 01:49:27 +07:00
james
449c665776 [raiplay:live] Add extractor 2017-06-25 01:48:54 +07:00
Sergey M․
23aec3d623 [redbulltv] Restore hls format prefix 2017-06-25 01:10:31 +07:00
Sergey M․
27449ad894 [redbulltv] Add support for lives and segments (closes #13486)) 2017-06-25 01:09:12 +07:00
Sergey M․
bd65f18153 [onetpl] Add support for videos embedded via pulsembed (closes #13482) 2017-06-24 18:33:31 +07:00
Sergey M․
73af5cc817 [YoutubeDL] Skip malformed formats for better extraction robustness 2017-06-23 21:18:33 +07:00
Sergey M․
b5f523ed62 [ooyala] Add test for missing stream['url']['data'] 2017-06-23 20:56:48 +07:00
Sergey M․
4f4dd8d797 [ooyala] Make more robust 2017-06-23 20:56:21 +07:00
Sergey M․
4cb18ab1b9 [ooyala] Skip empty format URLs (closes #13471, closes #13476) 2017-06-23 20:50:48 +07:00
Sergey M․
ac7409eec5 [hgtv.com:show] Fix typo 2017-06-23 02:54:12 +07:00
Sergey M․
170719414d release 2017.06.23 2017-06-23 02:13:21 +07:00
Sergey M․
38dad4737f [ChangeLog] Actualize 2017-06-23 02:10:54 +07:00
Sergey M․
ddbb4c5c3e [youtube] Adapt to new automatic captions rendition (closes #13467) 2017-06-23 02:00:19 +07:00
Sergey M․
fa3ea7223a [hgtv.com:show] Relax video config regex and update test (closes #13279, closes #13461) 2017-06-23 00:42:42 +07:00
Parmjit Virk
0f4a5a73e7 [drtuber] Fix formats extraction (fixes 12058) 2017-06-23 00:08:36 +07:00
Sergey M․
18166bb8e8 [youporn] Fix upload date extraction 2017-06-22 00:47:02 +07:00
Sergey M․
d4893e764b [youporn] Improve formats extraction 2017-06-22 00:40:15 +07:00
Sergey M․
97b6e30113 [youporn] Fix title extraction (closes #13456) 2017-06-22 00:20:45 +07:00
Sergey M․
9be9ec5980 [googledrive] Fix formats' sorting (closes #13443) 2017-06-20 22:58:33 +07:00
Giuseppe Fabiano
048b55804d [watchindianporn] Fix extraction (closes #13411) 2017-06-20 04:30:45 +07:00
Giuseppe Fabiano
6ce79d7ac0 [abcotvs] Fix test md5 2017-06-20 04:07:00 +07:00
Sergey M․
1641ca402d [vimeo] Add fallback mp4 extension for original format 2017-06-20 01:27:59 +07:00
Sergey M․
85cbcede5b [ruv] Improve, extract all formats and metadata (closes #13396) 2017-06-19 23:46:03 +07:00
Orn
a1de83e5f0 [ruv] Add extractor 2017-06-19 23:45:45 +07:00
Sergey M․
fee00b3884 [viu] Fix extraction on older python 2.6 2017-06-19 22:57:37 +07:00
Sergey M․
2d2132ac6e [adobepass] Fix extraction on older python 2.6 2017-06-19 22:54:53 +07:00
Yen Chi Hsuan
cc2ffe5afe [pandora.tv] Fix upload_date extraction (closes #12846) 2017-06-19 16:20:36 +08:00
Sergey M․
560050669b [asiancrush] Add extractor (closes #13420) 2017-06-18 20:18:51 +07:00
Sergey M․
eaa006d1bd release 2017.06.18 2017-06-18 00:16:49 +07:00
Sergey M․
a6f29820c6 [ChangeLog] Actualize 2017-06-18 00:15:43 +07:00
Sergey M․
1433734c35 [downloader/common] Use utils.shell_quote for debug command line 2017-06-17 23:50:21 +07:00
Sergey M․
aefce8e6dc [utils] Use compat_shlex_quote in shell_quote 2017-06-17 23:48:58 +07:00
Sergey M․
8b6ac49ecc [postprocessor/execafterdownload] Encode command line (closes #13407) 2017-06-17 23:16:53 +07:00
Sergey M․
b08e235f09 [compat] Fix compat_shlex_quote on Windows (closes #5889, closes #10254) 2017-06-17 23:14:24 +07:00
Sergey M․
be80986ed9 [postprocessor/metadatafromtitle] Fix missing optional meta fields (closes #13408) 2017-06-17 19:05:10 +07:00
Yen Chi Hsuan
473e87064b [devscripts/prepare_manpage] Fix deprecated escape sequence on py36 2017-06-17 17:37:25 +08:00
Yen Chi Hsuan
4f90d2aeac [Makefile] Excluding __pycache__ correctly (#13400) 2017-06-17 17:09:24 +08:00
Jakub Adam Wieczorek
b230fefc3c [polskieradio] Fix extraction 2017-06-16 04:57:56 +07:00
Sergey M․
96a2daa1ee [extractor/common] Improve jwplayer subtitles extraction 2017-06-15 23:40:39 +07:00
gfabiano
0ea6efbb7a [xfileshare] Add support for fastvideo.me 2017-06-15 21:41:49 +07:00
Yen Chi Hsuan
6a9cb29509 [extractor/common] Fix json dumping with --geo-bypass
The line "[debug] Using fake IP %s (%s) as X-Forwarded-For." was printed
to stdout even with -j/-J, which breaks the resultant JSON.
2017-06-15 13:04:36 +08:00
Yen Chi Hsuan
ca27037171 [bilibili] Fix extraction of videos with double quotes in titles
Closes #13387
2017-06-15 11:19:03 +08:00
gfabiano
0bf4b71b75 [4tube] Fix extraction (closes #13381) 2017-06-15 04:16:50 +07:00
Marcin Cieślak
5215f45327 [disney] Add support for disneychannel.de 2017-06-15 04:13:04 +07:00
Sergey M․
0a268c6e11 [extractor/common] Improve jwplayer formats extraction (closes #13379) 2017-06-14 22:02:15 +07:00
Sergey M․
7dd5415cd0 [npo] Improve _VALID_URL (closes #13376) 2017-06-14 21:33:40 +07:00
Sergey M․
b5dc33daa9 [corus] Add support for showcase.ca 2017-06-13 23:27:27 +07:00
Sergey M․
97fa1f8dc4 [corus] Add support for history.ca (closes #13359) 2017-06-13 23:16:21 +07:00
Sergey M․
b081f53b08 [compat] Add compat_HTMLParseError to __all__ 2017-06-12 02:36:43 +07:00
Sergey M․
cb1e6d8985 release 2017.06.12 2017-06-12 02:23:17 +07:00
Sergey M․
9932ac5c58 [ChangeLog] Actualize 2017-06-12 02:01:15 +07:00
Sergey M․
bf87c36c93 [xfileshare] PEP 8 2017-06-12 02:01:12 +07:00
Sergey M․
b4a3d461e4 [utils] Handle HTMLParseError in extract_attributes (closes #13349) 2017-06-12 01:52:24 +07:00
Sergey M․
72b409559c [compat] Introduce compat_HTMLParseError 2017-06-12 01:50:32 +07:00
Sergey M․
534863e057 [xfileshare] Add support for rapidvideo (closes #13348) 2017-06-12 00:16:47 +07:00
Sergey M․
16bc958287 [xfileshare] Modernize and pass referrer 2017-06-12 00:14:04 +07:00
Sergey M․
624bd0104c [rutv] Add support for testplayer.vgtrk.com (closes #13347) 2017-06-11 21:36:19 +07:00
Sergey M․
28a4d6cce8 [newgrounds] Extract more metadata (closes #13232) 2017-06-11 21:30:06 +07:00
Sergey M․
2ae2ffda5e [utils] Improve unified_timestamp 2017-06-11 21:27:22 +07:00
Sergey M․
70e7967202 [newgrounds:playlist] Add extractor (closes #10611) 2017-06-11 20:50:55 +07:00
Sergey M․
6e999fbc12 [newgrounds] Improve formats and uploader extraction (closes #13346) 2017-06-11 19:44:44 +07:00
Sergey M․
7409af9eb3 [msn] Fix formats extraction 2017-06-11 08:56:53 +07:00
Sergey M․
4e3637034c [extractor/generic] Ensure format id is unicode string 2017-06-10 23:56:20 +07:00
Sergey M․
1afd0b0da7 [extractor/common] Return unicode string from _match_id 2017-06-09 00:40:03 +07:00
Sergey M․
7515830422 [turbo] Ensure format id is string 2017-06-09 00:31:56 +07:00
Sergey M․
f5521ea209 [sexu] Ensure height is int 2017-06-09 00:30:23 +07:00
Sergey M․
34646967ba [jove] Ensure comment count is int 2017-06-09 00:29:20 +07:00
Sergey M․
e4d2e76d8e [golem] Ensure format id is string 2017-06-09 00:27:11 +07:00
Sergey M․
87f5646937 [gfycat] Ensure filesize is int 2017-06-09 00:24:23 +07:00
Sergey M․
cc69a3de1b [foxgay] Ensure height is int 2017-06-09 00:22:14 +07:00
Sergey M․
15aeeb1188 [flickr] Ensure format id is string 2017-06-09 00:20:07 +07:00
Sergey M․
1693bebe4d [sohu] Fix numeric fields 2017-06-09 00:16:42 +07:00
Sergey M․
4244a13a1d [safari] Improve authentication detection (closes #13319) 2017-06-08 23:20:48 +07:00
Sergey M․
931adf8cc1 [liveleak] Ensure height is int (closes #13313) 2017-06-08 22:54:30 +07:00
Sergey M․
c996943418 [YoutubeDL] Sanitize more fields (#13313) 2017-06-08 22:53:14 +07:00
Sergey M․
76e6378358 [README.md] Improve man page formatting 2017-06-08 22:02:42 +07:00
Sergey M․
a355b57f58 [README.md] Clarify output template references (closes #13316) 2017-06-08 21:52:19 +07:00
Sergey M․
1508da30c2 [streamango] Skip download for test (closes #13292) 2017-06-07 21:53:40 +07:00
Luca Steeb
eb703e5380 [streamango] Make title optional 2017-06-07 21:53:33 +07:00
Sergey M․
0a3924e746 [rtlnl] Improve _VALID_URL (closes #13295) 2017-06-06 21:21:44 +07:00
Sergey M․
e1db730d86 [tvplayer] Fix extraction (closes #13291) 2017-06-06 00:13:57 +07:00
Sergey M․
537191826f release 2017.06.05 2017-06-05 00:48:07 +07:00
Sergey M․
130880ba48 [ChangeLog] Actualize 2017-06-05 00:43:38 +07:00
Sergey M․
f8ba3fda4d Credit @jktjkt for dvtv formats (#13063) 2017-06-05 00:38:44 +07:00
Sergey M․
e1b90cc3db Credit @mikf for beam:vod (#13032) 2017-06-05 00:35:41 +07:00
Sergey M․
43e6579558 Credit @adamvoss for bandcamp:weekly (#12758) 2017-06-04 23:22:19 +07:00
Sergey M․
6d923aab35 [bandcamp:weekly] Improve and extract more metadata (closes #12758) 2017-06-04 23:21:30 +07:00
Adam Voss
62bafabc09 [bandcamp:weekly] Add extractor 2017-06-04 23:21:07 +07:00
Sergey M․
9edcdac90c [pornhub:uservideos] Add missing raise 2017-06-04 20:39:55 +07:00
Sergey M․
cd138d8bd4 [pornhub:playlist] Fix extraction (closes #13281) 2017-06-04 15:54:19 +07:00
Sergey M․
cd750b731c [godtv] Remove extractor (closes #13175) 2017-06-03 22:08:12 +07:00
CeruleanSky
4bede0d8f5 [YoutubeDL] Don't emit ANSI escape codes on Windows 2017-06-03 19:14:23 +07:00
Sergey M․
f129c3f349 [safari] Fix typo (closes #13252) 2017-06-02 01:03:51 +07:00
Sergey M․
39d4c1be4d [youtube] Improve chapters extraction (closes #13247) 2017-06-01 23:29:45 +07:00
Sergey M․
f7a747ce59 [1tv] Lower preference for http formats (closes #13246) 2017-06-01 22:19:52 +07:00
Sergey M․
4489d41816 [francetv] Relax _VALID_URL 2017-06-01 00:15:15 +07:00
Sergey M․
87b5184a0d [drbonanza] Fix extraction (closes #13231) 2017-05-31 23:56:32 +07:00
Remita Amine
c56ad5c975 [packtpub] Fix authentication(closes #13240) 2017-05-31 15:44:29 +01:00
Sergey M․
6b7ce85cdc [README.md] Mention http_dash_segments protocol 2017-05-30 23:50:48 +07:00
Yen Chi Hsuan
d10d0e3cf8 [README.md] Add an example for how to use .netrc on Windows
That's a Python bug: http://bugs.python.org/issue28334
Most likely it will be fixed in Python 3.7: https://github.com/python/cpython/pull/123
2017-05-29 14:58:07 +08:00
Sergey M․
941ea38ef5 release 2017.05.29 2017-05-29 00:42:18 +07:00
Sergey M․
99bea8d298 [ChangeLog] Actualize 2017-05-29 00:33:56 +07:00
Yen Chi Hsuan
a49eccdfa7 [youtube] Parse player_url if format URLs are encrypted or DASH MPDs are requested
Fixes #13211
2017-05-28 20:20:20 +08:00
Sergey M․
a846173d93 [xhamster] Simplify (closes #13216) 2017-05-28 07:55:56 +07:00
fiocfun
78e210dea5 [xhamster] Fix author and like/dislike count extraction 2017-05-28 07:55:07 +07:00
Sergey M․
8555204274 [xhamster] Extract categories (closes #11728) 2017-05-28 07:50:15 +07:00
Sergey M․
164fcbfeb7 [abcnews] Improve and remove duplicate test (closes #12851) 2017-05-28 07:06:56 +07:00
Tithen-Firion
bc22df29c4 [abcnews] Add support for embed URLs 2017-05-28 07:06:29 +07:00
Sergey M․
7e688d2f6a [gaskrank] Improve (closes #12493) 2017-05-28 06:47:38 +07:00
motophil
5a6d1da442 [gaskrank] Fix extraction 2017-05-28 06:47:30 +07:00
Sergey M․
703751add4 [medialaan] PEP 8 (closes #12774) 2017-05-28 06:27:57 +07:00
midas02
4050be78e5 [medialaan] Fix videos with missing videoUrl
A rough trick to get around the two different json styles medialaan seems to be using.
Fix for these example videos:
https://vtmkzoom.be/video?aid=45724
https://vtmkzoom.be/video?aid=45425
2017-05-28 06:27:52 +07:00
Sergey M․
4d9fc40100 [dvtv] Improve and fix playlists support (closes #13063) 2017-05-28 06:19:54 +07:00
Jan Kundrát
765522345f [dvtv] Parse adaptive formats as well
The old code hit an error when it attempted to parse the string
"adaptive" for video height. Actually parsing the returned playlists is
a good idea because it adds more output formats, including some
audio-only-ones.
2017-05-28 06:19:46 +07:00
Sergey M․
6bceb36b99 [beam] Improve and add support for mixer.com (closes #13032) 2017-05-28 05:43:04 +07:00
Mike Fährmann
1e0d65f0bd [beam:vod] Add extractor 2017-05-28 05:42:23 +07:00
Sergey M․
03327bc9a6 [cbsinteractive] Relax _VALID_URL (closes #13213) 2017-05-27 22:37:24 +07:00
Yen Chi Hsuan
b407d8533d [utils] Drop an compatibility wrapper for Python < 2.6
addinfourl.getcode is added since Python 2.6a1. As youtube-dl now
requires 2.6+, this is no longer necessary.

See 9b0d46db11
2017-05-27 23:05:02 +08:00
Remita Amine
20e2c9de04 [adn] fix formats extraction 2017-05-26 20:00:44 +01:00
Yen Chi Hsuan
d16c0121b9 [youku] Extract more metadata (closes #10433) 2017-05-27 00:08:37 +08:00
Sergey M․
7f4c3a7439 [cbsnews] Fix extraction (closes #13205) 2017-05-26 22:42:27 +07:00
Sergey M․
28dbde9cc3 release 2017.05.26 2017-05-26 22:29:16 +07:00
Sergey M․
cc304ce588 [ChangeLog] Actualize 2017-05-26 22:27:56 +07:00
Yen Chi Hsuan
98a0618941 [ChangeLog] Update after the fix for #11381 2017-05-26 23:22:54 +08:00
Yen Chi Hsuan
fd545fc6d1 Revert "[youtube] Don't use the DASH manifest from 'get_video_info' if 'use_cipher_signature' is True (#5118)"
This reverts commit 87dc451108.
2017-05-26 23:22:54 +08:00
Sergey M․
97067db2ae [bbc] Add support for authentication 2017-05-26 22:12:24 +07:00
Yen Chi Hsuan
c130f0a37b [tudou] Merge into youku extractor (fixes #12214)
Also, there are no tudou playlists anymore. All playlist URLs points to youku
playlists.
2017-05-26 23:04:42 +08:00
Yen Chi Hsuan
d3d4ba7f24 [youku:show] Fix extraction 2017-05-26 21:59:16 +08:00
Yen Chi Hsuan
5552c9eb0f [utils] Recognize more patterns in strip_jsonp()
Used in Youku Show pages
2017-05-26 21:58:18 +08:00
Yen Chi Hsuan
59ed87cbd9 [youku] Fix extraction (closes #13191) 2017-05-26 19:16:52 +08:00
Sergey M․
b7f8749304 [udemy] Fix extraction for outputs' format entries without URL (closes #13192) 2017-05-25 22:28:26 +07:00
Yen Chi Hsuan
5192ee17e7 [postprocessor/ffmpeg] Fix metadata filename handling on Python 2
Fixes #13182
2017-05-25 22:07:03 +08:00
Sergey M․
e834f04400 [vimeo] Fix formats' sorting (closes #13189) 2017-05-24 22:58:16 +07:00
Remita Amine
884d09f330 [cbsnews] fix extraction for 60 Minutes videos 2017-05-24 09:44:41 +01:00
remitamine
9e35298f97 Merge pull request #12861 from Tithen-Firion/cbsinteractive-fix
[cbsinteractive] update extractor and test cases
2017-05-24 10:21:33 +02:00
Sergey M․
0551f1b07b Credit @gritstub for vevo fix (#12879) 2017-05-23 21:47:40 +07:00
Sergey M․
de53511201 Credit @timendum for rai (#11790) and mediaset (#12964) 2017-05-23 00:41:53 +07:00
Sergey M․
2570e85167 release 2017.05.23 2017-05-23 00:17:48 +07:00
Sergey M․
9dc5ab041f [ChangeLog] Actualize 2017-05-23 00:15:44 +07:00
Sergey M․
01f3c8e290 Credit @fredbourni for noovo (#12792) 2017-05-23 00:08:14 +07:00
Sergey M․
06c1b3ce07 Credit @mphe for streamango (#12643) 2017-05-23 00:07:31 +07:00
Sergey M․
0b75e42dfb Credit @zurfyx for atresplayer improvements (#12548) 2017-05-23 00:00:49 +07:00
Sergey M․
a609e61a90 [downloader/external] Pass -loglevel to ffmpeg downloader (closes #13183) 2017-05-22 23:40:07 +07:00
Ondřej Caletka
afdb387cd8 [streamcz] Add support for subtitles 2017-05-21 15:41:52 +07:00
Sergey M․
dc4e4f90a2 [youtube] Modernize 2017-05-21 01:18:56 +07:00
Protuhj
fdc20f87a6 [youtube] Fix DASH manifest signature decryption (closes #8944) 2017-05-21 01:11:37 +07:00
Sergey M․
35a2d221a3 [toggle] Relax _VALID_URL (closes #13172) 2017-05-20 23:06:30 +07:00
Nii-90
daa4e9ff90 [adobepass] Add support for Brighthouse MSO 2017-05-20 20:50:46 +07:00
Sergey M․
2ca29f1aaf [toypics] Improve and modernize 2017-05-20 01:29:33 +07:00
vobe
77d682da9d [toypics] Fix extraction 2017-05-20 01:18:03 +07:00
Sergey M․
8fffac6927 [njpwworld] Fix extraction (closes #13162) 2017-05-19 23:11:02 +07:00
Sergey M․
5f6fbcea08 [hitbox] Add support for smashcast.tv (closes #13154) 2017-05-19 22:34:00 +07:00
Logan B
00cb0faca8 [mitele] Update app key regex 2017-05-19 21:54:57 +07:00
Sergey M․
bfdf6fcc66 release 2017.05.18.1 2017-05-18 23:00:03 +07:00
Sergey M․
bcaa1dd060 [ChangeLog] Actualize 2017-05-18 22:58:14 +07:00
Sergey M․
0e2d626ddd [jsinterp] Fix typo and cleanup regexes (closes #13134) 2017-05-18 22:57:38 +07:00
Sergey M․
9221d5d7a8 [ChangeLog] Fix typo 2017-05-18 22:37:32 +07:00
Tithen-Firion
96820c1c6b [cbsinteractive] extract formats with CBSIE 2017-04-27 20:23:52 +02:00
Tithen-Firion
e095109da1 [cbsinteractive] update test cases 2017-04-27 15:40:17 +02:00
Tithen-Firion
d68afc5bc9 [cbsinteractive] fix extractor 2017-04-27 15:27:01 +02:00
101 changed files with 2248 additions and 1141 deletions

View File

@@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.18** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.05.18 [debug] youtube-dl version 2017.07.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@@ -212,3 +212,11 @@ Xiao Di Guan
Thomas Winant Thomas Winant
Daniel Twardowski Daniel Twardowski
Jeremie Jarosh Jeremie Jarosh
Gerard Rovira
Marvin Ewald
Frédéric Bournival
Timendum
gritstub
Adam Voss
Mike Fährmann
Jan Kundrát

194
ChangeLog
View File

@@ -1,3 +1,195 @@
version 2017.07.02
Core
* [extractor/common] Improve _json_ld
Extractors
+ [thisoldhouse] Add more fallbacks for video id
* [thisoldhouse] Fix video id extraction (#13540, #13541)
* [xfileshare] Extend format regular expression (#13536)
* [ted] Fix extraction (#13535)
+ [tastytrade] Add support for tastytrade.com (#13521)
* [dplayit] Relax video id regular expression (#13524)
+ [generic] Extract more generic metadata (#13527)
+ [bbccouk] Capture and output error message (#13501, #13518)
* [cbsnews] Relax video info regular expression (#13284, #13503)
+ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
* [soundcloud] Switch to https for API requests (#13502)
* [pandatv] Switch to https for API and download URLs
+ [pandatv] Add support for https URLs (#13491)
+ [niconico] Support sp subdomain (#13494)
version 2017.06.25
Core
+ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
* [YoutubeDL] Skip malformed formats for better extraction robustness
Extractors
+ [wsj] Add support for barrons.com (#13470)
+ [ign] Add another video id pattern (#13328)
+ [raiplay:live] Add support for live streams (#13414)
+ [redbulltv] Add support for live videos and segments (#13486)
+ [onetpl] Add support for videos embedded via pulsembed (#13482)
* [ooyala] Make more robust
* [ooyala] Skip empty format URLs (#13471, #13476)
* [hgtv.com:show] Fix typo
version 2017.06.23
Core
* [adobepass] Fix extraction on older python 2.6
Extractors
* [youtube] Adapt to new automatic captions rendition (#13467)
* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
* [drtuber] Fix formats extraction (#12058)
* [youporn] Fix upload date extraction
* [youporn] Improve formats extraction
* [youporn] Fix title extraction (#13456)
* [googledrive] Fix formats sorting (#13443)
* [watchindianporn] Fix extraction (#13411, #13415)
+ [vimeo] Add fallback mp4 extension for original format
+ [ruv] Add support for ruv.is (#13396)
* [viu] Fix extraction on older python 2.6
* [pandora.tv] Fix upload_date extraction (#12846)
+ [asiancrush] Add support for asiancrush.com (#13420)
version 2017.06.18
Core
* [downloader/common] Use utils.shell_quote for debug command line
* [utils] Use compat_shlex_quote in shell_quote
* [postprocessor/execafterdownload] Encode command line (#13407)
* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
in --metadata-from-title (#13408)
* [extractor/common] Fix json dumping with --geo-bypass
+ [extractor/common] Improve jwplayer subtitles extraction
+ [extractor/common] Improve jwplayer formats extraction (#13379)
Extractors
* [polskieradio] Fix extraction (#13392)
+ [xfileshare] Add support for fastvideo.me (#13385)
* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
* [4tube] Fix extraction (#13381, #13382)
+ [disney] Add support for disneychannel.de (#13383)
* [npo] Improve URL regular expression (#13376)
+ [corus] Add support for showcase.ca
+ [corus] Add support for history.ca (#13359)
version 2017.06.12
Core
* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
+ [compat] Introduce compat_HTMLParseError
* [utils] Improve unified_timestamp
* [extractor/generic] Ensure format id is unicode string
* [extractor/common] Return unicode string from _match_id
+ [YoutubeDL] Sanitize more fields (#13313)
Extractors
+ [xfileshare] Add support for rapidvideo.tv (#13348)
* [xfileshare] Modernize and pass Referer
+ [rutv] Add support for testplayer.vgtrk.com (#13347)
+ [newgrounds] Extract more metadata (#13232)
+ [newgrounds:playlist] Add support for playlists (#10611)
* [newgrounds] Improve formats and uploader extraction (#13346)
* [msn] Fix formats extraction
* [turbo] Ensure format id is string
* [sexu] Ensure height is int
* [jove] Ensure comment count is int
* [golem] Ensure format id is string
* [gfycat] Ensure filesize is int
* [foxgay] Ensure height is int
* [flickr] Ensure format id is string
* [sohu] Fix numeric fields
* [safari] Improve authentication detection (#13319)
* [liveleak] Ensure height is int (#13313)
* [streamango] Make title optional (#13292)
* [rtlnl] Improve URL regular expression (#13295)
* [tvplayer] Fix extraction (#13291)
version 2017.06.05
Core
* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
Extractors
+ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
* [pornhub:playlist] Fix extraction (#13281)
- [godtv] Remove extractor (#13175)
* [safari] Fix typo (#13252)
* [youtube] Improve chapters extraction (#13247)
* [1tv] Lower preference for HTTP formats (#13246)
* [francetv] Relax URL regular expression
* [drbonanza] Fix extraction (#13231)
* [packtpub] Fix authentication (#13240)
version 2017.05.29
Extractors
* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
(#13211)
* [xhamster] Fix uploader and like/dislike count extraction (#13216))
+ [xhamster] Extract categories (#11728)
+ [abcnews] Add support for embed URLs (#12851)
* [gaskrank] Fix extraction (#12493)
* [medialaan] Fix videos with missing videoUrl (#12774)
* [dvtv] Fix playlist support
+ [dvtv] Add support for DASH and HLS formats (#3063)
+ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
* [cbsinteractive] Relax URL regular expression (#13213)
* [adn] Fix formats extraction
+ [youku] Extract more metadata (#10433)
* [cbsnews] Fix extraction (#13205)
version 2017.05.26
Core
+ [utils] strip_jsonp() can recognize more patterns
* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
Extractors
+ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
+ [bbc] Add support for authentication
* [tudou] Merge into youku extractor (#12214)
* [youku:show] Fix extraction
* [youku] Fix extraction (#13191)
* [udemy] Fix extraction for outputs' format entries without URL (#13192)
* [vimeo] Fix formats' sorting (#13189)
* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
version 2017.05.23
Core
+ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
+ [adobepass] Add support for Bright House Networks (#13149)
Extractors
+ [streamcz] Add support for subtitles (#13174)
* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
* [toggle] Relax URL regular expression (#13172)
* [toypics] Fix extraction (#13077)
* [njpwworld] Fix extraction (#13162, #13169)
+ [hitbox] Add support for smashcast.tv (#13154)
* [mitele] Update app key regular expression (#13158)
version 2017.05.18.1
Core
* [jsinterp] Fix typo and cleanup regular expressions (#13134)
version 2017.05.18 version 2017.05.18
Core Core
@@ -24,7 +216,7 @@ Core
+ [postprocessor/metadatafromtitle] Add support regular expression syntax for + [postprocessor/metadatafromtitle] Add support regular expression syntax for
--metadata-from-title (#13065) --metadata-from-title (#13065)
Extractor Extractors
+ [mediaset] Add support for video.mediaset.it (#12708, #12964) + [mediaset] Add support for video.mediaset.it (#12708, #12964)
* [orf:radio] Fix extraction (#11643, #12926) * [orf:radio] Fix extraction (#11643, #12926)
* [aljazeera] Extend URL regular expression (#13053) * [aljazeera] Extend URL regular expression (#13053)

View File

@@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '*.pyc' \ --exclude '*.pyc' \
--exclude '*.pyo' \ --exclude '*.pyo' \
--exclude '*~' \ --exclude '*~' \
--exclude '__pycache' \ --exclude '__pycache__' \
--exclude '.git' \ --exclude '.git' \
--exclude 'testdata' \ --exclude 'testdata' \
--exclude 'docs/_build' \ --exclude 'docs/_build' \

View File

@@ -145,18 +145,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--max-views COUNT Do not download any videos with more than --max-views COUNT Do not download any videos with more than
COUNT views COUNT views
--match-filter FILTER Generic video filter. Specify any key (see --match-filter FILTER Generic video filter. Specify any key (see
help for -o for a list of available keys) the "OUTPUT TEMPLATE" for a list of
to match if the key is present, !key to available keys) to match if the key is
check if the key is not present, key > present, !key to check if the key is not
NUMBER (like "comment_count > 12", also present, key > NUMBER (like "comment_count
works with >=, <, <=, !=, =) to compare > 12", also works with >=, <, <=, !=, =) to
against a number, key = 'LITERAL' (like compare against a number, key = 'LITERAL'
"uploader = 'Mike Smith'", also works with (like "uploader = 'Mike Smith'", also works
!=) to match against a string literal and & with !=) to match against a string literal
to require multiple matches. Values which and & to require multiple matches. Values
are not known are excluded unless you put a which are not known are excluded unless you
question mark (?) after the operator. For put a question mark (?) after the operator.
example, to only match videos that have For example, to only match videos that have
been liked more than 100 times and disliked been liked more than 100 times and disliked
less than 50 times (or the dislike less than 50 times (or the dislike
functionality is not available at the given functionality is not available at the given
@@ -277,8 +277,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--get-filename Simulate, quiet but print output filename --get-filename Simulate, quiet but print output filename
--get-format Simulate, quiet but print output format --get-format Simulate, quiet but print output format
-j, --dump-json Simulate, quiet but print JSON information. -j, --dump-json Simulate, quiet but print JSON information.
See --output for a description of available See the "OUTPUT TEMPLATE" for a description
keys. of available keys.
-J, --dump-single-json Simulate, quiet but print JSON information -J, --dump-single-json Simulate, quiet but print JSON information
for each command-line argument. If the URL for each command-line argument. If the URL
refers to a playlist, dump the whole refers to a playlist, dump the whole
@@ -474,7 +474,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
``` ```
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration). To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
On Windows you may also need to setup the `%HOME%` environment variable manually. On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
```
set HOME=%USERPROFILE%
```
# OUTPUT TEMPLATE # OUTPUT TEMPLATE
@@ -532,13 +535,14 @@ The basic usage is not to set any template arguments when downloading a single f
- `playlist_id` (string): Playlist identifier - `playlist_id` (string): Playlist identifier
- `playlist_title` (string): Playlist title - `playlist_title` (string): Playlist title
Available for the video that belongs to some logical chapter or section: Available for the video that belongs to some logical chapter or section:
- `chapter` (string): Name or title of the chapter the video belongs to - `chapter` (string): Name or title of the chapter the video belongs to
- `chapter_number` (numeric): Number of the chapter the video belongs to - `chapter_number` (numeric): Number of the chapter the video belongs to
- `chapter_id` (string): Id of the chapter the video belongs to - `chapter_id` (string): Id of the chapter the video belongs to
Available for the video that is an episode of some series or programme: Available for the video that is an episode of some series or programme:
- `series` (string): Title of the series or programme the video episode belongs to - `series` (string): Title of the series or programme the video episode belongs to
- `season` (string): Title of the season the video episode belongs to - `season` (string): Title of the season the video episode belongs to
- `season_number` (numeric): Number of the season the video episode belongs to - `season_number` (numeric): Number of the season the video episode belongs to
@@ -548,6 +552,7 @@ Available for the video that is an episode of some series or programme:
- `episode_id` (string): Id of the video episode - `episode_id` (string): Id of the video episode
Available for the media that is a track or a part of a music album: Available for the media that is a track or a part of a music album:
- `track` (string): Title of the track - `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc - `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track - `track_id` (string): Id of the track
@@ -649,7 +654,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
- `acodec`: Name of the audio codec in use - `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use - `vcodec`: Name of the video codec in use
- `container`: Name of the container format - `container`: Name of the container format
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`) - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format - `format_id`: A short description of the format
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.

View File

@@ -8,7 +8,7 @@ import re
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md') README_FILE = os.path.join(ROOT_DIR, 'README.md')
PREFIX = '''%YOUTUBE-DL(1) PREFIX = r'''%YOUTUBE-DL(1)
# NAME # NAME

View File

@@ -67,6 +67,8 @@
- **arte.tv:info** - **arte.tv:info**
- **arte.tv:magazine** - **arte.tv:magazine**
- **arte.tv:playlist** - **arte.tv:playlist**
- **AsianCrush**
- **AsianCrushPlaylist**
- **AtresPlayer** - **AtresPlayer**
- **ATTTechChannel** - **ATTTechChannel**
- **ATVAt** - **ATVAt**
@@ -87,13 +89,13 @@
- **bambuser:channel** - **bambuser:channel**
- **Bandcamp** - **Bandcamp**
- **Bandcamp:album** - **Bandcamp:album**
- **Bandcamp:weekly**
- **bangumi.bilibili.com**: BiliBili番剧 - **bangumi.bilibili.com**: BiliBili番剧
- **bbc**: BBC - **bbc**: BBC
- **bbc.co.uk**: BBC iPlayer - **bbc.co.uk**: BBC iPlayer
- **bbc.co.uk:article**: BBC articles - **bbc.co.uk:article**: BBC articles
- **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:iplayer:playlist**
- **bbc.co.uk:playlist** - **bbc.co.uk:playlist**
- **Beam:live**
- **Beatport** - **Beatport**
- **Beeg** - **Beeg**
- **BehindKink** - **BehindKink**
@@ -311,7 +313,6 @@
- **Go** - **Go**
- **Go90** - **Go90**
- **GodTube** - **GodTube**
- **GodTV**
- **Golem** - **Golem**
- **GoogleDrive** - **GoogleDrive**
- **Goshgay** - **Goshgay**
@@ -453,6 +454,8 @@
- **mixcloud:playlist** - **mixcloud:playlist**
- **mixcloud:stream** - **mixcloud:stream**
- **mixcloud:user** - **mixcloud:user**
- **Mixer:live**
- **Mixer:vod**
- **MLB** - **MLB**
- **Mnet** - **Mnet**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
@@ -511,6 +514,7 @@
- **netease:song**: 网易云音乐 - **netease:song**: 网易云音乐
- **Netzkino** - **Netzkino**
- **Newgrounds** - **Newgrounds**
- **NewgroundsPlaylist**
- **Newstube** - **Newstube**
- **NextMedia**: 蘋果日報 - **NextMedia**: 蘋果日報
- **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextMediaActionNews**: 蘋果日報 - 動新聞
@@ -640,6 +644,7 @@
- **RadioJavan** - **RadioJavan**
- **Rai** - **Rai**
- **RaiPlay** - **RaiPlay**
- **RaiPlayLive**
- **RBMARadio** - **RBMARadio**
- **RDS**: RDS.ca - **RDS**: RDS.ca
- **RedBullTV** - **RedBullTV**
@@ -684,6 +689,7 @@
- **rutube:person**: Rutube person videos - **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU - **RUTV**: RUTV.RU
- **Ruutu** - **Ruutu**
- **Ruv**
- **safari**: safaribooksonline.com online video - **safari**: safaribooksonline.com online video
- **safari:api** - **safari:api**
- **safari:course**: safaribooksonline.com online courses - **safari:course**: safaribooksonline.com online courses
@@ -762,6 +768,7 @@
- **Tagesschau** - **Tagesschau**
- **tagesschau:player** - **tagesschau:player**
- **Tass** - **Tass**
- **TastyTrade**
- **TBS** - **TBS**
- **TDSLifeway** - **TDSLifeway**
- **teachertube**: teachertube.com videos - **teachertube**: teachertube.com videos
@@ -803,16 +810,13 @@
- **ToonGoggles** - **ToonGoggles**
- **Tosh**: Tosh.0 - **Tosh**: Tosh.0
- **tou.tv** - **tou.tv**
- **Toypics**: Toypics user profile - **Toypics**: Toypics video
- **ToypicsUser**: Toypics user profile - **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken) - **TrailerAddict** (Currently broken)
- **Trilulilu** - **Trilulilu**
- **TruTV** - **TruTV**
- **Tube8** - **Tube8**
- **TubiTv** - **TubiTv**
- **tudou**
- **tudou:album**
- **tudou:playlist**
- **Tumblr** - **Tumblr**
- **tunein:clip** - **tunein:clip**
- **tunein:program** - **tunein:program**
@@ -976,7 +980,7 @@
- **WSJArticle** - **WSJArticle**
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑 - **xiami:album**: 虾米音乐 - 专辑

View File

@@ -340,6 +340,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
def test_determine_ext(self): def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -678,6 +679,14 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'}) self.assertEqual(d, {'status': 'success'})
stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
stripped = strip_jsonp('window.cb && cb({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@@ -907,6 +916,8 @@ class TestUtil(unittest.TestCase):
supports_outside_bmp = False supports_outside_bmp = False
if supports_outside_bmp: if supports_outside_bmp:
self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'}) self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
# Malformed HTML should not break attributes extraction on older Python
self.assertEqual(extract_attributes('<mal"formed/>'), {})
def test_clean_html(self): def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\nb'), 'a: b')

View File

@@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase):
'title': '3 - Из серпов луны...[Iz serpov luny]', 'title': '3 - Из серпов луны...[Iz serpov luny]',
}] }]
), ),
(
# https://www.youtube.com/watch?v=xZW70zEasOk
# time point more than duration
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
283,
[]
),
] ]
def test_youtube_chapters(self): def test_youtube_chapters(self):

View File

@@ -58,6 +58,7 @@ from .utils import (
format_bytes, format_bytes,
formatSeconds, formatSeconds,
GeoRestrictedError, GeoRestrictedError,
int_or_none,
ISO3166Utils, ISO3166Utils,
locked_file, locked_file,
make_HTTPS_handler, make_HTTPS_handler,
@@ -302,6 +303,17 @@ class YoutubeDL(object):
postprocessor. postprocessor.
""" """
_NUMERIC_FIELDS = set((
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
'timestamp', 'upload_year', 'upload_month', 'upload_day',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'average_rating', 'comment_count', 'age_limit',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
'playlist_index',
))
params = None params = None
_ies = [] _ies = []
_pps = [] _pps = []
@@ -498,24 +510,25 @@ class YoutubeDL(object):
def to_console_title(self, message): def to_console_title(self, message):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): if compat_os_name == 'nt':
# c_wchar_p() might not be necessary if `message` is if ctypes.windll.kernel32.GetConsoleWindow():
# already of type unicode() # c_wchar_p() might not be necessary if `message` is
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) # already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ: elif 'TERM' in os.environ:
self._write_string('\033]0;%s\007' % message, self._screen_file) self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self): def save_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ: if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack # Save the title on stack
self._write_string('\033[22;0t', self._screen_file) self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self): def restore_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ: if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack # Restore the title from stack
self._write_string('\033[23;0t', self._screen_file) self._write_string('\033[23;0t', self._screen_file)
@@ -638,22 +651,11 @@ class YoutubeDL(object):
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl) outtmpl)
NUMERIC_FIELDS = set((
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
'timestamp', 'upload_year', 'upload_month', 'upload_day',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'average_rating', 'comment_count', 'age_limit',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
'playlist_index',
))
# Missing numeric fields used together with integer presentation types # Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since # in format specification will break the argument substitution since
# string 'NA' is returned for missing fields. We will patch output # string 'NA' is returned for missing fields. We will patch output
# template for missing fields to meet string presentation type. # template for missing fields to meet string presentation type.
for numeric_field in NUMERIC_FIELDS: for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict: if numeric_field not in template_dict:
# As of [1] format syntax is: # As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
@@ -1344,9 +1346,28 @@ class YoutubeDL(object):
if 'title' not in info_dict: if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result') raise ExtractorError('Missing "title" field in extractor result')
if not isinstance(info_dict['id'], compat_str): def report_force_conversion(field, field_not, conversion):
self.report_warning('"id" field is not a string - forcing string conversion') self.report_warning(
info_dict['id'] = compat_str(info_dict['id']) '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
% (field, field_not, conversion))
def sanitize_string_field(info, string_field):
field = info.get(string_field)
if field is None or isinstance(field, compat_str):
return
report_force_conversion(string_field, 'a string', 'string')
info[string_field] = compat_str(field)
def sanitize_numeric_fields(info):
for numeric_field in self._NUMERIC_FIELDS:
field = info.get(numeric_field)
if field is None or isinstance(field, compat_numeric_types):
continue
report_force_conversion(numeric_field, 'numeric', 'int')
info[numeric_field] = int_or_none(field)
sanitize_string_field(info_dict, 'id')
sanitize_numeric_fields(info_dict)
if 'playlist' not in info_dict: if 'playlist' not in info_dict:
# It isn't part of a playlist # It isn't part of a playlist
@@ -1427,15 +1448,25 @@ class YoutubeDL(object):
if not formats: if not formats:
raise ExtractorError('No video formats found!') raise ExtractorError('No video formats found!')
def is_wellformed(f):
url = f.get('url')
valid_url = url and isinstance(url, compat_str)
if not valid_url:
self.report_warning(
'"url" field is missing or empty - skipping format, '
'there is an error in extractor')
return valid_url
# Filter out malformed formats for better extraction robustness
formats = list(filter(is_wellformed, formats))
formats_dict = {} formats_dict = {}
# We check that all the formats have the format and format_id fields # We check that all the formats have the format and format_id fields
for i, format in enumerate(formats): for i, format in enumerate(formats):
if 'url' not in format: sanitize_string_field(format, 'format_id')
raise ExtractorError('Missing "url" key in result (index %d)' % i) sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url']) format['url'] = sanitize_url(format['url'])
if format.get('format_id') is None: if format.get('format_id') is None:
format['format_id'] = compat_str(i) format['format_id'] = compat_str(i)
else: else:

View File

@@ -2322,6 +2322,19 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
from HTMLParser import HTMLParser as compat_HTMLParser from HTMLParser import HTMLParser as compat_HTMLParser
try: # Python 2
from HTMLParser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python <3.4
try:
from html.parser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python >3.4
# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling
class compat_HTMLParseError(Exception):
pass
try: try:
from subprocess import DEVNULL from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@ -2604,14 +2617,22 @@ except ImportError: # Python 2
parsed_result[name] = [value] parsed_result[name] = [value]
return parsed_result return parsed_result
try:
from shlex import quote as compat_shlex_quote compat_os_name = os._name if os.name == 'java' else os.name
except ImportError: # Python < 3.3
if compat_os_name == 'nt':
def compat_shlex_quote(s): def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s): return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
return s else:
else: try:
return "'" + s.replace("'", "'\"'\"'") + "'" from shlex import quote as compat_shlex_quote
except ImportError: # Python < 3.3
def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
try: try:
@@ -2636,9 +2657,6 @@ def compat_ord(c):
return ord(c) return ord(c)
compat_os_name = os._name if os.name == 'java' else os.name
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
compat_getenv = os.getenv compat_getenv = os.getenv
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser
@@ -2882,6 +2900,7 @@ else:
__all__ = [ __all__ = [
'compat_HTMLParseError',
'compat_HTMLParser', 'compat_HTMLParser',
'compat_HTTPError', 'compat_HTTPError',
'compat_basestring', 'compat_basestring',

View File

@@ -8,10 +8,11 @@ import random
from ..compat import compat_os_name from ..compat import compat_os_name
from ..utils import ( from ..utils import (
decodeArgument,
encodeFilename, encodeFilename,
error_to_compat_str, error_to_compat_str,
decodeArgument,
format_bytes, format_bytes,
shell_quote,
timeconvert, timeconvert,
) )
@@ -381,10 +382,5 @@ class FileDownloader(object):
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(str_args[0])
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen('[debug] %s command line: %s' % ( self.to_screen('[debug] %s command line: %s' % (
exe, shell_quote(str_args))) exe, shell_quote(str_args)))

View File

@@ -212,6 +212,11 @@ class FFmpegFD(ExternalFD):
args = [ffpp.executable, '-y'] args = [ffpp.executable, '-y']
for log_level in ('quiet', 'verbose'):
if self.params.get(log_level, False):
args += ['-loglevel', log_level]
break
seekable = info_dict.get('_seekable') seekable = info_dict.get('_seekable')
if seekable is not None: if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server # setting -seekable prevents ffmpeg from guessing if the server

View File

@@ -12,7 +12,15 @@ from ..compat import compat_urlparse
class AbcNewsVideoIE(AMPIE): class AbcNewsVideoIE(AMPIE):
IE_NAME = 'abcnews:video' IE_NAME = 'abcnews:video'
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
abcnews\.go\.com/
(?:
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
video/embed\?.*?\bid=
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
@@ -29,6 +37,9 @@ class AbcNewsVideoIE(AMPIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://abcnews.go.com/video/embed?id=46979033',
'only_matching': True,
}, { }, {
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
'only_matching': True, 'only_matching': True,

View File

@@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
'ext': 'mp4', 'ext': 'mp4',
'title': 'East Bay museum celebrates vintage synthesizers', 'title': 'East Bay museum celebrates vintage synthesizers',
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421123075, 'timestamp': 1421123075,
'upload_date': '20150113', 'upload_date': '20150113',

View File

@@ -15,6 +15,7 @@ from ..utils import (
intlist_to_bytes, intlist_to_bytes,
srt_subtitles_timecode, srt_subtitles_timecode,
strip_or_none, strip_or_none,
urljoin,
) )
@@ -31,25 +32,28 @@ class ADNIE(InfoExtractor):
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
} }
} }
_BASE_URL = 'http://animedigitalnetwork.fr'
def _get_subtitles(self, sub_path, video_id): def _get_subtitles(self, sub_path, video_id):
if not sub_path: if not sub_path:
return None return None
enc_subtitles = self._download_webpage( enc_subtitles = self._download_webpage(
'http://animedigitalnetwork.fr/' + sub_path, urljoin(self._BASE_URL, sub_path),
video_id, fatal=False) video_id, fatal=False, headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
})
if not enc_subtitles: if not enc_subtitles:
return None return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'), bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
)) ))
subtitles_json = self._parse_json( subtitles_json = self._parse_json(
dec_subtitles[:-compat_ord(dec_subtitles[-1])], dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
None, fatal=False) None, fatal=False)
if not subtitles_json: if not subtitles_json:
return None return None
@@ -103,9 +107,16 @@ class ADNIE(InfoExtractor):
metas = options.get('metas') or {} metas = options.get('metas') or {}
title = metas.get('title') or video_info['title'] title = metas.get('title') or video_info['title']
links = player_config.get('links') or {} links = player_config.get('links') or {}
if not links:
links_url = player_config['linksurl']
links_data = self._download_json(urljoin(
self._BASE_URL, links_url), video_id)
links = links_data.get('links') or {}
formats = [] formats = []
for format_id, qualities in links.items(): for format_id, qualities in links.items():
if not isinstance(qualities, dict):
continue
for load_balancer_url in qualities.values(): for load_balancer_url in qualities.values():
load_balancer_data = self._download_json( load_balancer_data = self._download_json(
load_balancer_url, video_id, fatal=False) or {} load_balancer_url, video_id, fatal=False) or {}

View File

@@ -6,12 +6,16 @@ import time
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_kwargs,
compat_urlparse,
)
from ..utils import ( from ..utils import (
unescapeHTML, unescapeHTML,
urlencode_postdata, urlencode_postdata,
unified_timestamp, unified_timestamp,
ExtractorError, ExtractorError,
NO_DEFAULT,
) )
@@ -21,6 +25,11 @@ MSO_INFO = {
'username_field': 'username', 'username_field': 'username',
'password_field': 'password', 'password_field': 'password',
}, },
'ATTOTT': {
'name': 'DIRECTV NOW',
'username_field': 'email',
'password_field': 'loginpassword',
},
'Rogers': { 'Rogers': {
'name': 'Rogers', 'name': 'Rogers',
'username_field': 'UserName', 'username_field': 'UserName',
@@ -36,6 +45,11 @@ MSO_INFO = {
'username_field': 'Ecom_User_ID', 'username_field': 'Ecom_User_ID',
'password_field': 'Ecom_Password', 'password_field': 'Ecom_Password',
}, },
'Brighthouse': {
'name': 'Bright House Networks | Spectrum',
'username_field': 'j_username',
'password_field': 'j_password',
},
'Charter_Direct': { 'Charter_Direct': {
'name': 'Charter Spectrum', 'name': 'Charter Spectrum',
'username_field': 'IDToken1', 'username_field': 'IDToken1',
@@ -1308,11 +1322,14 @@ class AdobePassIE(InfoExtractor):
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MVPD_CACHE = 'ap-mvpd' _MVPD_CACHE = 'ap-mvpd'
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
headers = kwargs.get('headers', {}) headers = kwargs.get('headers', {})
headers.update(self.geo_verification_headers()) headers.update(self.geo_verification_headers())
kwargs['headers'] = headers kwargs['headers'] = headers
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs) return super(AdobePassIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
@staticmethod @staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating): def _get_mvpd_resource(provider_id, title, guid, rating):
@@ -1356,6 +1373,21 @@ class AdobePassIE(InfoExtractor):
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
def extract_redirect_url(html, url=None, fatal=False):
# TODO: eliminate code duplication with generic extractor and move
# redirection code into _download_webpage_handle
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
redirect_url = self._search_regex(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
html, 'meta refresh redirect',
default=NO_DEFAULT if fatal else None, fatal=fatal)
if not redirect_url:
return None
if url:
redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
return redirect_url
mvpd_headers = { mvpd_headers = {
'ap_42': 'anonymous', 'ap_42': 'anonymous',
'ap_11': 'Linux i686', 'ap_11': 'Linux i686',
@@ -1405,16 +1437,15 @@ class AdobePassIE(InfoExtractor):
if '<form name="signin"' in provider_redirect_page: if '<form name="signin"' in provider_redirect_page:
provider_login_page_res = provider_redirect_page_res provider_login_page_res = provider_redirect_page_res
elif 'http-equiv="refresh"' in provider_redirect_page: elif 'http-equiv="refresh"' in provider_redirect_page:
oauth_redirect_url = self._html_search_regex( oauth_redirect_url = extract_redirect_url(
r'content="0;\s*url=([^\'"]+)', provider_redirect_page, fatal=True)
provider_redirect_page, 'meta refresh redirect')
provider_login_page_res = self._download_webpage_handle( provider_login_page_res = self._download_webpage_handle(
oauth_redirect_url, video_id, oauth_redirect_url, video_id,
'Downloading Provider Login Page') self._DOWNLOADING_LOGIN_PAGE)
else: else:
provider_login_page_res = post_form( provider_login_page_res = post_form(
provider_redirect_page_res, provider_redirect_page_res,
'Downloading Provider Login Page') self._DOWNLOADING_LOGIN_PAGE)
mvpd_confirm_page_res = post_form( mvpd_confirm_page_res = post_form(
provider_login_page_res, 'Logging in', { provider_login_page_res, 'Logging in', {
@@ -1461,8 +1492,17 @@ class AdobePassIE(InfoExtractor):
'Content-Type': 'application/x-www-form-urlencoded' 'Content-Type': 'application/x-www-form-urlencoded'
}) })
else: else:
# Some providers (e.g. DIRECTV NOW) have another meta refresh
# based redirect that should be followed.
provider_redirect_page, urlh = provider_redirect_page_res
provider_refresh_redirect_url = extract_redirect_url(
provider_redirect_page, url=urlh.geturl())
if provider_refresh_redirect_url:
provider_redirect_page_res = self._download_webpage_handle(
provider_refresh_redirect_url, video_id,
'Downloading Provider Redirect Page (meta refresh)')
provider_login_page_res = post_form( provider_login_page_res = post_form(
provider_redirect_page_res, 'Downloading Provider Login Page') provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
mso_info.get('username_field', 'username'): username, mso_info.get('username_field', 'username'): username,
mso_info.get('password_field', 'password'): password, mso_info.get('password_field', 'password'): password,

View File

@@ -0,0 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
remove_end,
urlencode_postdata,
)
class AsianCrushIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
_TESTS = [{
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
'info_dict': {
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
'description': 'md5:3db14e9186197857e7063522cb89a805',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
},
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
data=urlencode_postdata({
'postid': video_id,
'action': 'get_channel_kaltura_vars',
}))
entry_id = data['entry_id']
return self.url_result(
'kaltura:%s:%s' % (data['partner_id'], entry_id),
ie=KalturaIE.ie_key(), video_id=entry_id,
video_title=data.get('vid_label'))
class AsianCrushPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
_TEST = {
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
'info_dict': {
'id': '12481',
'title': 'Scholar Who Walks the Night',
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
},
'playlist_count': 20,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = []
for mobj in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('class') == 'clearfix':
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
title = remove_end(
self._html_search_regex(
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
' | AsianCrush')
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, 'description', fatal=False)
return self.playlist_result(entries, playlist_id, title, description)

View File

@@ -14,14 +14,16 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
KNOWN_EXTENSIONS,
parse_filesize, parse_filesize,
unescapeHTML, unescapeHTML,
update_url_query, update_url_query,
unified_strdate,
) )
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)' _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439', 'md5': 'c557841d5e50261777a6585648adf439',
@@ -155,7 +157,7 @@ class BandcampIE(InfoExtractor):
class BandcampAlbumIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album' IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -222,6 +224,12 @@ class BandcampAlbumIE(InfoExtractor):
'playlist_count': 2, 'playlist_count': 2,
}] }]
@classmethod
def suitable(cls, url):
return (False
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
else super(BandcampAlbumIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('subdomain') uploader_id = mobj.group('subdomain')
@@ -250,3 +258,92 @@ class BandcampAlbumIE(InfoExtractor):
'title': title, 'title': title,
'entries': entries, 'entries': entries,
} }
class BandcampWeeklyIE(InfoExtractor):
IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{
'url': 'https://bandcamp.com/?show=224',
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
'info_dict': {
'id': '224',
'ext': 'opus',
'title': 'BC Weekly April 4th 2017 - Magic Moments',
'description': 'md5:5d48150916e8e02d030623a48512c874',
'duration': 5829.77,
'release_date': '20170404',
'series': 'Bandcamp Weekly',
'episode': 'Magic Moments',
'episode_number': 208,
'episode_id': '224',
}
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
'blob', group='blob'),
video_id, transform_source=unescapeHTML)
show = blob['bcw_show']
# This is desired because any invalid show id redirects to `bandcamp.com`
# which happens to expose the latest Bandcamp Weekly episode.
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
formats = []
for format_id, format_url in show['audio_stream'].items():
if not isinstance(format_url, compat_str):
continue
for known_ext in KNOWN_EXTENSIONS:
if known_ext in format_id:
ext = known_ext
break
else:
ext = None
formats.append({
'format_id': format_id,
'url': format_url,
'ext': ext,
'vcodec': 'none',
})
self._sort_formats(formats)
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
if subtitle:
title += ' - %s' % subtitle
episode_number = None
seq = blob.get('bcw_seq')
if seq and isinstance(seq, list):
try:
episode_number = next(
int_or_none(e.get('episode_number'))
for e in seq
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
except StopIteration:
pass
return {
'id': video_id,
'title': title,
'description': show.get('desc') or show.get('short_desc'),
'duration': float_or_none(show.get('audio_duration')),
'is_live': False,
'release_date': unified_strdate(show.get('published_date')),
'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'),
'episode_number': episode_number,
'episode_id': compat_str(video_id),
'formats': formats
}

View File

@@ -6,14 +6,18 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
dict_get, dict_get,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
get_element_by_class,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
try_get, try_get,
unescapeHTML, unescapeHTML,
urlencode_postdata,
urljoin,
) )
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
@@ -32,12 +36,15 @@ class BBCCoUkIE(InfoExtractor):
(?: (?:
programmes/(?!articles/)| programmes/(?!articles/)|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)| iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/clips[/#]| music/(?:clips|audiovideo/popular)[/#]|
radio/player/ radio/player/
) )
(?P<id>%s)(?!/(?:episodes|broadcasts|clips)) (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX ''' % _ID_REGEX
_LOGIN_URL = 'https://account.bbc.com/signin'
_NETRC_MACHINE = 'bbc'
_MEDIASELECTOR_URLS = [ _MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails # Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g. # with geolocation in some cases when it's even not geo restricted at all (e.g.
@@ -222,11 +229,46 @@ class BBCCoUkIE(InfoExtractor):
}, { }, {
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf', 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
'only_matching': True, 'only_matching': True,
} }, {
] 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
'only_matching': True,
}]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading signin page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password,
})
post_url = urljoin(self._LOGIN_URL, self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
'post url', default=self._LOGIN_URL, group='url'))
response, urlh = self._download_webpage_handle(
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
headers={'Referer': self._LOGIN_URL})
if self._LOGIN_URL in urlh.geturl():
error = clean_html(get_element_by_class('form-message', response))
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
def _real_initialize(self):
self._login()
class MediaSelectionError(Exception): class MediaSelectionError(Exception):
def __init__(self, id): def __init__(self, id):
self.id = id self.id = id
@@ -483,6 +525,12 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page') webpage = self._download_webpage(url, group_id, 'Downloading video page')
error = self._search_regex(
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
webpage, 'error', default=None)
if error:
raise ExtractorError(error, expected=True)
programme_id = None programme_id = None
duration = None duration = None

View File

@@ -6,18 +6,33 @@ from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
compat_str, compat_str,
float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
try_get, try_get,
urljoin,
) )
class BeamProLiveIE(InfoExtractor): class BeamProBaseIE(InfoExtractor):
IE_NAME = 'Beam:live' _API_BASE = 'https://mixer.com/api/v1'
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
_RATINGS = {'family': 0, 'teen': 13, '18+': 18} _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
def _extract_channel_info(self, chan):
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
return {
'uploader': chan.get('token') or try_get(
chan, lambda x: x['user']['username'], compat_str),
'uploader_id': compat_str(user_id) if user_id else None,
'age_limit': self._RATINGS.get(chan.get('audience')),
}
class BeamProLiveIE(BeamProBaseIE):
IE_NAME = 'Mixer:live'
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'http://www.beam.pro/niterhayven', 'url': 'http://mixer.com/niterhayven',
'info_dict': { 'info_dict': {
'id': '261562', 'id': '261562',
'ext': 'mp4', 'ext': 'mp4',
@@ -38,11 +53,17 @@ class BeamProLiveIE(InfoExtractor):
}, },
} }
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
@classmethod
def suitable(cls, url):
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
channel_name = self._match_id(url) channel_name = self._match_id(url)
chan = self._download_json( chan = self._download_json(
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name) '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
if chan.get('online') is False: if chan.get('online') is False:
raise ExtractorError( raise ExtractorError(
@@ -50,24 +71,118 @@ class BeamProLiveIE(InfoExtractor):
channel_id = chan['id'] channel_id = chan['id']
def manifest_url(kind):
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id, manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
channel_name, ext='mp4', m3u8_id='hls', fatal=False) fatal=False)
formats.extend(self._extract_smil_formats(
manifest_url('smil'), channel_name, fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) info = {
return {
'id': compat_str(chan.get('id') or channel_name), 'id': compat_str(chan.get('id') or channel_name),
'title': self._live_title(chan.get('name') or channel_name), 'title': self._live_title(chan.get('name') or channel_name),
'description': clean_html(chan.get('description')), 'description': clean_html(chan.get('description')),
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str), 'thumbnail': try_get(
chan, lambda x: x['thumbnail']['url'], compat_str),
'timestamp': parse_iso8601(chan.get('updatedAt')), 'timestamp': parse_iso8601(chan.get('updatedAt')),
'uploader': chan.get('token') or try_get(
chan, lambda x: x['user']['username'], compat_str),
'uploader_id': compat_str(user_id) if user_id else None,
'age_limit': self._RATINGS.get(chan.get('audience')),
'is_live': True, 'is_live': True,
'view_count': int_or_none(chan.get('viewersTotal')), 'view_count': int_or_none(chan.get('viewersTotal')),
'formats': formats, 'formats': formats,
} }
info.update(self._extract_channel_info(chan))
return info
class BeamProVodIE(BeamProBaseIE):
IE_NAME = 'Mixer:vod'
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
_TEST = {
'url': 'https://mixer.com/willow8714?vod=2259830',
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
'info_dict': {
'id': '2259830',
'ext': 'mp4',
'title': 'willow8714\'s Channel',
'duration': 6828.15,
'thumbnail': r're:https://.*source\.png$',
'timestamp': 1494046474,
'upload_date': '20170506',
'uploader': 'willow8714',
'uploader_id': '6085379',
'age_limit': 13,
'view_count': int,
},
'params': {
'skip_download': True,
},
}
@staticmethod
def _extract_format(vod, vod_type):
if not vod.get('baseUrl'):
return []
if vod_type == 'hls':
filename, protocol = 'manifest.m3u8', 'm3u8_native'
elif vod_type == 'raw':
filename, protocol = 'source.mp4', 'https'
else:
assert False
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
format_id = [vod_type]
if isinstance(data.get('Height'), compat_str):
format_id.append('%sp' % data['Height'])
return [{
'url': urljoin(vod['baseUrl'], filename),
'format_id': '-'.join(format_id),
'ext': 'mp4',
'protocol': protocol,
'width': int_or_none(data.get('Width')),
'height': int_or_none(data.get('Height')),
'fps': int_or_none(data.get('Fps')),
'tbr': int_or_none(data.get('Bitrate'), 1000),
}]
def _real_extract(self, url):
vod_id = self._match_id(url)
vod_info = self._download_json(
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
state = vod_info.get('state')
if state != 'AVAILABLE':
raise ExtractorError(
'VOD %s is not available (state: %s)' % (vod_id, state),
expected=True)
formats = []
thumbnail_url = None
for vod in vod_info['vods']:
vod_type = vod.get('format')
if vod_type in ('hls', 'raw'):
formats.extend(self._extract_format(vod, vod_type))
elif vod_type == 'thumbnail':
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
self._sort_formats(formats)
info = {
'id': vod_id,
'title': vod_info.get('name') or vod_id,
'duration': float_or_none(vod_info.get('duration')),
'thumbnail': thumbnail_url,
'timestamp': parse_iso8601(vod_info.get('createdAt')),
'view_count': int_or_none(vod_info.get('viewsTotal')),
'formats': formats,
}
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
return info

View File

@@ -54,6 +54,22 @@ class BiliBiliIE(InfoExtractor):
'description': '如果你是神明并且能够让妄想成为现实。那你会进行怎么样的妄想是淫靡的世界独裁社会毁灭性的制裁还是……2015年涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', 'description': '如果你是神明并且能够让妄想成为现实。那你会进行怎么样的妄想是淫靡的世界独裁社会毁灭性的制裁还是……2015年涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
}, },
'skip': 'Geo-restricted to China', 'skip': 'Geo-restricted to China',
}, {
# Title with double quotes
'url': 'http://www.bilibili.com/video/av8903802/',
'info_dict': {
'id': '8903802',
'ext': 'mp4',
'title': '阿滴英文|英文歌分享#6 "Closer',
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
'uploader': '阿滴英文',
'uploader_id': '65880958',
'timestamp': 1488382620,
'upload_date': '20170301',
},
'params': {
'skip_download': True, # Test metadata only
},
}] }]
_APP_KEY = '84956560bc028eb7' _APP_KEY = '84956560bc028eb7'
@@ -135,7 +151,7 @@ class BiliBiliIE(InfoExtractor):
'formats': formats, 'formats': formats,
}) })
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex( timestamp = unified_timestamp(self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None)) r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))

View File

@@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor):
continue continue
entries.append(self.url_result(video['url'])) entries.append(self.url_result(video['url']))
facebook_url = FacebookIE._extract_url(webpage) facebook_urls = FacebookIE._extract_urls(webpage)
if facebook_url: entries.extend([
entries.append(self.url_result(facebook_url)) self.url_result(facebook_url)
for facebook_url in facebook_urls])
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@@ -49,13 +49,13 @@ class CBSIE(CBSBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_video_info(self, content_id): def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
items_data = self._download_xml( items_data = self._download_xml(
'http://can.cbs.com/thunder/player/videoPlayerService.php', 'http://can.cbs.com/thunder/player/videoPlayerService.php',
content_id, query={'partner': 'cbs', 'contentId': content_id}) content_id, query={'partner': site, 'contentId': content_id})
video_data = xpath_element(items_data, './/item') video_data = xpath_element(items_data, './/item')
title = xpath_text(video_data, 'videoTitle', 'title', True) title = xpath_text(video_data, 'videoTitle', 'title', True)
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
tp_release_url = 'http://link.theplatform.com/s/' + tp_path tp_release_url = 'http://link.theplatform.com/s/' + tp_path
asset_types = [] asset_types = []

View File

@@ -3,17 +3,18 @@ from __future__ import unicode_literals
import re import re
from .theplatform import ThePlatformIE from .cbs import CBSIE
from ..utils import int_or_none from ..utils import int_or_none
class CBSInteractiveIE(ThePlatformIE): class CBSInteractiveIE(CBSIE):
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)' _VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'info_dict': { 'info_dict': {
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', 'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
'ext': 'flv', 'display_id': 'hands-on-with-microsofts-windows-8-1-update',
'ext': 'mp4',
'title': 'Hands-on with Microsoft Windows 8.1 Update', 'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861', 'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
@@ -22,13 +23,19 @@ class CBSInteractiveIE(ThePlatformIE):
'timestamp': 1396479627, 'timestamp': 1396479627,
'upload_date': '20140402', 'upload_date': '20140402',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/', 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
'info_dict': { 'info_dict': {
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba', 'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
'ext': 'flv', 'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
'ext': 'mp4',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)', 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole', 'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40', 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda', 'uploader': 'Ashley Esqueda',
'duration': 1482, 'duration': 1482,
@@ -38,23 +45,28 @@ class CBSInteractiveIE(ThePlatformIE):
}, { }, {
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/', 'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
'info_dict': { 'info_dict': {
'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0', 'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Video: Keeping Android smartphones and tablets secure', 'title': 'Video: Keeping Android smartphones and tablets secure',
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.', 'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0', 'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
'uploader': 'Adrian Kingsley-Hughes', 'uploader': 'Adrian Kingsley-Hughes',
'timestamp': 1448961720, 'duration': 731,
'upload_date': '20151201', 'timestamp': 1449129925,
'upload_date': '20151203',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} },
}, {
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
'only_matching': True,
}] }]
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
MPX_ACCOUNTS = { MPX_ACCOUNTS = {
'cnet': 2288573011, 'cnet': 2198311517,
'zdnet': 2387448114, 'zdnet': 2387448114,
} }
@@ -68,7 +80,8 @@ class CBSInteractiveIE(ThePlatformIE):
data = self._parse_json(data_json, display_id) data = self._parse_json(data_json, display_id)
vdata = data.get('video') or data['videos'][0] vdata = data.get('video') or data['videos'][0]
video_id = vdata['id'] video_id = vdata['mpxRefId']
title = vdata['title'] title = vdata['title']
author = vdata.get('author') author = vdata.get('author')
if author: if author:
@@ -78,20 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
uploader = None uploader = None
uploader_id = None uploader_id = None
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId']) info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
formats, subtitles = [], {}
for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue
release_url = self.TP_RELEASE_URL_TEMPLATE % vid
if fkey == 'hds':
release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats)
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
info.update({ info.update({
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
@@ -99,7 +99,5 @@ class CBSInteractiveIE(ThePlatformIE):
'duration': int_or_none(vdata.get('duration')), 'duration': int_or_none(vdata.get('duration')),
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'subtitles': subtitles,
'formats': formats,
}) })
return info return info

View File

@@ -15,19 +15,23 @@ class CBSNewsIE(CBSIE):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/', # 60 minutes
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
'info_dict': { 'info_dict': {
'id': 'tesla-and-spacex-elon-musks-industrial-empire', 'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
'ext': 'flv', 'ext': 'mp4',
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire', 'title': 'Artificial Intelligence',
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg', 'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
'duration': 791, 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1606,
'uploader': 'CBSI-NEW',
'timestamp': 1498431900,
'upload_date': '20170625',
}, },
'params': { 'params': {
# rtmp download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Subscribers only',
}, },
{ {
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
@@ -52,6 +56,22 @@ class CBSNewsIE(CBSIE):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# 48 hours
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
'info_dict': {
'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
'ext': 'mp4',
'title': 'Cold as Ice',
'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
'upload_date': '20170604',
'timestamp': 1496538000,
'uploader': 'CBSI-NEW',
},
'params': {
'skip_download': True,
},
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@@ -60,12 +80,18 @@ class CBSNewsIE(CBSIE):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_info = self._parse_json(self._html_search_regex( video_info = self._parse_json(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'', r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
webpage, 'video JSON info'), video_id) webpage, 'video JSON info', default='{}'), video_id, fatal=False)
item = video_info['item'] if 'item' in video_info else video_info if video_info:
guid = item['mpxRefId'] item = video_info['item'] if 'item' in video_info else video_info
return self._extract_video_info(guid) else:
state = self._parse_json(self._search_regex(
r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
'playlist JSON info', group='json'), video_id)['state']
item = state['playlist'][state['pid']]
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
class CBSNewsLiveVideoIE(InfoExtractor): class CBSNewsLiveVideoIE(InfoExtractor):

View File

@@ -376,7 +376,7 @@ class InfoExtractor(object):
cls._VALID_URL_RE = re.compile(cls._VALID_URL) cls._VALID_URL_RE = re.compile(cls._VALID_URL)
m = cls._VALID_URL_RE.match(url) m = cls._VALID_URL_RE.match(url)
assert m assert m
return m.group('id') return compat_str(m.group('id'))
@classmethod @classmethod
def working(cls): def working(cls):
@@ -420,7 +420,7 @@ class InfoExtractor(object):
if country_code: if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self._downloader.to_stdout( self._downloader.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.' '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
% (self._x_forwarded_for_ip, country_code.upper())) % (self._x_forwarded_for_ip, country_code.upper()))
@@ -1002,17 +1002,17 @@ class InfoExtractor(object):
item_type = e.get('@type') item_type = e.get('@type')
if expected_type is not None and expected_type != item_type: if expected_type is not None and expected_type != item_type:
return info return info
if item_type == 'TVEpisode': if item_type in ('TVEpisode', 'Episode'):
info.update({ info.update({
'episode': unescapeHTML(e.get('name')), 'episode': unescapeHTML(e.get('name')),
'episode_number': int_or_none(e.get('episodeNumber')), 'episode_number': int_or_none(e.get('episodeNumber')),
'description': unescapeHTML(e.get('description')), 'description': unescapeHTML(e.get('description')),
}) })
part_of_season = e.get('partOfSeason') part_of_season = e.get('partOfSeason')
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name')) info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Article': elif item_type == 'Article':
info.update({ info.update({
@@ -1022,10 +1022,10 @@ class InfoExtractor(object):
}) })
elif item_type == 'VideoObject': elif item_type == 'VideoObject':
extract_video_object(e) extract_video_object(e)
elif item_type == 'WebPage': continue
video = e.get('video') video = e.get('video')
if isinstance(video, dict) and video.get('@type') == 'VideoObject': if isinstance(video, dict) and video.get('@type') == 'VideoObject':
extract_video_object(video) extract_video_object(video)
break break
return dict((k, v) for k, v in info.items() if v is not None) return dict((k, v) for k, v in info.items() if v is not None)
@@ -2299,6 +2299,8 @@ class InfoExtractor(object):
tracks = video_data.get('tracks') tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list): if tracks and isinstance(tracks, list):
for track in tracks: for track in tracks:
if not isinstance(track, dict):
continue
if track.get('kind') != 'captions': if track.get('kind') != 'captions':
continue continue
track_url = urljoin(base_url, track.get('file')) track_url = urljoin(base_url, track.get('file'))
@@ -2328,6 +2330,8 @@ class InfoExtractor(object):
urls = [] urls = []
formats = [] formats = []
for source in jwplayer_sources_data: for source in jwplayer_sources_data:
if not isinstance(source, dict):
continue
source_url = self._proto_relative_url(source.get('file')) source_url = self._proto_relative_url(source.get('file'))
if not source_url: if not source_url:
continue continue

View File

@@ -8,7 +8,16 @@ from ..utils import int_or_none
class CorusIE(ThePlatformFeedIE): class CorusIE(ThePlatformFeedIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?P<domain>
(?:globaltv|etcanada)\.com|
(?:hgtv|foodnetwork|slice|history|showcase)\.ca
)
/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/', 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
'md5': '05dcbca777bf1e58c2acbb57168ad3a6', 'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
@@ -27,6 +36,12 @@ class CorusIE(ThePlatformFeedIE):
}, { }, {
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/', 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
'only_matching': True,
}, {
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
'only_matching': True,
}] }]
_TP_FEEDS = { _TP_FEEDS = {
@@ -50,6 +65,14 @@ class CorusIE(ThePlatformFeedIE):
'feed_id': '5tUJLgV2YNJ5', 'feed_id': '5tUJLgV2YNJ5',
'account_id': 2414427935, 'account_id': 2414427935,
}, },
'history': {
'feed_id': 'tQFx_TyyEq4J',
'account_id': 2369613659,
},
'showcase': {
'feed_id': '9H6qyshBZU3E',
'account_id': 2414426607,
},
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -15,7 +15,7 @@ from ..utils import (
class DisneyIE(InfoExtractor): class DisneyIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))''' https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
_TESTS = [{ _TESTS = [{
# Disney.EmbedVideo # Disney.EmbedVideo
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
@@ -68,6 +68,9 @@ class DisneyIE(InfoExtractor):
}, { }, {
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo', 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
'only_matching': True,
}, { }, {
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue', 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
'only_matching': True, 'only_matching': True,

View File

@@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
info_url = self._search_regex( info_url = self._search_regex(
r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
webpage, 'video id') webpage, 'video id')
title = remove_end(self._og_search_title(webpage), ' | Dplay') title = remove_end(self._og_search_title(webpage), ' | Dplay')

View File

@@ -1,135 +1,59 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, js_to_json,
parse_iso8601, parse_duration,
unescapeHTML,
) )
class DRBonanzaIE(InfoExtractor): class DRBonanzaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)' _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
_TEST = {
_TESTS = [{ 'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
'info_dict': { 'info_dict': {
'id': '65517', 'id': '40312',
'display_id': 'matador---0824-komme-fremmede-',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Talkshowet - Leonard Cohen', 'title': 'MATADOR - 08:24. "Komme fremmede".',
'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca', 'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
'timestamp': 1295537932, 'duration': 4613,
'upload_date': '20110120',
'duration': 3664,
}, },
'params': { }
'skip_download': True, # requires rtmp
},
}, {
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
'md5': '6dfe039417e76795fb783c52da3de11d',
'info_dict': {
'id': '59410',
'ext': 'mp3',
'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
'description': 'md5:501e5a195749480552e214fbbed16c4e',
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
'timestamp': 1223274900,
'upload_date': '20081006',
'duration': 7369,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
url_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, url_id) video_id, display_id = mobj.group('id', 'display_id')
if url_id: webpage = self._download_webpage(url, display_id)
info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
else:
# Just fetch the first video on that page
info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
asset_id = str(info['AssetId']) info = self._parse_html5_media_entries(
title = info['Title'].rstrip(' \'\"-,.:;!?') url, webpage, display_id, m3u8_id='hls',
duration = int_or_none(info.get('Duration'), scale=1000) m3u8_entry_protocol='m3u8_native')[0]
# First published online. "FirstPublished" contains the date for original airing. self._sort_formats(info['formats'])
timestamp = parse_iso8601(
re.sub(r'\.\d+$', '', info['Created']))
def parse_filename_info(url): asset = self._parse_json(
match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) self._search_regex(
if match: r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
return { display_id, transform_source=js_to_json)
'width': int(match.group('width')),
'height': int(match.group('height')),
'vbr': int(match.group('bitrate')),
'ext': match.group('ext')
}
match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
if match:
return {
'vbr': int(match.group('bitrate')),
'ext': match.group(2)
}
return {}
video_types = ['VideoHigh', 'VideoMid', 'VideoLow'] title = unescapeHTML(asset['AssetTitle']).strip()
preferencemap = {
'VideoHigh': -1,
'VideoMid': -2,
'VideoLow': -3,
'Audio': -4,
}
formats = [] def extract(field):
for file in info['Files']: return self._search_regex(
if info['Type'] == 'Video': r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
if file['Type'] in video_types: webpage, field, default=None)
format = parse_filename_info(file['Location'])
format.update({
'url': file['Location'],
'format_id': file['Type'].replace('Video', ''),
'preference': preferencemap.get(file['Type'], -10),
})
if format['url'].startswith('rtmp'):
rtmp_url = format['url']
format['rtmp_live'] = True # --resume does not work
if '/bonanza/' in rtmp_url:
format['play_path'] = rtmp_url.split('/bonanza/')[1]
formats.append(format)
elif file['Type'] == 'Thumb':
thumbnail = file['Location']
elif info['Type'] == 'Audio':
if file['Type'] == 'Audio':
format = parse_filename_info(file['Location'])
format.update({
'url': file['Location'],
'format_id': file['Type'],
'vcodec': 'none',
})
formats.append(format)
elif file['Type'] == 'Thumb':
thumbnail = file['Location']
description = '%s\n%s\n%s\n' % ( info.update({
info['Description'], info['Actors'], info['Colophon']) 'id': asset.get('AssetId') or video_id,
self._sort_formats(formats)
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
display_id = re.sub(r'-+', '-', display_id)
return {
'id': asset_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'formats': formats, 'description': extract('Programinfo'),
'description': description, 'duration': parse_duration(extract('Tid')),
'thumbnail': thumbnail, 'thumbnail': asset.get('AssetImageUrl'),
'timestamp': timestamp, })
'duration': duration, return info
}

View File

@@ -44,8 +44,23 @@ class DrTuberIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.drtuber.com/video/%s' % video_id, display_id) 'http://www.drtuber.com/video/%s' % video_id, display_id)
video_url = self._html_search_regex( video_data = self._download_json(
r'<source src="([^"]+)"', webpage, 'video URL') 'http://www.drtuber.com/player_config_json/', video_id, query={
'vid': video_id,
'embed': 0,
'aid': 0,
'domain_id': 0,
})
formats = []
for format_id, video_url in video_data['files'].items():
if video_url:
formats.append({
'format_id': format_id,
'quality': 2 if format_id == 'hq' else 1,
'url': video_url
})
self._sort_formats(formats)
title = self._html_search_regex( title = self._html_search_regex(
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
@@ -75,7 +90,7 @@ class DrTuberIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url, 'formats': formats,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'like_count': like_count, 'like_count': like_count,

View File

@@ -5,9 +5,12 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
js_to_json, determine_ext,
unescapeHTML,
ExtractorError, ExtractorError,
int_or_none,
js_to_json,
mimetype2ext,
unescapeHTML,
) )
@@ -24,14 +27,7 @@ class DVTVIE(InfoExtractor):
'id': 'dc0768de855511e49e4b0025900fea04', 'id': 'dc0768de855511e49e4b0025900fea04',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
} 'duration': 1484,
}, {
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
'md5': '6388f1941b48537dbd28791f712af8bf',
'info_dict': {
'id': '72c02230849211e49f60002590604f2e',
'ext': 'mp4',
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
} }
}, { }, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
@@ -44,55 +40,100 @@ class DVTVIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'b0b40906854d11e4bdad0025900fea04', 'id': 'b0b40906854d11e4bdad0025900fea04',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne' 'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne',
'description': 'md5:0916925dea8e30fe84222582280b47a0',
'timestamp': 1418760010,
'upload_date': '20141216',
} }
}, { }, {
'md5': '5f7652a08b05009c1292317b449ffea2', 'md5': '5f7652a08b05009c1292317b449ffea2',
'info_dict': { 'info_dict': {
'id': '420ad9ec854a11e4bdad0025900fea04', 'id': '420ad9ec854a11e4bdad0025900fea04',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka' 'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka',
'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
'timestamp': 1418760010,
'upload_date': '20141216',
} }
}, { }, {
'md5': '498eb9dfa97169f409126c617e2a3d64', 'md5': '498eb9dfa97169f409126c617e2a3d64',
'info_dict': { 'info_dict': {
'id': '95d35580846a11e4b6d20025900fea04', 'id': '95d35580846a11e4b6d20025900fea04',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?' 'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?',
'description': 'md5:889fe610a70fee5511dc3326a089188e',
'timestamp': 1418760010,
'upload_date': '20141216',
} }
}, { }, {
'md5': 'b8dc6b744844032dab6ba3781a7274b9', 'md5': 'b8dc6b744844032dab6ba3781a7274b9',
'info_dict': { 'info_dict': {
'id': '6fe14d66853511e4833a0025900fea04', 'id': '6fe14d66853511e4833a0025900fea04',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády' 'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády',
'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
'timestamp': 1418760010,
'upload_date': '20141216',
} }
}], }],
}, {
'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
'md5': 'f8efe9656017da948369aa099788c8ea',
'info_dict': {
'id': '3c496fec365911e7a6500025900fea04',
'ext': 'mp4',
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
'duration': 1103,
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True, 'only_matching': True,
}] }]
def _parse_video_metadata(self, js, video_id): def _parse_video_metadata(self, js, video_id):
metadata = self._parse_json(js, video_id, transform_source=js_to_json) data = self._parse_json(js, video_id, transform_source=js_to_json)
title = unescapeHTML(data['title'])
formats = [] formats = []
for video in metadata['sources']: for video in data['sources']:
ext = video['type'][6:] video_url = video.get('file')
formats.append({ if not video_url:
'url': video['file'], continue
'ext': ext, video_type = video.get('type')
'format_id': '%s-%s' % (ext, video['label']), ext = determine_ext(video_url, mimetype2ext(video_type))
'height': int(video['label'].rstrip('p')), if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
'fps': 25, formats.extend(self._extract_m3u8_formats(
}) video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif video_type == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
else:
label = video.get('label')
height = self._search_regex(
r'^(\d+)[pP]', label or '', 'height', default=None)
format_id = ['http']
for f in (ext, label):
if f:
format_id.append(f)
formats.append({
'url': video_url,
'format_id': '-'.join(format_id),
'height': int_or_none(height),
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': metadata['mediaid'], 'id': data.get('mediaid') or video_id,
'title': unescapeHTML(metadata['title']), 'title': title,
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'), 'description': data.get('description'),
'thumbnail': data.get('image'),
'duration': int_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('pubtime')),
'formats': formats 'formats': formats
} }
@@ -103,7 +144,7 @@ class DVTVIE(InfoExtractor):
# single video # single video
item = self._search_regex( item = self._search_regex(
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});", r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
webpage, 'video', default=None, fatal=False) webpage, 'video', default=None, fatal=False)
if item: if item:
@@ -113,6 +154,8 @@ class DVTVIE(InfoExtractor):
items = re.findall( items = re.findall(
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);", r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
webpage) webpage)
if not items:
items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
if items: if items:
return { return {

View File

@@ -71,6 +71,10 @@ from .arte import (
TheOperaPlatformIE, TheOperaPlatformIE,
ArteTVPlaylistIE, ArteTVPlaylistIE,
) )
from .asiancrush import (
AsianCrushIE,
AsianCrushPlaylistIE,
)
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE from .atttechchannel import ATTTechChannelIE
from .atvat import ATVAtIE from .atvat import ATVAtIE
@@ -90,7 +94,7 @@ from .azmedien import (
) )
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbc import ( from .bbc import (
BBCCoUkIE, BBCCoUkIE,
BBCCoUkArticleIE, BBCCoUkArticleIE,
@@ -98,7 +102,10 @@ from .bbc import (
BBCCoUkPlaylistIE, BBCCoUkPlaylistIE,
BBCIE, BBCIE,
) )
from .beampro import BeamProLiveIE from .beampro import (
BeamProLiveIE,
BeamProVodIE,
)
from .beeg import BeegIE from .beeg import BeegIE
from .behindkink import BehindKinkIE from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE from .bellmedia import BellMediaIE
@@ -389,7 +396,6 @@ from .globo import (
from .go import GoIE from .go import GoIE
from .go90 import Go90IE from .go90 import Go90IE
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .godtv import GodTVIE
from .golem import GolemIE from .golem import GolemIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
@@ -634,7 +640,10 @@ from .neteasemusic import (
NetEaseMusicProgramIE, NetEaseMusicProgramIE,
NetEaseMusicDjRadioIE, NetEaseMusicDjRadioIE,
) )
from .newgrounds import NewgroundsIE from .newgrounds import (
NewgroundsIE,
NewgroundsPlaylistIE,
)
from .newstube import NewstubeIE from .newstube import NewstubeIE
from .nextmedia import ( from .nextmedia import (
NextMediaIE, NextMediaIE,
@@ -815,6 +824,7 @@ from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE from .radiofrance import RadioFranceIE
from .rai import ( from .rai import (
RaiPlayIE, RaiPlayIE,
RaiPlayLiveIE,
RaiIE, RaiIE,
) )
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
@@ -866,6 +876,7 @@ from .rutube import (
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .ruutu import RuutuIE from .ruutu import RuutuIE
from .ruv import RuvIE
from .sandia import SandiaIE from .sandia import SandiaIE
from .safari import ( from .safari import (
SafariIE, SafariIE,
@@ -962,6 +973,7 @@ from .tagesschau import (
TagesschauIE, TagesschauIE,
) )
from .tass import TassIE from .tass import TassIE
from .tastytrade import TastyTradeIE
from .tbs import TBSIE from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE from .tdslifeway import TDSLifewayIE
from .teachertube import ( from .teachertube import (
@@ -1019,11 +1031,6 @@ from .trilulilu import TriluliluIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubitv import TubiTvIE from .tubitv import TubiTvIE
from .tudou import (
TudouIE,
TudouPlaylistIE,
TudouAlbumIE,
)
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tunein import ( from .tunein import (
TuneInClipIE, TuneInClipIE,

View File

@@ -203,19 +203,19 @@ class FacebookIE(InfoExtractor):
}] }]
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_urls(webpage):
mobj = re.search( urls = []
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) for mobj in re.finditer(
if mobj is not None: r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
return mobj.group('url') webpage):
urls.append(mobj.group('url'))
# Facebook API embed # Facebook API embed
# see https://developers.facebook.com/docs/plugins/embedded-video-player # see https://developers.facebook.com/docs/plugins/embedded-video-player
mobj = re.search(r'''(?x)<div[^>]+ for mobj in re.finditer(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage) data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
if mobj is not None: urls.append(mobj.group('url'))
return mobj.group('url') return urls
def _login(self): def _login(self):
(useremail, password) = self._get_login_info() (useremail, password) = self._get_login_info()

View File

@@ -102,6 +102,8 @@ class FirstTVIE(InfoExtractor):
'format_id': f.get('name'), 'format_id': f.get('name'),
'tbr': tbr, 'tbr': tbr,
'source_preference': quality(f.get('name')), 'source_preference': quality(f.get('name')),
# quality metadata of http formats may be incorrect
'preference': -1,
}) })
# m3u8 URL format is reverse engineered from [1] (search for # m3u8 URL format is reverse engineered from [1] (search for
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)

View File

@@ -1,7 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@@ -81,7 +84,7 @@ class FlickrIE(InfoExtractor):
formats = [] formats = []
for stream in streams['stream']: for stream in streams['stream']:
stream_type = str(stream.get('type')) stream_type = compat_str(stream.get('type'))
formats.append({ formats.append({
'format_id': stream_type, 'format_id': stream_type,
'url': stream['_content'], 'url': stream['_content'],

View File

@@ -85,11 +85,11 @@ class FourTubeIE(InfoExtractor):
media_id = params[0] media_id = params[0]
sources = ['%s' % p for p in params[2]] sources = ['%s' % p for p in params[2]]
token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format( token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
media_id, '+'.join(sources)) media_id, '+'.join(sources))
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
b'Origin': b'http://www.4tube.com', b'Origin': b'https://www.4tube.com',
} }
token_req = sanitized_Request(token_url, b'{}', headers) token_req = sanitized_Request(token_url, b'{}', headers)
tokens = self._download_json(token_req, video_id) tokens = self._download_json(token_req, video_id)

View File

@@ -5,6 +5,7 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
get_element_by_id, get_element_by_id,
int_or_none,
remove_end, remove_end,
) )
@@ -46,7 +47,7 @@ class FoxgayIE(InfoExtractor):
formats = [{ formats = [{
'url': source, 'url': source,
'height': resolution, 'height': int_or_none(resolution),
} for source, resolution in zip( } for source, resolution in zip(
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]

View File

@@ -112,7 +112,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
class FranceTVIE(FranceTVBaseInfoExtractor): class FranceTVIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)+(?P<id>[^/]+)\.html' _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
@@ -157,6 +157,9 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
}, { }, {
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html', 'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.france.tv/142749-rouge-sang.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -6,62 +6,52 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
js_to_json,
unified_strdate, unified_strdate,
) )
class GaskrankIE(InfoExtractor): class GaskrankIE(InfoExtractor):
"""InfoExtractor for gaskrank.tv""" _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?' _TESTS = [{
_TESTS = [ 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
{ 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm', 'info_dict': {
'md5': '1ae88dbac97887d85ebd1157a95fc4f9', 'id': '201601/26955',
'info_dict': { 'ext': 'mp4',
'id': '201601/26955', 'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*', 'categories': ['motorrad-fun'],
'thumbnail': r're:^https?://.*\.jpg$', 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
'categories': ['motorrad-fun'], 'uploader_id': 'Bikefun',
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden', 'upload_date': '20170110',
'uploader_id': 'Bikefun', 'uploader_url': None,
'upload_date': '20170110',
'uploader_url': None,
}
},
{
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
'info_dict': {
'id': '201106/15920',
'ext': 'mp4',
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
'thumbnail': r're:^https?://.*\.jpg$',
'categories': ['racing'],
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
'uploader_id': 'IOM',
'upload_date': '20160506',
'uploader_url': 'www.iomtt.com',
}
} }
] }, {
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
'info_dict': {
'id': '201106/15920',
'ext': 'mp4',
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
'thumbnail': r're:^https?://.*\.jpg$',
'categories': ['racing'],
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
'uploader_id': 'IOM',
'upload_date': '20170523',
'uploader_url': 'www.iomtt.com',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
"""extract information from gaskrank.tv"""
def fix_json(code):
"""Removes trailing comma in json: {{},} --> {{}}"""
return re.sub(r',\s*}', r'}', js_to_json(code))
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
title = self._og_search_title(
webpage, default=None) or self._html_search_meta(
'title', webpage, fatal=True)
categories = [re.match(self._VALID_URL, url).group('categories')] categories = [re.match(self._VALID_URL, url).group('categories')]
title = self._search_regex(
r'movieName\s*:\s*\'([^\']*)\'',
webpage, 'title')
thumbnail = self._search_regex(
r'poster\s*:\s*\'([^\']*)\'',
webpage, 'thumbnail', default=None)
mobj = re.search( mobj = re.search(
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])', r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
@@ -89,29 +79,14 @@ class GaskrankIE(InfoExtractor):
if average_rating: if average_rating:
average_rating = float_or_none(average_rating.replace(',', '.')) average_rating = float_or_none(average_rating.replace(',', '.'))
playlist = self._parse_json(
self._search_regex(
r'playlist\s*:\s*\[([^\]]*)\]',
webpage, 'playlist', default='{}'),
display_id, transform_source=fix_json, fatal=False)
video_id = self._search_regex( video_id = self._search_regex(
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4', r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
playlist.get('0').get('src'), 'video id') webpage, 'video id', default=display_id)
formats = [] entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
for key in playlist: entry.update({
formats.append({
'url': playlist[key]['src'],
'format_id': key,
'quality': playlist[key].get('quality')})
self._sort_formats(formats, field_preference=['format_id'])
return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats,
'thumbnail': thumbnail,
'categories': categories, 'categories': categories,
'display_id': display_id, 'display_id': display_id,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
@@ -120,4 +95,7 @@ class GaskrankIE(InfoExtractor):
'tags': tags, 'tags': tags,
'view_count': view_count, 'view_count': view_count,
'average_rating': average_rating, 'average_rating': average_rating,
} })
self._sort_formats(entry['formats'])
return entry

View File

@@ -10,6 +10,7 @@ from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_str,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
@@ -1521,6 +1522,21 @@ class GenericIE(InfoExtractor):
'title': 'Facebook video #599637780109885', 'title': 'Facebook video #599637780109885',
}, },
}, },
# Facebook <iframe> embed, plugin video
{
'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
'info_dict': {
'id': '1754168231264132',
'ext': 'mp4',
'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
'uploader': 'Tariq Ramadan (official)',
'timestamp': 1496758379,
'upload_date': '20170606',
},
'params': {
'skip_download': True,
},
},
# Facebook API embed # Facebook API embed
{ {
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/', 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
@@ -1907,14 +1923,14 @@ class GenericIE(InfoExtractor):
content_type = head_response.headers.get('Content-Type', '').lower() content_type = head_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type) m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m: if m:
format_id = m.group('format_id') format_id = compat_str(m.group('format_id'))
if format_id.endswith('mpegurl'): if format_id.endswith('mpegurl'):
formats = self._extract_m3u8_formats(url, video_id, 'mp4') formats = self._extract_m3u8_formats(url, video_id, 'mp4')
elif format_id == 'f4m': elif format_id == 'f4m':
formats = self._extract_f4m_formats(url, video_id) formats = self._extract_f4m_formats(url, video_id)
else: else:
formats = [{ formats = [{
'format_id': m.group('format_id'), 'format_id': format_id,
'url': url, 'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None 'vcodec': 'none' if m.group('type') == 'audio' else None
}] }]
@@ -2032,6 +2048,13 @@ class GenericIE(InfoExtractor):
video_description = self._og_search_description(webpage, default=None) video_description = self._og_search_description(webpage, default=None)
video_thumbnail = self._og_search_thumbnail(webpage, default=None) video_thumbnail = self._og_search_thumbnail(webpage, default=None)
info_dict.update({
'title': video_title,
'description': video_description,
'thumbnail': video_thumbnail,
'age_limit': age_limit,
})
# Look for Brightcove Legacy Studio embeds # Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls: if bc_urls:
@@ -2221,9 +2244,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url')) return self.url_result(mobj.group('url'))
# Look for embedded Facebook player # Look for embedded Facebook player
facebook_url = FacebookIE._extract_url(webpage) facebook_urls = FacebookIE._extract_urls(webpage)
if facebook_url is not None: if facebook_urls:
return self.url_result(facebook_url, 'Facebook') return self.playlist_from_matches(facebook_urls, video_id, video_title)
# Look for embedded VK player # Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
@@ -2668,18 +2691,26 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
if v is not None:
merged[k] = v
for k, v in dict2.items():
if v is None:
continue
if (k not in merged or
(isinstance(v, compat_str) and v and
isinstance(merged[k], compat_str) and
not merged[k])):
merged[k] = v
return merged
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')
if json_ld.get('url'): if json_ld.get('url'):
info_dict.update({ return merge_dicts(json_ld, info_dict)
'title': video_title or info_dict['title'],
'description': video_description,
'thumbnail': video_thumbnail,
'age_limit': age_limit
})
info_dict.update(json_ld)
return info_dict
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
@@ -2697,9 +2728,7 @@ class GenericIE(InfoExtractor):
if jwplayer_data: if jwplayer_data:
info = self._parse_jwplayer_data( info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=url) jwplayer_data, video_id, require_title=False, base_url=url)
if not info.get('title'): return merge_dicts(info, info_dict)
info['title'] = video_title
return info
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):

View File

@@ -82,7 +82,7 @@ class GfycatIE(InfoExtractor):
video_url = gfy.get('%sUrl' % format_id) video_url = gfy.get('%sUrl' % format_id)
if not video_url: if not video_url:
continue continue
filesize = gfy.get('%sSize' % format_id) filesize = int_or_none(gfy.get('%sSize' % format_id))
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'format_id': format_id, 'format_id': format_id,

View File

@@ -1,66 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..utils import js_to_json
class GodTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
'info_dict': {
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
'ext': 'mp4',
'title': 'Randy Needham',
'duration': 3615.08,
},
'params': {
'skip_download': True,
}
}, {
'url': 'http://god.tv/playlist/bible-study',
'info_dict': {
'id': 'bible-study',
},
'playlist_mincount': 37,
}, {
'url': 'http://god.tv/node/15097',
'only_matching': True,
}, {
'url': 'http://god.tv/live/africa',
'only_matching': True,
}, {
'url': 'http://god.tv/liveevents',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
settings = self._parse_json(
self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'settings', default='{}'),
display_id, transform_source=js_to_json, fatal=False)
ooyala_id = None
if settings:
playlist = settings.get('playlist')
if playlist and isinstance(playlist, list):
entries = [
OoyalaIE._build_url_result(video['content_id'])
for video in playlist if video.get('content_id')]
if entries:
return self.playlist_result(entries, display_id)
ooyala_id = settings.get('ooyala', {}).get('content_id')
if not ooyala_id:
ooyala_id = self._search_regex(
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
webpage, 'ooyala id', group='id')
return OoyalaIE._build_url_result(ooyala_id)

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@@ -46,7 +47,7 @@ class GolemIE(InfoExtractor):
continue continue
formats.append({ formats.append({
'format_id': e.tag, 'format_id': compat_str(e.tag),
'url': compat_urlparse.urljoin(self._PREFIX, url), 'url': compat_urlparse.urljoin(self._PREFIX, url),
'height': self._int(e.get('height'), 'height'), 'height': self._int(e.get('height'), 'height'),
'width': self._int(e.get('width'), 'width'), 'width': self._int(e.get('width'), 'width'),

View File

@@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor):
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
resolutions = {}
for fmt in fmt_list:
mobj = re.search(
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
if mobj:
resolutions[mobj.group('format_id')] = (
int(mobj.group('width')), int(mobj.group('height')))
formats = [] formats = []
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): for fmt_stream in fmt_stream_map:
fmt_id, fmt_url = fmt_stream.split('|') fmt_stream_split = fmt_stream.split('|')
resolution = fmt.split('/')[1] if len(fmt_stream_split) < 2:
width, height = resolution.split('x') continue
formats.append({ format_id, format_url = fmt_stream_split[:2]
'url': lowercase_escape(fmt_url), f = {
'format_id': fmt_id, 'url': lowercase_escape(format_url),
'resolution': resolution, 'format_id': format_id,
'width': int_or_none(width), 'ext': self._FORMATS_EXT[format_id],
'height': int_or_none(height), }
'ext': self._FORMATS_EXT[fmt_id], resolution = resolutions.get(format_id)
}) if resolution:
f.update({
'width': resolution[0],
'height': resolution[0],
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@@ -7,14 +7,19 @@ from .common import InfoExtractor
class HGTVComShowIE(InfoExtractor): class HGTVComShowIE(InfoExtractor):
IE_NAME = 'hgtv.com:show' IE_NAME = 'hgtv.com:show'
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos', # data-module="video"
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
'info_dict': { 'info_dict': {
'id': 'flip-or-flop-full-episodes-videos', 'id': 'flip-or-flop-full-episodes-season-4-videos',
'title': 'Flip or Flop Full Episodes', 'title': 'Flip or Flop Full Episodes',
}, },
'playlist_mincount': 15, 'playlist_mincount': 15,
} }, {
# data-deferred-module="video"
'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@@ -23,7 +28,7 @@ class HGTVComShowIE(InfoExtractor):
config = self._parse_json( config = self._parse_json(
self._search_regex( self._search_regex(
r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script', r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
webpage, 'video config'), webpage, 'video config'),
display_id)['channels'][0] display_id)['channels'][0]

View File

@@ -16,8 +16,8 @@ from ..utils import (
class HitboxIE(InfoExtractor): class HitboxIE(InfoExtractor):
IE_NAME = 'hitbox' IE_NAME = 'hitbox'
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.hitbox.tv/video/203213', 'url': 'http://www.hitbox.tv/video/203213',
'info_dict': { 'info_dict': {
'id': '203213', 'id': '203213',
@@ -38,13 +38,15 @@ class HitboxIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
'only_matching': True,
}]
def _extract_metadata(self, url, video_id): def _extract_metadata(self, url, video_id):
thumb_base = 'https://edge.sf.hitbox.tv' thumb_base = 'https://edge.sf.hitbox.tv'
metadata = self._download_json( metadata = self._download_json(
'%s/%s' % (url, video_id), video_id, '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
'Downloading metadata JSON')
date = 'media_live_since' date = 'media_live_since'
media_type = 'livestream' media_type = 'livestream'
@@ -63,14 +65,15 @@ class HitboxIE(InfoExtractor):
views = int_or_none(video_meta.get('media_views')) views = int_or_none(video_meta.get('media_views'))
timestamp = parse_iso8601(video_meta.get(date), ' ') timestamp = parse_iso8601(video_meta.get(date), ' ')
categories = [video_meta.get('category_name')] categories = [video_meta.get('category_name')]
thumbs = [ thumbs = [{
{'url': thumb_base + video_meta.get('media_thumbnail'), 'url': thumb_base + video_meta.get('media_thumbnail'),
'width': 320, 'width': 320,
'height': 180}, 'height': 180
{'url': thumb_base + video_meta.get('media_thumbnail_large'), }, {
'width': 768, 'url': thumb_base + video_meta.get('media_thumbnail_large'),
'height': 432}, 'width': 768,
] 'height': 432
}]
return { return {
'id': video_id, 'id': video_id,
@@ -90,7 +93,7 @@ class HitboxIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
player_config = self._download_json( player_config = self._download_json(
'https://www.hitbox.tv/api/player/config/video/%s' % video_id, 'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
video_id, 'Downloading video JSON') video_id, 'Downloading video JSON')
formats = [] formats = []
@@ -121,8 +124,7 @@ class HitboxIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
metadata = self._extract_metadata( metadata = self._extract_metadata(
'https://www.hitbox.tv/api/media/video', 'https://www.smashcast.tv/api/media/video', video_id)
video_id)
metadata['formats'] = formats metadata['formats'] = formats
return metadata return metadata
@@ -130,8 +132,8 @@ class HitboxIE(InfoExtractor):
class HitboxLiveIE(HitboxIE): class HitboxLiveIE(HitboxIE):
IE_NAME = 'hitbox:live' IE_NAME = 'hitbox:live'
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'http://www.hitbox.tv/dimak', 'url': 'http://www.hitbox.tv/dimak',
'info_dict': { 'info_dict': {
'id': 'dimak', 'id': 'dimak',
@@ -146,13 +148,20 @@ class HitboxLiveIE(HitboxIE):
# live # live
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'https://www.smashcast.tv/dimak',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
player_config = self._download_json( player_config = self._download_json(
'https://www.hitbox.tv/api/player/config/live/%s' % video_id, 'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
video_id) video_id)
formats = [] formats = []
@@ -197,8 +206,7 @@ class HitboxLiveIE(HitboxIE):
self._sort_formats(formats) self._sort_formats(formats)
metadata = self._extract_metadata( metadata = self._extract_metadata(
'https://www.hitbox.tv/api/media/live', 'https://www.smashcast.tv/api/media/live', video_id)
video_id)
metadata['formats'] = formats metadata['formats'] = formats
metadata['is_live'] = True metadata['is_live'] = True
metadata['title'] = self._live_title(metadata.get('title')) metadata['title'] = self._live_title(metadata.get('title'))

View File

@@ -89,6 +89,11 @@ class IGNIE(InfoExtractor):
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds', 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True, 'only_matching': True,
}, },
{
# videoId pattern
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
'only_matching': True,
},
] ]
def _find_video_id(self, webpage): def _find_video_id(self, webpage):
@@ -98,6 +103,8 @@ class IGNIE(InfoExtractor):
r'data-video-id="(.+?)"', r'data-video-id="(.+?)"',
r'<object id="vid_(.+?)"', r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"', r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
r'videoId&quot;\s*:\s*&quot;(.+?)&quot;',
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
] ]
return self._search_regex(res_id, webpage, 'video id', default=None) return self._search_regex(res_id, webpage, 'video id', default=None)

View File

@@ -65,9 +65,9 @@ class JoveIE(InfoExtractor):
webpage, 'description', fatal=False) webpage, 'description', fatal=False)
publish_date = unified_strdate(self._html_search_meta( publish_date = unified_strdate(self._html_search_meta(
'citation_publication_date', webpage, 'publish date', fatal=False)) 'citation_publication_date', webpage, 'publish date', fatal=False))
comment_count = self._html_search_regex( comment_count = int(self._html_search_regex(
r'<meta name="num_comments" content="(\d+) Comments?"', r'<meta name="num_comments" content="(\d+) Comments?"',
webpage, 'comment count', fatal=False) webpage, 'comment count', fatal=False))
return { return {
'id': video_id, 'id': video_id,

View File

@@ -115,8 +115,9 @@ class LiveLeakIE(InfoExtractor):
for a_format in info_dict['formats']: for a_format in info_dict['formats']:
if not a_format.get('height'): if not a_format.get('height'):
a_format['height'] = self._search_regex( a_format['height'] = int_or_none(self._search_regex(
r'([0-9]+)p\.mp4', a_format['url'], 'height label', default=None) r'([0-9]+)p\.mp4', a_format['url'], 'height label',
default=None))
self._sort_formats(info_dict['formats']) self._sort_formats(info_dict['formats'])

View File

@@ -17,7 +17,7 @@ from ..utils import (
class MedialaanIE(InfoExtractor): class MedialaanIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)? (?:www\.|nieuws\.)?
(?: (?:
(?P<site_id>vtm|q2|vtmkzoom)\.be/ (?P<site_id>vtm|q2|vtmkzoom)\.be/
(?: (?:
@@ -85,6 +85,22 @@ class MedialaanIE(InfoExtractor):
# clip # clip
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio', 'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
'only_matching': True, 'only_matching': True,
}, {
# http/s redirect
'url': 'https://vtmkzoom.be/video?aid=45724',
'info_dict': {
'id': '257136373657000',
'ext': 'mp4',
'title': 'K3 Dansstudio Ushuaia afl.6',
},
'params': {
'skip_download': True,
},
'skip': 'Requires account credentials',
}, {
# nieuws.vtm.be
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
'only_matching': True,
}] }]
def _real_initialize(self): def _real_initialize(self):
@@ -146,6 +162,8 @@ class MedialaanIE(InfoExtractor):
video_id, transform_source=lambda s: '[%s]' % s, fatal=False) video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
if player: if player:
video = player[-1] video = player[-1]
if video['videoUrl'] in ('http', 'https'):
return self.url_result(video['url'], MedialaanIE.ie_key())
info = { info = {
'id': video_id, 'id': video_id,
'url': video['videoUrl'], 'url': video['videoUrl'],

View File

@@ -136,11 +136,9 @@ class MiTeleIE(InfoExtractor):
video_id, 'Downloading gigya script') video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key # Get a appKey/uuid for getting the session key
appKey_var = self._search_regex(
r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
gigya_sc, 'appKey variable')
appKey = self._search_regex( appKey = self._search_regex(
r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey') r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
gigya_sc, 'appKey')
session_json = self._download_json( session_json = self._download_json(
'https://appgrid-api.cloud.accedo.tv/session', 'https://appgrid-api.cloud.accedo.tv/session',

View File

@@ -68,10 +68,6 @@ class MSNIE(InfoExtractor):
format_url = file_.get('url') format_url = file_.get('url')
if not format_url: if not format_url:
continue continue
ext = determine_ext(format_url)
if ext == 'ism':
formats.extend(self._extract_ism_formats(
format_url + '/Manifest', display_id, 'mss', fatal=False))
if 'm3u8' in format_url: if 'm3u8' in format_url:
# m3u8_native should not be used here until # m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed # https://github.com/rg3/youtube-dl/issues/9913 is fixed
@@ -79,6 +75,9 @@ class MSNIE(InfoExtractor):
format_url, display_id, 'mp4', format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
elif determine_ext(format_url) == 'ism':
formats.extend(self._extract_ism_formats(
format_url + '/Manifest', display_id, 'mss', fatal=False))
else: else:
formats.append({ formats.append({
'url': format_url, 'url': format_url,

View File

@@ -1,6 +1,15 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
parse_duration,
parse_filesize,
unified_timestamp,
)
class NewgroundsIE(InfoExtractor): class NewgroundsIE(InfoExtractor):
@@ -13,7 +22,10 @@ class NewgroundsIE(InfoExtractor):
'ext': 'mp3', 'ext': 'mp3',
'title': 'B7 - BusMode', 'title': 'B7 - BusMode',
'uploader': 'Burn7', 'uploader': 'Burn7',
} 'timestamp': 1378878540,
'upload_date': '20130911',
'duration': 143,
},
}, { }, {
'url': 'https://www.newgrounds.com/portal/view/673111', 'url': 'https://www.newgrounds.com/portal/view/673111',
'md5': '3394735822aab2478c31b1004fe5e5bc', 'md5': '3394735822aab2478c31b1004fe5e5bc',
@@ -22,25 +34,133 @@ class NewgroundsIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dancin', 'title': 'Dancin',
'uploader': 'Squirrelman82', 'uploader': 'Squirrelman82',
'timestamp': 1460256780,
'upload_date': '20160410',
},
}, {
# source format unavailable, additional mp4 formats
'url': 'http://www.newgrounds.com/portal/view/689400',
'info_dict': {
'id': '689400',
'ext': 'mp4',
'title': 'ZTV News Episode 8',
'uploader': 'BennettTheSage',
'timestamp': 1487965140,
'upload_date': '20170224',
},
'params': {
'skip_download': True,
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
media_id = self._match_id(url) media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id) webpage = self._download_webpage(url, media_id)
title = self._html_search_regex( title = self._html_search_regex(
r'<title>([^>]+)</title>', webpage, 'title') r'<title>([^>]+)</title>', webpage, 'title')
uploader = self._html_search_regex( media_url = self._parse_json(self._search_regex(
r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False) r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
music_url = self._parse_json(self._search_regex( formats = [{
r'"url":("[^"]+"),', webpage, ''), media_id) 'url': media_url,
'format_id': 'source',
'quality': 1,
}]
max_resolution = int_or_none(self._search_regex(
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
default=None))
if max_resolution:
url_base = media_url.rpartition('.')[0]
for resolution in (360, 720, 1080):
if resolution > max_resolution:
break
formats.append({
'url': '%s.%dp.mp4' % (url_base, resolution),
'format_id': '%dp' % resolution,
'height': resolution,
})
self._check_formats(formats, media_id)
self._sort_formats(formats)
uploader = self._search_regex(
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
fatal=False)
timestamp = unified_timestamp(self._search_regex(
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
default=None))
duration = parse_duration(self._search_regex(
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
default=None))
filesize_approx = parse_filesize(self._html_search_regex(
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
default=None))
if len(formats) == 1:
formats[0]['filesize_approx'] = filesize_approx
if '<dd>Song' in webpage:
formats[0]['vcodec'] = 'none'
return { return {
'id': media_id, 'id': media_id,
'title': title, 'title': title,
'url': music_url,
'uploader': uploader, 'uploader': uploader,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
} }
class NewgroundsPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.newgrounds.com/collection/cats',
'info_dict': {
'id': 'cats',
'title': 'Cats',
},
'playlist_mincount': 46,
}, {
'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
'info_dict': {
'id': 'ZONE-SAMA',
'title': 'Portal Search: ZONE-SAMA',
},
'playlist_mincount': 47,
}, {
'url': 'http://www.newgrounds.com/audio/search/title/cats',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._search_regex(
r'<title>([^>]+)</title>', webpage, 'title', default=None)
# cut left menu
webpage = self._search_regex(
r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
webpage, 'wide column', default=webpage)
entries = []
for a, path, media_id in re.findall(
r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
webpage):
a_class = extract_attributes(a).get('class')
if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
continue
entries.append(
self.url_result(
'https://www.newgrounds.com/%s' % path,
ie=NewgroundsIE.ie_key(), video_id=media_id))
return self.playlist_result(entries, playlist_id, title)

View File

@@ -83,9 +83,12 @@ class NiconicoIE(InfoExtractor):
'uploader_id': '312', 'uploader_id': '312',
}, },
'skip': 'The viewing period of the video you were searching for has expired.', 'skip': 'The viewing period of the video you were searching for has expired.',
}, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
}] }]
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
def _real_initialize(self): def _real_initialize(self):

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
extract_attributes,
get_element_by_class, get_element_by_class,
urlencode_postdata, urlencode_postdata,
) )
@@ -56,17 +57,24 @@ class NJPWWorldIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
formats = [] formats = []
for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage): for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
player_url = compat_urlparse.urljoin(url, player_url) player = extract_attributes(mobj.group(0))
player_path = player.get('href')
if not player_path:
continue
kind = self._search_regex(
r'(low|high)$', player.get('class') or '', 'kind',
default='low')
player_url = compat_urlparse.urljoin(url, player_path)
player_page = self._download_webpage( player_page = self._download_webpage(
player_url, video_id, note='Downloading player page') player_url, video_id, note='Downloading player page')
entries = self._parse_html5_media_entries( entries = self._parse_html5_media_entries(
player_url, player_page, video_id, m3u8_id='hls-%s' % kind, player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
m3u8_entry_protocol='m3u8_native', m3u8_entry_protocol='m3u8_native')
preference=2 if 'hq' in kind else 1) kind_formats = entries[0]['formats']
formats.extend(entries[0]['formats']) for f in kind_formats:
f['quality'] = 2 if kind == 'high' else 1
formats.extend(kind_formats)
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -35,7 +35,7 @@ class NPOIE(NPOBaseIE):
https?:// https?://
(?:www\.)? (?:www\.)?
(?: (?:
npo\.nl/(?!live|radio)(?:[^/]+/){2}| npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
ntr\.nl/(?:[^/]+/){2,}| ntr\.nl/(?:[^/]+/){2,}|
omroepwnl\.nl/video/fragment/[^/]+__| omroepwnl\.nl/video/fragment/[^/]+__|
zapp\.nl/[^/]+/[^/]+/ zapp\.nl/[^/]+/[^/]+/
@@ -150,6 +150,9 @@ class NPOIE(NPOBaseIE):
# live stream # live stream
'url': 'npo:LI_NL1_4188102', 'url': 'npo:LI_NL1_4188102',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -11,6 +11,7 @@ from ..utils import (
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
js_to_json, js_to_json,
NO_DEFAULT,
parse_iso8601, parse_iso8601,
remove_start, remove_start,
strip_or_none, strip_or_none,
@@ -198,6 +199,19 @@ class OnetPlIE(InfoExtractor):
'upload_date': '20170214', 'upload_date': '20170214',
'timestamp': 1487078046, 'timestamp': 1487078046,
}, },
}, {
# embedded via pulsembed
'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
'info_dict': {
'id': '501235.965429946',
'ext': 'mp4',
'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
'upload_date': '20170622',
'timestamp': 1498159955,
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
'only_matching': True, 'only_matching': True,
@@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _search_mvp_id(self, webpage, default=NO_DEFAULT):
return self._search_regex(
r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
default=default)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
mvp_id = self._search_regex( mvp_id = self._search_mvp_id(webpage, default=None)
r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
if not mvp_id:
pulsembed_url = self._search_regex(
r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
webpage, 'pulsembed url', group='url')
webpage = self._download_webpage(
pulsembed_url, video_id, 'Downloading pulsembed webpage')
mvp_id = self._search_mvp_id(webpage)
return self.url_result( return self.url_result(
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)

View File

@@ -3,12 +3,14 @@ import re
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none,
float_or_none,
ExtractorError,
unsmuggle_url,
determine_ext, determine_ext,
ExtractorError,
float_or_none,
int_or_none,
try_get,
unsmuggle_url,
) )
from ..compat import compat_urllib_parse_urlencode from ..compat import compat_urllib_parse_urlencode
@@ -39,13 +41,15 @@ class OoyalaBaseIE(InfoExtractor):
formats = [] formats = []
if cur_auth_data['authorized']: if cur_auth_data['authorized']:
for stream in cur_auth_data['streams']: for stream in cur_auth_data['streams']:
s_url = base64.b64decode( url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
stream['url']['data'].encode('ascii')).decode('utf-8') if not url_data:
if s_url in urls: continue
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
if not s_url or s_url in urls:
continue continue
urls.append(s_url) urls.append(s_url)
ext = determine_ext(s_url, None) ext = determine_ext(s_url, None)
delivery_type = stream['delivery_type'] delivery_type = stream.get('delivery_type')
if delivery_type == 'hls' or ext == 'm3u8': if delivery_type == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
@@ -65,7 +69,7 @@ class OoyalaBaseIE(InfoExtractor):
else: else:
formats.append({ formats.append({
'url': s_url, 'url': s_url,
'ext': ext or stream.get('delivery_type'), 'ext': ext or delivery_type,
'vcodec': stream.get('video_codec'), 'vcodec': stream.get('video_codec'),
'format_id': delivery_type, 'format_id': delivery_type,
'width': int_or_none(stream.get('width')), 'width': int_or_none(stream.get('width')),
@@ -136,6 +140,11 @@ class OoyalaIE(OoyalaBaseIE):
'title': 'Divide Tool Path.mp4', 'title': 'Divide Tool Path.mp4',
'duration': 204.405, 'duration': 204.405,
} }
},
{
# empty stream['url']['data']
'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
'only_matching': True,
} }
] ]

View File

@@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@@ -14,7 +15,6 @@ from ..utils import (
strip_or_none, strip_or_none,
unified_timestamp, unified_timestamp,
urljoin, urljoin,
urlencode_postdata,
) )
@@ -45,22 +45,15 @@ class PacktPubIE(PacktPubBaseIE):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
return return
webpage = self._download_webpage(self._PACKT_BASE, None)
login_form = self._form_hidden_inputs(
'packt-user-login-form', webpage)
login_form.update({
'email': username,
'password': password,
})
self._download_webpage(
self._PACKT_BASE, None, 'Logging in as %s' % username,
data=urlencode_postdata(login_form))
try: try:
self._TOKEN = self._download_json( self._TOKEN = self._download_json(
'%s/users/tokens/sessions' % self._MAPT_REST, None, self._MAPT_REST + '/users/tokens', None,
'Downloading Authorization Token')['data']['token'] 'Downloading Authorization Token', data=json.dumps({
'email': username,
'password': password,
}).encode())['data']['access']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404): if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
message = self._parse_json(e.cause.read().decode(), None)['message'] message = self._parse_json(e.cause.read().decode(), None)['message']
raise ExtractorError(message, expected=True) raise ExtractorError(message, expected=True)
raise raise
@@ -83,7 +76,7 @@ class PacktPubIE(PacktPubBaseIE):
headers = {} headers = {}
if self._TOKEN: if self._TOKEN:
headers['Authorization'] = self._TOKEN headers['Authorization'] = 'Bearer ' + self._TOKEN
video = self._download_json( video = self._download_json(
'%s/users/me/products/%s/chapters/%s/sections/%s' '%s/users/me/products/%s/chapters/%s/sections/%s'
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id, % (self._MAPT_REST, course_id, chapter_id, video_id), video_id,

View File

@@ -10,13 +10,13 @@ from ..utils import (
class PandaTVIE(InfoExtractor): class PandaTVIE(InfoExtractor):
IE_DESC = '熊猫TV' IE_DESC = '熊猫TV'
_VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.panda.tv/10091', 'url': 'http://www.panda.tv/66666',
'info_dict': { 'info_dict': {
'id': '10091', 'id': '66666',
'title': 're:.+', 'title': 're:.+',
'uploader': '囚徒', 'uploader': '刘杀鸡',
'ext': 'flv', 'ext': 'flv',
'is_live': True, 'is_live': True,
}, },
@@ -24,13 +24,16 @@ class PandaTVIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Live stream is offline', 'skip': 'Live stream is offline',
} }, {
'url': 'https://www.panda.tv/66666',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
config = self._download_json( config = self._download_json(
'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id) 'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0) error_code = config.get('errno', 0)
if error_code is not 0: if error_code is not 0:
@@ -74,7 +77,7 @@ class PandaTVIE(InfoExtractor):
continue continue
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))): for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
formats.append({ formats.append({
'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s' 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext), % (pl, plflag1, room_key, live_panda, suffix[quality], ext),
'format_id': '%s-%s' % (k, ext), 'format_id': '%s-%s' % (k, ext),
'quality': quality, 'quality': quality,

View File

@@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor):
IE_NAME = 'pandora.tv' IE_NAME = 'pandora.tv'
IE_DESC = '판도라TV' IE_DESC = '판도라TV'
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?' _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
_TEST = { _TESTS = [{
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
'info_dict': { 'info_dict': {
'id': '53294230', 'id': '53294230',
@@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
} }
} }, {
'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
'info_dict': {
'id': '54721744',
'ext': 'flv',
'title': '[HD] JAPAN COUNTDOWN 170423',
'description': '[HD] JAPAN COUNTDOWN 170423',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1704.9,
'upload_date': '20170423',
'uploader': 'GOGO_UCC',
'uploader_id': 'gogoucc',
'view_count': int,
'like_count': int,
},
'params': {
# Test metadata only
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
@@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor):
'description': info.get('body'), 'description': info.get('body'),
'thumbnail': info.get('thumbnail') or info.get('poster'), 'thumbnail': info.get('thumbnail') or info.get('poster'),
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')), 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None, 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
'uploader': info.get('nickname'), 'uploader': info.get('nickname'),
'uploader_id': info.get('upload_userid'), 'uploader_id': info.get('upload_userid'),
'view_count': str_to_int(info.get('hit')), 'view_count': str_to_int(info.get('hit')),

View File

@@ -65,7 +65,7 @@ class PolskieRadioIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
content = self._search_regex( content = self._search_regex(
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>', r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
webpage, 'content') webpage, 'content')
timestamp = unified_timestamp(self._html_search_regex( timestamp = unified_timestamp(self._html_search_regex(

View File

@@ -252,11 +252,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
playlist = self._parse_json( playlist = self._parse_json(
self._search_regex( self._search_regex(
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
playlist_id) 'playlist', default='{}'),
playlist_id, fatal=False)
title = playlist.get('title') or self._search_regex(
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
return self.playlist_result( return self.playlist_result(
entries, playlist_id, playlist.get('title'), playlist.get('description')) entries, playlist_id, title, playlist.get('description'))
class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubPlaylistIE(PornHubPlaylistBaseIE):
@@ -296,6 +299,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
break break
raise
page_entries = self._extract_entries(webpage) page_entries = self._extract_entries(webpage)
if not page_entries: if not page_entries:
break break

View File

@@ -191,11 +191,12 @@ class RaiPlayIE(RaiBaseIE):
info = { info = {
'id': video_id, 'id': video_id,
'title': title, 'title': self._live_title(title) if relinker_info.get(
'is_live') else title,
'alt_title': media.get('subtitle'), 'alt_title': media.get('subtitle'),
'description': media.get('description'), 'description': media.get('description'),
'uploader': media.get('channel'), 'uploader': strip_or_none(media.get('channel')),
'creator': media.get('editor'), 'creator': strip_or_none(media.get('editor')),
'duration': parse_duration(video.get('duration')), 'duration': parse_duration(video.get('duration')),
'timestamp': timestamp, 'timestamp': timestamp,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
@@ -208,10 +209,46 @@ class RaiPlayIE(RaiBaseIE):
} }
info.update(relinker_info) info.update(relinker_info)
return info return info
class RaiPlayLiveIE(RaiBaseIE):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.raiplay.it/dirette/rainews24',
'info_dict': {
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
'display_id': 'rainews24',
'ext': 'mp4',
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:6eca31500550f9376819f174e5644754',
'uploader': 'Rai News 24',
'creator': 'Rai News 24',
'is_live': True,
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
webpage, 'content id')
return {
'_type': 'url_transparent',
'ie_key': RaiPlayIE.ie_key(),
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
'id': video_id,
'display_id': display_id,
}
class RaiIE(RaiBaseIE): class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{ _TESTS = [{

View File

@@ -13,7 +13,7 @@ from ..utils import (
class RedBullTVIE(InfoExtractor): class RedBullTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)' _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
_TESTS = [{ _TESTS = [{
# film # film
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
@@ -42,6 +42,22 @@ class RedBullTVIE(InfoExtractor):
'season_number': 2, 'season_number': 2,
'episode_number': 4, 'episode_number': 4,
}, },
'params': {
'skip_download': True,
},
}, {
# segment
'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
'info_dict': {
'id': 'AP-1QSAQJ6V52111',
'ext': 'mp4',
'title': 'Semi Finals - Vans Park Series Pro Tour',
'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
'duration': 11791.991,
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion', 'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
'only_matching': True, 'only_matching': True,
@@ -82,7 +98,8 @@ class RedBullTVIE(InfoExtractor):
title = info['title'].strip() title = info['title'].strip()
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
video['url'], video_id, 'mp4', 'm3u8_native') video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}

View File

@@ -15,7 +15,7 @@ class RtlNlIE(InfoExtractor):
https?://(?:www\.)? https?://(?:www\.)?
(?: (?:
rtlxl\.nl/[^\#]*\#!/[^/]+/| rtlxl\.nl/[^\#]*\#!/[^/]+/|
rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid= rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
) )
(?P<id>[0-9a-f-]+)''' (?P<id>[0-9a-f-]+)'''
@@ -70,6 +70,9 @@ class RtlNlIE(InfoExtractor):
}, { }, {
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -13,11 +13,15 @@ from ..utils import (
class RUTVIE(InfoExtractor): class RUTVIE(InfoExtractor):
IE_DESC = 'RUTV.RU' IE_DESC = 'RUTV.RU'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://player\.(?:rutv\.ru|vgtrk\.com)/ https?://
(?P<path>flash\d+v/container\.swf\?id= (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
|iframe/(?P<type>swf|video|live)/id/ (?P<path>
|index/iframe/cast_id/) flash\d+v/container\.swf\?id=|
(?P<id>\d+)''' iframe/(?P<type>swf|video|live)/id/|
index/iframe/cast_id/
)
(?P<id>\d+)
'''
_TESTS = [ _TESTS = [
{ {
@@ -99,17 +103,21 @@ class RUTVIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
'only_matching': True,
},
] ]
@classmethod @classmethod
def _extract_url(cls, webpage): def _extract_url(cls, webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
mobj = re.search( mobj = re.search(
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
webpage) webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')

101
youtube_dl/extractor/ruv.py Normal file
View File

@@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
unified_timestamp,
)
class RuvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
_TESTS = [{
# m3u8
'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
'md5': '66347652f4e13e71936817102acc1724',
'info_dict': {
'id': '1144499',
'display_id': 'fh-valur/20170516',
'ext': 'mp4',
'title': 'FH - Valur',
'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
'timestamp': 1494963600,
'upload_date': '20170516',
},
}, {
# mp3
'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
'md5': '395ea250c8a13e5fdb39d4670ef85378',
'info_dict': {
'id': '1153630',
'display_id': 'morgunutvarpid/20170619',
'ext': 'mp3',
'title': 'Morgunútvarpið',
'description': 'md5:a4cf1202c0a1645ca096b06525915418',
'timestamp': 1497855000,
'upload_date': '20170619',
},
}, {
'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
'only_matching': True,
}, {
'url': 'http://www.ruv.is/node/1151854',
'only_matching': True,
}, {
'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
'only_matching': True,
}, {
'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
media_url = self._html_search_regex(
FIELD_RE % 'src', webpage, 'video URL', group='url')
video_id = self._search_regex(
r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
webpage, 'video id', default=display_id)
ext = determine_ext(media_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
elif ext == 'mp3':
formats = [{
'format_id': 'mp3',
'url': media_url,
'vcodec': 'none',
}]
else:
formats = [{
'url': media_url,
}]
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._search_regex(
FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
timestamp = unified_timestamp(self._html_search_meta(
'article:published_time', webpage, 'timestamp', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'formats': formats,
}

View File

@@ -16,7 +16,6 @@ from ..utils import (
class SafariBaseIE(InfoExtractor): class SafariBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
_NETRC_MACHINE = 'safari' _NETRC_MACHINE = 'safari'
_API_BASE = 'https://www.safaribooksonline.com/api/v1' _API_BASE = 'https://www.safaribooksonline.com/api/v1'
@@ -28,10 +27,6 @@ class SafariBaseIE(InfoExtractor):
self._login() self._login()
def _login(self): def _login(self):
# We only need to log in once for courses or individual videos
if self.LOGGED_IN:
return
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
return return
@@ -39,11 +34,17 @@ class SafariBaseIE(InfoExtractor):
headers = std_headers.copy() headers = std_headers.copy()
if 'Referer' not in headers: if 'Referer' not in headers:
headers['Referer'] = self._LOGIN_URL headers['Referer'] = self._LOGIN_URL
login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
login_page = self._download_webpage( login_page = self._download_webpage(
login_page_request, None, self._LOGIN_URL, None, 'Downloading login form', headers=headers)
'Downloading login form')
def is_logged(webpage):
return any(re.search(p, webpage) for p in (
r'href=["\']/accounts/logout/', r'>Sign Out<'))
if is_logged(login_page):
self.LOGGED_IN = True
return
csrf = self._html_search_regex( csrf = self._html_search_regex(
r"name='csrfmiddlewaretoken'\s+value='([^']+)'", r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
@@ -62,14 +63,12 @@ class SafariBaseIE(InfoExtractor):
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in as %s' % username)
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: if not is_logged(login_page):
raise ExtractorError( raise ExtractorError(
'Login failed; make sure your credentials are correct and try again.', 'Login failed; make sure your credentials are correct and try again.',
expected=True) expected=True)
SafariBaseIE.LOGGED_IN = True self.LOGGED_IN = True
self.to_screen('Login successful')
class SafariIE(SafariBaseIE): class SafariIE(SafariBaseIE):

View File

@@ -32,8 +32,9 @@ class SexuIE(InfoExtractor):
formats = [{ formats = [{
'url': source['file'].replace('\\', ''), 'url': source['file'].replace('\\', ''),
'format_id': source.get('label'), 'format_id': source.get('label'),
'height': self._search_regex( 'height': int(self._search_regex(
r'^(\d+)[pP]', source.get('label', ''), 'height', default=None), r'^(\d+)[pP]', source.get('label', ''), 'height',
default=None)),
} for source in sources if source.get('file')] } for source in sources if source.get('file')]
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -8,7 +8,11 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
) )
from ..utils import ExtractorError from ..utils import (
ExtractorError,
int_or_none,
try_get,
)
class SohuIE(InfoExtractor): class SohuIE(InfoExtractor):
@@ -169,10 +173,11 @@ class SohuIE(InfoExtractor):
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'format_id': format_id, 'format_id': format_id,
'filesize': data['clipsBytes'][i], 'filesize': int_or_none(
'width': data['width'], try_get(data, lambda x: x['clipsBytes'][i])),
'height': data['height'], 'width': int_or_none(data.get('width')),
'fps': data['fps'], 'height': int_or_none(data.get('height')),
'fps': int_or_none(data.get('fps')),
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -136,7 +136,7 @@ class SoundcloudIE(InfoExtractor):
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
@@ -174,7 +174,7 @@ class SoundcloudIE(InfoExtractor):
# We have to retrieve the url # We have to retrieve the url
format_dict = self._download_json( format_dict = self._download_json(
'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id, 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
track_id, 'Downloading track url', query={ track_id, 'Downloading track url', query={
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
'secret_token': secret_token, 'secret_token': secret_token,
@@ -236,7 +236,7 @@ class SoundcloudIE(InfoExtractor):
track_id = mobj.group('track_id') track_id = mobj.group('track_id')
if track_id is not None: if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id full_title = track_id
token = mobj.group('secret_token') token = mobj.group('secret_token')
if token: if token:
@@ -261,7 +261,7 @@ class SoundcloudIE(InfoExtractor):
self.report_resolve(full_title) self.report_resolve(full_title)
url = 'http://soundcloud.com/%s' % resolve_title url = 'https://soundcloud.com/%s' % resolve_title
info_json_url = self._resolv_url(url) info_json_url = self._resolv_url(url)
info = self._download_json(info_json_url, full_title, 'Downloading info JSON') info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
@@ -290,7 +290,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
'id': '2284613', 'id': '2284613',
'title': 'The Royal Concept EP', 'title': 'The Royal Concept EP',
}, },
'playlist_mincount': 6, 'playlist_mincount': 5,
}, { }, {
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
'only_matching': True, 'only_matching': True,
@@ -304,7 +304,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
# extract simple title (uploader + slug of song title) # extract simple title (uploader + slug of song title)
slug_title = mobj.group('slug_title') slug_title = mobj.group('slug_title')
full_title = '%s/sets/%s' % (uploader, slug_title) full_title = '%s/sets/%s' % (uploader, slug_title)
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
token = mobj.group('token') token = mobj.group('token')
if token: if token:
@@ -380,7 +380,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
'url': 'https://soundcloud.com/grynpyret/spotlight', 'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': { 'info_dict': {
'id': '7098329', 'id': '7098329',
'title': 'GRYNPYRET (Spotlight)', 'title': 'Grynpyret (Spotlight)',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}] }]
@@ -410,7 +410,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user') uploader = mobj.group('user')
url = 'http://soundcloud.com/%s/' % uploader url = 'https://soundcloud.com/%s/' % uploader
resolv_url = self._resolv_url(url) resolv_url = self._resolv_url(url)
user = self._download_json( user = self._download_json(
resolv_url, uploader, 'Downloading user info') resolv_url, uploader, 'Downloading user info')
@@ -473,7 +473,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist' IE_NAME = 'soundcloud:playlist'
_TESTS = [{ _TESTS = [{
'url': 'http://api.soundcloud.com/playlists/4110309', 'url': 'https://api.soundcloud.com/playlists/4110309',
'info_dict': { 'info_dict': {
'id': '4110309', 'id': '4110309',
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',

View File

@@ -21,6 +21,17 @@ class StreamangoIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '20170315_150006.mp4', 'title': '20170315_150006.mp4',
} }
}, {
# no og:title
'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
'info_dict': {
'id': 'foqebrpftarclpob',
'ext': 'mp4',
'title': 'foqebrpftarclpob',
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
'only_matching': True, 'only_matching': True,
@@ -31,7 +42,7 @@ class StreamangoIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage) title = self._og_search_title(webpage, default=video_id)
formats = [] formats = []
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage): for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):

View File

@@ -26,7 +26,7 @@ class StreamCZIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
'md5': '6d3ca61a8d0633c9c542b92fcb936b0c', 'md5': '934bb6a6d220d99c010783c9719960d5',
'info_dict': { 'info_dict': {
'id': '765767', 'id': '765767',
'ext': 'mp4', 'ext': 'mp4',
@@ -37,7 +37,7 @@ class StreamCZIE(InfoExtractor):
}, },
}, { }, {
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka', 'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
'md5': 'e54a254fb8b871968fd8403255f28589', 'md5': '849a88c1e1ca47d41403c2ba5e59e261',
'info_dict': { 'info_dict': {
'id': '10002447', 'id': '10002447',
'ext': 'mp4', 'ext': 'mp4',
@@ -85,6 +85,14 @@ class StreamCZIE(InfoExtractor):
else: else:
title = data['name'] title = data['name']
subtitles = {}
srt_url = data.get('subtitles_srt')
if srt_url:
subtitles['cs'] = [{
'ext': 'srt',
'url': srt_url,
}]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@@ -93,4 +101,5 @@ class StreamCZIE(InfoExtractor):
'description': data.get('web_site_text'), 'description': data.get('web_site_text'),
'duration': int_or_none(data.get('duration')), 'duration': int_or_none(data.get('duration')),
'view_count': int_or_none(data.get('views')), 'view_count': int_or_none(data.get('views')),
'subtitles': subtitles,
} }

View File

@@ -0,0 +1,43 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .ooyala import OoyalaIE
class TastyTradeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
'info_dict': {
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
'ext': 'mp4',
'title': 'A History of Teaming',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
'duration': 422.255,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
webpage, 'ooyala code', group='code')
info = self._search_json_ld(webpage, display_id, fatal=False)
info.update({
'_type': 'url_transparent',
'ie_key': OoyalaIE.ie_key(),
'url': 'ooyala:%s' % ooyala_code,
'display_id': display_id,
})
return info

View File

@@ -6,7 +6,10 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import int_or_none from ..utils import (
int_or_none,
try_get,
)
class TEDIE(InfoExtractor): class TEDIE(InfoExtractor):
@@ -113,8 +116,9 @@ class TEDIE(InfoExtractor):
} }
def _extract_info(self, webpage): def _extract_info(self, webpage):
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', info_json = self._search_regex(
webpage, 'info json') r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
webpage, 'info json')
return json.loads(info_json) return json.loads(info_json)
def _real_extract(self, url): def _real_extract(self, url):
@@ -136,11 +140,16 @@ class TEDIE(InfoExtractor):
webpage = self._download_webpage(url, name, webpage = self._download_webpage(url, name,
'Downloading playlist webpage') 'Downloading playlist webpage')
info = self._extract_info(webpage) info = self._extract_info(webpage)
playlist_info = info['playlist']
playlist_info = try_get(
info, lambda x: x['__INITIAL_DATA__']['playlist'],
dict) or info['playlist']
playlist_entries = [ playlist_entries = [
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key()) self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
for talk in info['talks'] for talk in try_get(
info, lambda x: x['__INITIAL_DATA__']['talks'],
dict) or info['talks']
] ]
return self.playlist_result( return self.playlist_result(
playlist_entries, playlist_entries,
@@ -149,9 +158,14 @@ class TEDIE(InfoExtractor):
def _talk_info(self, url, video_name): def _talk_info(self, url, video_name):
webpage = self._download_webpage(url, video_name) webpage = self._download_webpage(url, video_name)
self.report_extraction(video_name)
talk_info = self._extract_info(webpage)['talks'][0] info = self._extract_info(webpage)
talk_info = try_get(
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
dict) or info['talks'][0]
title = talk_info['title'].strip()
external = talk_info.get('external') external = talk_info.get('external')
if external: if external:
@@ -165,19 +179,27 @@ class TEDIE(InfoExtractor):
'url': ext_url or external['uri'], 'url': ext_url or external['uri'],
} }
native_downloads = try_get(
talk_info, lambda x: x['downloads']['nativeDownloads'],
dict) or talk_info['nativeDownloads']
formats = [{ formats = [{
'url': format_url, 'url': format_url,
'format_id': format_id, 'format_id': format_id,
'format': format_id, 'format': format_id,
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None] } for (format_id, format_url) in native_downloads.items() if format_url is not None]
if formats: if formats:
for f in formats: for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id']) finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo: if finfo:
f.update(finfo) f.update(finfo)
player_talk = talk_info['player_talks'][0]
resources_ = player_talk.get('resources') or talk_info.get('resources')
http_url = None http_url = None
for format_id, resources in talk_info['resources'].items(): for format_id, resources in resources_.items():
if format_id == 'h264': if format_id == 'h264':
for resource in resources: for resource in resources:
h264_url = resource.get('file') h264_url = resource.get('file')
@@ -237,14 +259,11 @@ class TEDIE(InfoExtractor):
video_id = compat_str(talk_info['id']) video_id = compat_str(talk_info['id'])
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
thumbnail = 'http://' + thumbnail
return { return {
'id': video_id, 'id': video_id,
'title': talk_info['title'].strip(), 'title': title,
'uploader': talk_info['speaker'], 'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
'thumbnail': thumbnail, 'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'subtitles': self._get_subtitles(video_id, talk_info), 'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats, 'formats': formats,

View File

@@ -2,13 +2,15 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import try_get
class ThisOldHouseIE(InfoExtractor): class ThisOldHouseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
'md5': '946f05bbaa12a33f9ae35580d2dfcfe3', 'md5': '568acf9ca25a639f0c4ff905826b662f',
'info_dict': { 'info_dict': {
'id': '2REGtUDQ', 'id': '2REGtUDQ',
'ext': 'mp4', 'ext': 'mp4',
@@ -28,8 +30,15 @@ class ThisOldHouseIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex( video_id = self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', (r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'drupal settings'), display_id) r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
video_id = drupal_settings['jwplatform']['video_id'] webpage, 'video id', default=None, group='id')
if not video_id:
drupal_settings = self._parse_json(self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings'), display_id)
video_id = try_get(
drupal_settings, lambda x: x['jwplatform']['video_id'],
compat_str) or list(drupal_settings['comScore'])[0]
return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id) return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)

View File

@@ -17,7 +17,7 @@ from ..utils import (
class ToggleIE(InfoExtractor): class ToggleIE(InfoExtractor):
IE_NAME = 'toggle' IE_NAME = 'toggle'
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)' _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115', 'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': { 'info_dict': {
@@ -73,6 +73,12 @@ class ToggleIE(InfoExtractor):
}, { }, {
'url': 'http://video.toggle.sg/en/movies/seven-days/321936', 'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
'only_matching': True,
}] }]
_FORMAT_PREFERENCES = { _FORMAT_PREFERENCES = {

View File

@@ -6,42 +6,48 @@ import re
class ToypicsIE(InfoExtractor): class ToypicsIE(InfoExtractor):
IE_DESC = 'Toypics user profile' IE_DESC = 'Toypics video'
_VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*' _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
'md5': '16e806ad6d6f58079d210fe30985e08b', 'md5': '16e806ad6d6f58079d210fe30985e08b',
'info_dict': { 'info_dict': {
'id': '514', 'id': '514',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Chance-Bulge\'d, 2', 'title': "Chance-Bulge'd, 2",
'age_limit': 18, 'age_limit': 18,
'uploader': 'kidsune', 'uploader': 'kidsune',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL') formats = self._parse_html5_media_entries(
title = self._html_search_regex( url, webpage, video_id)[0]['formats']
r'<title>Toypics - ([^<]+)</title>', page, 'title') title = self._html_search_regex([
username = self._html_search_regex( r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h',
r'toypics.net/([^/"]+)" class="user-name">', page, 'username') r'<title>([^<]+) - Toypics</title>',
], webpage, 'title')
uploader = self._html_search_regex(
r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader',
fatal=False)
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'formats': formats,
'title': title, 'title': title,
'uploader': username, 'uploader': uploader,
'age_limit': 18, 'age_limit': 18,
} }
class ToypicsUserIE(InfoExtractor): class ToypicsUserIE(InfoExtractor):
IE_DESC = 'Toypics user profile' IE_DESC = 'Toypics user profile'
_VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])' _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'http://videos.toypics.net/Mikey', 'url': 'http://videos.toypics.net/Mikey',
'info_dict': { 'info_dict': {
@@ -51,8 +57,7 @@ class ToypicsUserIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) username = self._match_id(url)
username = mobj.group('username')
profile_page = self._download_webpage( profile_page = self._download_webpage(
url, username, note='Retrieving profile page') url, username, note='Retrieving profile page')
@@ -71,7 +76,7 @@ class ToypicsUserIE(InfoExtractor):
note='Downloading page %d/%d' % (n, page_count)) note='Downloading page %d/%d' % (n, page_count))
urls.extend( urls.extend(
re.findall( re.findall(
r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">', r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"',
lpage)) lpage))
return { return {

View File

@@ -3,138 +3,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
InAdvancePagedList,
float_or_none,
unescapeHTML,
)
class TudouIE(InfoExtractor):
IE_NAME = 'tudou'
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})'
_TESTS = [{
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
'md5': '140a49ed444bd22f93330985d8475fcb',
'info_dict': {
'id': '159448201',
'ext': 'f4v',
'title': '卡马乔国足开大脚长传冲吊集锦',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1372113489000,
'description': '卡马乔卡家军,开大脚先进战术不完全集锦!',
'duration': 289.04,
'view_count': int,
'filesize': int,
}
}, {
'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
'info_dict': {
'id': '117049447',
'ext': 'f4v',
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1349207518000,
'description': 'md5:294612423894260f2dcd5c6c04fe248b',
'duration': 5478.33,
'view_count': int,
'filesize': int,
}
}]
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
# Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
# 0001, 0002 and 4001 are not included as they indicate temporary issues
TVC_ERRORS = {
'0003': 'The video is deleted or does not exist',
'1001': 'This video is unavailable due to licensing issues',
'1002': 'This video is unavailable as it\'s under review',
'1003': 'This video is unavailable as it\'s under review',
'3001': 'Password required',
'5001': 'This video is available in Mainland China only due to licensing issues',
'7001': 'This video is unavailable',
'8001': 'This video is unavailable due to licensing issues',
}
def _url_for_id(self, video_id, quality=None):
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
if quality:
info_url += '&hd' + quality
xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
error = xml_data.attrib.get('error')
if error is not None:
raise ExtractorError('Tudou said: %s' % error, expected=True)
final_url = xml_data.text
return final_url
def _real_extract(self, url):
video_id = self._match_id(url)
item_data = self._download_json(
'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id)
youku_vcode = item_data.get('vcode')
if youku_vcode:
return self.url_result('youku:' + youku_vcode, ie='Youku')
if not item_data.get('itemSegs'):
tvc_code = item_data.get('tvcCode')
if tvc_code:
err_msg = self.TVC_ERRORS.get(tvc_code)
if err_msg:
raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
raise ExtractorError('Unxpected error returned from Tudou')
title = unescapeHTML(item_data['kw'])
description = item_data.get('desc')
thumbnail_url = item_data.get('pic')
view_count = int_or_none(item_data.get('playTimes'))
timestamp = int_or_none(item_data.get('pt'))
segments = self._parse_json(item_data['itemSegs'], video_id)
# It looks like the keys are the arguments that have to be passed as
# the hd field in the request url, we pick the higher
# Also, filter non-number qualities (see issue #3643).
quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
key=lambda k: int(k))[-1]
parts = segments[quality]
len_parts = len(parts)
if len_parts > 1:
self.to_screen('%s: found %s parts' % (video_id, len_parts))
def part_func(partnum):
part = parts[partnum]
part_id = part['k']
final_url = self._url_for_id(part_id, quality)
ext = (final_url.split('?')[0]).split('.')[-1]
return [{
'id': '%s' % part_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
'description': description,
'view_count': view_count,
'timestamp': timestamp,
'duration': float_or_none(part.get('seconds'), 1000),
'filesize': int_or_none(part.get('size')),
'http_headers': {
'Referer': self._PLAYER_URL,
},
}]
entries = InAdvancePagedList(part_func, len_parts, 1)
return {
'_type': 'multi_video',
'entries': entries,
'id': video_id,
'title': title,
}
class TudouPlaylistIE(InfoExtractor): class TudouPlaylistIE(InfoExtractor):

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@@ -49,7 +50,7 @@ class TurboIE(InfoExtractor):
for child in item: for child in item:
m = re.search(r'url_video_(?P<quality>.+)', child.tag) m = re.search(r'url_video_(?P<quality>.+)', child.tag)
if m: if m:
quality = m.group('quality') quality = compat_str(m.group('quality'))
formats.append({ formats.append({
'format_id': quality, 'format_id': quality,
'url': child.text, 'url': child.text,

View File

@@ -48,7 +48,7 @@ class TVPlayerIE(InfoExtractor):
'https://tvplayer.com/watch/context', display_id, 'https://tvplayer.com/watch/context', display_id,
'Downloading JSON context', query={ 'Downloading JSON context', query={
'resource': resource_id, 'resource': resource_id,
'nonce': token, 'gen': token,
}) })
validate = context['validate'] validate = context['validate']

View File

@@ -52,6 +52,10 @@ class UdemyIE(InfoExtractor):
# new URL schema # new URL schema
'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906', 'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
'only_matching': True, 'only_matching': True,
}, {
# no url in outputs format entry
'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
'only_matching': True,
}] }]
def _extract_course_info(self, webpage, video_id): def _extract_course_info(self, webpage, video_id):
@@ -219,7 +223,7 @@ class UdemyIE(InfoExtractor):
def extract_output_format(src, f_id): def extract_output_format(src, f_id):
return { return {
'url': src['url'], 'url': src.get('url'),
'format_id': '%sp' % (src.get('height') or f_id), 'format_id': '%sp' % (src.get('height') or f_id),
'width': int_or_none(src.get('width')), 'width': int_or_none(src.get('width')),
'height': int_or_none(src.get('height')), 'height': int_or_none(src.get('height')),

View File

@@ -151,10 +151,16 @@ class VimeoBaseInfoExtractor(InfoExtractor):
else: else:
mpd_manifest_urls = [(format_id, manifest_url)] mpd_manifest_urls = [(format_id, manifest_url)]
for f_id, m_url in mpd_manifest_urls: for f_id, m_url in mpd_manifest_urls:
formats.extend(self._extract_mpd_formats( mpd_formats = self._extract_mpd_formats(
m_url.replace('/master.json', '/master.mpd'), video_id, f_id, m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
'Downloading %s MPD information' % cdn_name, 'Downloading %s MPD information' % cdn_name,
fatal=False)) fatal=False)
for f in mpd_formats:
if f.get('vcodec') == 'none':
f['preference'] = -50
elif f.get('acodec') == 'none':
f['preference'] = -40
formats.extend(mpd_formats)
subtitles = {} subtitles = {}
text_tracks = config['request'].get('text_tracks') text_tracks = config['request'].get('text_tracks')
@@ -609,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
source_name = source_file.get('public_name', 'Original') source_name = source_file.get('public_name', 'Original')
if self._is_valid_url(download_url, video_id, '%s video' % source_name): if self._is_valid_url(download_url, video_id, '%s video' % source_name):
ext = source_file.get('extension', determine_ext(download_url)).lower() ext = (try_get(
source_file, lambda x: x['extension'],
compat_str) or determine_ext(
download_url, None) or 'mp4').lower()
formats.append({ formats.append({
'url': download_url, 'url': download_url,
'ext': ext, 'ext': ext,

View File

@@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_kwargs,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor):
headers.update(kwargs.get('headers', {})) headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers kwargs['headers'] = headers
response = self._download_json( response = self._download_json(
'https://www.viu.com/api/' + path, *args, **kwargs)['response'] 'https://www.viu.com/api/' + path, *args,
**compat_kwargs(kwargs))['response']
if response.get('status') != 'success': if response.get('status') != 'success':
raise ExtractorError('%s said: %s' % ( raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True) self.IE_NAME, response['message']), expected=True)

View File

@@ -4,11 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import parse_duration
unified_strdate,
parse_duration,
int_or_none,
)
class WatchIndianPornIE(InfoExtractor): class WatchIndianPornIE(InfoExtractor):
@@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera', 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'LoveJay',
'upload_date': '20160428',
'duration': 226, 'duration': 226,
'view_count': int, 'view_count': int,
'comment_count': int,
'categories': list, 'categories': list,
'age_limit': 18, 'age_limit': 18,
} }
@@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_url = self._html_search_regex( info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
r"url: escape\('([^']+)'\)", webpage, 'url')
title = self._html_search_regex( title = self._html_search_regex((
r'<h2 class="he2"><span>(.*?)</span>', r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
webpage, 'title') r'<h4>(.+?)</h4>'
thumbnail = self._html_search_regex( ), webpage, 'title')
r'<span id="container"><img\s+src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'class="aupa">\s*(.*?)</a>',
webpage, 'uploader')
upload_date = unified_strdate(self._html_search_regex(
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>', r'Time:\s*<strong>\s*(.+?)\s*</strong>',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex( view_count = int(self._search_regex(
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
webpage, 'comment count', fatal=False))
categories = re.findall( categories = re.findall(
r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>', r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
webpage) webpage)
return { info_dict.update({
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url,
'http_headers': { 'http_headers': {
'Referer': url, 'Referer': url,
}, },
'title': title, 'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'comment_count': comment_count,
'categories': categories, 'categories': categories,
'age_limit': 18, 'age_limit': 18,
} })
return info_dict

View File

@@ -13,7 +13,7 @@ class WSJIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=| https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
https?://(?:www\.)?wsj\.com/video/[^/]+/| https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/|
wsj: wsj:
) )
(?P<id>[a-fA-F0-9-]{36}) (?P<id>[a-fA-F0-9-]{36})
@@ -35,6 +35,9 @@ class WSJIE(InfoExtractor):
}, { }, {
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html', 'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -10,7 +10,6 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
NO_DEFAULT, NO_DEFAULT,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
) )
@@ -30,6 +29,8 @@ class XFileShareIE(InfoExtractor):
(r'vidabc\.com', 'Vid ABC'), (r'vidabc\.com', 'Vid ABC'),
(r'vidbom\.com', 'VidBom'), (r'vidbom\.com', 'VidBom'),
(r'vidlo\.us', 'vidlo'), (r'vidlo\.us', 'vidlo'),
(r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
(r'fastvideo\.me', 'FastVideo.me'),
) )
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
@@ -109,6 +110,12 @@ class XFileShareIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://www.rapidvideo.cool/b667kprndr8w',
'only_matching': True,
}, {
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
'only_matching': True
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@@ -130,12 +137,12 @@ class XFileShareIE(InfoExtractor):
if countdown: if countdown:
self._sleep(countdown, video_id) self._sleep(countdown, video_id)
post = urlencode_postdata(fields) webpage = self._download_webpage(
url, video_id, 'Downloading video page',
req = sanitized_Request(url, post) data=urlencode_postdata(fields), headers={
req.add_header('Content-type', 'application/x-www-form-urlencoded') 'Referer': url,
'Content-type': 'application/x-www-form-urlencoded',
webpage = self._download_webpage(req, video_id, 'Downloading video page') })
title = (self._search_regex( title = (self._search_regex(
(r'style="z-index: [0-9]+;">([^<]+)</span>', (r'style="z-index: [0-9]+;">([^<]+)</span>',
@@ -150,7 +157,7 @@ class XFileShareIE(InfoExtractor):
def extract_formats(default=NO_DEFAULT): def extract_formats(default=NO_DEFAULT):
urls = [] urls = []
for regex in ( for regex in (
r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1', r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)', r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'): r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):

View File

@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
dict_get, dict_get,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@@ -25,6 +26,7 @@ class XHamsterIE(InfoExtractor):
'uploader': 'Ruseful2011', 'uploader': 'Ruseful2011',
'duration': 893, 'duration': 893,
'age_limit': 18, 'age_limit': 18,
'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],
}, },
}, { }, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
@@ -36,6 +38,7 @@ class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400', 'uploader': 'jojo747400',
'duration': 200, 'duration': 200,
'age_limit': 18, 'age_limit': 18,
'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@@ -51,6 +54,7 @@ class XHamsterIE(InfoExtractor):
'uploader': 'parejafree', 'uploader': 'parejafree',
'duration': 72, 'duration': 72,
'age_limit': 18, 'age_limit': 18,
'categories': ['Amateur', 'Blowjobs'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@@ -104,7 +108,7 @@ class XHamsterIE(InfoExtractor):
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+href=["\'].+?xhamster\.com/user/[^>]+>(?P<uploader>.+?)</a>', r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
webpage, 'uploader', default='anonymous') webpage, 'uploader', default='anonymous')
thumbnail = self._search_regex( thumbnail = self._search_regex(
@@ -120,7 +124,7 @@ class XHamsterIE(InfoExtractor):
r'content=["\']User(?:View|Play)s:(\d+)', r'content=["\']User(?:View|Play)s:(\d+)',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage) mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None) (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage) mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
@@ -152,6 +156,12 @@ class XHamsterIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
categories_html = self._search_regex(
r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
'categories', default=None)
categories = [clean_html(category) for category in re.findall(
r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@@ -165,6 +175,7 @@ class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(dislike_count), 'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count), 'comment_count': int_or_none(comment_count),
'age_limit': age_limit, 'age_limit': age_limit,
'categories': categories,
'formats': formats, 'formats': formats,
} }

View File

@@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import itertools import itertools
import random import random
import re import re
@@ -9,15 +8,13 @@ import string
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_ord,
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
get_element_by_attribute, get_element_by_class,
try_get, js_to_json,
str_or_none,
strip_jsonp,
urljoin,
) )
@@ -26,7 +23,9 @@ class YoukuIE(InfoExtractor):
IE_DESC = '优酷' IE_DESC = '优酷'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| https?://(
(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
video\.tudou\.com/v/)|
youku:) youku:)
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
''' '''
@@ -35,9 +34,15 @@ class YoukuIE(InfoExtractor):
# MD5 is unstable # MD5 is unstable
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
'info_dict': { 'info_dict': {
'id': 'XMTc1ODE5Njcy_part1', 'id': 'XMTc1ODE5Njcy',
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
'ext': 'flv' 'ext': 'mp4',
'duration': 74.73,
'thumbnail': r're:^https?://.*',
'uploader': '。躲猫猫、',
'uploader_id': '36017967',
'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
'tags': list,
} }
}, { }, {
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
@@ -46,25 +51,42 @@ class YoukuIE(InfoExtractor):
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
'info_dict': { 'info_dict': {
'id': 'XODgxNjg1Mzk2', 'id': 'XODgxNjg1Mzk2',
'ext': 'mp4',
'title': '武媚娘传奇 85', 'title': '武媚娘传奇 85',
'duration': 1999.61,
'thumbnail': r're:^https?://.*',
'uploader': '疯狂豆花',
'uploader_id': '62583473',
'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
'tags': list,
}, },
'playlist_count': 11,
'skip': 'Available in China only',
}, { }, {
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
'info_dict': { 'info_dict': {
'id': 'XMTI1OTczNDM5Mg', 'id': 'XMTI1OTczNDM5Mg',
'ext': 'mp4',
'title': '花千骨 04', 'title': '花千骨 04',
'duration': 2363,
'thumbnail': r're:^https?://.*',
'uploader': '放剧场-花千骨',
'uploader_id': '772849359',
'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
'tags': list,
}, },
'playlist_count': 13,
}, { }, {
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html', 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
'note': 'Video protected with password', 'note': 'Video protected with password',
'info_dict': { 'info_dict': {
'id': 'XNjA1NzA2Njgw', 'id': 'XNjA1NzA2Njgw',
'ext': 'mp4',
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起', 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
'duration': 7264.5,
'thumbnail': r're:^https?://.*',
'uploader': 'FoxJin1006',
'uploader_id': '322014285',
'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
'tags': list,
}, },
'playlist_count': 19,
'params': { 'params': {
'videopassword': '100600', 'videopassword': '100600',
}, },
@@ -73,130 +95,38 @@ class YoukuIE(InfoExtractor):
'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html', 'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
'info_dict': { 'info_dict': {
'id': 'XOTUxMzg4NDMy', 'id': 'XOTUxMzg4NDMy',
'ext': 'mp4',
'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft', 'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
'duration': 702.08,
'thumbnail': r're:^https?://.*',
'uploader': '明月庄主moon',
'uploader_id': '38465621',
'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0',
'tags': list,
}, },
'playlist_count': 6, }, {
'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805',
'info_dict': {
'id': 'XMjIyNzAzMTQ4NA',
'ext': 'mp4',
'title': '卡马乔国足开大脚长传冲吊集锦',
'duration': 289,
'thumbnail': r're:^https?://.*',
'uploader': '阿卜杜拉之星',
'uploader_id': '2382249',
'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==',
'tags': list,
},
}, {
'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html',
'only_matching': True,
}] }]
def construct_video_urls(self, data):
# get sid, token
def yk_t(s1, s2):
ls = list(range(256))
t = 0
for i in range(256):
t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
ls[i], ls[t] = ls[t], ls[i]
s = bytearray()
x, y = 0, 0
for i in range(len(s2)):
y = (y + 1) % 256
x = (x + ls[y]) % 256
ls[x], ls[y] = ls[y], ls[x]
s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
return bytes(s)
sid, token = yk_t(
b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
).decode('ascii').split('_')
# get oip
oip = data['security']['ip']
fileid_dict = {}
for stream in data['stream']:
if stream.get('channel_type') == 'tail':
continue
format = stream.get('stream_type')
fileid = try_get(
stream, lambda x: x['segs'][0]['fileid'],
compat_str) or stream['stream_fileid']
fileid_dict[format] = fileid
def get_fileid(format, n):
number = hex(int(str(n), 10))[2:].upper()
if len(number) == 1:
number = '0' + number
streamfileids = fileid_dict[format]
fileid = streamfileids[0:8] + number + streamfileids[10:]
return fileid
# get ep
def generate_ep(format, n):
fileid = get_fileid(format, n)
ep_t = yk_t(
b'bf7e5f01',
('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
)
ep = base64.b64encode(ep_t).decode('ascii')
return ep
# generate video_urls
video_urls_dict = {}
for stream in data['stream']:
if stream.get('channel_type') == 'tail':
continue
format = stream.get('stream_type')
video_urls = []
for dt in stream['segs']:
n = str(stream['segs'].index(dt))
param = {
'K': dt['key'],
'hd': self.get_hd(format),
'myp': 0,
'ypp': 0,
'ctype': 12,
'ev': 1,
'token': token,
'oip': oip,
'ep': generate_ep(format, n)
}
video_url = \
'http://k.youku.com/player/getFlvPath/' + \
'sid/' + sid + \
'_00' + \
'/st/' + self.parse_ext_l(format) + \
'/fileid/' + get_fileid(format, n) + '?' + \
compat_urllib_parse_urlencode(param)
video_urls.append(video_url)
video_urls_dict[format] = video_urls
return video_urls_dict
@staticmethod @staticmethod
def get_ysuid(): def get_ysuid():
return '%d%s' % (int(time.time()), ''.join([ return '%d%s' % (int(time.time()), ''.join([
random.choice(string.ascii_letters) for i in range(3)])) random.choice(string.ascii_letters) for i in range(3)]))
def get_hd(self, fm):
hd_id_dict = {
'3gp': '0',
'3gphd': '1',
'flv': '0',
'flvhd': '0',
'mp4': '1',
'mp4hd': '1',
'mp4hd2': '1',
'mp4hd3': '1',
'hd2': '2',
'hd3': '3',
}
return hd_id_dict[fm]
def parse_ext_l(self, fm):
ext_dict = {
'3gp': 'flv',
'3gphd': 'mp4',
'flv': 'flv',
'flvhd': 'flv',
'mp4': 'mp4',
'mp4hd': 'mp4',
'mp4hd2': 'flv',
'mp4hd3': 'flv',
'hd2': 'flv',
'hd3': 'flv',
}
return ext_dict[fm]
def get_format_name(self, fm): def get_format_name(self, fm):
_dict = { _dict = {
'3gp': 'h6', '3gp': 'h6',
@@ -210,32 +140,40 @@ class YoukuIE(InfoExtractor):
'hd2': 'h2', 'hd2': 'h2',
'hd3': 'h1', 'hd3': 'h1',
} }
return _dict[fm] return _dict.get(fm)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
self._set_cookie('youku.com', '__ysuid', self.get_ysuid()) self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
def retrieve_data(req_url, note): _, urlh = self._download_webpage_handle(
headers = { 'https://log.mmstat.com/eg.js', video_id, 'Retrieving cna info')
'Referer': req_url, # The etag header is '"foobar"'; let's remove the double quotes
} cna = urlh.headers['etag'][1:-1]
headers.update(self.geo_verification_headers())
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
raw_data = self._download_json(req_url, video_id, note=note, headers=headers)
return raw_data['data']
video_password = self._downloader.params.get('videopassword')
# request basic data # request basic data
basic_data_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % video_id basic_data_params = {
if video_password: 'vid': video_id,
basic_data_url += '&pwd=%s' % video_password 'ccode': '0402' if 'tudou.com' in url else '0401',
'client_ip': '192.168.1.1',
'utid': cna,
'client_ts': time.time() / 1000,
}
data = retrieve_data(basic_data_url, 'Downloading JSON metadata') video_password = self._downloader.params.get('videopassword')
if video_password:
basic_data_params['password'] = video_password
headers = {
'Referer': url,
}
headers.update(self.geo_verification_headers())
data = self._download_json(
'https://ups.youku.com/ups/get.json', video_id,
'Downloading JSON metadata',
query=basic_data_params, headers=headers)['data']
error = data.get('error') error = data.get('error')
if error: if error:
@@ -253,86 +191,87 @@ class YoukuIE(InfoExtractor):
raise ExtractorError(msg) raise ExtractorError(msg)
# get video title # get video title
title = data['video']['title'] video_data = data['video']
title = video_data['title']
# generate video_urls_dict formats = [{
video_urls_dict = self.construct_video_urls(data) 'url': stream['m3u8_url'],
'format_id': self.get_format_name(stream.get('stream_type')),
# construct info 'ext': 'mp4',
entries = [{ 'protocol': 'm3u8_native',
'id': '%s_part%d' % (video_id, i + 1), 'filesize': int(stream.get('size')),
'title': title, 'width': stream.get('width'),
'formats': [], 'height': stream.get('height'),
# some formats are not available for all parts, we have to detect } for stream in data['stream'] if stream.get('channel_type') != 'tail']
# which one has all self._sort_formats(formats)
} for i in range(max(len(v.get('segs')) for v in data['stream']))]
for stream in data['stream']:
if stream.get('channel_type') == 'tail':
continue
fm = stream.get('stream_type')
video_urls = video_urls_dict[fm]
for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
entry['formats'].append({
'url': video_url,
'format_id': self.get_format_name(fm),
'ext': self.parse_ext_l(fm),
'filesize': int(seg['size']),
'width': stream.get('width'),
'height': stream.get('height'),
})
return { return {
'_type': 'multi_video',
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'entries': entries, 'formats': formats,
'duration': video_data.get('seconds'),
'thumbnail': video_data.get('logo'),
'uploader': video_data.get('username'),
'uploader_id': str_or_none(video_data.get('userid')),
'uploader_url': data.get('uploader', {}).get('homepage'),
'tags': video_data.get('tags'),
} }
class YoukuShowIE(InfoExtractor): class YoukuShowIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html' _VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
IE_NAME = 'youku:show' IE_NAME = 'youku:show'
_TEST = { _TEST = {
'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html', 'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
'info_dict': { 'info_dict': {
'id': 'zc7c670be07ff11e48b3f', 'id': 'zc7c670be07ff11e48b3f',
'title': '花千骨 未删减版', 'title': '花千骨 未删减版',
'description': 'md5:578d4f2145ae3f9128d9d4d863312910', 'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
}, },
'playlist_count': 50, 'playlist_count': 50,
} }
_PAGE_SIZE = 40 _PAGE_SIZE = 40
def _find_videos_in_page(self, webpage):
videos = re.findall(
r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage)
return [
self.url_result(video_url, YoukuIE.ie_key(), title)
for video_url, title in videos]
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id) webpage = self._download_webpage(url, show_id)
entries = self._find_videos_in_page(webpage) entries = []
page_config = self._parse_json(self._search_regex(
playlist_title = self._html_search_regex( r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False) show_id, transform_source=js_to_json)
detail_div = get_element_by_attribute('class', 'detail', webpage) or '' for idx in itertools.count(0):
playlist_description = self._html_search_regex( if idx == 0:
r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>', playlist_data_url = 'http://list.youku.com/show/module'
detail_div, 'playlist description', fatal=False) query = {'id': page_config['showid'], 'tab': 'point'}
else:
for idx in itertools.count(1): playlist_data_url = 'http://list.youku.com/show/point'
episodes_page = self._download_webpage( query = {
'http://www.youku.com/show_episode/id_%s.html' % show_id, 'id': page_config['showid'],
show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)}, 'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1),
note='Downloading episodes page %d' % idx) }
new_entries = self._find_videos_in_page(episodes_page) query['callback'] = 'cb'
playlist_data = self._download_json(
playlist_data_url, show_id, query=query,
note='Downloading playlist data page %d' % (idx + 1),
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
video_urls = re.findall(
r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"',
playlist_data)
new_entries = [
self.url_result(urljoin(url, video_url), YoukuIE.ie_key())
for video_url in video_urls]
entries.extend(new_entries) entries.extend(new_entries)
if len(new_entries) < self._PAGE_SIZE: if len(new_entries) < self._PAGE_SIZE:
break break
return self.playlist_result(entries, show_id, playlist_title, playlist_description) desc = self._html_search_meta('description', webpage, fatal=False)
playlist_title = desc.split(',')[0] if desc else None
detail_li = get_element_by_class('p-intro', webpage)
playlist_description = get_element_by_class(
'intro-more', detail_li) if detail_li else None
return self.playlist_result(
entries, show_id, playlist_title, playlist_description)

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request, sanitized_Request,
@@ -26,7 +27,7 @@ class YouPornIE(InfoExtractor):
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?', 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Ask Dan And Jennifer', 'uploader': 'Ask Dan And Jennifer',
'upload_date': '20101221', 'upload_date': '20101217',
'average_rating': int, 'average_rating': int,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
@@ -45,7 +46,7 @@ class YouPornIE(InfoExtractor):
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4', 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Unknown', 'uploader': 'Unknown',
'upload_date': '20111125', 'upload_date': '20110418',
'average_rating': int, 'average_rating': int,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
@@ -68,28 +69,46 @@ class YouPornIE(InfoExtractor):
webpage = self._download_webpage(request, display_id) webpage = self._download_webpage(request, display_id)
title = self._search_regex( title = self._search_regex(
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>.+?)\1', [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'], r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
webpage, 'title', group='title') webpage, 'title', group='title',
default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'title', webpage, fatal=True)
links = [] links = []
# Main source
definitions = self._parse_json(
self._search_regex(
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
'media definitions', default='[]'),
video_id, fatal=False)
if definitions:
for definition in definitions:
if not isinstance(definition, dict):
continue
video_url = definition.get('videoUrl')
if isinstance(video_url, compat_str) and video_url:
links.append(video_url)
# Fallback #1, this also contains extra low quality 180p format
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
links.append(link)
# Fallback #2 (unavailable as at 22.06.2017)
sources = self._search_regex( sources = self._search_regex(
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None) r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
if sources: if sources:
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources): for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
links.append(link) links.append(link)
# Fallback #1 # Fallback #3 (unavailable as at 22.06.2017)
for _, link in re.findall( for _, link in re.findall(
r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage): r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
links.append(link) links.append(link)
# Fallback #2, this also contains extra low quality 180p format # Fallback #4, encrypted links (unavailable as at 22.06.2017)
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
links.append(link)
# Fallback #3, encrypted links
for _, encrypted_link in re.findall( for _, encrypted_link in re.findall(
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage): r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8')) links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
@@ -124,7 +143,8 @@ class YouPornIE(InfoExtractor):
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex( upload_date = unified_strdate(self._html_search_regex(
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>', [r'Date\s+[Aa]dded:\s*<span>([^<]+)',
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)

View File

@@ -1269,37 +1269,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sub_lang_list[sub_lang] = sub_formats sub_lang_list[sub_lang] = sub_formats
return sub_lang_list return sub_lang_list
def make_captions(sub_url, sub_langs):
parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
caption_qs = compat_parse_qs(parsed_sub_url.query)
captions = {}
for sub_lang in sub_langs:
sub_formats = []
for ext in self._SUBTITLE_FORMATS:
caption_qs.update({
'tlang': [sub_lang],
'fmt': [ext],
})
sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
query=compat_urllib_parse_urlencode(caption_qs, True)))
sub_formats.append({
'url': sub_url,
'ext': ext,
})
captions[sub_lang] = sub_formats
return captions
# New captions format as of 22.06.2017
player_response = args.get('player_response')
if player_response and isinstance(player_response, compat_str):
player_response = self._parse_json(
player_response, video_id, fatal=False)
if player_response:
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
base_url = renderer['captionTracks'][0]['baseUrl']
sub_lang_list = []
for lang in renderer['translationLanguages']:
lang_code = lang.get('languageCode')
if lang_code:
sub_lang_list.append(lang_code)
return make_captions(base_url, sub_lang_list)
# Some videos don't provide ttsurl but rather caption_tracks and # Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA) # caption_translation_languages (e.g. 20LmZk1hakA)
# Does not used anymore as of 22.06.2017
caption_tracks = args['caption_tracks'] caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages'] caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
parsed_caption_url = compat_urllib_parse_urlparse(caption_url) sub_lang_list = []
caption_qs = compat_parse_qs(parsed_caption_url.query)
sub_lang_list = {}
for lang in caption_translation_languages.split(','): for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang)) lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0] sub_lang = lang_qs.get('lc', [None])[0]
if not sub_lang: if sub_lang:
continue sub_lang_list.append(sub_lang)
sub_formats = [] return make_captions(caption_url, sub_lang_list)
for ext in self._SUBTITLE_FORMATS:
caption_qs.update({
'tlang': [sub_lang],
'fmt': [ext],
})
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
query=compat_urllib_parse_urlencode(caption_qs, True)))
sub_formats.append({
'url': sub_url,
'ext': ext,
})
sub_lang_list[sub_lang] = sub_formats
return sub_lang_list
# An extractor error can be raise by the download process if there are # An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles # no automatic captions but there are subtitles
except (KeyError, ExtractorError): except (KeyError, IndexError, ExtractorError):
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
@@ -1353,10 +1373,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
start_time = parse_duration(time_point) start_time = parse_duration(time_point)
if start_time is None: if start_time is None:
continue continue
if start_time > duration:
break
end_time = (duration if next_num == len(chapter_lines) end_time = (duration if next_num == len(chapter_lines)
else parse_duration(chapter_lines[next_num][1])) else parse_duration(chapter_lines[next_num][1]))
if end_time is None: if end_time is None:
continue continue
if end_time > duration:
end_time = duration
if start_time > end_time:
break
chapter_title = re.sub( chapter_title = re.sub(
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-') r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
chapter_title = re.sub(r'\s+', ' ', chapter_title) chapter_title = re.sub(r'\s+', ' ', chapter_title)
@@ -1435,6 +1461,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
age_gate = False age_gate = False
video_info = None video_info = None
sts = None
# Try looking directly into the video webpage # Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config: if ytplayer_config:
@@ -1451,6 +1478,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1: if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True is_live = True
sts = ytplayer_config.get('sts')
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
# We also try looking in get_video_info since it may contain different dashmpd # We also try looking in get_video_info since it may contain different dashmpd
# URL that points to a DASH manifest with possibly different itag set (some itags # URL that points to a DASH manifest with possibly different itag set (some itags
@@ -1459,17 +1487,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# The general idea is to take a union of itags of both DASH manifests (for example # The general idea is to take a union of itags of both DASH manifests (for example
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093) # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
self.report_video_info_webpage_download(video_id) self.report_video_info_webpage_download(video_id)
for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']: for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
video_info_url = ( query = {
'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 'video_id': video_id,
% (proto, video_id, el_type)) 'ps': 'default',
'eurl': '',
'gl': 'US',
'hl': 'en',
}
if el:
query['el'] = el
if sts:
query['sts'] = sts
video_info_webpage = self._download_webpage( video_info_webpage = self._download_webpage(
video_info_url, '%s://www.youtube.com/get_video_info' % proto,
video_id, note=False, video_id, note=False,
errnote='unable to download video info webpage') errnote='unable to download video info webpage',
fatal=False, query=query)
if not video_info_webpage:
continue
get_video_info = compat_parse_qs(video_info_webpage) get_video_info = compat_parse_qs(video_info_webpage)
if get_video_info.get('use_cipher_signature') != ['True']: add_dash_mpd(get_video_info)
add_dash_mpd(get_video_info)
if not video_info: if not video_info:
video_info = get_video_info video_info = get_video_info
if 'token' in get_video_info: if 'token' in get_video_info:
@@ -1703,12 +1741,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
format_id = url_data['itag'][0] format_id = url_data['itag'][0]
url = url_data['url'][0] url = url_data['url'][0]
if 'sig' in url_data: if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
encrypted_sig = url_data['s'][0]
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
jsplayer_url_json = self._search_regex( jsplayer_url_json = self._search_regex(
ASSETS_RE, ASSETS_RE,
embed_webpage if age_gate else video_webpage, embed_webpage if age_gate else video_webpage,
@@ -1729,6 +1763,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_webpage, 'age gate player URL') video_webpage, 'age gate player URL')
player_url = json.loads(player_url_json) player_url = json.loads(player_url_json)
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'): if self._downloader.params.get('verbose'):
if player_url is None: if player_url is None:
player_version = 'unknown' player_version = 'unknown'

View File

@@ -214,15 +214,18 @@ class JSInterpreter(object):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {} obj = {}
obj_m = re.search( obj_m = re.search(
(r'(?<!this\.)%s\s*=\s*\{' % re.escape(objname)) + r'''(?x)
r'\s*(?P<fields>(%s\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' + (?<!this\.)%s\s*=\s*{\s*
r'\}\s*;' % _FUNC_NAME_RE, (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
}\s*;
''' % (re.escape(objname), _FUNC_NAME_RE),
self.code) self.code)
fields = obj_m.group('fields') fields = obj_m.group('fields')
# Currently, it only supports function definitions # Currently, it only supports function definitions
fields_m = re.finditer( fields_m = re.finditer(
r'(?P<key>%s)\s*:\s*function' r'''(?x)
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}' % _FUNC_NAME_RE, (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
''' % _FUNC_NAME_RE,
fields) fields)
for f in fields_m: for f in fields_m:
argnames = f.group('args').split(',') argnames = f.group('args').split(',')

View File

@@ -310,7 +310,7 @@ def parseOpts(overrideArguments=None):
metavar='FILTER', dest='match_filter', default=None, metavar='FILTER', dest='match_filter', default=None,
help=( help=(
'Generic video filter. ' 'Generic video filter. '
'Specify any key (see help for -o for a list of available keys) to ' 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
'match if the key is present, ' 'match if the key is present, '
'!key to check if the key is not present, ' '!key to check if the key is not present, '
'key > NUMBER (like "comment_count > 12", also works with ' 'key > NUMBER (like "comment_count > 12", also works with '
@@ -618,7 +618,7 @@ def parseOpts(overrideArguments=None):
verbosity.add_option( verbosity.add_option(
'-j', '--dump-json', '-j', '--dump-json',
action='store_true', dest='dumpjson', default=False, action='store_true', dest='dumpjson', default=False,
help='Simulate, quiet but print JSON information. See --output for a description of available keys.') help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
verbosity.add_option( verbosity.add_option(
'-J', '--dump-single-json', '-J', '--dump-single-json',
action='store_true', dest='dump_single_json', default=False, action='store_true', dest='dump_single_json', default=False,

View File

@@ -4,7 +4,10 @@ import subprocess
from .common import PostProcessor from .common import PostProcessor
from ..compat import compat_shlex_quote from ..compat import compat_shlex_quote
from ..utils import PostProcessingError from ..utils import (
encodeArgument,
PostProcessingError,
)
class ExecAfterDownloadPP(PostProcessor): class ExecAfterDownloadPP(PostProcessor):
@@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor):
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
self._downloader.to_screen('[exec] Executing command: %s' % cmd) self._downloader.to_screen('[exec] Executing command: %s' % cmd)
retCode = subprocess.call(cmd, shell=True) retCode = subprocess.call(encodeArgument(cmd), shell=True)
if retCode != 0: if retCode != 0:
raise PostProcessingError( raise PostProcessingError(
'Command returned error code %d' % retCode) 'Command returned error code %d' % retCode)

View File

@@ -444,7 +444,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
chapters = info.get('chapters', []) chapters = info.get('chapters', [])
if chapters: if chapters:
metadata_filename = encodeFilename(replace_extension(filename, 'meta')) metadata_filename = replace_extension(filename, 'meta')
with io.open(metadata_filename, 'wt', encoding='utf-8') as f: with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
def ffmpeg_escape(text): def ffmpeg_escape(text):
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text) return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)

View File

@@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor):
title = info['title'] title = info['title']
match = re.match(self._titleregex, title) match = re.match(self._titleregex, title)
if match is None: if match is None:
self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat) self._downloader.to_screen(
'[fromtitle] Could not interpret title of video as "%s"'
% self._titleformat)
return [], info return [], info
for attribute, value in match.groupdict().items(): for attribute, value in match.groupdict().items():
value = match.group(attribute)
info[attribute] = value info[attribute] = value
self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) self._downloader.to_screen(
'[fromtitle] parsed %s: %s'
% (attribute, value if value is not None else 'NA'))
return [], info return [], info

View File

@@ -22,7 +22,6 @@ import locale
import math import math
import operator import operator
import os import os
import pipes
import platform import platform
import random import random
import re import re
@@ -36,6 +35,7 @@ import xml.etree.ElementTree
import zlib import zlib
from .compat import ( from .compat import (
compat_HTMLParseError,
compat_HTMLParser, compat_HTMLParser,
compat_basestring, compat_basestring,
compat_chr, compat_chr,
@@ -409,8 +409,12 @@ def extract_attributes(html_element):
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
""" """
parser = HTMLAttributeParser() parser = HTMLAttributeParser()
parser.feed(html_element) try:
parser.close() parser.feed(html_element)
parser.close()
# Older Python may throw HTMLParseError in case of malformed HTML
except compat_HTMLParseError:
pass
return parser.attrs return parser.attrs
@@ -932,14 +936,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
except zlib.error: except zlib.error:
return zlib.decompress(data) return zlib.decompress(data)
@staticmethod
def addinfourl_wrapper(stream, headers, url, code):
if hasattr(compat_urllib_request.addinfourl, 'getcode'):
return compat_urllib_request.addinfourl(stream, headers, url, code)
ret = compat_urllib_request.addinfourl(stream, headers, url)
ret.code = code
return ret
def http_request(self, req): def http_request(self, req):
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
# always respected by websites, some tend to give out URLs with non percent-encoded # always respected by websites, some tend to give out URLs with non percent-encoded
@@ -991,13 +987,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
break break
else: else:
raise original_ioerror raise original_ioerror
resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code) resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg resp.msg = old_resp.msg
del resp.headers['Content-encoding'] del resp.headers['Content-encoding']
# deflate # deflate
if resp.headers.get('Content-encoding', '') == 'deflate': if resp.headers.get('Content-encoding', '') == 'deflate':
gz = io.BytesIO(self.deflate(resp.read())) gz = io.BytesIO(self.deflate(resp.read()))
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg resp.msg = old_resp.msg
del resp.headers['Content-encoding'] del resp.headers['Content-encoding']
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
@@ -1187,7 +1183,7 @@ def unified_timestamp(date_str, day_first=True):
if date_str is None: if date_str is None:
return None return None
date_str = date_str.replace(',', ' ') date_str = re.sub(r'[,|]', '', date_str)
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str) timezone, date_str = extract_timezone(date_str)
@@ -1538,7 +1534,7 @@ def shell_quote(args):
if isinstance(a, bytes): if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename' # We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding) a = a.decode(encoding)
quoted_args.append(pipes.quote(a)) quoted_args.append(compat_shlex_quote(a))
return ' '.join(quoted_args) return ' '.join(quoted_args)
@@ -2211,7 +2207,12 @@ def parse_age_limit(s):
def strip_jsonp(code): def strip_jsonp(code):
return re.sub( return re.sub(
r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) r'''(?sx)^
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
(?:\s*&&\s*(?P=func_name))?
\s*\(\s*(?P<callback_data>.*)\);?
\s*?(?://[^\n]*)*$''',
r'\g<callback_data>', code)
def js_to_json(code): def js_to_json(code):

Some files were not shown because too many files have changed in this diff Show More