Compare commits
189 Commits
2017.05.18
...
2017.07.02
Author | SHA1 | Date | |
---|---|---|---|
![]() |
b6c9fe4162 | ||
![]() |
4d9ba27bba | ||
![]() |
50ae3f646e | ||
![]() |
99a7e76240 | ||
![]() |
a3a6d01a96 | ||
![]() |
02d61a65e2 | ||
![]() |
9b35297be1 | ||
![]() |
4917478803 | ||
![]() |
54faac2235 | ||
![]() |
c69701c6ab | ||
![]() |
d4f8ce6e91 | ||
![]() |
b311b0ead2 | ||
![]() |
72d256c434 | ||
![]() |
b2ed954fc6 | ||
![]() |
a919ca0ad6 | ||
![]() |
88d6b7c2bd | ||
![]() |
fd1c5fba6b | ||
![]() |
0646e34c7d | ||
![]() |
bf2dc9cc6e | ||
![]() |
f1c051009b | ||
![]() |
33ffb645a6 | ||
![]() |
35544690e4 | ||
![]() |
136503e302 | ||
![]() |
4a87de72df | ||
![]() |
a7ce8f16c4 | ||
![]() |
a5aea53fc8 | ||
![]() |
0c7a631b61 | ||
![]() |
fd9ee4de8c | ||
![]() |
5744cf6c03 | ||
![]() |
9c48b5a193 | ||
![]() |
449c665776 | ||
![]() |
23aec3d623 | ||
![]() |
27449ad894 | ||
![]() |
bd65f18153 | ||
![]() |
73af5cc817 | ||
![]() |
b5f523ed62 | ||
![]() |
4f4dd8d797 | ||
![]() |
4cb18ab1b9 | ||
![]() |
ac7409eec5 | ||
![]() |
170719414d | ||
![]() |
38dad4737f | ||
![]() |
ddbb4c5c3e | ||
![]() |
fa3ea7223a | ||
![]() |
0f4a5a73e7 | ||
![]() |
18166bb8e8 | ||
![]() |
d4893e764b | ||
![]() |
97b6e30113 | ||
![]() |
9be9ec5980 | ||
![]() |
048b55804d | ||
![]() |
6ce79d7ac0 | ||
![]() |
1641ca402d | ||
![]() |
85cbcede5b | ||
![]() |
a1de83e5f0 | ||
![]() |
fee00b3884 | ||
![]() |
2d2132ac6e | ||
![]() |
cc2ffe5afe | ||
![]() |
560050669b | ||
![]() |
eaa006d1bd | ||
![]() |
a6f29820c6 | ||
![]() |
1433734c35 | ||
![]() |
aefce8e6dc | ||
![]() |
8b6ac49ecc | ||
![]() |
b08e235f09 | ||
![]() |
be80986ed9 | ||
![]() |
473e87064b | ||
![]() |
4f90d2aeac | ||
![]() |
b230fefc3c | ||
![]() |
96a2daa1ee | ||
![]() |
0ea6efbb7a | ||
![]() |
6a9cb29509 | ||
![]() |
ca27037171 | ||
![]() |
0bf4b71b75 | ||
![]() |
5215f45327 | ||
![]() |
0a268c6e11 | ||
![]() |
7dd5415cd0 | ||
![]() |
b5dc33daa9 | ||
![]() |
97fa1f8dc4 | ||
![]() |
b081f53b08 | ||
![]() |
cb1e6d8985 | ||
![]() |
9932ac5c58 | ||
![]() |
bf87c36c93 | ||
![]() |
b4a3d461e4 | ||
![]() |
72b409559c | ||
![]() |
534863e057 | ||
![]() |
16bc958287 | ||
![]() |
624bd0104c | ||
![]() |
28a4d6cce8 | ||
![]() |
2ae2ffda5e | ||
![]() |
70e7967202 | ||
![]() |
6e999fbc12 | ||
![]() |
7409af9eb3 | ||
![]() |
4e3637034c | ||
![]() |
1afd0b0da7 | ||
![]() |
7515830422 | ||
![]() |
f5521ea209 | ||
![]() |
34646967ba | ||
![]() |
e4d2e76d8e | ||
![]() |
87f5646937 | ||
![]() |
cc69a3de1b | ||
![]() |
15aeeb1188 | ||
![]() |
1693bebe4d | ||
![]() |
4244a13a1d | ||
![]() |
931adf8cc1 | ||
![]() |
c996943418 | ||
![]() |
76e6378358 | ||
![]() |
a355b57f58 | ||
![]() |
1508da30c2 | ||
![]() |
eb703e5380 | ||
![]() |
0a3924e746 | ||
![]() |
e1db730d86 | ||
![]() |
537191826f | ||
![]() |
130880ba48 | ||
![]() |
f8ba3fda4d | ||
![]() |
e1b90cc3db | ||
![]() |
43e6579558 | ||
![]() |
6d923aab35 | ||
![]() |
62bafabc09 | ||
![]() |
9edcdac90c | ||
![]() |
cd138d8bd4 | ||
![]() |
cd750b731c | ||
![]() |
4bede0d8f5 | ||
![]() |
f129c3f349 | ||
![]() |
39d4c1be4d | ||
![]() |
f7a747ce59 | ||
![]() |
4489d41816 | ||
![]() |
87b5184a0d | ||
![]() |
c56ad5c975 | ||
![]() |
6b7ce85cdc | ||
![]() |
d10d0e3cf8 | ||
![]() |
941ea38ef5 | ||
![]() |
99bea8d298 | ||
![]() |
a49eccdfa7 | ||
![]() |
a846173d93 | ||
![]() |
78e210dea5 | ||
![]() |
8555204274 | ||
![]() |
164fcbfeb7 | ||
![]() |
bc22df29c4 | ||
![]() |
7e688d2f6a | ||
![]() |
5a6d1da442 | ||
![]() |
703751add4 | ||
![]() |
4050be78e5 | ||
![]() |
4d9fc40100 | ||
![]() |
765522345f | ||
![]() |
6bceb36b99 | ||
![]() |
1e0d65f0bd | ||
![]() |
03327bc9a6 | ||
![]() |
b407d8533d | ||
![]() |
20e2c9de04 | ||
![]() |
d16c0121b9 | ||
![]() |
7f4c3a7439 | ||
![]() |
28dbde9cc3 | ||
![]() |
cc304ce588 | ||
![]() |
98a0618941 | ||
![]() |
fd545fc6d1 | ||
![]() |
97067db2ae | ||
![]() |
c130f0a37b | ||
![]() |
d3d4ba7f24 | ||
![]() |
5552c9eb0f | ||
![]() |
59ed87cbd9 | ||
![]() |
b7f8749304 | ||
![]() |
5192ee17e7 | ||
![]() |
e834f04400 | ||
![]() |
884d09f330 | ||
![]() |
9e35298f97 | ||
![]() |
0551f1b07b | ||
![]() |
de53511201 | ||
![]() |
2570e85167 | ||
![]() |
9dc5ab041f | ||
![]() |
01f3c8e290 | ||
![]() |
06c1b3ce07 | ||
![]() |
0b75e42dfb | ||
![]() |
a609e61a90 | ||
![]() |
afdb387cd8 | ||
![]() |
dc4e4f90a2 | ||
![]() |
fdc20f87a6 | ||
![]() |
35a2d221a3 | ||
![]() |
daa4e9ff90 | ||
![]() |
2ca29f1aaf | ||
![]() |
77d682da9d | ||
![]() |
8fffac6927 | ||
![]() |
5f6fbcea08 | ||
![]() |
00cb0faca8 | ||
![]() |
bfdf6fcc66 | ||
![]() |
bcaa1dd060 | ||
![]() |
0e2d626ddd | ||
![]() |
9221d5d7a8 | ||
![]() |
96820c1c6b | ||
![]() |
e095109da1 | ||
![]() |
d68afc5bc9 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.18**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.05.18
|
[debug] youtube-dl version 2017.07.02
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
8
AUTHORS
8
AUTHORS
@@ -212,3 +212,11 @@ Xiao Di Guan
|
|||||||
Thomas Winant
|
Thomas Winant
|
||||||
Daniel Twardowski
|
Daniel Twardowski
|
||||||
Jeremie Jarosh
|
Jeremie Jarosh
|
||||||
|
Gerard Rovira
|
||||||
|
Marvin Ewald
|
||||||
|
Frédéric Bournival
|
||||||
|
Timendum
|
||||||
|
gritstub
|
||||||
|
Adam Voss
|
||||||
|
Mike Fährmann
|
||||||
|
Jan Kundrát
|
||||||
|
194
ChangeLog
194
ChangeLog
@@ -1,3 +1,195 @@
|
|||||||
|
version 2017.07.02
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Improve _json_ld
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [thisoldhouse] Add more fallbacks for video id
|
||||||
|
* [thisoldhouse] Fix video id extraction (#13540, #13541)
|
||||||
|
* [xfileshare] Extend format regular expression (#13536)
|
||||||
|
* [ted] Fix extraction (#13535)
|
||||||
|
+ [tastytrade] Add support for tastytrade.com (#13521)
|
||||||
|
* [dplayit] Relax video id regular expression (#13524)
|
||||||
|
+ [generic] Extract more generic metadata (#13527)
|
||||||
|
+ [bbccouk] Capture and output error message (#13501, #13518)
|
||||||
|
* [cbsnews] Relax video info regular expression (#13284, #13503)
|
||||||
|
+ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
|
||||||
|
* [soundcloud] Switch to https for API requests (#13502)
|
||||||
|
* [pandatv] Switch to https for API and download URLs
|
||||||
|
+ [pandatv] Add support for https URLs (#13491)
|
||||||
|
+ [niconico] Support sp subdomain (#13494)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.25
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
|
||||||
|
* [YoutubeDL] Skip malformed formats for better extraction robustness
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [wsj] Add support for barrons.com (#13470)
|
||||||
|
+ [ign] Add another video id pattern (#13328)
|
||||||
|
+ [raiplay:live] Add support for live streams (#13414)
|
||||||
|
+ [redbulltv] Add support for live videos and segments (#13486)
|
||||||
|
+ [onetpl] Add support for videos embedded via pulsembed (#13482)
|
||||||
|
* [ooyala] Make more robust
|
||||||
|
* [ooyala] Skip empty format URLs (#13471, #13476)
|
||||||
|
* [hgtv.com:show] Fix typo
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [adobepass] Fix extraction on older python 2.6
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Adapt to new automatic captions rendition (#13467)
|
||||||
|
* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
|
||||||
|
* [drtuber] Fix formats extraction (#12058)
|
||||||
|
* [youporn] Fix upload date extraction
|
||||||
|
* [youporn] Improve formats extraction
|
||||||
|
* [youporn] Fix title extraction (#13456)
|
||||||
|
* [googledrive] Fix formats sorting (#13443)
|
||||||
|
* [watchindianporn] Fix extraction (#13411, #13415)
|
||||||
|
+ [vimeo] Add fallback mp4 extension for original format
|
||||||
|
+ [ruv] Add support for ruv.is (#13396)
|
||||||
|
* [viu] Fix extraction on older python 2.6
|
||||||
|
* [pandora.tv] Fix upload_date extraction (#12846)
|
||||||
|
+ [asiancrush] Add support for asiancrush.com (#13420)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/common] Use utils.shell_quote for debug command line
|
||||||
|
* [utils] Use compat_shlex_quote in shell_quote
|
||||||
|
* [postprocessor/execafterdownload] Encode command line (#13407)
|
||||||
|
* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
|
||||||
|
* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
|
||||||
|
in --metadata-from-title (#13408)
|
||||||
|
* [extractor/common] Fix json dumping with --geo-bypass
|
||||||
|
+ [extractor/common] Improve jwplayer subtitles extraction
|
||||||
|
+ [extractor/common] Improve jwplayer formats extraction (#13379)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [polskieradio] Fix extraction (#13392)
|
||||||
|
+ [xfileshare] Add support for fastvideo.me (#13385)
|
||||||
|
* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
|
||||||
|
* [4tube] Fix extraction (#13381, #13382)
|
||||||
|
+ [disney] Add support for disneychannel.de (#13383)
|
||||||
|
* [npo] Improve URL regular expression (#13376)
|
||||||
|
+ [corus] Add support for showcase.ca
|
||||||
|
+ [corus] Add support for history.ca (#13359)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.12
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
|
||||||
|
+ [compat] Introduce compat_HTMLParseError
|
||||||
|
* [utils] Improve unified_timestamp
|
||||||
|
* [extractor/generic] Ensure format id is unicode string
|
||||||
|
* [extractor/common] Return unicode string from _match_id
|
||||||
|
+ [YoutubeDL] Sanitize more fields (#13313)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [xfileshare] Add support for rapidvideo.tv (#13348)
|
||||||
|
* [xfileshare] Modernize and pass Referer
|
||||||
|
+ [rutv] Add support for testplayer.vgtrk.com (#13347)
|
||||||
|
+ [newgrounds] Extract more metadata (#13232)
|
||||||
|
+ [newgrounds:playlist] Add support for playlists (#10611)
|
||||||
|
* [newgrounds] Improve formats and uploader extraction (#13346)
|
||||||
|
* [msn] Fix formats extraction
|
||||||
|
* [turbo] Ensure format id is string
|
||||||
|
* [sexu] Ensure height is int
|
||||||
|
* [jove] Ensure comment count is int
|
||||||
|
* [golem] Ensure format id is string
|
||||||
|
* [gfycat] Ensure filesize is int
|
||||||
|
* [foxgay] Ensure height is int
|
||||||
|
* [flickr] Ensure format id is string
|
||||||
|
* [sohu] Fix numeric fields
|
||||||
|
* [safari] Improve authentication detection (#13319)
|
||||||
|
* [liveleak] Ensure height is int (#13313)
|
||||||
|
* [streamango] Make title optional (#13292)
|
||||||
|
* [rtlnl] Improve URL regular expression (#13295)
|
||||||
|
* [tvplayer] Fix extraction (#13291)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.05
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
|
||||||
|
* [pornhub:playlist] Fix extraction (#13281)
|
||||||
|
- [godtv] Remove extractor (#13175)
|
||||||
|
* [safari] Fix typo (#13252)
|
||||||
|
* [youtube] Improve chapters extraction (#13247)
|
||||||
|
* [1tv] Lower preference for HTTP formats (#13246)
|
||||||
|
* [francetv] Relax URL regular expression
|
||||||
|
* [drbonanza] Fix extraction (#13231)
|
||||||
|
* [packtpub] Fix authentication (#13240)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.29
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
|
||||||
|
(#13211)
|
||||||
|
* [xhamster] Fix uploader and like/dislike count extraction (#13216))
|
||||||
|
+ [xhamster] Extract categories (#11728)
|
||||||
|
+ [abcnews] Add support for embed URLs (#12851)
|
||||||
|
* [gaskrank] Fix extraction (#12493)
|
||||||
|
* [medialaan] Fix videos with missing videoUrl (#12774)
|
||||||
|
* [dvtv] Fix playlist support
|
||||||
|
+ [dvtv] Add support for DASH and HLS formats (#3063)
|
||||||
|
+ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
|
||||||
|
* [cbsinteractive] Relax URL regular expression (#13213)
|
||||||
|
* [adn] Fix formats extraction
|
||||||
|
+ [youku] Extract more metadata (#10433)
|
||||||
|
* [cbsnews] Fix extraction (#13205)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.26
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] strip_jsonp() can recognize more patterns
|
||||||
|
* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
|
||||||
|
+ [bbc] Add support for authentication
|
||||||
|
* [tudou] Merge into youku extractor (#12214)
|
||||||
|
* [youku:show] Fix extraction
|
||||||
|
* [youku] Fix extraction (#13191)
|
||||||
|
* [udemy] Fix extraction for outputs' format entries without URL (#13192)
|
||||||
|
* [vimeo] Fix formats' sorting (#13189)
|
||||||
|
* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
|
||||||
|
+ [adobepass] Add support for Bright House Networks (#13149)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [streamcz] Add support for subtitles (#13174)
|
||||||
|
* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
|
||||||
|
* [toggle] Relax URL regular expression (#13172)
|
||||||
|
* [toypics] Fix extraction (#13077)
|
||||||
|
* [njpwworld] Fix extraction (#13162, #13169)
|
||||||
|
+ [hitbox] Add support for smashcast.tv (#13154)
|
||||||
|
* [mitele] Update app key regular expression (#13158)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.18.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [jsinterp] Fix typo and cleanup regular expressions (#13134)
|
||||||
|
|
||||||
|
|
||||||
version 2017.05.18
|
version 2017.05.18
|
||||||
|
|
||||||
Core
|
Core
|
||||||
@@ -24,7 +216,7 @@ Core
|
|||||||
+ [postprocessor/metadatafromtitle] Add support regular expression syntax for
|
+ [postprocessor/metadatafromtitle] Add support regular expression syntax for
|
||||||
--metadata-from-title (#13065)
|
--metadata-from-title (#13065)
|
||||||
|
|
||||||
Extractor
|
Extractors
|
||||||
+ [mediaset] Add support for video.mediaset.it (#12708, #12964)
|
+ [mediaset] Add support for video.mediaset.it (#12708, #12964)
|
||||||
* [orf:radio] Fix extraction (#11643, #12926)
|
* [orf:radio] Fix extraction (#11643, #12926)
|
||||||
* [aljazeera] Extend URL regular expression (#13053)
|
* [aljazeera] Extend URL regular expression (#13053)
|
||||||
|
2
Makefile
2
Makefile
@@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
|||||||
--exclude '*.pyc' \
|
--exclude '*.pyc' \
|
||||||
--exclude '*.pyo' \
|
--exclude '*.pyo' \
|
||||||
--exclude '*~' \
|
--exclude '*~' \
|
||||||
--exclude '__pycache' \
|
--exclude '__pycache__' \
|
||||||
--exclude '.git' \
|
--exclude '.git' \
|
||||||
--exclude 'testdata' \
|
--exclude 'testdata' \
|
||||||
--exclude 'docs/_build' \
|
--exclude 'docs/_build' \
|
||||||
|
39
README.md
39
README.md
@@ -145,18 +145,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--max-views COUNT Do not download any videos with more than
|
--max-views COUNT Do not download any videos with more than
|
||||||
COUNT views
|
COUNT views
|
||||||
--match-filter FILTER Generic video filter. Specify any key (see
|
--match-filter FILTER Generic video filter. Specify any key (see
|
||||||
help for -o for a list of available keys)
|
the "OUTPUT TEMPLATE" for a list of
|
||||||
to match if the key is present, !key to
|
available keys) to match if the key is
|
||||||
check if the key is not present, key >
|
present, !key to check if the key is not
|
||||||
NUMBER (like "comment_count > 12", also
|
present, key > NUMBER (like "comment_count
|
||||||
works with >=, <, <=, !=, =) to compare
|
> 12", also works with >=, <, <=, !=, =) to
|
||||||
against a number, key = 'LITERAL' (like
|
compare against a number, key = 'LITERAL'
|
||||||
"uploader = 'Mike Smith'", also works with
|
(like "uploader = 'Mike Smith'", also works
|
||||||
!=) to match against a string literal and &
|
with !=) to match against a string literal
|
||||||
to require multiple matches. Values which
|
and & to require multiple matches. Values
|
||||||
are not known are excluded unless you put a
|
which are not known are excluded unless you
|
||||||
question mark (?) after the operator. For
|
put a question mark (?) after the operator.
|
||||||
example, to only match videos that have
|
For example, to only match videos that have
|
||||||
been liked more than 100 times and disliked
|
been liked more than 100 times and disliked
|
||||||
less than 50 times (or the dislike
|
less than 50 times (or the dislike
|
||||||
functionality is not available at the given
|
functionality is not available at the given
|
||||||
@@ -277,8 +277,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--get-filename Simulate, quiet but print output filename
|
--get-filename Simulate, quiet but print output filename
|
||||||
--get-format Simulate, quiet but print output format
|
--get-format Simulate, quiet but print output format
|
||||||
-j, --dump-json Simulate, quiet but print JSON information.
|
-j, --dump-json Simulate, quiet but print JSON information.
|
||||||
See --output for a description of available
|
See the "OUTPUT TEMPLATE" for a description
|
||||||
keys.
|
of available keys.
|
||||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||||
for each command-line argument. If the URL
|
for each command-line argument. If the URL
|
||||||
refers to a playlist, dump the whole
|
refers to a playlist, dump the whole
|
||||||
@@ -474,7 +474,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
|
|||||||
```
|
```
|
||||||
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
||||||
|
|
||||||
On Windows you may also need to setup the `%HOME%` environment variable manually.
|
On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
|
||||||
|
```
|
||||||
|
set HOME=%USERPROFILE%
|
||||||
|
```
|
||||||
|
|
||||||
# OUTPUT TEMPLATE
|
# OUTPUT TEMPLATE
|
||||||
|
|
||||||
@@ -532,13 +535,14 @@ The basic usage is not to set any template arguments when downloading a single f
|
|||||||
- `playlist_id` (string): Playlist identifier
|
- `playlist_id` (string): Playlist identifier
|
||||||
- `playlist_title` (string): Playlist title
|
- `playlist_title` (string): Playlist title
|
||||||
|
|
||||||
|
|
||||||
Available for the video that belongs to some logical chapter or section:
|
Available for the video that belongs to some logical chapter or section:
|
||||||
|
|
||||||
- `chapter` (string): Name or title of the chapter the video belongs to
|
- `chapter` (string): Name or title of the chapter the video belongs to
|
||||||
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||||
- `chapter_id` (string): Id of the chapter the video belongs to
|
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||||
|
|
||||||
Available for the video that is an episode of some series or programme:
|
Available for the video that is an episode of some series or programme:
|
||||||
|
|
||||||
- `series` (string): Title of the series or programme the video episode belongs to
|
- `series` (string): Title of the series or programme the video episode belongs to
|
||||||
- `season` (string): Title of the season the video episode belongs to
|
- `season` (string): Title of the season the video episode belongs to
|
||||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||||
@@ -548,6 +552,7 @@ Available for the video that is an episode of some series or programme:
|
|||||||
- `episode_id` (string): Id of the video episode
|
- `episode_id` (string): Id of the video episode
|
||||||
|
|
||||||
Available for the media that is a track or a part of a music album:
|
Available for the media that is a track or a part of a music album:
|
||||||
|
|
||||||
- `track` (string): Title of the track
|
- `track` (string): Title of the track
|
||||||
- `track_number` (numeric): Number of the track within an album or a disc
|
- `track_number` (numeric): Number of the track within an album or a disc
|
||||||
- `track_id` (string): Id of the track
|
- `track_id` (string): Id of the track
|
||||||
@@ -649,7 +654,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
|||||||
- `acodec`: Name of the audio codec in use
|
- `acodec`: Name of the audio codec in use
|
||||||
- `vcodec`: Name of the video codec in use
|
- `vcodec`: Name of the video codec in use
|
||||||
- `container`: Name of the container format
|
- `container`: Name of the container format
|
||||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
|
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||||
- `format_id`: A short description of the format
|
- `format_id`: A short description of the format
|
||||||
|
|
||||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||||
|
@@ -8,7 +8,7 @@ import re
|
|||||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||||
|
|
||||||
PREFIX = '''%YOUTUBE-DL(1)
|
PREFIX = r'''%YOUTUBE-DL(1)
|
||||||
|
|
||||||
# NAME
|
# NAME
|
||||||
|
|
||||||
|
@@ -67,6 +67,8 @@
|
|||||||
- **arte.tv:info**
|
- **arte.tv:info**
|
||||||
- **arte.tv:magazine**
|
- **arte.tv:magazine**
|
||||||
- **arte.tv:playlist**
|
- **arte.tv:playlist**
|
||||||
|
- **AsianCrush**
|
||||||
|
- **AsianCrushPlaylist**
|
||||||
- **AtresPlayer**
|
- **AtresPlayer**
|
||||||
- **ATTTechChannel**
|
- **ATTTechChannel**
|
||||||
- **ATVAt**
|
- **ATVAt**
|
||||||
@@ -87,13 +89,13 @@
|
|||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
|
- **Bandcamp:weekly**
|
||||||
- **bangumi.bilibili.com**: BiliBili番剧
|
- **bangumi.bilibili.com**: BiliBili番剧
|
||||||
- **bbc**: BBC
|
- **bbc**: BBC
|
||||||
- **bbc.co.uk**: BBC iPlayer
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
- **bbc.co.uk:article**: BBC articles
|
- **bbc.co.uk:article**: BBC articles
|
||||||
- **bbc.co.uk:iplayer:playlist**
|
- **bbc.co.uk:iplayer:playlist**
|
||||||
- **bbc.co.uk:playlist**
|
- **bbc.co.uk:playlist**
|
||||||
- **Beam:live**
|
|
||||||
- **Beatport**
|
- **Beatport**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
@@ -311,7 +313,6 @@
|
|||||||
- **Go**
|
- **Go**
|
||||||
- **Go90**
|
- **Go90**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **GodTV**
|
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GoogleDrive**
|
- **GoogleDrive**
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
@@ -453,6 +454,8 @@
|
|||||||
- **mixcloud:playlist**
|
- **mixcloud:playlist**
|
||||||
- **mixcloud:stream**
|
- **mixcloud:stream**
|
||||||
- **mixcloud:user**
|
- **mixcloud:user**
|
||||||
|
- **Mixer:live**
|
||||||
|
- **Mixer:vod**
|
||||||
- **MLB**
|
- **MLB**
|
||||||
- **Mnet**
|
- **Mnet**
|
||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
@@ -511,6 +514,7 @@
|
|||||||
- **netease:song**: 网易云音乐
|
- **netease:song**: 网易云音乐
|
||||||
- **Netzkino**
|
- **Netzkino**
|
||||||
- **Newgrounds**
|
- **Newgrounds**
|
||||||
|
- **NewgroundsPlaylist**
|
||||||
- **Newstube**
|
- **Newstube**
|
||||||
- **NextMedia**: 蘋果日報
|
- **NextMedia**: 蘋果日報
|
||||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||||
@@ -640,6 +644,7 @@
|
|||||||
- **RadioJavan**
|
- **RadioJavan**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
- **RaiPlay**
|
- **RaiPlay**
|
||||||
|
- **RaiPlayLive**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
- **RDS**: RDS.ca
|
- **RDS**: RDS.ca
|
||||||
- **RedBullTV**
|
- **RedBullTV**
|
||||||
@@ -684,6 +689,7 @@
|
|||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
- **Ruutu**
|
- **Ruutu**
|
||||||
|
- **Ruv**
|
||||||
- **safari**: safaribooksonline.com online video
|
- **safari**: safaribooksonline.com online video
|
||||||
- **safari:api**
|
- **safari:api**
|
||||||
- **safari:course**: safaribooksonline.com online courses
|
- **safari:course**: safaribooksonline.com online courses
|
||||||
@@ -762,6 +768,7 @@
|
|||||||
- **Tagesschau**
|
- **Tagesschau**
|
||||||
- **tagesschau:player**
|
- **tagesschau:player**
|
||||||
- **Tass**
|
- **Tass**
|
||||||
|
- **TastyTrade**
|
||||||
- **TBS**
|
- **TBS**
|
||||||
- **TDSLifeway**
|
- **TDSLifeway**
|
||||||
- **teachertube**: teachertube.com videos
|
- **teachertube**: teachertube.com videos
|
||||||
@@ -803,16 +810,13 @@
|
|||||||
- **ToonGoggles**
|
- **ToonGoggles**
|
||||||
- **Tosh**: Tosh.0
|
- **Tosh**: Tosh.0
|
||||||
- **tou.tv**
|
- **tou.tv**
|
||||||
- **Toypics**: Toypics user profile
|
- **Toypics**: Toypics video
|
||||||
- **ToypicsUser**: Toypics user profile
|
- **ToypicsUser**: Toypics user profile
|
||||||
- **TrailerAddict** (Currently broken)
|
- **TrailerAddict** (Currently broken)
|
||||||
- **Trilulilu**
|
- **Trilulilu**
|
||||||
- **TruTV**
|
- **TruTV**
|
||||||
- **Tube8**
|
- **Tube8**
|
||||||
- **TubiTv**
|
- **TubiTv**
|
||||||
- **tudou**
|
|
||||||
- **tudou:album**
|
|
||||||
- **tudou:playlist**
|
|
||||||
- **Tumblr**
|
- **Tumblr**
|
||||||
- **tunein:clip**
|
- **tunein:clip**
|
||||||
- **tunein:program**
|
- **tunein:program**
|
||||||
@@ -976,7 +980,7 @@
|
|||||||
- **WSJArticle**
|
- **WSJArticle**
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
- **XHamsterEmbed**
|
- **XHamsterEmbed**
|
||||||
- **xiami:album**: 虾米音乐 - 专辑
|
- **xiami:album**: 虾米音乐 - 专辑
|
||||||
|
@@ -340,6 +340,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||||
|
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||||
|
|
||||||
def test_determine_ext(self):
|
def test_determine_ext(self):
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||||
@@ -678,6 +679,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, {'status': 'success'})
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('window.cb && cb({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
def test_uppercase_escape(self):
|
def test_uppercase_escape(self):
|
||||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||||
@@ -907,6 +916,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
supports_outside_bmp = False
|
supports_outside_bmp = False
|
||||||
if supports_outside_bmp:
|
if supports_outside_bmp:
|
||||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||||
|
# Malformed HTML should not break attributes extraction on older Python
|
||||||
|
self.assertEqual(extract_attributes('<mal"formed/>'), {})
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
|
@@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase):
|
|||||||
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
||||||
}]
|
}]
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
# https://www.youtube.com/watch?v=xZW70zEasOk
|
||||||
|
# time point more than duration
|
||||||
|
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
|
||||||
|
283,
|
||||||
|
[]
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
def test_youtube_chapters(self):
|
def test_youtube_chapters(self):
|
||||||
|
@@ -58,6 +58,7 @@ from .utils import (
|
|||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
@@ -302,6 +303,17 @@ class YoutubeDL(object):
|
|||||||
postprocessor.
|
postprocessor.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_NUMERIC_FIELDS = set((
|
||||||
|
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||||
|
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||||
|
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||||
|
'average_rating', 'comment_count', 'age_limit',
|
||||||
|
'start_time', 'end_time',
|
||||||
|
'chapter_number', 'season_number', 'episode_number',
|
||||||
|
'track_number', 'disc_number', 'release_year',
|
||||||
|
'playlist_index',
|
||||||
|
))
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
_ies = []
|
_ies = []
|
||||||
_pps = []
|
_pps = []
|
||||||
@@ -498,24 +510,25 @@ class YoutubeDL(object):
|
|||||||
def to_console_title(self, message):
|
def to_console_title(self, message):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
if compat_os_name == 'nt':
|
||||||
# c_wchar_p() might not be necessary if `message` is
|
if ctypes.windll.kernel32.GetConsoleWindow():
|
||||||
# already of type unicode()
|
# c_wchar_p() might not be necessary if `message` is
|
||||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
# already of type unicode()
|
||||||
|
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||||
elif 'TERM' in os.environ:
|
elif 'TERM' in os.environ:
|
||||||
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
||||||
|
|
||||||
def save_console_title(self):
|
def save_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||||
# Save the title on stack
|
# Save the title on stack
|
||||||
self._write_string('\033[22;0t', self._screen_file)
|
self._write_string('\033[22;0t', self._screen_file)
|
||||||
|
|
||||||
def restore_console_title(self):
|
def restore_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||||
# Restore the title from stack
|
# Restore the title from stack
|
||||||
self._write_string('\033[23;0t', self._screen_file)
|
self._write_string('\033[23;0t', self._screen_file)
|
||||||
|
|
||||||
@@ -638,22 +651,11 @@ class YoutubeDL(object):
|
|||||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||||
outtmpl)
|
outtmpl)
|
||||||
|
|
||||||
NUMERIC_FIELDS = set((
|
|
||||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
|
||||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
|
||||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
|
||||||
'average_rating', 'comment_count', 'age_limit',
|
|
||||||
'start_time', 'end_time',
|
|
||||||
'chapter_number', 'season_number', 'episode_number',
|
|
||||||
'track_number', 'disc_number', 'release_year',
|
|
||||||
'playlist_index',
|
|
||||||
))
|
|
||||||
|
|
||||||
# Missing numeric fields used together with integer presentation types
|
# Missing numeric fields used together with integer presentation types
|
||||||
# in format specification will break the argument substitution since
|
# in format specification will break the argument substitution since
|
||||||
# string 'NA' is returned for missing fields. We will patch output
|
# string 'NA' is returned for missing fields. We will patch output
|
||||||
# template for missing fields to meet string presentation type.
|
# template for missing fields to meet string presentation type.
|
||||||
for numeric_field in NUMERIC_FIELDS:
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
if numeric_field not in template_dict:
|
if numeric_field not in template_dict:
|
||||||
# As of [1] format syntax is:
|
# As of [1] format syntax is:
|
||||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||||
@@ -1344,9 +1346,28 @@ class YoutubeDL(object):
|
|||||||
if 'title' not in info_dict:
|
if 'title' not in info_dict:
|
||||||
raise ExtractorError('Missing "title" field in extractor result')
|
raise ExtractorError('Missing "title" field in extractor result')
|
||||||
|
|
||||||
if not isinstance(info_dict['id'], compat_str):
|
def report_force_conversion(field, field_not, conversion):
|
||||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
self.report_warning(
|
||||||
info_dict['id'] = compat_str(info_dict['id'])
|
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||||
|
% (field, field_not, conversion))
|
||||||
|
|
||||||
|
def sanitize_string_field(info, string_field):
|
||||||
|
field = info.get(string_field)
|
||||||
|
if field is None or isinstance(field, compat_str):
|
||||||
|
return
|
||||||
|
report_force_conversion(string_field, 'a string', 'string')
|
||||||
|
info[string_field] = compat_str(field)
|
||||||
|
|
||||||
|
def sanitize_numeric_fields(info):
|
||||||
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
|
field = info.get(numeric_field)
|
||||||
|
if field is None or isinstance(field, compat_numeric_types):
|
||||||
|
continue
|
||||||
|
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||||
|
info[numeric_field] = int_or_none(field)
|
||||||
|
|
||||||
|
sanitize_string_field(info_dict, 'id')
|
||||||
|
sanitize_numeric_fields(info_dict)
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
@@ -1427,15 +1448,25 @@ class YoutubeDL(object):
|
|||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('No video formats found!')
|
raise ExtractorError('No video formats found!')
|
||||||
|
|
||||||
|
def is_wellformed(f):
|
||||||
|
url = f.get('url')
|
||||||
|
valid_url = url and isinstance(url, compat_str)
|
||||||
|
if not valid_url:
|
||||||
|
self.report_warning(
|
||||||
|
'"url" field is missing or empty - skipping format, '
|
||||||
|
'there is an error in extractor')
|
||||||
|
return valid_url
|
||||||
|
|
||||||
|
# Filter out malformed formats for better extraction robustness
|
||||||
|
formats = list(filter(is_wellformed, formats))
|
||||||
|
|
||||||
formats_dict = {}
|
formats_dict = {}
|
||||||
|
|
||||||
# We check that all the formats have the format and format_id fields
|
# We check that all the formats have the format and format_id fields
|
||||||
for i, format in enumerate(formats):
|
for i, format in enumerate(formats):
|
||||||
if 'url' not in format:
|
sanitize_string_field(format, 'format_id')
|
||||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
sanitize_numeric_fields(format)
|
||||||
|
|
||||||
format['url'] = sanitize_url(format['url'])
|
format['url'] = sanitize_url(format['url'])
|
||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
else:
|
else:
|
||||||
|
@@ -2322,6 +2322,19 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||||
|
|
||||||
|
try: # Python 2
|
||||||
|
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||||
|
except ImportError: # Python <3.4
|
||||||
|
try:
|
||||||
|
from html.parser import HTMLParseError as compat_HTMLParseError
|
||||||
|
except ImportError: # Python >3.4
|
||||||
|
|
||||||
|
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||||
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
|
# and uniform cross-version exceptiong handling
|
||||||
|
class compat_HTMLParseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
from subprocess import DEVNULL
|
||||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||||
@@ -2604,14 +2617,22 @@ except ImportError: # Python 2
|
|||||||
parsed_result[name] = [value]
|
parsed_result[name] = [value]
|
||||||
return parsed_result
|
return parsed_result
|
||||||
|
|
||||||
try:
|
|
||||||
from shlex import quote as compat_shlex_quote
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
except ImportError: # Python < 3.3
|
|
||||||
|
|
||||||
|
if compat_os_name == 'nt':
|
||||||
def compat_shlex_quote(s):
|
def compat_shlex_quote(s):
|
||||||
if re.match(r'^[-_\w./]+$', s):
|
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||||
return s
|
else:
|
||||||
else:
|
try:
|
||||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
from shlex import quote as compat_shlex_quote
|
||||||
|
except ImportError: # Python < 3.3
|
||||||
|
def compat_shlex_quote(s):
|
||||||
|
if re.match(r'^[-_\w./]+$', s):
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -2636,9 +2657,6 @@ def compat_ord(c):
|
|||||||
return ord(c)
|
return ord(c)
|
||||||
|
|
||||||
|
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
compat_getenv = os.getenv
|
compat_getenv = os.getenv
|
||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
@@ -2882,6 +2900,7 @@ else:
|
|||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'compat_HTMLParseError',
|
||||||
'compat_HTMLParser',
|
'compat_HTMLParser',
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
|
@@ -8,10 +8,11 @@ import random
|
|||||||
|
|
||||||
from ..compat import compat_os_name
|
from ..compat import compat_os_name
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
decodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
decodeArgument,
|
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
shell_quote,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -381,10 +382,5 @@ class FileDownloader(object):
|
|||||||
if exe is None:
|
if exe is None:
|
||||||
exe = os.path.basename(str_args[0])
|
exe = os.path.basename(str_args[0])
|
||||||
|
|
||||||
try:
|
|
||||||
import pipes
|
|
||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
|
||||||
except ImportError:
|
|
||||||
shell_quote = repr
|
|
||||||
self.to_screen('[debug] %s command line: %s' % (
|
self.to_screen('[debug] %s command line: %s' % (
|
||||||
exe, shell_quote(str_args)))
|
exe, shell_quote(str_args)))
|
||||||
|
@@ -212,6 +212,11 @@ class FFmpegFD(ExternalFD):
|
|||||||
|
|
||||||
args = [ffpp.executable, '-y']
|
args = [ffpp.executable, '-y']
|
||||||
|
|
||||||
|
for log_level in ('quiet', 'verbose'):
|
||||||
|
if self.params.get(log_level, False):
|
||||||
|
args += ['-loglevel', log_level]
|
||||||
|
break
|
||||||
|
|
||||||
seekable = info_dict.get('_seekable')
|
seekable = info_dict.get('_seekable')
|
||||||
if seekable is not None:
|
if seekable is not None:
|
||||||
# setting -seekable prevents ffmpeg from guessing if the server
|
# setting -seekable prevents ffmpeg from guessing if the server
|
||||||
|
@@ -12,7 +12,15 @@ from ..compat import compat_urlparse
|
|||||||
|
|
||||||
class AbcNewsVideoIE(AMPIE):
|
class AbcNewsVideoIE(AMPIE):
|
||||||
IE_NAME = 'abcnews:video'
|
IE_NAME = 'abcnews:video'
|
||||||
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
abcnews\.go\.com/
|
||||||
|
(?:
|
||||||
|
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||||
|
video/embed\?.*?\bid=
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||||
@@ -29,6 +37,9 @@ class AbcNewsVideoIE(AMPIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/video/embed?id=46979033',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
|
|||||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'East Bay museum celebrates vintage synthesizers',
|
'title': 'East Bay museum celebrates vintage synthesizers',
|
||||||
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1421123075,
|
'timestamp': 1421123075,
|
||||||
'upload_date': '20150113',
|
'upload_date': '20150113',
|
||||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,25 +32,28 @@ class ADNIE(InfoExtractor):
|
|||||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
|
|
||||||
def _get_subtitles(self, sub_path, video_id):
|
def _get_subtitles(self, sub_path, video_id):
|
||||||
if not sub_path:
|
if not sub_path:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
'http://animedigitalnetwork.fr/' + sub_path,
|
urljoin(self._BASE_URL, sub_path),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
|
||||||
|
})
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||||
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
|
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||||
))
|
))
|
||||||
subtitles_json = self._parse_json(
|
subtitles_json = self._parse_json(
|
||||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
|
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||||
None, fatal=False)
|
None, fatal=False)
|
||||||
if not subtitles_json:
|
if not subtitles_json:
|
||||||
return None
|
return None
|
||||||
@@ -103,9 +107,16 @@ class ADNIE(InfoExtractor):
|
|||||||
metas = options.get('metas') or {}
|
metas = options.get('metas') or {}
|
||||||
title = metas.get('title') or video_info['title']
|
title = metas.get('title') or video_info['title']
|
||||||
links = player_config.get('links') or {}
|
links = player_config.get('links') or {}
|
||||||
|
if not links:
|
||||||
|
links_url = player_config['linksurl']
|
||||||
|
links_data = self._download_json(urljoin(
|
||||||
|
self._BASE_URL, links_url), video_id)
|
||||||
|
links = links_data.get('links') or {}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, qualities in links.items():
|
for format_id, qualities in links.items():
|
||||||
|
if not isinstance(qualities, dict):
|
||||||
|
continue
|
||||||
for load_balancer_url in qualities.values():
|
for load_balancer_url in qualities.values():
|
||||||
load_balancer_data = self._download_json(
|
load_balancer_data = self._download_json(
|
||||||
load_balancer_url, video_id, fatal=False) or {}
|
load_balancer_url, video_id, fatal=False) or {}
|
||||||
|
@@ -6,12 +6,16 @@ import time
|
|||||||
import xml.etree.ElementTree as etree
|
import xml.etree.ElementTree as etree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_kwargs,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
NO_DEFAULT,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -21,6 +25,11 @@ MSO_INFO = {
|
|||||||
'username_field': 'username',
|
'username_field': 'username',
|
||||||
'password_field': 'password',
|
'password_field': 'password',
|
||||||
},
|
},
|
||||||
|
'ATTOTT': {
|
||||||
|
'name': 'DIRECTV NOW',
|
||||||
|
'username_field': 'email',
|
||||||
|
'password_field': 'loginpassword',
|
||||||
|
},
|
||||||
'Rogers': {
|
'Rogers': {
|
||||||
'name': 'Rogers',
|
'name': 'Rogers',
|
||||||
'username_field': 'UserName',
|
'username_field': 'UserName',
|
||||||
@@ -36,6 +45,11 @@ MSO_INFO = {
|
|||||||
'username_field': 'Ecom_User_ID',
|
'username_field': 'Ecom_User_ID',
|
||||||
'password_field': 'Ecom_Password',
|
'password_field': 'Ecom_Password',
|
||||||
},
|
},
|
||||||
|
'Brighthouse': {
|
||||||
|
'name': 'Bright House Networks | Spectrum',
|
||||||
|
'username_field': 'j_username',
|
||||||
|
'password_field': 'j_password',
|
||||||
|
},
|
||||||
'Charter_Direct': {
|
'Charter_Direct': {
|
||||||
'name': 'Charter Spectrum',
|
'name': 'Charter Spectrum',
|
||||||
'username_field': 'IDToken1',
|
'username_field': 'IDToken1',
|
||||||
@@ -1308,11 +1322,14 @@ class AdobePassIE(InfoExtractor):
|
|||||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||||
_MVPD_CACHE = 'ap-mvpd'
|
_MVPD_CACHE = 'ap-mvpd'
|
||||||
|
|
||||||
|
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
headers = kwargs.get('headers', {})
|
headers = kwargs.get('headers', {})
|
||||||
headers.update(self.geo_verification_headers())
|
headers.update(self.geo_verification_headers())
|
||||||
kwargs['headers'] = headers
|
kwargs['headers'] = headers
|
||||||
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
|
return super(AdobePassIE, self)._download_webpage_handle(
|
||||||
|
*args, **compat_kwargs(kwargs))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||||
@@ -1356,6 +1373,21 @@ class AdobePassIE(InfoExtractor):
|
|||||||
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
|
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
|
||||||
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
|
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
|
||||||
|
|
||||||
|
def extract_redirect_url(html, url=None, fatal=False):
|
||||||
|
# TODO: eliminate code duplication with generic extractor and move
|
||||||
|
# redirection code into _download_webpage_handle
|
||||||
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
|
redirect_url = self._search_regex(
|
||||||
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||||
|
html, 'meta refresh redirect',
|
||||||
|
default=NO_DEFAULT if fatal else None, fatal=fatal)
|
||||||
|
if not redirect_url:
|
||||||
|
return None
|
||||||
|
if url:
|
||||||
|
redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
|
||||||
|
return redirect_url
|
||||||
|
|
||||||
mvpd_headers = {
|
mvpd_headers = {
|
||||||
'ap_42': 'anonymous',
|
'ap_42': 'anonymous',
|
||||||
'ap_11': 'Linux i686',
|
'ap_11': 'Linux i686',
|
||||||
@@ -1405,16 +1437,15 @@ class AdobePassIE(InfoExtractor):
|
|||||||
if '<form name="signin"' in provider_redirect_page:
|
if '<form name="signin"' in provider_redirect_page:
|
||||||
provider_login_page_res = provider_redirect_page_res
|
provider_login_page_res = provider_redirect_page_res
|
||||||
elif 'http-equiv="refresh"' in provider_redirect_page:
|
elif 'http-equiv="refresh"' in provider_redirect_page:
|
||||||
oauth_redirect_url = self._html_search_regex(
|
oauth_redirect_url = extract_redirect_url(
|
||||||
r'content="0;\s*url=([^\'"]+)',
|
provider_redirect_page, fatal=True)
|
||||||
provider_redirect_page, 'meta refresh redirect')
|
|
||||||
provider_login_page_res = self._download_webpage_handle(
|
provider_login_page_res = self._download_webpage_handle(
|
||||||
oauth_redirect_url, video_id,
|
oauth_redirect_url, video_id,
|
||||||
'Downloading Provider Login Page')
|
self._DOWNLOADING_LOGIN_PAGE)
|
||||||
else:
|
else:
|
||||||
provider_login_page_res = post_form(
|
provider_login_page_res = post_form(
|
||||||
provider_redirect_page_res,
|
provider_redirect_page_res,
|
||||||
'Downloading Provider Login Page')
|
self._DOWNLOADING_LOGIN_PAGE)
|
||||||
|
|
||||||
mvpd_confirm_page_res = post_form(
|
mvpd_confirm_page_res = post_form(
|
||||||
provider_login_page_res, 'Logging in', {
|
provider_login_page_res, 'Logging in', {
|
||||||
@@ -1461,8 +1492,17 @@ class AdobePassIE(InfoExtractor):
|
|||||||
'Content-Type': 'application/x-www-form-urlencoded'
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||||
|
# based redirect that should be followed.
|
||||||
|
provider_redirect_page, urlh = provider_redirect_page_res
|
||||||
|
provider_refresh_redirect_url = extract_redirect_url(
|
||||||
|
provider_redirect_page, url=urlh.geturl())
|
||||||
|
if provider_refresh_redirect_url:
|
||||||
|
provider_redirect_page_res = self._download_webpage_handle(
|
||||||
|
provider_refresh_redirect_url, video_id,
|
||||||
|
'Downloading Provider Redirect Page (meta refresh)')
|
||||||
provider_login_page_res = post_form(
|
provider_login_page_res = post_form(
|
||||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
|
||||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||||
mso_info.get('username_field', 'username'): username,
|
mso_info.get('username_field', 'username'): username,
|
||||||
mso_info.get('password_field', 'password'): password,
|
mso_info.get('password_field', 'password'): password,
|
||||||
|
93
youtube_dl/extractor/asiancrush.py
Normal file
93
youtube_dl/extractor/asiancrush.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .kaltura import KalturaIE
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
remove_end,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AsianCrushIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||||
|
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_y4tmjm5r',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Women Who Flirt',
|
||||||
|
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
||||||
|
'timestamp': 1496936429,
|
||||||
|
'upload_date': '20170608',
|
||||||
|
'uploader_id': 'craig@crifkin.com',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'postid': video_id,
|
||||||
|
'action': 'get_channel_kaltura_vars',
|
||||||
|
}))
|
||||||
|
|
||||||
|
entry_id = data['entry_id']
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'kaltura:%s:%s' % (data['partner_id'], entry_id),
|
||||||
|
ie=KalturaIE.ie_key(), video_id=entry_id,
|
||||||
|
video_title=data.get('vid_label'))
|
||||||
|
|
||||||
|
|
||||||
|
class AsianCrushPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12481',
|
||||||
|
'title': 'Scholar Who Walks the Night',
|
||||||
|
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||||
|
},
|
||||||
|
'playlist_count': 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||||
|
webpage):
|
||||||
|
attrs = extract_attributes(mobj.group(0))
|
||||||
|
if attrs.get('class') == 'clearfix':
|
||||||
|
entries.append(self.url_result(
|
||||||
|
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||||
|
|
||||||
|
title = remove_end(
|
||||||
|
self._html_search_regex(
|
||||||
|
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||||
|
'title', default=None) or self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title',
|
||||||
|
default=None) or self._search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
||||||
|
' | AsianCrush')
|
||||||
|
|
||||||
|
description = self._og_search_description(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:description', webpage, 'description', fatal=False)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title, description)
|
@@ -14,14 +14,16 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
KNOWN_EXTENSIONS,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
@@ -155,7 +157,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
@@ -222,6 +224,12 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (False
|
||||||
|
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||||
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader_id = mobj.group('subdomain')
|
uploader_id = mobj.group('subdomain')
|
||||||
@@ -250,3 +258,92 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BandcampWeeklyIE(InfoExtractor):
|
||||||
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://bandcamp.com/?show=224',
|
||||||
|
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '224',
|
||||||
|
'ext': 'opus',
|
||||||
|
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||||
|
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||||
|
'duration': 5829.77,
|
||||||
|
'release_date': '20170404',
|
||||||
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': 'Magic Moments',
|
||||||
|
'episode_number': 208,
|
||||||
|
'episode_id': '224',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
blob = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||||
|
'blob', group='blob'),
|
||||||
|
video_id, transform_source=unescapeHTML)
|
||||||
|
|
||||||
|
show = blob['bcw_show']
|
||||||
|
|
||||||
|
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||||
|
# which happens to expose the latest Bandcamp Weekly episode.
|
||||||
|
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
|
if not isinstance(format_url, compat_str):
|
||||||
|
continue
|
||||||
|
for known_ext in KNOWN_EXTENSIONS:
|
||||||
|
if known_ext in format_id:
|
||||||
|
ext = known_ext
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ext = None
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': format_url,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||||
|
subtitle = show.get('subtitle')
|
||||||
|
if subtitle:
|
||||||
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
|
episode_number = None
|
||||||
|
seq = blob.get('bcw_seq')
|
||||||
|
|
||||||
|
if seq and isinstance(seq, list):
|
||||||
|
try:
|
||||||
|
episode_number = next(
|
||||||
|
int_or_none(e.get('episode_number'))
|
||||||
|
for e in seq
|
||||||
|
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
|
'is_live': False,
|
||||||
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': show.get('subtitle'),
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'episode_id': compat_str(video_id),
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
@@ -6,14 +6,18 @@ import itertools
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
@@ -32,12 +36,15 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
programmes/(?!articles/)|
|
programmes/(?!articles/)|
|
||||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||||
music/clips[/#]|
|
music/(?:clips|audiovideo/popular)[/#]|
|
||||||
radio/player/
|
radio/player/
|
||||||
)
|
)
|
||||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
''' % _ID_REGEX
|
''' % _ID_REGEX
|
||||||
|
|
||||||
|
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||||
|
_NETRC_MACHINE = 'bbc'
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIASELECTOR_URLS = [
|
||||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||||
@@ -222,11 +229,46 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
|
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}, {
|
||||||
]
|
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Downloading signin page')
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
})
|
||||||
|
|
||||||
|
post_url = urljoin(self._LOGIN_URL, self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||||
|
'post url', default=self._LOGIN_URL, group='url'))
|
||||||
|
|
||||||
|
response, urlh = self._download_webpage_handle(
|
||||||
|
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||||
|
headers={'Referer': self._LOGIN_URL})
|
||||||
|
|
||||||
|
if self._LOGIN_URL in urlh.geturl():
|
||||||
|
error = clean_html(get_element_by_class('form-message', response))
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % error, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
class MediaSelectionError(Exception):
|
class MediaSelectionError(Exception):
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.id = id
|
self.id = id
|
||||||
@@ -483,6 +525,12 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||||
|
|
||||||
|
error = self._search_regex(
|
||||||
|
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
||||||
|
webpage, 'error', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
programme_id = None
|
programme_id = None
|
||||||
duration = None
|
duration = None
|
||||||
|
|
||||||
|
@@ -6,18 +6,33 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
clean_html,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BeamProLiveIE(InfoExtractor):
|
class BeamProBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'Beam:live'
|
_API_BASE = 'https://mixer.com/api/v1'
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
|
|
||||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||||
|
|
||||||
|
def _extract_channel_info(self, chan):
|
||||||
|
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||||
|
return {
|
||||||
|
'uploader': chan.get('token') or try_get(
|
||||||
|
chan, lambda x: x['user']['username'], compat_str),
|
||||||
|
'uploader_id': compat_str(user_id) if user_id else None,
|
||||||
|
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BeamProLiveIE(BeamProBaseIE):
|
||||||
|
IE_NAME = 'Mixer:live'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.beam.pro/niterhayven',
|
'url': 'http://mixer.com/niterhayven',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '261562',
|
'id': '261562',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -38,11 +53,17 @@ class BeamProLiveIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_name = self._match_id(url)
|
channel_name = self._match_id(url)
|
||||||
|
|
||||||
chan = self._download_json(
|
chan = self._download_json(
|
||||||
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
|
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
||||||
|
|
||||||
if chan.get('online') is False:
|
if chan.get('online') is False:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -50,24 +71,118 @@ class BeamProLiveIE(InfoExtractor):
|
|||||||
|
|
||||||
channel_id = chan['id']
|
channel_id = chan['id']
|
||||||
|
|
||||||
|
def manifest_url(kind):
|
||||||
|
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
|
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
||||||
channel_name, ext='mp4', m3u8_id='hls', fatal=False)
|
fatal=False)
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
manifest_url('smil'), channel_name, fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
info = {
|
||||||
|
|
||||||
return {
|
|
||||||
'id': compat_str(chan.get('id') or channel_name),
|
'id': compat_str(chan.get('id') or channel_name),
|
||||||
'title': self._live_title(chan.get('name') or channel_name),
|
'title': self._live_title(chan.get('name') or channel_name),
|
||||||
'description': clean_html(chan.get('description')),
|
'description': clean_html(chan.get('description')),
|
||||||
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
|
'thumbnail': try_get(
|
||||||
|
chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||||
'uploader': chan.get('token') or try_get(
|
|
||||||
chan, lambda x: x['user']['username'], compat_str),
|
|
||||||
'uploader_id': compat_str(user_id) if user_id else None,
|
|
||||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
info.update(self._extract_channel_info(chan))
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class BeamProVodIE(BeamProBaseIE):
|
||||||
|
IE_NAME = 'Mixer:vod'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||||
|
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2259830',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'willow8714\'s Channel',
|
||||||
|
'duration': 6828.15,
|
||||||
|
'thumbnail': r're:https://.*source\.png$',
|
||||||
|
'timestamp': 1494046474,
|
||||||
|
'upload_date': '20170506',
|
||||||
|
'uploader': 'willow8714',
|
||||||
|
'uploader_id': '6085379',
|
||||||
|
'age_limit': 13,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_format(vod, vod_type):
|
||||||
|
if not vod.get('baseUrl'):
|
||||||
|
return []
|
||||||
|
|
||||||
|
if vod_type == 'hls':
|
||||||
|
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
||||||
|
elif vod_type == 'raw':
|
||||||
|
filename, protocol = 'source.mp4', 'https'
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
||||||
|
|
||||||
|
format_id = [vod_type]
|
||||||
|
if isinstance(data.get('Height'), compat_str):
|
||||||
|
format_id.append('%sp' % data['Height'])
|
||||||
|
|
||||||
|
return [{
|
||||||
|
'url': urljoin(vod['baseUrl'], filename),
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': protocol,
|
||||||
|
'width': int_or_none(data.get('Width')),
|
||||||
|
'height': int_or_none(data.get('Height')),
|
||||||
|
'fps': int_or_none(data.get('Fps')),
|
||||||
|
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
vod_id = self._match_id(url)
|
||||||
|
|
||||||
|
vod_info = self._download_json(
|
||||||
|
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
||||||
|
|
||||||
|
state = vod_info.get('state')
|
||||||
|
if state != 'AVAILABLE':
|
||||||
|
raise ExtractorError(
|
||||||
|
'VOD %s is not available (state: %s)' % (vod_id, state),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnail_url = None
|
||||||
|
|
||||||
|
for vod in vod_info['vods']:
|
||||||
|
vod_type = vod.get('format')
|
||||||
|
if vod_type in ('hls', 'raw'):
|
||||||
|
formats.extend(self._extract_format(vod, vod_type))
|
||||||
|
elif vod_type == 'thumbnail':
|
||||||
|
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': vod_id,
|
||||||
|
'title': vod_info.get('name') or vod_id,
|
||||||
|
'duration': float_or_none(vod_info.get('duration')),
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
||||||
|
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
||||||
|
|
||||||
|
return info
|
||||||
|
@@ -54,6 +54,22 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to China',
|
'skip': 'Geo-restricted to China',
|
||||||
|
}, {
|
||||||
|
# Title with double quotes
|
||||||
|
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8903802',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||||
|
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||||
|
'uploader': '阿滴英文',
|
||||||
|
'uploader_id': '65880958',
|
||||||
|
'timestamp': 1488382620,
|
||||||
|
'upload_date': '20170301',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # Test metadata only
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_APP_KEY = '84956560bc028eb7'
|
_APP_KEY = '84956560bc028eb7'
|
||||||
@@ -135,7 +151,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
|
||||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||||
|
@@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
entries.append(self.url_result(video['url']))
|
entries.append(self.url_result(video['url']))
|
||||||
|
|
||||||
facebook_url = FacebookIE._extract_url(webpage)
|
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||||
if facebook_url:
|
entries.extend([
|
||||||
entries.append(self.url_result(facebook_url))
|
self.url_result(facebook_url)
|
||||||
|
for facebook_url in facebook_urls])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
@@ -49,13 +49,13 @@ class CBSIE(CBSBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, content_id):
|
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||||
items_data = self._download_xml(
|
items_data = self._download_xml(
|
||||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
content_id, query={'partner': site, 'contentId': content_id})
|
||||||
video_data = xpath_element(items_data, './/item')
|
video_data = xpath_element(items_data, './/item')
|
||||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||||
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||||
|
|
||||||
asset_types = []
|
asset_types = []
|
||||||
|
@@ -3,17 +3,18 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .cbs import CBSIE
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class CBSInteractiveIE(ThePlatformIE):
|
class CBSInteractiveIE(CBSIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
|
||||||
'ext': 'flv',
|
'display_id': 'hands-on-with-microsofts-windows-8-1-update',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||||
@@ -22,13 +23,19 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
'timestamp': 1396479627,
|
'timestamp': 1396479627,
|
||||||
'upload_date': '20140402',
|
'upload_date': '20140402',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||||
|
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
|
'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
|
||||||
'ext': 'flv',
|
'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
||||||
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
|
'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
|
||||||
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||||
'uploader': 'Ashley Esqueda',
|
'uploader': 'Ashley Esqueda',
|
||||||
'duration': 1482,
|
'duration': 1482,
|
||||||
@@ -38,23 +45,28 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
|
'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
|
||||||
|
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video: Keeping Android smartphones and tablets secure',
|
'title': 'Video: Keeping Android smartphones and tablets secure',
|
||||||
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
||||||
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
||||||
'uploader': 'Adrian Kingsley-Hughes',
|
'uploader': 'Adrian Kingsley-Hughes',
|
||||||
'timestamp': 1448961720,
|
'duration': 731,
|
||||||
'upload_date': '20151201',
|
'timestamp': 1449129925,
|
||||||
|
'upload_date': '20151203',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
|
|
||||||
MPX_ACCOUNTS = {
|
MPX_ACCOUNTS = {
|
||||||
'cnet': 2288573011,
|
'cnet': 2198311517,
|
||||||
'zdnet': 2387448114,
|
'zdnet': 2387448114,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,7 +80,8 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
data = self._parse_json(data_json, display_id)
|
data = self._parse_json(data_json, display_id)
|
||||||
vdata = data.get('video') or data['videos'][0]
|
vdata = data.get('video') or data['videos'][0]
|
||||||
|
|
||||||
video_id = vdata['id']
|
video_id = vdata['mpxRefId']
|
||||||
|
|
||||||
title = vdata['title']
|
title = vdata['title']
|
||||||
author = vdata.get('author')
|
author = vdata.get('author')
|
||||||
if author:
|
if author:
|
||||||
@@ -78,20 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
uploader = None
|
uploader = None
|
||||||
uploader_id = None
|
uploader_id = None
|
||||||
|
|
||||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
|
||||||
formats, subtitles = [], {}
|
|
||||||
for (fkey, vid) in vdata['files'].items():
|
|
||||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
|
||||||
continue
|
|
||||||
release_url = self.TP_RELEASE_URL_TEMPLATE % vid
|
|
||||||
if fkey == 'hds':
|
|
||||||
release_url += '&manifest=f4m'
|
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
|
||||||
formats.extend(tp_formats)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
|
||||||
info.update({
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
@@ -99,7 +99,5 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
'duration': int_or_none(vdata.get('duration')),
|
'duration': int_or_none(vdata.get('duration')),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
@@ -15,19 +15,23 @@ class CBSNewsIE(CBSIE):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
|
# 60 minutes
|
||||||
|
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tesla-and-spacex-elon-musks-industrial-empire',
|
'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
|
'title': 'Artificial Intelligence',
|
||||||
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
|
'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
|
||||||
'duration': 791,
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1606,
|
||||||
|
'uploader': 'CBSI-NEW',
|
||||||
|
'timestamp': 1498431900,
|
||||||
|
'upload_date': '20170625',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Subscribers only',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||||
@@ -52,6 +56,22 @@ class CBSNewsIE(CBSIE):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# 48 hours
|
||||||
|
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cold as Ice',
|
||||||
|
'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
|
||||||
|
'upload_date': '20170604',
|
||||||
|
'timestamp': 1496538000,
|
||||||
|
'uploader': 'CBSI-NEW',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -60,12 +80,18 @@ class CBSNewsIE(CBSIE):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_info = self._parse_json(self._html_search_regex(
|
video_info = self._parse_json(self._html_search_regex(
|
||||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||||
webpage, 'video JSON info'), video_id)
|
webpage, 'video JSON info', default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
item = video_info['item'] if 'item' in video_info else video_info
|
if video_info:
|
||||||
guid = item['mpxRefId']
|
item = video_info['item'] if 'item' in video_info else video_info
|
||||||
return self._extract_video_info(guid)
|
else:
|
||||||
|
state = self._parse_json(self._search_regex(
|
||||||
|
r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
|
||||||
|
'playlist JSON info', group='json'), video_id)['state']
|
||||||
|
item = state['playlist'][state['pid']]
|
||||||
|
|
||||||
|
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
|
@@ -376,7 +376,7 @@ class InfoExtractor(object):
|
|||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
m = cls._VALID_URL_RE.match(url)
|
m = cls._VALID_URL_RE.match(url)
|
||||||
assert m
|
assert m
|
||||||
return m.group('id')
|
return compat_str(m.group('id'))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def working(cls):
|
def working(cls):
|
||||||
@@ -420,7 +420,7 @@ class InfoExtractor(object):
|
|||||||
if country_code:
|
if country_code:
|
||||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self._downloader.to_stdout(
|
self._downloader.to_screen(
|
||||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||||
|
|
||||||
@@ -1002,17 +1002,17 @@ class InfoExtractor(object):
|
|||||||
item_type = e.get('@type')
|
item_type = e.get('@type')
|
||||||
if expected_type is not None and expected_type != item_type:
|
if expected_type is not None and expected_type != item_type:
|
||||||
return info
|
return info
|
||||||
if item_type == 'TVEpisode':
|
if item_type in ('TVEpisode', 'Episode'):
|
||||||
info.update({
|
info.update({
|
||||||
'episode': unescapeHTML(e.get('name')),
|
'episode': unescapeHTML(e.get('name')),
|
||||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
})
|
})
|
||||||
part_of_season = e.get('partOfSeason')
|
part_of_season = e.get('partOfSeason')
|
||||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
||||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||||
elif item_type == 'Article':
|
elif item_type == 'Article':
|
||||||
info.update({
|
info.update({
|
||||||
@@ -1022,10 +1022,10 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
elif item_type == 'WebPage':
|
continue
|
||||||
video = e.get('video')
|
video = e.get('video')
|
||||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||||
extract_video_object(video)
|
extract_video_object(video)
|
||||||
break
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
@@ -2299,6 +2299,8 @@ class InfoExtractor(object):
|
|||||||
tracks = video_data.get('tracks')
|
tracks = video_data.get('tracks')
|
||||||
if tracks and isinstance(tracks, list):
|
if tracks and isinstance(tracks, list):
|
||||||
for track in tracks:
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
if track.get('kind') != 'captions':
|
if track.get('kind') != 'captions':
|
||||||
continue
|
continue
|
||||||
track_url = urljoin(base_url, track.get('file'))
|
track_url = urljoin(base_url, track.get('file'))
|
||||||
@@ -2328,6 +2330,8 @@ class InfoExtractor(object):
|
|||||||
urls = []
|
urls = []
|
||||||
formats = []
|
formats = []
|
||||||
for source in jwplayer_sources_data:
|
for source in jwplayer_sources_data:
|
||||||
|
if not isinstance(source, dict):
|
||||||
|
continue
|
||||||
source_url = self._proto_relative_url(source.get('file'))
|
source_url = self._proto_relative_url(source.get('file'))
|
||||||
if not source_url:
|
if not source_url:
|
||||||
continue
|
continue
|
||||||
|
@@ -8,7 +8,16 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
|
|
||||||
class CorusIE(ThePlatformFeedIE):
|
class CorusIE(ThePlatformFeedIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?P<domain>
|
||||||
|
(?:globaltv|etcanada)\.com|
|
||||||
|
(?:hgtv|foodnetwork|slice|history|showcase)\.ca
|
||||||
|
)
|
||||||
|
/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||||
@@ -27,6 +36,12 @@ class CorusIE(ThePlatformFeedIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_TP_FEEDS = {
|
_TP_FEEDS = {
|
||||||
@@ -50,6 +65,14 @@ class CorusIE(ThePlatformFeedIE):
|
|||||||
'feed_id': '5tUJLgV2YNJ5',
|
'feed_id': '5tUJLgV2YNJ5',
|
||||||
'account_id': 2414427935,
|
'account_id': 2414427935,
|
||||||
},
|
},
|
||||||
|
'history': {
|
||||||
|
'feed_id': 'tQFx_TyyEq4J',
|
||||||
|
'account_id': 2369613659,
|
||||||
|
},
|
||||||
|
'showcase': {
|
||||||
|
'feed_id': '9H6qyshBZU3E',
|
||||||
|
'account_id': 2414426607,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class DisneyIE(InfoExtractor):
|
class DisneyIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Disney.EmbedVideo
|
# Disney.EmbedVideo
|
||||||
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
||||||
@@ -68,6 +68,9 @@ class DisneyIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
info_url = self._search_regex(
|
info_url = self._search_regex(
|
||||||
r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
||||||
|
@@ -1,135 +1,59 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
js_to_json,
|
||||||
parse_iso8601,
|
parse_duration,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DRBonanzaIE(InfoExtractor):
|
class DRBonanzaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
_TESTS = [{
|
'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
|
||||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '65517',
|
'id': '40312',
|
||||||
|
'display_id': 'matador---0824-komme-fremmede-',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Talkshowet - Leonard Cohen',
|
'title': 'MATADOR - 08:24. "Komme fremmede".',
|
||||||
'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
|
'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
|
||||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
||||||
'timestamp': 1295537932,
|
'duration': 4613,
|
||||||
'upload_date': '20110120',
|
|
||||||
'duration': 3664,
|
|
||||||
},
|
},
|
||||||
'params': {
|
}
|
||||||
'skip_download': True, # requires rtmp
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
|
||||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '59410',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
|
|
||||||
'description': 'md5:501e5a195749480552e214fbbed16c4e',
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
|
||||||
'timestamp': 1223274900,
|
|
||||||
'upload_date': '20081006',
|
|
||||||
'duration': 7369,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
webpage = self._download_webpage(url, url_id)
|
video_id, display_id = mobj.group('id', 'display_id')
|
||||||
|
|
||||||
if url_id:
|
webpage = self._download_webpage(url, display_id)
|
||||||
info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
|
|
||||||
else:
|
|
||||||
# Just fetch the first video on that page
|
|
||||||
info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
|
|
||||||
|
|
||||||
asset_id = str(info['AssetId'])
|
info = self._parse_html5_media_entries(
|
||||||
title = info['Title'].rstrip(' \'\"-,.:;!?')
|
url, webpage, display_id, m3u8_id='hls',
|
||||||
duration = int_or_none(info.get('Duration'), scale=1000)
|
m3u8_entry_protocol='m3u8_native')[0]
|
||||||
# First published online. "FirstPublished" contains the date for original airing.
|
self._sort_formats(info['formats'])
|
||||||
timestamp = parse_iso8601(
|
|
||||||
re.sub(r'\.\d+$', '', info['Created']))
|
|
||||||
|
|
||||||
def parse_filename_info(url):
|
asset = self._parse_json(
|
||||||
match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
self._search_regex(
|
||||||
if match:
|
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
|
||||||
return {
|
display_id, transform_source=js_to_json)
|
||||||
'width': int(match.group('width')),
|
|
||||||
'height': int(match.group('height')),
|
|
||||||
'vbr': int(match.group('bitrate')),
|
|
||||||
'ext': match.group('ext')
|
|
||||||
}
|
|
||||||
match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
|
||||||
if match:
|
|
||||||
return {
|
|
||||||
'vbr': int(match.group('bitrate')),
|
|
||||||
'ext': match.group(2)
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
video_types = ['VideoHigh', 'VideoMid', 'VideoLow']
|
title = unescapeHTML(asset['AssetTitle']).strip()
|
||||||
preferencemap = {
|
|
||||||
'VideoHigh': -1,
|
|
||||||
'VideoMid': -2,
|
|
||||||
'VideoLow': -3,
|
|
||||||
'Audio': -4,
|
|
||||||
}
|
|
||||||
|
|
||||||
formats = []
|
def extract(field):
|
||||||
for file in info['Files']:
|
return self._search_regex(
|
||||||
if info['Type'] == 'Video':
|
r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
|
||||||
if file['Type'] in video_types:
|
webpage, field, default=None)
|
||||||
format = parse_filename_info(file['Location'])
|
|
||||||
format.update({
|
|
||||||
'url': file['Location'],
|
|
||||||
'format_id': file['Type'].replace('Video', ''),
|
|
||||||
'preference': preferencemap.get(file['Type'], -10),
|
|
||||||
})
|
|
||||||
if format['url'].startswith('rtmp'):
|
|
||||||
rtmp_url = format['url']
|
|
||||||
format['rtmp_live'] = True # --resume does not work
|
|
||||||
if '/bonanza/' in rtmp_url:
|
|
||||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
|
||||||
formats.append(format)
|
|
||||||
elif file['Type'] == 'Thumb':
|
|
||||||
thumbnail = file['Location']
|
|
||||||
elif info['Type'] == 'Audio':
|
|
||||||
if file['Type'] == 'Audio':
|
|
||||||
format = parse_filename_info(file['Location'])
|
|
||||||
format.update({
|
|
||||||
'url': file['Location'],
|
|
||||||
'format_id': file['Type'],
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
formats.append(format)
|
|
||||||
elif file['Type'] == 'Thumb':
|
|
||||||
thumbnail = file['Location']
|
|
||||||
|
|
||||||
description = '%s\n%s\n%s\n' % (
|
info.update({
|
||||||
info['Description'], info['Actors'], info['Colophon'])
|
'id': asset.get('AssetId') or video_id,
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
|
||||||
display_id = re.sub(r'-+', '-', display_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': asset_id,
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'description': extract('Programinfo'),
|
||||||
'description': description,
|
'duration': parse_duration(extract('Tid')),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': asset.get('AssetImageUrl'),
|
||||||
'timestamp': timestamp,
|
})
|
||||||
'duration': duration,
|
return info
|
||||||
}
|
|
||||||
|
@@ -44,8 +44,23 @@ class DrTuberIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_data = self._download_json(
|
||||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
'http://www.drtuber.com/player_config_json/', video_id, query={
|
||||||
|
'vid': video_id,
|
||||||
|
'embed': 0,
|
||||||
|
'aid': 0,
|
||||||
|
'domain_id': 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, video_url in video_data['files'].items():
|
||||||
|
if video_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': 2 if format_id == 'hq' else 1,
|
||||||
|
'url': video_url
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||||
@@ -75,7 +90,7 @@ class DrTuberIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
|
@@ -5,9 +5,12 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
determine_ext,
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -24,14 +27,7 @@ class DVTVIE(InfoExtractor):
|
|||||||
'id': 'dc0768de855511e49e4b0025900fea04',
|
'id': 'dc0768de855511e49e4b0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
||||||
}
|
'duration': 1484,
|
||||||
}, {
|
|
||||||
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
|
|
||||||
'md5': '6388f1941b48537dbd28791f712af8bf',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '72c02230849211e49f60002590604f2e',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||||
@@ -44,55 +40,100 @@ class DVTVIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b0b40906854d11e4bdad0025900fea04',
|
'id': 'b0b40906854d11e4bdad0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
|
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne',
|
||||||
|
'description': 'md5:0916925dea8e30fe84222582280b47a0',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '5f7652a08b05009c1292317b449ffea2',
|
'md5': '5f7652a08b05009c1292317b449ffea2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '420ad9ec854a11e4bdad0025900fea04',
|
'id': '420ad9ec854a11e4bdad0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
|
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka',
|
||||||
|
'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95d35580846a11e4b6d20025900fea04',
|
'id': '95d35580846a11e4b6d20025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
|
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?',
|
||||||
|
'description': 'md5:889fe610a70fee5511dc3326a089188e',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6fe14d66853511e4833a0025900fea04',
|
'id': '6fe14d66853511e4833a0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
|
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády',
|
||||||
|
'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
|
||||||
|
'md5': 'f8efe9656017da948369aa099788c8ea',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3c496fec365911e7a6500025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
|
||||||
|
'duration': 1103,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_video_metadata(self, js, video_id):
|
def _parse_video_metadata(self, js, video_id):
|
||||||
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
|
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
title = unescapeHTML(data['title'])
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video in metadata['sources']:
|
for video in data['sources']:
|
||||||
ext = video['type'][6:]
|
video_url = video.get('file')
|
||||||
formats.append({
|
if not video_url:
|
||||||
'url': video['file'],
|
continue
|
||||||
'ext': ext,
|
video_type = video.get('type')
|
||||||
'format_id': '%s-%s' % (ext, video['label']),
|
ext = determine_ext(video_url, mimetype2ext(video_type))
|
||||||
'height': int(video['label'].rstrip('p')),
|
if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
|
||||||
'fps': 25,
|
formats.extend(self._extract_m3u8_formats(
|
||||||
})
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif video_type == 'application/dash+xml' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
label = video.get('label')
|
||||||
|
height = self._search_regex(
|
||||||
|
r'^(\d+)[pP]', label or '', 'height', default=None)
|
||||||
|
format_id = ['http']
|
||||||
|
for f in (ext, label):
|
||||||
|
if f:
|
||||||
|
format_id.append(f)
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'height': int_or_none(height),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': metadata['mediaid'],
|
'id': data.get('mediaid') or video_id,
|
||||||
'title': unescapeHTML(metadata['title']),
|
'title': title,
|
||||||
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
|
'description': data.get('description'),
|
||||||
|
'thumbnail': data.get('image'),
|
||||||
|
'duration': int_or_none(data.get('duration')),
|
||||||
|
'timestamp': int_or_none(data.get('pubtime')),
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,7 +144,7 @@ class DVTVIE(InfoExtractor):
|
|||||||
|
|
||||||
# single video
|
# single video
|
||||||
item = self._search_regex(
|
item = self._search_regex(
|
||||||
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
|
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
|
||||||
webpage, 'video', default=None, fatal=False)
|
webpage, 'video', default=None, fatal=False)
|
||||||
|
|
||||||
if item:
|
if item:
|
||||||
@@ -113,6 +154,8 @@ class DVTVIE(InfoExtractor):
|
|||||||
items = re.findall(
|
items = re.findall(
|
||||||
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
||||||
webpage)
|
webpage)
|
||||||
|
if not items:
|
||||||
|
items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
|
||||||
|
|
||||||
if items:
|
if items:
|
||||||
return {
|
return {
|
||||||
|
@@ -71,6 +71,10 @@ from .arte import (
|
|||||||
TheOperaPlatformIE,
|
TheOperaPlatformIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
from .asiancrush import (
|
||||||
|
AsianCrushIE,
|
||||||
|
AsianCrushPlaylistIE,
|
||||||
|
)
|
||||||
from .atresplayer import AtresPlayerIE
|
from .atresplayer import AtresPlayerIE
|
||||||
from .atttechchannel import ATTTechChannelIE
|
from .atttechchannel import ATTTechChannelIE
|
||||||
from .atvat import ATVAtIE
|
from .atvat import ATVAtIE
|
||||||
@@ -90,7 +94,7 @@ from .azmedien import (
|
|||||||
)
|
)
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
from .bbc import (
|
from .bbc import (
|
||||||
BBCCoUkIE,
|
BBCCoUkIE,
|
||||||
BBCCoUkArticleIE,
|
BBCCoUkArticleIE,
|
||||||
@@ -98,7 +102,10 @@ from .bbc import (
|
|||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
from .beampro import BeamProLiveIE
|
from .beampro import (
|
||||||
|
BeamProLiveIE,
|
||||||
|
BeamProVodIE,
|
||||||
|
)
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
from .bellmedia import BellMediaIE
|
from .bellmedia import BellMediaIE
|
||||||
@@ -389,7 +396,6 @@ from .globo import (
|
|||||||
from .go import GoIE
|
from .go import GoIE
|
||||||
from .go90 import Go90IE
|
from .go90 import Go90IE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
from .godtv import GodTVIE
|
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .googledrive import GoogleDriveIE
|
from .googledrive import GoogleDriveIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
@@ -634,7 +640,10 @@ from .neteasemusic import (
|
|||||||
NetEaseMusicProgramIE,
|
NetEaseMusicProgramIE,
|
||||||
NetEaseMusicDjRadioIE,
|
NetEaseMusicDjRadioIE,
|
||||||
)
|
)
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import (
|
||||||
|
NewgroundsIE,
|
||||||
|
NewgroundsPlaylistIE,
|
||||||
|
)
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
NextMediaIE,
|
NextMediaIE,
|
||||||
@@ -815,6 +824,7 @@ from .radiobremen import RadioBremenIE
|
|||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
from .rai import (
|
from .rai import (
|
||||||
RaiPlayIE,
|
RaiPlayIE,
|
||||||
|
RaiPlayLiveIE,
|
||||||
RaiIE,
|
RaiIE,
|
||||||
)
|
)
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
@@ -866,6 +876,7 @@ from .rutube import (
|
|||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .ruutu import RuutuIE
|
from .ruutu import RuutuIE
|
||||||
|
from .ruv import RuvIE
|
||||||
from .sandia import SandiaIE
|
from .sandia import SandiaIE
|
||||||
from .safari import (
|
from .safari import (
|
||||||
SafariIE,
|
SafariIE,
|
||||||
@@ -962,6 +973,7 @@ from .tagesschau import (
|
|||||||
TagesschauIE,
|
TagesschauIE,
|
||||||
)
|
)
|
||||||
from .tass import TassIE
|
from .tass import TassIE
|
||||||
|
from .tastytrade import TastyTradeIE
|
||||||
from .tbs import TBSIE
|
from .tbs import TBSIE
|
||||||
from .tdslifeway import TDSLifewayIE
|
from .tdslifeway import TDSLifewayIE
|
||||||
from .teachertube import (
|
from .teachertube import (
|
||||||
@@ -1019,11 +1031,6 @@ from .trilulilu import TriluliluIE
|
|||||||
from .trutv import TruTVIE
|
from .trutv import TruTVIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tubitv import TubiTvIE
|
from .tubitv import TubiTvIE
|
||||||
from .tudou import (
|
|
||||||
TudouIE,
|
|
||||||
TudouPlaylistIE,
|
|
||||||
TudouAlbumIE,
|
|
||||||
)
|
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tunein import (
|
from .tunein import (
|
||||||
TuneInClipIE,
|
TuneInClipIE,
|
||||||
|
@@ -203,19 +203,19 @@ class FacebookIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_urls(webpage):
|
||||||
mobj = re.search(
|
urls = []
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
for mobj in re.finditer(
|
||||||
if mobj is not None:
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||||
return mobj.group('url')
|
webpage):
|
||||||
|
urls.append(mobj.group('url'))
|
||||||
# Facebook API embed
|
# Facebook API embed
|
||||||
# see https://developers.facebook.com/docs/plugins/embedded-video-player
|
# see https://developers.facebook.com/docs/plugins/embedded-video-player
|
||||||
mobj = re.search(r'''(?x)<div[^>]+
|
for mobj in re.finditer(r'''(?x)<div[^>]+
|
||||||
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
||||||
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
|
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
|
||||||
if mobj is not None:
|
urls.append(mobj.group('url'))
|
||||||
return mobj.group('url')
|
return urls
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(useremail, password) = self._get_login_info()
|
(useremail, password) = self._get_login_info()
|
||||||
|
@@ -102,6 +102,8 @@ class FirstTVIE(InfoExtractor):
|
|||||||
'format_id': f.get('name'),
|
'format_id': f.get('name'),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'source_preference': quality(f.get('name')),
|
'source_preference': quality(f.get('name')),
|
||||||
|
# quality metadata of http formats may be incorrect
|
||||||
|
'preference': -1,
|
||||||
})
|
})
|
||||||
# m3u8 URL format is reverse engineered from [1] (search for
|
# m3u8 URL format is reverse engineered from [1] (search for
|
||||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||||
|
@@ -1,7 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_urlencode,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -81,7 +84,7 @@ class FlickrIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for stream in streams['stream']:
|
for stream in streams['stream']:
|
||||||
stream_type = str(stream.get('type'))
|
stream_type = compat_str(stream.get('type'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': stream_type,
|
'format_id': stream_type,
|
||||||
'url': stream['_content'],
|
'url': stream['_content'],
|
||||||
|
@@ -85,11 +85,11 @@ class FourTubeIE(InfoExtractor):
|
|||||||
media_id = params[0]
|
media_id = params[0]
|
||||||
sources = ['%s' % p for p in params[2]]
|
sources = ['%s' % p for p in params[2]]
|
||||||
|
|
||||||
token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
|
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||||
media_id, '+'.join(sources))
|
media_id, '+'.join(sources))
|
||||||
headers = {
|
headers = {
|
||||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
b'Origin': b'http://www.4tube.com',
|
b'Origin': b'https://www.4tube.com',
|
||||||
}
|
}
|
||||||
token_req = sanitized_Request(token_url, b'{}', headers)
|
token_req = sanitized_Request(token_url, b'{}', headers)
|
||||||
tokens = self._download_json(token_req, video_id)
|
tokens = self._download_json(token_req, video_id)
|
||||||
|
@@ -5,6 +5,7 @@ import itertools
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
int_or_none,
|
||||||
remove_end,
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -46,7 +47,7 @@ class FoxgayIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': source,
|
'url': source,
|
||||||
'height': resolution,
|
'height': int_or_none(resolution),
|
||||||
} for source, resolution in zip(
|
} for source, resolution in zip(
|
||||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||||
|
|
||||||
|
@@ -112,7 +112,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)+(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||||
@@ -157,6 +157,9 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
|
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.france.tv/142749-rouge-sang.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -6,62 +6,52 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GaskrankIE(InfoExtractor):
|
class GaskrankIE(InfoExtractor):
|
||||||
"""InfoExtractor for gaskrank.tv"""
|
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
|
||||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
_TESTS = [{
|
||||||
_TESTS = [
|
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||||
{
|
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||||
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
'info_dict': {
|
||||||
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
'id': '201601/26955',
|
||||||
'info_dict': {
|
'ext': 'mp4',
|
||||||
'id': '201601/26955',
|
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||||
'ext': 'mp4',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
'categories': ['motorrad-fun'],
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||||
'categories': ['motorrad-fun'],
|
'uploader_id': 'Bikefun',
|
||||||
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
'upload_date': '20170110',
|
||||||
'uploader_id': 'Bikefun',
|
'uploader_url': None,
|
||||||
'upload_date': '20170110',
|
|
||||||
'uploader_url': None,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
|
||||||
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '201106/15920',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'categories': ['racing'],
|
|
||||||
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
|
||||||
'uploader_id': 'IOM',
|
|
||||||
'upload_date': '20160506',
|
|
||||||
'uploader_url': 'www.iomtt.com',
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
}, {
|
||||||
|
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||||
|
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201106/15920',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'categories': ['racing'],
|
||||||
|
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||||
|
'uploader_id': 'IOM',
|
||||||
|
'upload_date': '20170523',
|
||||||
|
'uploader_url': 'www.iomtt.com',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
"""extract information from gaskrank.tv"""
|
|
||||||
def fix_json(code):
|
|
||||||
"""Removes trailing comma in json: {{},} --> {{}}"""
|
|
||||||
return re.sub(r',\s*}', r'}', js_to_json(code))
|
|
||||||
|
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'title', webpage, fatal=True)
|
||||||
|
|
||||||
categories = [re.match(self._VALID_URL, url).group('categories')]
|
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||||
title = self._search_regex(
|
|
||||||
r'movieName\s*:\s*\'([^\']*)\'',
|
|
||||||
webpage, 'title')
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'poster\s*:\s*\'([^\']*)\'',
|
|
||||||
webpage, 'thumbnail', default=None)
|
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||||
@@ -89,29 +79,14 @@ class GaskrankIE(InfoExtractor):
|
|||||||
if average_rating:
|
if average_rating:
|
||||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||||
|
|
||||||
playlist = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'playlist\s*:\s*\[([^\]]*)\]',
|
|
||||||
webpage, 'playlist', default='{}'),
|
|
||||||
display_id, transform_source=fix_json, fatal=False)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||||
playlist.get('0').get('src'), 'video id')
|
webpage, 'video id', default=display_id)
|
||||||
|
|
||||||
formats = []
|
entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
for key in playlist:
|
entry.update({
|
||||||
formats.append({
|
|
||||||
'url': playlist[key]['src'],
|
|
||||||
'format_id': key,
|
|
||||||
'quality': playlist[key].get('quality')})
|
|
||||||
self._sort_formats(formats, field_preference=['format_id'])
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
@@ -120,4 +95,7 @@ class GaskrankIE(InfoExtractor):
|
|||||||
'tags': tags,
|
'tags': tags,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'average_rating': average_rating,
|
'average_rating': average_rating,
|
||||||
}
|
})
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
|
||||||
|
return entry
|
||||||
|
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
|||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
@@ -1521,6 +1522,21 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Facebook video #599637780109885',
|
'title': 'Facebook video #599637780109885',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Facebook <iframe> embed, plugin video
|
||||||
|
{
|
||||||
|
'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1754168231264132',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
|
||||||
|
'uploader': 'Tariq Ramadan (official)',
|
||||||
|
'timestamp': 1496758379,
|
||||||
|
'upload_date': '20170606',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# Facebook API embed
|
# Facebook API embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
|
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
|
||||||
@@ -1907,14 +1923,14 @@ class GenericIE(InfoExtractor):
|
|||||||
content_type = head_response.headers.get('Content-Type', '').lower()
|
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
format_id = m.group('format_id')
|
format_id = compat_str(m.group('format_id'))
|
||||||
if format_id.endswith('mpegurl'):
|
if format_id.endswith('mpegurl'):
|
||||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
elif format_id == 'f4m':
|
elif format_id == 'f4m':
|
||||||
formats = self._extract_f4m_formats(url, video_id)
|
formats = self._extract_f4m_formats(url, video_id)
|
||||||
else:
|
else:
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': m.group('format_id'),
|
'format_id': format_id,
|
||||||
'url': url,
|
'url': url,
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
}]
|
}]
|
||||||
@@ -2032,6 +2048,13 @@ class GenericIE(InfoExtractor):
|
|||||||
video_description = self._og_search_description(webpage, default=None)
|
video_description = self._og_search_description(webpage, default=None)
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
|
info_dict.update({
|
||||||
|
'title': video_title,
|
||||||
|
'description': video_description,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
})
|
||||||
|
|
||||||
# Look for Brightcove Legacy Studio embeds
|
# Look for Brightcove Legacy Studio embeds
|
||||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||||
if bc_urls:
|
if bc_urls:
|
||||||
@@ -2221,9 +2244,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for embedded Facebook player
|
# Look for embedded Facebook player
|
||||||
facebook_url = FacebookIE._extract_url(webpage)
|
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||||
if facebook_url is not None:
|
if facebook_urls:
|
||||||
return self.url_result(facebook_url, 'Facebook')
|
return self.playlist_from_matches(facebook_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for embedded VK player
|
# Look for embedded VK player
|
||||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||||
@@ -2668,18 +2691,26 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||||
|
|
||||||
|
def merge_dicts(dict1, dict2):
|
||||||
|
merged = {}
|
||||||
|
for k, v in dict1.items():
|
||||||
|
if v is not None:
|
||||||
|
merged[k] = v
|
||||||
|
for k, v in dict2.items():
|
||||||
|
if v is None:
|
||||||
|
continue
|
||||||
|
if (k not in merged or
|
||||||
|
(isinstance(v, compat_str) and v and
|
||||||
|
isinstance(merged[k], compat_str) and
|
||||||
|
not merged[k])):
|
||||||
|
merged[k] = v
|
||||||
|
return merged
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
if json_ld.get('url'):
|
if json_ld.get('url'):
|
||||||
info_dict.update({
|
return merge_dicts(json_ld, info_dict)
|
||||||
'title': video_title or info_dict['title'],
|
|
||||||
'description': video_description,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
'age_limit': age_limit
|
|
||||||
})
|
|
||||||
info_dict.update(json_ld)
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
@@ -2697,9 +2728,7 @@ class GenericIE(InfoExtractor):
|
|||||||
if jwplayer_data:
|
if jwplayer_data:
|
||||||
info = self._parse_jwplayer_data(
|
info = self._parse_jwplayer_data(
|
||||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
if not info.get('title'):
|
return merge_dicts(info, info_dict)
|
||||||
info['title'] = video_title
|
|
||||||
return info
|
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
|
@@ -82,7 +82,7 @@ class GfycatIE(InfoExtractor):
|
|||||||
video_url = gfy.get('%sUrl' % format_id)
|
video_url = gfy.get('%sUrl' % format_id)
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
filesize = gfy.get('%sSize' % format_id)
|
filesize = int_or_none(gfy.get('%sSize' % format_id))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@@ -1,66 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .ooyala import OoyalaIE
|
|
||||||
from ..utils import js_to_json
|
|
||||||
|
|
||||||
|
|
||||||
class GodTVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Randy Needham',
|
|
||||||
'duration': 3615.08,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/playlist/bible-study',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'bible-study',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 37,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/node/15097',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/live/africa',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/liveevents',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
settings = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
|
||||||
webpage, 'settings', default='{}'),
|
|
||||||
display_id, transform_source=js_to_json, fatal=False)
|
|
||||||
|
|
||||||
ooyala_id = None
|
|
||||||
|
|
||||||
if settings:
|
|
||||||
playlist = settings.get('playlist')
|
|
||||||
if playlist and isinstance(playlist, list):
|
|
||||||
entries = [
|
|
||||||
OoyalaIE._build_url_result(video['content_id'])
|
|
||||||
for video in playlist if video.get('content_id')]
|
|
||||||
if entries:
|
|
||||||
return self.playlist_result(entries, display_id)
|
|
||||||
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
|
||||||
|
|
||||||
if not ooyala_id:
|
|
||||||
ooyala_id = self._search_regex(
|
|
||||||
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
|
||||||
webpage, 'ooyala id', group='id')
|
|
||||||
|
|
||||||
return OoyalaIE._build_url_result(ooyala_id)
|
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -46,7 +47,7 @@ class GolemIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': e.tag,
|
'format_id': compat_str(e.tag),
|
||||||
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||||
'height': self._int(e.get('height'), 'height'),
|
'height': self._int(e.get('height'), 'height'),
|
||||||
'width': self._int(e.get('width'), 'width'),
|
'width': self._int(e.get('width'), 'width'),
|
||||||
|
@@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
||||||
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
||||||
|
|
||||||
|
resolutions = {}
|
||||||
|
for fmt in fmt_list:
|
||||||
|
mobj = re.search(
|
||||||
|
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||||
|
if mobj:
|
||||||
|
resolutions[mobj.group('format_id')] = (
|
||||||
|
int(mobj.group('width')), int(mobj.group('height')))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
|
for fmt_stream in fmt_stream_map:
|
||||||
fmt_id, fmt_url = fmt_stream.split('|')
|
fmt_stream_split = fmt_stream.split('|')
|
||||||
resolution = fmt.split('/')[1]
|
if len(fmt_stream_split) < 2:
|
||||||
width, height = resolution.split('x')
|
continue
|
||||||
formats.append({
|
format_id, format_url = fmt_stream_split[:2]
|
||||||
'url': lowercase_escape(fmt_url),
|
f = {
|
||||||
'format_id': fmt_id,
|
'url': lowercase_escape(format_url),
|
||||||
'resolution': resolution,
|
'format_id': format_id,
|
||||||
'width': int_or_none(width),
|
'ext': self._FORMATS_EXT[format_id],
|
||||||
'height': int_or_none(height),
|
}
|
||||||
'ext': self._FORMATS_EXT[fmt_id],
|
resolution = resolutions.get(format_id)
|
||||||
})
|
if resolution:
|
||||||
|
f.update({
|
||||||
|
'width': resolution[0],
|
||||||
|
'height': resolution[0],
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -7,14 +7,19 @@ from .common import InfoExtractor
|
|||||||
class HGTVComShowIE(InfoExtractor):
|
class HGTVComShowIE(InfoExtractor):
|
||||||
IE_NAME = 'hgtv.com:show'
|
IE_NAME = 'hgtv.com:show'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
|
# data-module="video"
|
||||||
|
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'flip-or-flop-full-episodes-videos',
|
'id': 'flip-or-flop-full-episodes-season-4-videos',
|
||||||
'title': 'Flip or Flop Full Episodes',
|
'title': 'Flip or Flop Full Episodes',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 15,
|
'playlist_mincount': 15,
|
||||||
}
|
}, {
|
||||||
|
# data-deferred-module="video"
|
||||||
|
'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
@@ -23,7 +28,7 @@ class HGTVComShowIE(InfoExtractor):
|
|||||||
|
|
||||||
config = self._parse_json(
|
config = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||||
webpage, 'video config'),
|
webpage, 'video config'),
|
||||||
display_id)['channels'][0]
|
display_id)['channels'][0]
|
||||||
|
|
||||||
|
@@ -16,8 +16,8 @@ from ..utils import (
|
|||||||
|
|
||||||
class HitboxIE(InfoExtractor):
|
class HitboxIE(InfoExtractor):
|
||||||
IE_NAME = 'hitbox'
|
IE_NAME = 'hitbox'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hitbox.tv/video/203213',
|
'url': 'http://www.hitbox.tv/video/203213',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '203213',
|
'id': '203213',
|
||||||
@@ -38,13 +38,15 @@ class HitboxIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _extract_metadata(self, url, video_id):
|
def _extract_metadata(self, url, video_id):
|
||||||
thumb_base = 'https://edge.sf.hitbox.tv'
|
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
'%s/%s' % (url, video_id), video_id,
|
'%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
|
||||||
'Downloading metadata JSON')
|
|
||||||
|
|
||||||
date = 'media_live_since'
|
date = 'media_live_since'
|
||||||
media_type = 'livestream'
|
media_type = 'livestream'
|
||||||
@@ -63,14 +65,15 @@ class HitboxIE(InfoExtractor):
|
|||||||
views = int_or_none(video_meta.get('media_views'))
|
views = int_or_none(video_meta.get('media_views'))
|
||||||
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||||
categories = [video_meta.get('category_name')]
|
categories = [video_meta.get('category_name')]
|
||||||
thumbs = [
|
thumbs = [{
|
||||||
{'url': thumb_base + video_meta.get('media_thumbnail'),
|
'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||||
'width': 320,
|
'width': 320,
|
||||||
'height': 180},
|
'height': 180
|
||||||
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
}, {
|
||||||
'width': 768,
|
'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||||
'height': 432},
|
'width': 768,
|
||||||
]
|
'height': 432
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -90,7 +93,7 @@ class HitboxIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
player_config = self._download_json(
|
player_config = self._download_json(
|
||||||
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -121,8 +124,7 @@ class HitboxIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
metadata = self._extract_metadata(
|
metadata = self._extract_metadata(
|
||||||
'https://www.hitbox.tv/api/media/video',
|
'https://www.smashcast.tv/api/media/video', video_id)
|
||||||
video_id)
|
|
||||||
metadata['formats'] = formats
|
metadata['formats'] = formats
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
@@ -130,8 +132,8 @@ class HitboxIE(InfoExtractor):
|
|||||||
|
|
||||||
class HitboxLiveIE(HitboxIE):
|
class HitboxLiveIE(HitboxIE):
|
||||||
IE_NAME = 'hitbox:live'
|
IE_NAME = 'hitbox:live'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hitbox.tv/dimak',
|
'url': 'http://www.hitbox.tv/dimak',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dimak',
|
'id': 'dimak',
|
||||||
@@ -146,13 +148,20 @@ class HitboxLiveIE(HitboxIE):
|
|||||||
# live
|
# live
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.smashcast.tv/dimak',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
player_config = self._download_json(
|
player_config = self._download_json(
|
||||||
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -197,8 +206,7 @@ class HitboxLiveIE(HitboxIE):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
metadata = self._extract_metadata(
|
metadata = self._extract_metadata(
|
||||||
'https://www.hitbox.tv/api/media/live',
|
'https://www.smashcast.tv/api/media/live', video_id)
|
||||||
video_id)
|
|
||||||
metadata['formats'] = formats
|
metadata['formats'] = formats
|
||||||
metadata['is_live'] = True
|
metadata['is_live'] = True
|
||||||
metadata['title'] = self._live_title(metadata.get('title'))
|
metadata['title'] = self._live_title(metadata.get('title'))
|
||||||
|
@@ -89,6 +89,11 @@ class IGNIE(InfoExtractor):
|
|||||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# videoId pattern
|
||||||
|
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _find_video_id(self, webpage):
|
def _find_video_id(self, webpage):
|
||||||
@@ -98,6 +103,8 @@ class IGNIE(InfoExtractor):
|
|||||||
r'data-video-id="(.+?)"',
|
r'data-video-id="(.+?)"',
|
||||||
r'<object id="vid_(.+?)"',
|
r'<object id="vid_(.+?)"',
|
||||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||||
|
r'videoId"\s*:\s*"(.+?)"',
|
||||||
|
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
|
||||||
]
|
]
|
||||||
return self._search_regex(res_id, webpage, 'video id', default=None)
|
return self._search_regex(res_id, webpage, 'video id', default=None)
|
||||||
|
|
||||||
|
@@ -65,9 +65,9 @@ class JoveIE(InfoExtractor):
|
|||||||
webpage, 'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
publish_date = unified_strdate(self._html_search_meta(
|
publish_date = unified_strdate(self._html_search_meta(
|
||||||
'citation_publication_date', webpage, 'publish date', fatal=False))
|
'citation_publication_date', webpage, 'publish date', fatal=False))
|
||||||
comment_count = self._html_search_regex(
|
comment_count = int(self._html_search_regex(
|
||||||
r'<meta name="num_comments" content="(\d+) Comments?"',
|
r'<meta name="num_comments" content="(\d+) Comments?"',
|
||||||
webpage, 'comment count', fatal=False)
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -115,8 +115,9 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
|
|
||||||
for a_format in info_dict['formats']:
|
for a_format in info_dict['formats']:
|
||||||
if not a_format.get('height'):
|
if not a_format.get('height'):
|
||||||
a_format['height'] = self._search_regex(
|
a_format['height'] = int_or_none(self._search_regex(
|
||||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label', default=None)
|
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||||
|
default=None))
|
||||||
|
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
|
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
class MedialaanIE(InfoExtractor):
|
class MedialaanIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.|nieuws\.)?
|
||||||
(?:
|
(?:
|
||||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||||
(?:
|
(?:
|
||||||
@@ -85,6 +85,22 @@ class MedialaanIE(InfoExtractor):
|
|||||||
# clip
|
# clip
|
||||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# http/s redirect
|
||||||
|
'url': 'https://vtmkzoom.be/video?aid=45724',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '257136373657000',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'K3 Dansstudio Ushuaia afl.6',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires account credentials',
|
||||||
|
}, {
|
||||||
|
# nieuws.vtm.be
|
||||||
|
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@@ -146,6 +162,8 @@ class MedialaanIE(InfoExtractor):
|
|||||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||||
if player:
|
if player:
|
||||||
video = player[-1]
|
video = player[-1]
|
||||||
|
if video['videoUrl'] in ('http', 'https'):
|
||||||
|
return self.url_result(video['url'], MedialaanIE.ie_key())
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video['videoUrl'],
|
'url': video['videoUrl'],
|
||||||
|
@@ -136,11 +136,9 @@ class MiTeleIE(InfoExtractor):
|
|||||||
video_id, 'Downloading gigya script')
|
video_id, 'Downloading gigya script')
|
||||||
|
|
||||||
# Get a appKey/uuid for getting the session key
|
# Get a appKey/uuid for getting the session key
|
||||||
appKey_var = self._search_regex(
|
|
||||||
r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
|
|
||||||
gigya_sc, 'appKey variable')
|
|
||||||
appKey = self._search_regex(
|
appKey = self._search_regex(
|
||||||
r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
|
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
|
||||||
|
gigya_sc, 'appKey')
|
||||||
|
|
||||||
session_json = self._download_json(
|
session_json = self._download_json(
|
||||||
'https://appgrid-api.cloud.accedo.tv/session',
|
'https://appgrid-api.cloud.accedo.tv/session',
|
||||||
|
@@ -68,10 +68,6 @@ class MSNIE(InfoExtractor):
|
|||||||
format_url = file_.get('url')
|
format_url = file_.get('url')
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(format_url)
|
|
||||||
if ext == 'ism':
|
|
||||||
formats.extend(self._extract_ism_formats(
|
|
||||||
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
|
||||||
if 'm3u8' in format_url:
|
if 'm3u8' in format_url:
|
||||||
# m3u8_native should not be used here until
|
# m3u8_native should not be used here until
|
||||||
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
||||||
@@ -79,6 +75,9 @@ class MSNIE(InfoExtractor):
|
|||||||
format_url, display_id, 'mp4',
|
format_url, display_id, 'mp4',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
|
elif determine_ext(format_url) == 'ism':
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@@ -1,6 +1,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_filesize,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsIE(InfoExtractor):
|
class NewgroundsIE(InfoExtractor):
|
||||||
@@ -13,7 +22,10 @@ class NewgroundsIE(InfoExtractor):
|
|||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'B7 - BusMode',
|
'title': 'B7 - BusMode',
|
||||||
'uploader': 'Burn7',
|
'uploader': 'Burn7',
|
||||||
}
|
'timestamp': 1378878540,
|
||||||
|
'upload_date': '20130911',
|
||||||
|
'duration': 143,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.newgrounds.com/portal/view/673111',
|
'url': 'https://www.newgrounds.com/portal/view/673111',
|
||||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||||
@@ -22,25 +34,133 @@ class NewgroundsIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dancin',
|
'title': 'Dancin',
|
||||||
'uploader': 'Squirrelman82',
|
'uploader': 'Squirrelman82',
|
||||||
|
'timestamp': 1460256780,
|
||||||
|
'upload_date': '20160410',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# source format unavailable, additional mp4 formats
|
||||||
|
'url': 'http://www.newgrounds.com/portal/view/689400',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '689400',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ZTV News Episode 8',
|
||||||
|
'uploader': 'BennettTheSage',
|
||||||
|
'timestamp': 1487965140,
|
||||||
|
'upload_date': '20170224',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
media_id = self._match_id(url)
|
media_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, media_id)
|
webpage = self._download_webpage(url, media_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>([^>]+)</title>', webpage, 'title')
|
r'<title>([^>]+)</title>', webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
media_url = self._parse_json(self._search_regex(
|
||||||
r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
|
r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
|
||||||
|
|
||||||
music_url = self._parse_json(self._search_regex(
|
formats = [{
|
||||||
r'"url":("[^"]+"),', webpage, ''), media_id)
|
'url': media_url,
|
||||||
|
'format_id': 'source',
|
||||||
|
'quality': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
max_resolution = int_or_none(self._search_regex(
|
||||||
|
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
|
||||||
|
default=None))
|
||||||
|
if max_resolution:
|
||||||
|
url_base = media_url.rpartition('.')[0]
|
||||||
|
for resolution in (360, 720, 1080):
|
||||||
|
if resolution > max_resolution:
|
||||||
|
break
|
||||||
|
formats.append({
|
||||||
|
'url': '%s.%dp.mp4' % (url_base, resolution),
|
||||||
|
'format_id': '%dp' % resolution,
|
||||||
|
'height': resolution,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._check_formats(formats, media_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(self._search_regex(
|
||||||
|
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
|
||||||
|
default=None))
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
|
||||||
|
default=None))
|
||||||
|
|
||||||
|
filesize_approx = parse_filesize(self._html_search_regex(
|
||||||
|
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
|
||||||
|
default=None))
|
||||||
|
if len(formats) == 1:
|
||||||
|
formats[0]['filesize_approx'] = filesize_approx
|
||||||
|
|
||||||
|
if '<dd>Song' in webpage:
|
||||||
|
formats[0]['vcodec'] = 'none'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': music_url,
|
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NewgroundsPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.newgrounds.com/collection/cats',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cats',
|
||||||
|
'title': 'Cats',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 46,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ZONE-SAMA',
|
||||||
|
'title': 'Portal Search: ZONE-SAMA',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 47,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.newgrounds.com/audio/search/title/cats',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<title>([^>]+)</title>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
# cut left menu
|
||||||
|
webpage = self._search_regex(
|
||||||
|
r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
|
||||||
|
webpage, 'wide column', default=webpage)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for a, path, media_id in re.findall(
|
||||||
|
r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
|
||||||
|
webpage):
|
||||||
|
a_class = extract_attributes(a).get('class')
|
||||||
|
if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
|
||||||
|
continue
|
||||||
|
entries.append(
|
||||||
|
self.url_result(
|
||||||
|
'https://www.newgrounds.com/%s' % path,
|
||||||
|
ie=NewgroundsIE.ie_key(), video_id=media_id))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
@@ -83,9 +83,12 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'uploader_id': '312',
|
'uploader_id': '312',
|
||||||
},
|
},
|
||||||
'skip': 'The viewing period of the video you were searching for has expired.',
|
'skip': 'The viewing period of the video you were searching for has expired.',
|
||||||
|
}, {
|
||||||
|
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
|
@@ -6,6 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
@@ -56,17 +57,24 @@ class NJPWWorldIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage):
|
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
|
||||||
player_url = compat_urlparse.urljoin(url, player_url)
|
player = extract_attributes(mobj.group(0))
|
||||||
|
player_path = player.get('href')
|
||||||
|
if not player_path:
|
||||||
|
continue
|
||||||
|
kind = self._search_regex(
|
||||||
|
r'(low|high)$', player.get('class') or '', 'kind',
|
||||||
|
default='low')
|
||||||
|
player_url = compat_urlparse.urljoin(url, player_path)
|
||||||
player_page = self._download_webpage(
|
player_page = self._download_webpage(
|
||||||
player_url, video_id, note='Downloading player page')
|
player_url, video_id, note='Downloading player page')
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(
|
entries = self._parse_html5_media_entries(
|
||||||
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
||||||
m3u8_entry_protocol='m3u8_native',
|
m3u8_entry_protocol='m3u8_native')
|
||||||
preference=2 if 'hq' in kind else 1)
|
kind_formats = entries[0]['formats']
|
||||||
formats.extend(entries[0]['formats'])
|
for f in kind_formats:
|
||||||
|
f['quality'] = 2 if kind == 'high' else 1
|
||||||
|
formats.extend(kind_formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -35,7 +35,7 @@ class NPOIE(NPOBaseIE):
|
|||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
npo\.nl/(?!live|radio)(?:[^/]+/){2}|
|
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
||||||
ntr\.nl/(?:[^/]+/){2,}|
|
ntr\.nl/(?:[^/]+/){2,}|
|
||||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||||
zapp\.nl/[^/]+/[^/]+/
|
zapp\.nl/[^/]+/[^/]+/
|
||||||
@@ -150,6 +150,9 @@ class NPOIE(NPOBaseIE):
|
|||||||
# live stream
|
# live stream
|
||||||
'url': 'npo:LI_NL1_4188102',
|
'url': 'npo:LI_NL1_4188102',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
NO_DEFAULT,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
remove_start,
|
remove_start,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
@@ -198,6 +199,19 @@ class OnetPlIE(InfoExtractor):
|
|||||||
'upload_date': '20170214',
|
'upload_date': '20170214',
|
||||||
'timestamp': 1487078046,
|
'timestamp': 1487078046,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# embedded via pulsembed
|
||||||
|
'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '501235.965429946',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
|
||||||
|
'upload_date': '20170622',
|
||||||
|
'timestamp': 1498159955,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _search_mvp_id(self, webpage, default=NO_DEFAULT):
|
||||||
|
return self._search_regex(
|
||||||
|
r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
|
||||||
|
default=default)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
mvp_id = self._search_regex(
|
mvp_id = self._search_mvp_id(webpage, default=None)
|
||||||
r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
|
|
||||||
|
if not mvp_id:
|
||||||
|
pulsembed_url = self._search_regex(
|
||||||
|
r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
|
||||||
|
webpage, 'pulsembed url', group='url')
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
pulsembed_url, video_id, 'Downloading pulsembed webpage')
|
||||||
|
mvp_id = self._search_mvp_id(webpage)
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
||||||
|
@@ -3,12 +3,14 @@ import re
|
|||||||
import base64
|
import base64
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
ExtractorError,
|
|
||||||
unsmuggle_url,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import compat_urllib_parse_urlencode
|
||||||
|
|
||||||
@@ -39,13 +41,15 @@ class OoyalaBaseIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
if cur_auth_data['authorized']:
|
if cur_auth_data['authorized']:
|
||||||
for stream in cur_auth_data['streams']:
|
for stream in cur_auth_data['streams']:
|
||||||
s_url = base64.b64decode(
|
url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
|
||||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
if not url_data:
|
||||||
if s_url in urls:
|
continue
|
||||||
|
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
|
||||||
|
if not s_url or s_url in urls:
|
||||||
continue
|
continue
|
||||||
urls.append(s_url)
|
urls.append(s_url)
|
||||||
ext = determine_ext(s_url, None)
|
ext = determine_ext(s_url, None)
|
||||||
delivery_type = stream['delivery_type']
|
delivery_type = stream.get('delivery_type')
|
||||||
if delivery_type == 'hls' or ext == 'm3u8':
|
if delivery_type == 'hls' or ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
|
re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
|
||||||
@@ -65,7 +69,7 @@ class OoyalaBaseIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': s_url,
|
'url': s_url,
|
||||||
'ext': ext or stream.get('delivery_type'),
|
'ext': ext or delivery_type,
|
||||||
'vcodec': stream.get('video_codec'),
|
'vcodec': stream.get('video_codec'),
|
||||||
'format_id': delivery_type,
|
'format_id': delivery_type,
|
||||||
'width': int_or_none(stream.get('width')),
|
'width': int_or_none(stream.get('width')),
|
||||||
@@ -136,6 +140,11 @@ class OoyalaIE(OoyalaBaseIE):
|
|||||||
'title': 'Divide Tool Path.mp4',
|
'title': 'Divide Tool Path.mp4',
|
||||||
'duration': 204.405,
|
'duration': 204.405,
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# empty stream['url']['data']
|
||||||
|
'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -14,7 +15,6 @@ from ..utils import (
|
|||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urljoin,
|
urljoin,
|
||||||
urlencode_postdata,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -45,22 +45,15 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
webpage = self._download_webpage(self._PACKT_BASE, None)
|
|
||||||
login_form = self._form_hidden_inputs(
|
|
||||||
'packt-user-login-form', webpage)
|
|
||||||
login_form.update({
|
|
||||||
'email': username,
|
|
||||||
'password': password,
|
|
||||||
})
|
|
||||||
self._download_webpage(
|
|
||||||
self._PACKT_BASE, None, 'Logging in as %s' % username,
|
|
||||||
data=urlencode_postdata(login_form))
|
|
||||||
try:
|
try:
|
||||||
self._TOKEN = self._download_json(
|
self._TOKEN = self._download_json(
|
||||||
'%s/users/tokens/sessions' % self._MAPT_REST, None,
|
self._MAPT_REST + '/users/tokens', None,
|
||||||
'Downloading Authorization Token')['data']['token']
|
'Downloading Authorization Token', data=json.dumps({
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}).encode())['data']['access']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
|
||||||
message = self._parse_json(e.cause.read().decode(), None)['message']
|
message = self._parse_json(e.cause.read().decode(), None)['message']
|
||||||
raise ExtractorError(message, expected=True)
|
raise ExtractorError(message, expected=True)
|
||||||
raise
|
raise
|
||||||
@@ -83,7 +76,7 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
|
|
||||||
headers = {}
|
headers = {}
|
||||||
if self._TOKEN:
|
if self._TOKEN:
|
||||||
headers['Authorization'] = self._TOKEN
|
headers['Authorization'] = 'Bearer ' + self._TOKEN
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
||||||
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
||||||
|
@@ -10,13 +10,13 @@ from ..utils import (
|
|||||||
|
|
||||||
class PandaTVIE(InfoExtractor):
|
class PandaTVIE(InfoExtractor):
|
||||||
IE_DESC = '熊猫TV'
|
IE_DESC = '熊猫TV'
|
||||||
_VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.panda.tv/10091',
|
'url': 'http://www.panda.tv/66666',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10091',
|
'id': '66666',
|
||||||
'title': 're:.+',
|
'title': 're:.+',
|
||||||
'uploader': '囚徒',
|
'uploader': '刘杀鸡',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
},
|
},
|
||||||
@@ -24,13 +24,16 @@ class PandaTVIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Live stream is offline',
|
'skip': 'Live stream is offline',
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.panda.tv/66666',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
config = self._download_json(
|
config = self._download_json(
|
||||||
'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
|
'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
|
||||||
|
|
||||||
error_code = config.get('errno', 0)
|
error_code = config.get('errno', 0)
|
||||||
if error_code is not 0:
|
if error_code is not 0:
|
||||||
@@ -74,7 +77,7 @@ class PandaTVIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
|
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
|
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
|
||||||
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
|
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
|
||||||
'format_id': '%s-%s' % (k, ext),
|
'format_id': '%s-%s' % (k, ext),
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
|
@@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor):
|
|||||||
IE_NAME = 'pandora.tv'
|
IE_NAME = 'pandora.tv'
|
||||||
IE_DESC = '판도라TV'
|
IE_DESC = '판도라TV'
|
||||||
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '53294230',
|
'id': '53294230',
|
||||||
@@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '54721744',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '[HD] JAPAN COUNTDOWN 170423',
|
||||||
|
'description': '[HD] JAPAN COUNTDOWN 170423',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1704.9,
|
||||||
|
'upload_date': '20170423',
|
||||||
|
'uploader': 'GOGO_UCC',
|
||||||
|
'uploader_id': 'gogoucc',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Test metadata only
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
@@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor):
|
|||||||
'description': info.get('body'),
|
'description': info.get('body'),
|
||||||
'thumbnail': info.get('thumbnail') or info.get('poster'),
|
'thumbnail': info.get('thumbnail') or info.get('poster'),
|
||||||
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
|
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
|
||||||
'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None,
|
'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
|
||||||
'uploader': info.get('nickname'),
|
'uploader': info.get('nickname'),
|
||||||
'uploader_id': info.get('upload_userid'),
|
'uploader_id': info.get('upload_userid'),
|
||||||
'view_count': str_to_int(info.get('hit')),
|
'view_count': str_to_int(info.get('hit')),
|
||||||
|
@@ -65,7 +65,7 @@ class PolskieRadioIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
content = self._search_regex(
|
content = self._search_regex(
|
||||||
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
|
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
||||||
webpage, 'content')
|
webpage, 'content')
|
||||||
|
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
|
@@ -252,11 +252,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
playlist = self._parse_json(
|
playlist = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
||||||
playlist_id)
|
'playlist', default='{}'),
|
||||||
|
playlist_id, fatal=False)
|
||||||
|
title = playlist.get('title') or self._search_regex(
|
||||||
|
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
entries, playlist_id, title, playlist.get('description'))
|
||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||||
@@ -296,6 +299,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
|||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
break
|
break
|
||||||
|
raise
|
||||||
page_entries = self._extract_entries(webpage)
|
page_entries = self._extract_entries(webpage)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
|
@@ -191,11 +191,12 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if relinker_info.get(
|
||||||
|
'is_live') else title,
|
||||||
'alt_title': media.get('subtitle'),
|
'alt_title': media.get('subtitle'),
|
||||||
'description': media.get('description'),
|
'description': media.get('description'),
|
||||||
'uploader': media.get('channel'),
|
'uploader': strip_or_none(media.get('channel')),
|
||||||
'creator': media.get('editor'),
|
'creator': strip_or_none(media.get('editor')),
|
||||||
'duration': parse_duration(video.get('duration')),
|
'duration': parse_duration(video.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
@@ -208,10 +209,46 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
info.update(relinker_info)
|
info.update(relinker_info)
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class RaiPlayLiveIE(RaiBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||||
|
'display_id': 'rainews24',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'md5:6eca31500550f9376819f174e5644754',
|
||||||
|
'uploader': 'Rai News 24',
|
||||||
|
'creator': 'Rai News 24',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
|
||||||
|
webpage, 'content id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': RaiPlayIE.ie_key(),
|
||||||
|
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class RedBullTVIE(InfoExtractor):
|
class RedBullTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)'
|
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# film
|
# film
|
||||||
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
|
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
|
||||||
@@ -42,6 +42,22 @@ class RedBullTVIE(InfoExtractor):
|
|||||||
'season_number': 2,
|
'season_number': 2,
|
||||||
'episode_number': 4,
|
'episode_number': 4,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# segment
|
||||||
|
'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'AP-1QSAQJ6V52111',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Semi Finals - Vans Park Series Pro Tour',
|
||||||
|
'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
|
||||||
|
'duration': 11791.991,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
|
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -82,7 +98,8 @@ class RedBullTVIE(InfoExtractor):
|
|||||||
title = info['title'].strip()
|
title = info['title'].strip()
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
video['url'], video_id, 'mp4', 'm3u8_native')
|
video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@@ -15,7 +15,7 @@ class RtlNlIE(InfoExtractor):
|
|||||||
https?://(?:www\.)?
|
https?://(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
||||||
rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
|
rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
|
||||||
)
|
)
|
||||||
(?P<id>[0-9a-f-]+)'''
|
(?P<id>[0-9a-f-]+)'''
|
||||||
|
|
||||||
@@ -70,6 +70,9 @@ class RtlNlIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
|
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -13,11 +13,15 @@ from ..utils import (
|
|||||||
class RUTVIE(InfoExtractor):
|
class RUTVIE(InfoExtractor):
|
||||||
IE_DESC = 'RUTV.RU'
|
IE_DESC = 'RUTV.RU'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://player\.(?:rutv\.ru|vgtrk\.com)/
|
https?://
|
||||||
(?P<path>flash\d+v/container\.swf\?id=
|
(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
|
||||||
|iframe/(?P<type>swf|video|live)/id/
|
(?P<path>
|
||||||
|index/iframe/cast_id/)
|
flash\d+v/container\.swf\?id=|
|
||||||
(?P<id>\d+)'''
|
iframe/(?P<type>swf|video|live)/id/|
|
||||||
|
index/iframe/cast_id/
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -99,17 +103,21 @@ class RUTVIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_url(cls, webpage):
|
def _extract_url(cls, webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
|
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
101
youtube_dl/extractor/ruv.py
Normal file
101
youtube_dl/extractor/ruv.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RuvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
|
||||||
|
_TESTS = [{
|
||||||
|
# m3u8
|
||||||
|
'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
|
||||||
|
'md5': '66347652f4e13e71936817102acc1724',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1144499',
|
||||||
|
'display_id': 'fh-valur/20170516',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'FH - Valur',
|
||||||
|
'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
|
||||||
|
'timestamp': 1494963600,
|
||||||
|
'upload_date': '20170516',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# mp3
|
||||||
|
'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
|
||||||
|
'md5': '395ea250c8a13e5fdb39d4670ef85378',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1153630',
|
||||||
|
'display_id': 'morgunutvarpid/20170619',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Morgunútvarpið',
|
||||||
|
'description': 'md5:a4cf1202c0a1645ca096b06525915418',
|
||||||
|
'timestamp': 1497855000,
|
||||||
|
'upload_date': '20170619',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ruv.is/node/1151854',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
|
||||||
|
FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
|
||||||
|
|
||||||
|
media_url = self._html_search_regex(
|
||||||
|
FIELD_RE % 'src', webpage, 'video URL', group='url')
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
|
||||||
|
webpage, 'video id', default=display_id)
|
||||||
|
|
||||||
|
ext = determine_ext(media_url)
|
||||||
|
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
elif ext == 'mp3':
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'mp3',
|
||||||
|
'url': media_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}]
|
||||||
|
else:
|
||||||
|
formats = [{
|
||||||
|
'url': media_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage, default=None)
|
||||||
|
thumbnail = self._og_search_thumbnail(
|
||||||
|
webpage, default=None) or self._search_regex(
|
||||||
|
FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
|
||||||
|
timestamp = unified_timestamp(self._html_search_meta(
|
||||||
|
'article:published_time', webpage, 'timestamp', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -16,7 +16,6 @@ from ..utils import (
|
|||||||
|
|
||||||
class SafariBaseIE(InfoExtractor):
|
class SafariBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
|
||||||
_NETRC_MACHINE = 'safari'
|
_NETRC_MACHINE = 'safari'
|
||||||
|
|
||||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
||||||
@@ -28,10 +27,6 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
# We only need to log in once for courses or individual videos
|
|
||||||
if self.LOGGED_IN:
|
|
||||||
return
|
|
||||||
|
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
@@ -39,11 +34,17 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
headers = std_headers.copy()
|
headers = std_headers.copy()
|
||||||
if 'Referer' not in headers:
|
if 'Referer' not in headers:
|
||||||
headers['Referer'] = self._LOGIN_URL
|
headers['Referer'] = self._LOGIN_URL
|
||||||
login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
|
|
||||||
|
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
login_page_request, None,
|
self._LOGIN_URL, None, 'Downloading login form', headers=headers)
|
||||||
'Downloading login form')
|
|
||||||
|
def is_logged(webpage):
|
||||||
|
return any(re.search(p, webpage) for p in (
|
||||||
|
r'href=["\']/accounts/logout/', r'>Sign Out<'))
|
||||||
|
|
||||||
|
if is_logged(login_page):
|
||||||
|
self.LOGGED_IN = True
|
||||||
|
return
|
||||||
|
|
||||||
csrf = self._html_search_regex(
|
csrf = self._html_search_regex(
|
||||||
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
||||||
@@ -62,14 +63,12 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
request, None, 'Logging in as %s' % username)
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
if not is_logged(login_page):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Login failed; make sure your credentials are correct and try again.',
|
'Login failed; make sure your credentials are correct and try again.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
SafariBaseIE.LOGGED_IN = True
|
self.LOGGED_IN = True
|
||||||
|
|
||||||
self.to_screen('Login successful')
|
|
||||||
|
|
||||||
|
|
||||||
class SafariIE(SafariBaseIE):
|
class SafariIE(SafariBaseIE):
|
||||||
|
@@ -32,8 +32,9 @@ class SexuIE(InfoExtractor):
|
|||||||
formats = [{
|
formats = [{
|
||||||
'url': source['file'].replace('\\', ''),
|
'url': source['file'].replace('\\', ''),
|
||||||
'format_id': source.get('label'),
|
'format_id': source.get('label'),
|
||||||
'height': self._search_regex(
|
'height': int(self._search_regex(
|
||||||
r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
|
r'^(\d+)[pP]', source.get('label', ''), 'height',
|
||||||
|
default=None)),
|
||||||
} for source in sources if source.get('file')]
|
} for source in sources if source.get('file')]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -8,7 +8,11 @@ from ..compat import (
|
|||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
)
|
)
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
@@ -169,10 +173,11 @@ class SohuIE(InfoExtractor):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'filesize': data['clipsBytes'][i],
|
'filesize': int_or_none(
|
||||||
'width': data['width'],
|
try_get(data, lambda x: x['clipsBytes'][i])),
|
||||||
'height': data['height'],
|
'width': int_or_none(data.get('width')),
|
||||||
'fps': data['fps'],
|
'height': int_or_none(data.get('height')),
|
||||||
|
'fps': int_or_none(data.get('fps')),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -136,7 +136,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolv_url(cls, url):
|
def _resolv_url(cls, url):
|
||||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||||
|
|
||||||
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
||||||
track_id = compat_str(info['id'])
|
track_id = compat_str(info['id'])
|
||||||
@@ -174,7 +174,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
# We have to retrieve the url
|
# We have to retrieve the url
|
||||||
format_dict = self._download_json(
|
format_dict = self._download_json(
|
||||||
'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
||||||
track_id, 'Downloading track url', query={
|
track_id, 'Downloading track url', query={
|
||||||
'client_id': self._CLIENT_ID,
|
'client_id': self._CLIENT_ID,
|
||||||
'secret_token': secret_token,
|
'secret_token': secret_token,
|
||||||
@@ -236,7 +236,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
track_id = mobj.group('track_id')
|
track_id = mobj.group('track_id')
|
||||||
|
|
||||||
if track_id is not None:
|
if track_id is not None:
|
||||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||||
full_title = track_id
|
full_title = track_id
|
||||||
token = mobj.group('secret_token')
|
token = mobj.group('secret_token')
|
||||||
if token:
|
if token:
|
||||||
@@ -261,7 +261,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_resolve(full_title)
|
self.report_resolve(full_title)
|
||||||
|
|
||||||
url = 'http://soundcloud.com/%s' % resolve_title
|
url = 'https://soundcloud.com/%s' % resolve_title
|
||||||
info_json_url = self._resolv_url(url)
|
info_json_url = self._resolv_url(url)
|
||||||
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
|
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
|
||||||
|
|
||||||
@@ -290,7 +290,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
|||||||
'id': '2284613',
|
'id': '2284613',
|
||||||
'title': 'The Royal Concept EP',
|
'title': 'The Royal Concept EP',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 5,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
|
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -304,7 +304,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
|||||||
# extract simple title (uploader + slug of song title)
|
# extract simple title (uploader + slug of song title)
|
||||||
slug_title = mobj.group('slug_title')
|
slug_title = mobj.group('slug_title')
|
||||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||||
|
|
||||||
token = mobj.group('token')
|
token = mobj.group('token')
|
||||||
if token:
|
if token:
|
||||||
@@ -380,7 +380,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
|
|||||||
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7098329',
|
'id': '7098329',
|
||||||
'title': 'GRYNPYRET (Spotlight)',
|
'title': 'Grynpyret (Spotlight)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
}]
|
}]
|
||||||
@@ -410,7 +410,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader = mobj.group('user')
|
uploader = mobj.group('user')
|
||||||
|
|
||||||
url = 'http://soundcloud.com/%s/' % uploader
|
url = 'https://soundcloud.com/%s/' % uploader
|
||||||
resolv_url = self._resolv_url(url)
|
resolv_url = self._resolv_url(url)
|
||||||
user = self._download_json(
|
user = self._download_json(
|
||||||
resolv_url, uploader, 'Downloading user info')
|
resolv_url, uploader, 'Downloading user info')
|
||||||
@@ -473,7 +473,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
|||||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||||
IE_NAME = 'soundcloud:playlist'
|
IE_NAME = 'soundcloud:playlist'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://api.soundcloud.com/playlists/4110309',
|
'url': 'https://api.soundcloud.com/playlists/4110309',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4110309',
|
'id': '4110309',
|
||||||
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||||
|
@@ -21,6 +21,17 @@ class StreamangoIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '20170315_150006.mp4',
|
'title': '20170315_150006.mp4',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# no og:title
|
||||||
|
'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'foqebrpftarclpob',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'foqebrpftarclpob',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -31,7 +42,7 @@ class StreamangoIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage, default=video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
||||||
|
@@ -26,7 +26,7 @@ class StreamCZIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||||
'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
|
'md5': '934bb6a6d220d99c010783c9719960d5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '765767',
|
'id': '765767',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -37,7 +37,7 @@ class StreamCZIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
||||||
'md5': 'e54a254fb8b871968fd8403255f28589',
|
'md5': '849a88c1e1ca47d41403c2ba5e59e261',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10002447',
|
'id': '10002447',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -85,6 +85,14 @@ class StreamCZIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
title = data['name']
|
title = data['name']
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
srt_url = data.get('subtitles_srt')
|
||||||
|
if srt_url:
|
||||||
|
subtitles['cs'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'url': srt_url,
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -93,4 +101,5 @@ class StreamCZIE(InfoExtractor):
|
|||||||
'description': data.get('web_site_text'),
|
'description': data.get('web_site_text'),
|
||||||
'duration': int_or_none(data.get('duration')),
|
'duration': int_or_none(data.get('duration')),
|
||||||
'view_count': int_or_none(data.get('views')),
|
'view_count': int_or_none(data.get('views')),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
43
youtube_dl/extractor/tastytrade.py
Normal file
43
youtube_dl/extractor/tastytrade.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .ooyala import OoyalaIE
|
||||||
|
|
||||||
|
|
||||||
|
class TastyTradeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'A History of Teaming',
|
||||||
|
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
|
||||||
|
'duration': 422.255,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
ooyala_code = self._search_regex(
|
||||||
|
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'ooyala code', group='code')
|
||||||
|
|
||||||
|
info = self._search_json_ld(webpage, display_id, fatal=False)
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': OoyalaIE.ie_key(),
|
||||||
|
'url': 'ooyala:%s' % ooyala_code,
|
||||||
|
'display_id': display_id,
|
||||||
|
})
|
||||||
|
return info
|
@@ -6,7 +6,10 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TEDIE(InfoExtractor):
|
class TEDIE(InfoExtractor):
|
||||||
@@ -113,8 +116,9 @@ class TEDIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _extract_info(self, webpage):
|
def _extract_info(self, webpage):
|
||||||
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
info_json = self._search_regex(
|
||||||
webpage, 'info json')
|
r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
|
||||||
|
webpage, 'info json')
|
||||||
return json.loads(info_json)
|
return json.loads(info_json)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -136,11 +140,16 @@ class TEDIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, name,
|
webpage = self._download_webpage(url, name,
|
||||||
'Downloading playlist webpage')
|
'Downloading playlist webpage')
|
||||||
info = self._extract_info(webpage)
|
info = self._extract_info(webpage)
|
||||||
playlist_info = info['playlist']
|
|
||||||
|
playlist_info = try_get(
|
||||||
|
info, lambda x: x['__INITIAL_DATA__']['playlist'],
|
||||||
|
dict) or info['playlist']
|
||||||
|
|
||||||
playlist_entries = [
|
playlist_entries = [
|
||||||
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||||
for talk in info['talks']
|
for talk in try_get(
|
||||||
|
info, lambda x: x['__INITIAL_DATA__']['talks'],
|
||||||
|
dict) or info['talks']
|
||||||
]
|
]
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
playlist_entries,
|
playlist_entries,
|
||||||
@@ -149,9 +158,14 @@ class TEDIE(InfoExtractor):
|
|||||||
|
|
||||||
def _talk_info(self, url, video_name):
|
def _talk_info(self, url, video_name):
|
||||||
webpage = self._download_webpage(url, video_name)
|
webpage = self._download_webpage(url, video_name)
|
||||||
self.report_extraction(video_name)
|
|
||||||
|
|
||||||
talk_info = self._extract_info(webpage)['talks'][0]
|
info = self._extract_info(webpage)
|
||||||
|
|
||||||
|
talk_info = try_get(
|
||||||
|
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
|
||||||
|
dict) or info['talks'][0]
|
||||||
|
|
||||||
|
title = talk_info['title'].strip()
|
||||||
|
|
||||||
external = talk_info.get('external')
|
external = talk_info.get('external')
|
||||||
if external:
|
if external:
|
||||||
@@ -165,19 +179,27 @@ class TEDIE(InfoExtractor):
|
|||||||
'url': ext_url or external['uri'],
|
'url': ext_url or external['uri'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
native_downloads = try_get(
|
||||||
|
talk_info, lambda x: x['downloads']['nativeDownloads'],
|
||||||
|
dict) or talk_info['nativeDownloads']
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format': format_id,
|
'format': format_id,
|
||||||
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
|
} for (format_id, format_url) in native_downloads.items() if format_url is not None]
|
||||||
if formats:
|
if formats:
|
||||||
for f in formats:
|
for f in formats:
|
||||||
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
||||||
if finfo:
|
if finfo:
|
||||||
f.update(finfo)
|
f.update(finfo)
|
||||||
|
|
||||||
|
player_talk = talk_info['player_talks'][0]
|
||||||
|
|
||||||
|
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||||
|
|
||||||
http_url = None
|
http_url = None
|
||||||
for format_id, resources in talk_info['resources'].items():
|
for format_id, resources in resources_.items():
|
||||||
if format_id == 'h264':
|
if format_id == 'h264':
|
||||||
for resource in resources:
|
for resource in resources:
|
||||||
h264_url = resource.get('file')
|
h264_url = resource.get('file')
|
||||||
@@ -237,14 +259,11 @@ class TEDIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = compat_str(talk_info['id'])
|
video_id = compat_str(talk_info['id'])
|
||||||
|
|
||||||
thumbnail = talk_info['thumb']
|
|
||||||
if not thumbnail.startswith('http'):
|
|
||||||
thumbnail = 'http://' + thumbnail
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': talk_info['title'].strip(),
|
'title': title,
|
||||||
'uploader': talk_info['speaker'],
|
'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
'subtitles': self._get_subtitles(video_id, talk_info),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@@ -2,13 +2,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import try_get
|
||||||
|
|
||||||
|
|
||||||
class ThisOldHouseIE(InfoExtractor):
|
class ThisOldHouseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
|
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
|
||||||
'md5': '946f05bbaa12a33f9ae35580d2dfcfe3',
|
'md5': '568acf9ca25a639f0c4ff905826b662f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2REGtUDQ',
|
'id': '2REGtUDQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -28,8 +30,15 @@ class ThisOldHouseIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
drupal_settings = self._parse_json(self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
(r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
webpage, 'drupal settings'), display_id)
|
r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
|
||||||
video_id = drupal_settings['jwplatform']['video_id']
|
webpage, 'video id', default=None, group='id')
|
||||||
|
if not video_id:
|
||||||
|
drupal_settings = self._parse_json(self._search_regex(
|
||||||
|
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||||
|
webpage, 'drupal settings'), display_id)
|
||||||
|
video_id = try_get(
|
||||||
|
drupal_settings, lambda x: x['jwplatform']['video_id'],
|
||||||
|
compat_str) or list(drupal_settings['comScore'])[0]
|
||||||
return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
|
return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class ToggleIE(InfoExtractor):
|
class ToggleIE(InfoExtractor):
|
||||||
IE_NAME = 'toggle'
|
IE_NAME = 'toggle'
|
||||||
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -73,6 +73,12 @@ class ToggleIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
|
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMAT_PREFERENCES = {
|
_FORMAT_PREFERENCES = {
|
||||||
|
@@ -6,42 +6,48 @@ import re
|
|||||||
|
|
||||||
|
|
||||||
class ToypicsIE(InfoExtractor):
|
class ToypicsIE(InfoExtractor):
|
||||||
IE_DESC = 'Toypics user profile'
|
IE_DESC = 'Toypics video'
|
||||||
_VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
|
_VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
|
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
|
||||||
'md5': '16e806ad6d6f58079d210fe30985e08b',
|
'md5': '16e806ad6d6f58079d210fe30985e08b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '514',
|
'id': '514',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Chance-Bulge\'d, 2',
|
'title': "Chance-Bulge'd, 2",
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'uploader': 'kidsune',
|
'uploader': 'kidsune',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
page = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._html_search_regex(
|
|
||||||
r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
|
formats = self._parse_html5_media_entries(
|
||||||
title = self._html_search_regex(
|
url, webpage, video_id)[0]['formats']
|
||||||
r'<title>Toypics - ([^<]+)</title>', page, 'title')
|
title = self._html_search_regex([
|
||||||
username = self._html_search_regex(
|
r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h',
|
||||||
r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
|
r'<title>([^<]+) - Toypics</title>',
|
||||||
|
], webpage, 'title')
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'uploader': username,
|
'uploader': uploader,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ToypicsUserIE(InfoExtractor):
|
class ToypicsUserIE(InfoExtractor):
|
||||||
IE_DESC = 'Toypics user profile'
|
IE_DESC = 'Toypics user profile'
|
||||||
_VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
|
_VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://videos.toypics.net/Mikey',
|
'url': 'http://videos.toypics.net/Mikey',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -51,8 +57,7 @@ class ToypicsUserIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
username = self._match_id(url)
|
||||||
username = mobj.group('username')
|
|
||||||
|
|
||||||
profile_page = self._download_webpage(
|
profile_page = self._download_webpage(
|
||||||
url, username, note='Retrieving profile page')
|
url, username, note='Retrieving profile page')
|
||||||
@@ -71,7 +76,7 @@ class ToypicsUserIE(InfoExtractor):
|
|||||||
note='Downloading page %d/%d' % (n, page_count))
|
note='Downloading page %d/%d' % (n, page_count))
|
||||||
urls.extend(
|
urls.extend(
|
||||||
re.findall(
|
re.findall(
|
||||||
r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">',
|
r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"',
|
||||||
lpage))
|
lpage))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -3,138 +3,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
|
||||||
InAdvancePagedList,
|
|
||||||
float_or_none,
|
|
||||||
unescapeHTML,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TudouIE(InfoExtractor):
|
|
||||||
IE_NAME = 'tudou'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
|
||||||
'md5': '140a49ed444bd22f93330985d8475fcb',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '159448201',
|
|
||||||
'ext': 'f4v',
|
|
||||||
'title': '卡马乔国足开大脚长传冲吊集锦',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'timestamp': 1372113489000,
|
|
||||||
'description': '卡马乔卡家军,开大脚先进战术不完全集锦!',
|
|
||||||
'duration': 289.04,
|
|
||||||
'view_count': int,
|
|
||||||
'filesize': int,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '117049447',
|
|
||||||
'ext': 'f4v',
|
|
||||||
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'timestamp': 1349207518000,
|
|
||||||
'description': 'md5:294612423894260f2dcd5c6c04fe248b',
|
|
||||||
'duration': 5478.33,
|
|
||||||
'view_count': int,
|
|
||||||
'filesize': int,
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
|
||||||
|
|
||||||
# Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
|
|
||||||
# 0001, 0002 and 4001 are not included as they indicate temporary issues
|
|
||||||
TVC_ERRORS = {
|
|
||||||
'0003': 'The video is deleted or does not exist',
|
|
||||||
'1001': 'This video is unavailable due to licensing issues',
|
|
||||||
'1002': 'This video is unavailable as it\'s under review',
|
|
||||||
'1003': 'This video is unavailable as it\'s under review',
|
|
||||||
'3001': 'Password required',
|
|
||||||
'5001': 'This video is available in Mainland China only due to licensing issues',
|
|
||||||
'7001': 'This video is unavailable',
|
|
||||||
'8001': 'This video is unavailable due to licensing issues',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _url_for_id(self, video_id, quality=None):
|
|
||||||
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
|
|
||||||
if quality:
|
|
||||||
info_url += '&hd' + quality
|
|
||||||
xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
|
|
||||||
error = xml_data.attrib.get('error')
|
|
||||||
if error is not None:
|
|
||||||
raise ExtractorError('Tudou said: %s' % error, expected=True)
|
|
||||||
final_url = xml_data.text
|
|
||||||
return final_url
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
item_data = self._download_json(
|
|
||||||
'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id)
|
|
||||||
|
|
||||||
youku_vcode = item_data.get('vcode')
|
|
||||||
if youku_vcode:
|
|
||||||
return self.url_result('youku:' + youku_vcode, ie='Youku')
|
|
||||||
|
|
||||||
if not item_data.get('itemSegs'):
|
|
||||||
tvc_code = item_data.get('tvcCode')
|
|
||||||
if tvc_code:
|
|
||||||
err_msg = self.TVC_ERRORS.get(tvc_code)
|
|
||||||
if err_msg:
|
|
||||||
raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
|
|
||||||
raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
|
|
||||||
raise ExtractorError('Unxpected error returned from Tudou')
|
|
||||||
|
|
||||||
title = unescapeHTML(item_data['kw'])
|
|
||||||
description = item_data.get('desc')
|
|
||||||
thumbnail_url = item_data.get('pic')
|
|
||||||
view_count = int_or_none(item_data.get('playTimes'))
|
|
||||||
timestamp = int_or_none(item_data.get('pt'))
|
|
||||||
|
|
||||||
segments = self._parse_json(item_data['itemSegs'], video_id)
|
|
||||||
# It looks like the keys are the arguments that have to be passed as
|
|
||||||
# the hd field in the request url, we pick the higher
|
|
||||||
# Also, filter non-number qualities (see issue #3643).
|
|
||||||
quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
|
|
||||||
key=lambda k: int(k))[-1]
|
|
||||||
parts = segments[quality]
|
|
||||||
len_parts = len(parts)
|
|
||||||
if len_parts > 1:
|
|
||||||
self.to_screen('%s: found %s parts' % (video_id, len_parts))
|
|
||||||
|
|
||||||
def part_func(partnum):
|
|
||||||
part = parts[partnum]
|
|
||||||
part_id = part['k']
|
|
||||||
final_url = self._url_for_id(part_id, quality)
|
|
||||||
ext = (final_url.split('?')[0]).split('.')[-1]
|
|
||||||
return [{
|
|
||||||
'id': '%s' % part_id,
|
|
||||||
'url': final_url,
|
|
||||||
'ext': ext,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail_url,
|
|
||||||
'description': description,
|
|
||||||
'view_count': view_count,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': float_or_none(part.get('seconds'), 1000),
|
|
||||||
'filesize': int_or_none(part.get('size')),
|
|
||||||
'http_headers': {
|
|
||||||
'Referer': self._PLAYER_URL,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
entries = InAdvancePagedList(part_func, len_parts, 1)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'multi_video',
|
|
||||||
'entries': entries,
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class TudouPlaylistIE(InfoExtractor):
|
class TudouPlaylistIE(InfoExtractor):
|
||||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -49,7 +50,7 @@ class TurboIE(InfoExtractor):
|
|||||||
for child in item:
|
for child in item:
|
||||||
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
||||||
if m:
|
if m:
|
||||||
quality = m.group('quality')
|
quality = compat_str(m.group('quality'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': quality,
|
'format_id': quality,
|
||||||
'url': child.text,
|
'url': child.text,
|
||||||
|
@@ -48,7 +48,7 @@ class TVPlayerIE(InfoExtractor):
|
|||||||
'https://tvplayer.com/watch/context', display_id,
|
'https://tvplayer.com/watch/context', display_id,
|
||||||
'Downloading JSON context', query={
|
'Downloading JSON context', query={
|
||||||
'resource': resource_id,
|
'resource': resource_id,
|
||||||
'nonce': token,
|
'gen': token,
|
||||||
})
|
})
|
||||||
|
|
||||||
validate = context['validate']
|
validate = context['validate']
|
||||||
|
@@ -52,6 +52,10 @@ class UdemyIE(InfoExtractor):
|
|||||||
# new URL schema
|
# new URL schema
|
||||||
'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
|
'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# no url in outputs format entry
|
||||||
|
'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_course_info(self, webpage, video_id):
|
def _extract_course_info(self, webpage, video_id):
|
||||||
@@ -219,7 +223,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
|
|
||||||
def extract_output_format(src, f_id):
|
def extract_output_format(src, f_id):
|
||||||
return {
|
return {
|
||||||
'url': src['url'],
|
'url': src.get('url'),
|
||||||
'format_id': '%sp' % (src.get('height') or f_id),
|
'format_id': '%sp' % (src.get('height') or f_id),
|
||||||
'width': int_or_none(src.get('width')),
|
'width': int_or_none(src.get('width')),
|
||||||
'height': int_or_none(src.get('height')),
|
'height': int_or_none(src.get('height')),
|
||||||
|
@@ -151,10 +151,16 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
mpd_manifest_urls = [(format_id, manifest_url)]
|
mpd_manifest_urls = [(format_id, manifest_url)]
|
||||||
for f_id, m_url in mpd_manifest_urls:
|
for f_id, m_url in mpd_manifest_urls:
|
||||||
formats.extend(self._extract_mpd_formats(
|
mpd_formats = self._extract_mpd_formats(
|
||||||
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
|
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
|
||||||
'Downloading %s MPD information' % cdn_name,
|
'Downloading %s MPD information' % cdn_name,
|
||||||
fatal=False))
|
fatal=False)
|
||||||
|
for f in mpd_formats:
|
||||||
|
if f.get('vcodec') == 'none':
|
||||||
|
f['preference'] = -50
|
||||||
|
elif f.get('acodec') == 'none':
|
||||||
|
f['preference'] = -40
|
||||||
|
formats.extend(mpd_formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
text_tracks = config['request'].get('text_tracks')
|
text_tracks = config['request'].get('text_tracks')
|
||||||
@@ -609,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
|
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
|
||||||
source_name = source_file.get('public_name', 'Original')
|
source_name = source_file.get('public_name', 'Original')
|
||||||
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
|
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
|
||||||
ext = source_file.get('extension', determine_ext(download_url)).lower()
|
ext = (try_get(
|
||||||
|
source_file, lambda x: x['extension'],
|
||||||
|
compat_str) or determine_ext(
|
||||||
|
download_url, None) or 'mp4').lower()
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': download_url,
|
'url': download_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_kwargs,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor):
|
|||||||
headers.update(kwargs.get('headers', {}))
|
headers.update(kwargs.get('headers', {}))
|
||||||
kwargs['headers'] = headers
|
kwargs['headers'] = headers
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'https://www.viu.com/api/' + path, *args, **kwargs)['response']
|
'https://www.viu.com/api/' + path, *args,
|
||||||
|
**compat_kwargs(kwargs))['response']
|
||||||
if response.get('status') != 'success':
|
if response.get('status') != 'success':
|
||||||
raise ExtractorError('%s said: %s' % (
|
raise ExtractorError('%s said: %s' % (
|
||||||
self.IE_NAME, response['message']), expected=True)
|
self.IE_NAME, response['message']), expected=True)
|
||||||
|
@@ -4,11 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import parse_duration
|
||||||
unified_strdate,
|
|
||||||
parse_duration,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class WatchIndianPornIE(InfoExtractor):
|
class WatchIndianPornIE(InfoExtractor):
|
||||||
@@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
|
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'LoveJay',
|
|
||||||
'upload_date': '20160428',
|
|
||||||
'duration': 226,
|
'duration': 226,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
@@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
r"url: escape\('([^']+)'\)", webpage, 'url')
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex((
|
||||||
r'<h2 class="he2"><span>(.*?)</span>',
|
r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
|
||||||
webpage, 'title')
|
r'<h4>(.+?)</h4>'
|
||||||
thumbnail = self._html_search_regex(
|
), webpage, 'title')
|
||||||
r'<span id="container"><img\s+src="([^"]+)"',
|
|
||||||
webpage, 'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'class="aupa">\s*(.*?)</a>',
|
|
||||||
webpage, 'uploader')
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
|
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
|
r'Time:\s*<strong>\s*(.+?)\s*</strong>',
|
||||||
webpage, 'duration', fatal=False))
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = int_or_none(self._search_regex(
|
view_count = int(self._search_regex(
|
||||||
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = int_or_none(self._search_regex(
|
|
||||||
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
|
||||||
webpage, 'comment count', fatal=False))
|
|
||||||
|
|
||||||
categories = re.findall(
|
categories = re.findall(
|
||||||
r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
|
r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
return {
|
info_dict.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'url': video_url,
|
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
},
|
},
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'uploader': uploader,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'comment_count': comment_count,
|
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
@@ -13,7 +13,7 @@ class WSJIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
|
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
|
||||||
https?://(?:www\.)?wsj\.com/video/[^/]+/|
|
https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/|
|
||||||
wsj:
|
wsj:
|
||||||
)
|
)
|
||||||
(?P<id>[a-fA-F0-9-]{36})
|
(?P<id>[a-fA-F0-9-]{36})
|
||||||
@@ -35,6 +35,9 @@ class WSJIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
|
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -10,7 +10,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -30,6 +29,8 @@ class XFileShareIE(InfoExtractor):
|
|||||||
(r'vidabc\.com', 'Vid ABC'),
|
(r'vidabc\.com', 'Vid ABC'),
|
||||||
(r'vidbom\.com', 'VidBom'),
|
(r'vidbom\.com', 'VidBom'),
|
||||||
(r'vidlo\.us', 'vidlo'),
|
(r'vidlo\.us', 'vidlo'),
|
||||||
|
(r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
|
||||||
|
(r'fastvideo\.me', 'FastVideo.me'),
|
||||||
)
|
)
|
||||||
|
|
||||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||||
@@ -109,6 +110,12 @@ class XFileShareIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.rapidvideo.cool/b667kprndr8w',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -130,12 +137,12 @@ class XFileShareIE(InfoExtractor):
|
|||||||
if countdown:
|
if countdown:
|
||||||
self._sleep(countdown, video_id)
|
self._sleep(countdown, video_id)
|
||||||
|
|
||||||
post = urlencode_postdata(fields)
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, 'Downloading video page',
|
||||||
req = sanitized_Request(url, post)
|
data=urlencode_postdata(fields), headers={
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
'Referer': url,
|
||||||
|
'Content-type': 'application/x-www-form-urlencoded',
|
||||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
})
|
||||||
|
|
||||||
title = (self._search_regex(
|
title = (self._search_regex(
|
||||||
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||||
@@ -150,7 +157,7 @@ class XFileShareIE(InfoExtractor):
|
|||||||
def extract_formats(default=NO_DEFAULT):
|
def extract_formats(default=NO_DEFAULT):
|
||||||
urls = []
|
urls = []
|
||||||
for regex in (
|
for regex in (
|
||||||
r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||||
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
||||||
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
||||||
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
|
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
|
||||||
|
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -25,6 +26,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'uploader': 'Ruseful2011',
|
'uploader': 'Ruseful2011',
|
||||||
'duration': 893,
|
'duration': 893,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||||
@@ -36,6 +38,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'uploader': 'jojo747400',
|
'uploader': 'jojo747400',
|
||||||
'duration': 200,
|
'duration': 200,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -51,6 +54,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'uploader': 'parejafree',
|
'uploader': 'parejafree',
|
||||||
'duration': 72,
|
'duration': 72,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': ['Amateur', 'Blowjobs'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -104,7 +108,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+href=["\'].+?xhamster\.com/user/[^>]+>(?P<uploader>.+?)</a>',
|
r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
|
||||||
webpage, 'uploader', default='anonymous')
|
webpage, 'uploader', default='anonymous')
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
@@ -120,7 +124,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
r'content=["\']User(?:View|Play)s:(\d+)',
|
r'content=["\']User(?:View|Play)s:(\d+)',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage)
|
mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
|
||||||
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
|
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
|
||||||
|
|
||||||
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
|
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
|
||||||
@@ -152,6 +156,12 @@ class XHamsterIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
categories_html = self._search_regex(
|
||||||
|
r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
|
||||||
|
'categories', default=None)
|
||||||
|
categories = [clean_html(category) for category in re.findall(
|
||||||
|
r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -165,6 +175,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'dislike_count': int_or_none(dislike_count),
|
'dislike_count': int_or_none(dislike_count),
|
||||||
'comment_count': int_or_none(comment_count),
|
'comment_count': int_or_none(comment_count),
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
|
||||||
import itertools
|
import itertools
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
@@ -9,15 +8,13 @@ import string
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_ord,
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
get_element_by_attribute,
|
get_element_by_class,
|
||||||
try_get,
|
js_to_json,
|
||||||
|
str_or_none,
|
||||||
|
strip_jsonp,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -26,7 +23,9 @@ class YoukuIE(InfoExtractor):
|
|||||||
IE_DESC = '优酷'
|
IE_DESC = '优酷'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
|
https?://(
|
||||||
|
(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
|
||||||
|
video\.tudou\.com/v/)|
|
||||||
youku:)
|
youku:)
|
||||||
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
|
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
|
||||||
'''
|
'''
|
||||||
@@ -35,9 +34,15 @@ class YoukuIE(InfoExtractor):
|
|||||||
# MD5 is unstable
|
# MD5 is unstable
|
||||||
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
|
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XMTc1ODE5Njcy_part1',
|
'id': 'XMTc1ODE5Njcy',
|
||||||
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
|
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
|
||||||
'ext': 'flv'
|
'ext': 'mp4',
|
||||||
|
'duration': 74.73,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '。躲猫猫、',
|
||||||
|
'uploader_id': '36017967',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
|
||||||
|
'tags': list,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
||||||
@@ -46,25 +51,42 @@ class YoukuIE(InfoExtractor):
|
|||||||
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
|
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XODgxNjg1Mzk2',
|
'id': 'XODgxNjg1Mzk2',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '武媚娘传奇 85',
|
'title': '武媚娘传奇 85',
|
||||||
|
'duration': 1999.61,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '疯狂豆花',
|
||||||
|
'uploader_id': '62583473',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 11,
|
|
||||||
'skip': 'Available in China only',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
|
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XMTI1OTczNDM5Mg',
|
'id': 'XMTI1OTczNDM5Mg',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '花千骨 04',
|
'title': '花千骨 04',
|
||||||
|
'duration': 2363,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '放剧场-花千骨',
|
||||||
|
'uploader_id': '772849359',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 13,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
||||||
'note': 'Video protected with password',
|
'note': 'Video protected with password',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XNjA1NzA2Njgw',
|
'id': 'XNjA1NzA2Njgw',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
|
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
|
||||||
|
'duration': 7264.5,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': 'FoxJin1006',
|
||||||
|
'uploader_id': '322014285',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 19,
|
|
||||||
'params': {
|
'params': {
|
||||||
'videopassword': '100600',
|
'videopassword': '100600',
|
||||||
},
|
},
|
||||||
@@ -73,130 +95,38 @@ class YoukuIE(InfoExtractor):
|
|||||||
'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
|
'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XOTUxMzg4NDMy',
|
'id': 'XOTUxMzg4NDMy',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
|
'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
|
||||||
|
'duration': 702.08,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '明月庄主moon',
|
||||||
|
'uploader_id': '38465621',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
}, {
|
||||||
|
'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'XMjIyNzAzMTQ4NA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '卡马乔国足开大脚长传冲吊集锦',
|
||||||
|
'duration': 289,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '阿卜杜拉之星',
|
||||||
|
'uploader_id': '2382249',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==',
|
||||||
|
'tags': list,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def construct_video_urls(self, data):
|
|
||||||
# get sid, token
|
|
||||||
def yk_t(s1, s2):
|
|
||||||
ls = list(range(256))
|
|
||||||
t = 0
|
|
||||||
for i in range(256):
|
|
||||||
t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
|
|
||||||
ls[i], ls[t] = ls[t], ls[i]
|
|
||||||
s = bytearray()
|
|
||||||
x, y = 0, 0
|
|
||||||
for i in range(len(s2)):
|
|
||||||
y = (y + 1) % 256
|
|
||||||
x = (x + ls[y]) % 256
|
|
||||||
ls[x], ls[y] = ls[y], ls[x]
|
|
||||||
s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
|
|
||||||
return bytes(s)
|
|
||||||
|
|
||||||
sid, token = yk_t(
|
|
||||||
b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
|
|
||||||
).decode('ascii').split('_')
|
|
||||||
|
|
||||||
# get oip
|
|
||||||
oip = data['security']['ip']
|
|
||||||
|
|
||||||
fileid_dict = {}
|
|
||||||
for stream in data['stream']:
|
|
||||||
if stream.get('channel_type') == 'tail':
|
|
||||||
continue
|
|
||||||
format = stream.get('stream_type')
|
|
||||||
fileid = try_get(
|
|
||||||
stream, lambda x: x['segs'][0]['fileid'],
|
|
||||||
compat_str) or stream['stream_fileid']
|
|
||||||
fileid_dict[format] = fileid
|
|
||||||
|
|
||||||
def get_fileid(format, n):
|
|
||||||
number = hex(int(str(n), 10))[2:].upper()
|
|
||||||
if len(number) == 1:
|
|
||||||
number = '0' + number
|
|
||||||
streamfileids = fileid_dict[format]
|
|
||||||
fileid = streamfileids[0:8] + number + streamfileids[10:]
|
|
||||||
return fileid
|
|
||||||
|
|
||||||
# get ep
|
|
||||||
def generate_ep(format, n):
|
|
||||||
fileid = get_fileid(format, n)
|
|
||||||
ep_t = yk_t(
|
|
||||||
b'bf7e5f01',
|
|
||||||
('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
|
|
||||||
)
|
|
||||||
ep = base64.b64encode(ep_t).decode('ascii')
|
|
||||||
return ep
|
|
||||||
|
|
||||||
# generate video_urls
|
|
||||||
video_urls_dict = {}
|
|
||||||
for stream in data['stream']:
|
|
||||||
if stream.get('channel_type') == 'tail':
|
|
||||||
continue
|
|
||||||
format = stream.get('stream_type')
|
|
||||||
video_urls = []
|
|
||||||
for dt in stream['segs']:
|
|
||||||
n = str(stream['segs'].index(dt))
|
|
||||||
param = {
|
|
||||||
'K': dt['key'],
|
|
||||||
'hd': self.get_hd(format),
|
|
||||||
'myp': 0,
|
|
||||||
'ypp': 0,
|
|
||||||
'ctype': 12,
|
|
||||||
'ev': 1,
|
|
||||||
'token': token,
|
|
||||||
'oip': oip,
|
|
||||||
'ep': generate_ep(format, n)
|
|
||||||
}
|
|
||||||
video_url = \
|
|
||||||
'http://k.youku.com/player/getFlvPath/' + \
|
|
||||||
'sid/' + sid + \
|
|
||||||
'_00' + \
|
|
||||||
'/st/' + self.parse_ext_l(format) + \
|
|
||||||
'/fileid/' + get_fileid(format, n) + '?' + \
|
|
||||||
compat_urllib_parse_urlencode(param)
|
|
||||||
video_urls.append(video_url)
|
|
||||||
video_urls_dict[format] = video_urls
|
|
||||||
|
|
||||||
return video_urls_dict
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_ysuid():
|
def get_ysuid():
|
||||||
return '%d%s' % (int(time.time()), ''.join([
|
return '%d%s' % (int(time.time()), ''.join([
|
||||||
random.choice(string.ascii_letters) for i in range(3)]))
|
random.choice(string.ascii_letters) for i in range(3)]))
|
||||||
|
|
||||||
def get_hd(self, fm):
|
|
||||||
hd_id_dict = {
|
|
||||||
'3gp': '0',
|
|
||||||
'3gphd': '1',
|
|
||||||
'flv': '0',
|
|
||||||
'flvhd': '0',
|
|
||||||
'mp4': '1',
|
|
||||||
'mp4hd': '1',
|
|
||||||
'mp4hd2': '1',
|
|
||||||
'mp4hd3': '1',
|
|
||||||
'hd2': '2',
|
|
||||||
'hd3': '3',
|
|
||||||
}
|
|
||||||
return hd_id_dict[fm]
|
|
||||||
|
|
||||||
def parse_ext_l(self, fm):
|
|
||||||
ext_dict = {
|
|
||||||
'3gp': 'flv',
|
|
||||||
'3gphd': 'mp4',
|
|
||||||
'flv': 'flv',
|
|
||||||
'flvhd': 'flv',
|
|
||||||
'mp4': 'mp4',
|
|
||||||
'mp4hd': 'mp4',
|
|
||||||
'mp4hd2': 'flv',
|
|
||||||
'mp4hd3': 'flv',
|
|
||||||
'hd2': 'flv',
|
|
||||||
'hd3': 'flv',
|
|
||||||
}
|
|
||||||
return ext_dict[fm]
|
|
||||||
|
|
||||||
def get_format_name(self, fm):
|
def get_format_name(self, fm):
|
||||||
_dict = {
|
_dict = {
|
||||||
'3gp': 'h6',
|
'3gp': 'h6',
|
||||||
@@ -210,32 +140,40 @@ class YoukuIE(InfoExtractor):
|
|||||||
'hd2': 'h2',
|
'hd2': 'h2',
|
||||||
'hd3': 'h1',
|
'hd3': 'h1',
|
||||||
}
|
}
|
||||||
return _dict[fm]
|
return _dict.get(fm)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
|
self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
|
||||||
|
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
|
||||||
|
|
||||||
def retrieve_data(req_url, note):
|
_, urlh = self._download_webpage_handle(
|
||||||
headers = {
|
'https://log.mmstat.com/eg.js', video_id, 'Retrieving cna info')
|
||||||
'Referer': req_url,
|
# The etag header is '"foobar"'; let's remove the double quotes
|
||||||
}
|
cna = urlh.headers['etag'][1:-1]
|
||||||
headers.update(self.geo_verification_headers())
|
|
||||||
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
|
|
||||||
|
|
||||||
raw_data = self._download_json(req_url, video_id, note=note, headers=headers)
|
|
||||||
|
|
||||||
return raw_data['data']
|
|
||||||
|
|
||||||
video_password = self._downloader.params.get('videopassword')
|
|
||||||
|
|
||||||
# request basic data
|
# request basic data
|
||||||
basic_data_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % video_id
|
basic_data_params = {
|
||||||
if video_password:
|
'vid': video_id,
|
||||||
basic_data_url += '&pwd=%s' % video_password
|
'ccode': '0402' if 'tudou.com' in url else '0401',
|
||||||
|
'client_ip': '192.168.1.1',
|
||||||
|
'utid': cna,
|
||||||
|
'client_ts': time.time() / 1000,
|
||||||
|
}
|
||||||
|
|
||||||
data = retrieve_data(basic_data_url, 'Downloading JSON metadata')
|
video_password = self._downloader.params.get('videopassword')
|
||||||
|
if video_password:
|
||||||
|
basic_data_params['password'] = video_password
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Referer': url,
|
||||||
|
}
|
||||||
|
headers.update(self.geo_verification_headers())
|
||||||
|
data = self._download_json(
|
||||||
|
'https://ups.youku.com/ups/get.json', video_id,
|
||||||
|
'Downloading JSON metadata',
|
||||||
|
query=basic_data_params, headers=headers)['data']
|
||||||
|
|
||||||
error = data.get('error')
|
error = data.get('error')
|
||||||
if error:
|
if error:
|
||||||
@@ -253,86 +191,87 @@ class YoukuIE(InfoExtractor):
|
|||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
|
|
||||||
# get video title
|
# get video title
|
||||||
title = data['video']['title']
|
video_data = data['video']
|
||||||
|
title = video_data['title']
|
||||||
|
|
||||||
# generate video_urls_dict
|
formats = [{
|
||||||
video_urls_dict = self.construct_video_urls(data)
|
'url': stream['m3u8_url'],
|
||||||
|
'format_id': self.get_format_name(stream.get('stream_type')),
|
||||||
# construct info
|
'ext': 'mp4',
|
||||||
entries = [{
|
'protocol': 'm3u8_native',
|
||||||
'id': '%s_part%d' % (video_id, i + 1),
|
'filesize': int(stream.get('size')),
|
||||||
'title': title,
|
'width': stream.get('width'),
|
||||||
'formats': [],
|
'height': stream.get('height'),
|
||||||
# some formats are not available for all parts, we have to detect
|
} for stream in data['stream'] if stream.get('channel_type') != 'tail']
|
||||||
# which one has all
|
self._sort_formats(formats)
|
||||||
} for i in range(max(len(v.get('segs')) for v in data['stream']))]
|
|
||||||
for stream in data['stream']:
|
|
||||||
if stream.get('channel_type') == 'tail':
|
|
||||||
continue
|
|
||||||
fm = stream.get('stream_type')
|
|
||||||
video_urls = video_urls_dict[fm]
|
|
||||||
for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
|
|
||||||
entry['formats'].append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': self.get_format_name(fm),
|
|
||||||
'ext': self.parse_ext_l(fm),
|
|
||||||
'filesize': int(seg['size']),
|
|
||||||
'width': stream.get('width'),
|
|
||||||
'height': stream.get('height'),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'multi_video',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'formats': formats,
|
||||||
|
'duration': video_data.get('seconds'),
|
||||||
|
'thumbnail': video_data.get('logo'),
|
||||||
|
'uploader': video_data.get('username'),
|
||||||
|
'uploader_id': str_or_none(video_data.get('userid')),
|
||||||
|
'uploader_url': data.get('uploader', {}).get('homepage'),
|
||||||
|
'tags': video_data.get('tags'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoukuShowIE(InfoExtractor):
|
class YoukuShowIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html'
|
_VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
|
||||||
IE_NAME = 'youku:show'
|
IE_NAME = 'youku:show'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html',
|
'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'zc7c670be07ff11e48b3f',
|
'id': 'zc7c670be07ff11e48b3f',
|
||||||
'title': '花千骨 未删减版',
|
'title': '花千骨 未删减版',
|
||||||
'description': 'md5:578d4f2145ae3f9128d9d4d863312910',
|
'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
|
||||||
},
|
},
|
||||||
'playlist_count': 50,
|
'playlist_count': 50,
|
||||||
}
|
}
|
||||||
|
|
||||||
_PAGE_SIZE = 40
|
_PAGE_SIZE = 40
|
||||||
|
|
||||||
def _find_videos_in_page(self, webpage):
|
|
||||||
videos = re.findall(
|
|
||||||
r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage)
|
|
||||||
return [
|
|
||||||
self.url_result(video_url, YoukuIE.ie_key(), title)
|
|
||||||
for video_url, title in videos]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, show_id)
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
entries = self._find_videos_in_page(webpage)
|
entries = []
|
||||||
|
page_config = self._parse_json(self._search_regex(
|
||||||
playlist_title = self._html_search_regex(
|
r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
|
||||||
r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False)
|
show_id, transform_source=js_to_json)
|
||||||
detail_div = get_element_by_attribute('class', 'detail', webpage) or ''
|
for idx in itertools.count(0):
|
||||||
playlist_description = self._html_search_regex(
|
if idx == 0:
|
||||||
r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>',
|
playlist_data_url = 'http://list.youku.com/show/module'
|
||||||
detail_div, 'playlist description', fatal=False)
|
query = {'id': page_config['showid'], 'tab': 'point'}
|
||||||
|
else:
|
||||||
for idx in itertools.count(1):
|
playlist_data_url = 'http://list.youku.com/show/point'
|
||||||
episodes_page = self._download_webpage(
|
query = {
|
||||||
'http://www.youku.com/show_episode/id_%s.html' % show_id,
|
'id': page_config['showid'],
|
||||||
show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)},
|
'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1),
|
||||||
note='Downloading episodes page %d' % idx)
|
}
|
||||||
new_entries = self._find_videos_in_page(episodes_page)
|
query['callback'] = 'cb'
|
||||||
|
playlist_data = self._download_json(
|
||||||
|
playlist_data_url, show_id, query=query,
|
||||||
|
note='Downloading playlist data page %d' % (idx + 1),
|
||||||
|
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
|
||||||
|
video_urls = re.findall(
|
||||||
|
r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"',
|
||||||
|
playlist_data)
|
||||||
|
new_entries = [
|
||||||
|
self.url_result(urljoin(url, video_url), YoukuIE.ie_key())
|
||||||
|
for video_url in video_urls]
|
||||||
entries.extend(new_entries)
|
entries.extend(new_entries)
|
||||||
if len(new_entries) < self._PAGE_SIZE:
|
if len(new_entries) < self._PAGE_SIZE:
|
||||||
break
|
break
|
||||||
|
|
||||||
return self.playlist_result(entries, show_id, playlist_title, playlist_description)
|
desc = self._html_search_meta('description', webpage, fatal=False)
|
||||||
|
playlist_title = desc.split(',')[0] if desc else None
|
||||||
|
detail_li = get_element_by_class('p-intro', webpage)
|
||||||
|
playlist_description = get_element_by_class(
|
||||||
|
'intro-more', detail_li) if detail_li else None
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, show_id, playlist_title, playlist_description)
|
||||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
@@ -26,7 +27,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Ask Dan And Jennifer',
|
'uploader': 'Ask Dan And Jennifer',
|
||||||
'upload_date': '20101221',
|
'upload_date': '20101217',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
@@ -45,7 +46,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
|
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Unknown',
|
'uploader': 'Unknown',
|
||||||
'upload_date': '20111125',
|
'upload_date': '20110418',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
@@ -68,28 +69,46 @@ class YouPornIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(request, display_id)
|
webpage = self._download_webpage(request, display_id)
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>.+?)\1',
|
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'],
|
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
|
||||||
webpage, 'title', group='title')
|
webpage, 'title', group='title',
|
||||||
|
default=None) or self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'title', webpage, fatal=True)
|
||||||
|
|
||||||
links = []
|
links = []
|
||||||
|
|
||||||
|
# Main source
|
||||||
|
definitions = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
|
||||||
|
'media definitions', default='[]'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if definitions:
|
||||||
|
for definition in definitions:
|
||||||
|
if not isinstance(definition, dict):
|
||||||
|
continue
|
||||||
|
video_url = definition.get('videoUrl')
|
||||||
|
if isinstance(video_url, compat_str) and video_url:
|
||||||
|
links.append(video_url)
|
||||||
|
|
||||||
|
# Fallback #1, this also contains extra low quality 180p format
|
||||||
|
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||||
|
links.append(link)
|
||||||
|
|
||||||
|
# Fallback #2 (unavailable as at 22.06.2017)
|
||||||
sources = self._search_regex(
|
sources = self._search_regex(
|
||||||
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
|
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
|
||||||
if sources:
|
if sources:
|
||||||
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
|
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
|
||||||
links.append(link)
|
links.append(link)
|
||||||
|
|
||||||
# Fallback #1
|
# Fallback #3 (unavailable as at 22.06.2017)
|
||||||
for _, link in re.findall(
|
for _, link in re.findall(
|
||||||
r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
||||||
links.append(link)
|
links.append(link)
|
||||||
|
|
||||||
# Fallback #2, this also contains extra low quality 180p format
|
# Fallback #4, encrypted links (unavailable as at 22.06.2017)
|
||||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
# Fallback #3, encrypted links
|
|
||||||
for _, encrypted_link in re.findall(
|
for _, encrypted_link in re.findall(
|
||||||
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
|
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
|
||||||
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
||||||
@@ -124,7 +143,8 @@ class YouPornIE(InfoExtractor):
|
|||||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>',
|
[r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||||
|
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
@@ -1269,37 +1269,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
sub_lang_list[sub_lang] = sub_formats
|
sub_lang_list[sub_lang] = sub_formats
|
||||||
return sub_lang_list
|
return sub_lang_list
|
||||||
|
|
||||||
|
def make_captions(sub_url, sub_langs):
|
||||||
|
parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
|
||||||
|
caption_qs = compat_parse_qs(parsed_sub_url.query)
|
||||||
|
captions = {}
|
||||||
|
for sub_lang in sub_langs:
|
||||||
|
sub_formats = []
|
||||||
|
for ext in self._SUBTITLE_FORMATS:
|
||||||
|
caption_qs.update({
|
||||||
|
'tlang': [sub_lang],
|
||||||
|
'fmt': [ext],
|
||||||
|
})
|
||||||
|
sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
|
||||||
|
query=compat_urllib_parse_urlencode(caption_qs, True)))
|
||||||
|
sub_formats.append({
|
||||||
|
'url': sub_url,
|
||||||
|
'ext': ext,
|
||||||
|
})
|
||||||
|
captions[sub_lang] = sub_formats
|
||||||
|
return captions
|
||||||
|
|
||||||
|
# New captions format as of 22.06.2017
|
||||||
|
player_response = args.get('player_response')
|
||||||
|
if player_response and isinstance(player_response, compat_str):
|
||||||
|
player_response = self._parse_json(
|
||||||
|
player_response, video_id, fatal=False)
|
||||||
|
if player_response:
|
||||||
|
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||||
|
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||||
|
sub_lang_list = []
|
||||||
|
for lang in renderer['translationLanguages']:
|
||||||
|
lang_code = lang.get('languageCode')
|
||||||
|
if lang_code:
|
||||||
|
sub_lang_list.append(lang_code)
|
||||||
|
return make_captions(base_url, sub_lang_list)
|
||||||
|
|
||||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||||
|
# Does not used anymore as of 22.06.2017
|
||||||
caption_tracks = args['caption_tracks']
|
caption_tracks = args['caption_tracks']
|
||||||
caption_translation_languages = args['caption_translation_languages']
|
caption_translation_languages = args['caption_translation_languages']
|
||||||
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
|
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
|
||||||
parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
|
sub_lang_list = []
|
||||||
caption_qs = compat_parse_qs(parsed_caption_url.query)
|
|
||||||
|
|
||||||
sub_lang_list = {}
|
|
||||||
for lang in caption_translation_languages.split(','):
|
for lang in caption_translation_languages.split(','):
|
||||||
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
|
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
|
||||||
sub_lang = lang_qs.get('lc', [None])[0]
|
sub_lang = lang_qs.get('lc', [None])[0]
|
||||||
if not sub_lang:
|
if sub_lang:
|
||||||
continue
|
sub_lang_list.append(sub_lang)
|
||||||
sub_formats = []
|
return make_captions(caption_url, sub_lang_list)
|
||||||
for ext in self._SUBTITLE_FORMATS:
|
|
||||||
caption_qs.update({
|
|
||||||
'tlang': [sub_lang],
|
|
||||||
'fmt': [ext],
|
|
||||||
})
|
|
||||||
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
|
|
||||||
query=compat_urllib_parse_urlencode(caption_qs, True)))
|
|
||||||
sub_formats.append({
|
|
||||||
'url': sub_url,
|
|
||||||
'ext': ext,
|
|
||||||
})
|
|
||||||
sub_lang_list[sub_lang] = sub_formats
|
|
||||||
return sub_lang_list
|
|
||||||
# An extractor error can be raise by the download process if there are
|
# An extractor error can be raise by the download process if there are
|
||||||
# no automatic captions but there are subtitles
|
# no automatic captions but there are subtitles
|
||||||
except (KeyError, ExtractorError):
|
except (KeyError, IndexError, ExtractorError):
|
||||||
self._downloader.report_warning(err_msg)
|
self._downloader.report_warning(err_msg)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -1353,10 +1373,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
start_time = parse_duration(time_point)
|
start_time = parse_duration(time_point)
|
||||||
if start_time is None:
|
if start_time is None:
|
||||||
continue
|
continue
|
||||||
|
if start_time > duration:
|
||||||
|
break
|
||||||
end_time = (duration if next_num == len(chapter_lines)
|
end_time = (duration if next_num == len(chapter_lines)
|
||||||
else parse_duration(chapter_lines[next_num][1]))
|
else parse_duration(chapter_lines[next_num][1]))
|
||||||
if end_time is None:
|
if end_time is None:
|
||||||
continue
|
continue
|
||||||
|
if end_time > duration:
|
||||||
|
end_time = duration
|
||||||
|
if start_time > end_time:
|
||||||
|
break
|
||||||
chapter_title = re.sub(
|
chapter_title = re.sub(
|
||||||
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
|
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
|
||||||
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||||
@@ -1435,6 +1461,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
age_gate = False
|
age_gate = False
|
||||||
video_info = None
|
video_info = None
|
||||||
|
sts = None
|
||||||
# Try looking directly into the video webpage
|
# Try looking directly into the video webpage
|
||||||
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
||||||
if ytplayer_config:
|
if ytplayer_config:
|
||||||
@@ -1451,6 +1478,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
||||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||||
is_live = True
|
is_live = True
|
||||||
|
sts = ytplayer_config.get('sts')
|
||||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
# We also try looking in get_video_info since it may contain different dashmpd
|
# We also try looking in get_video_info since it may contain different dashmpd
|
||||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||||
@@ -1459,17 +1487,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# The general idea is to take a union of itags of both DASH manifests (for example
|
# The general idea is to take a union of itags of both DASH manifests (for example
|
||||||
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
|
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
|
||||||
self.report_video_info_webpage_download(video_id)
|
self.report_video_info_webpage_download(video_id)
|
||||||
for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
|
||||||
video_info_url = (
|
query = {
|
||||||
'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
'video_id': video_id,
|
||||||
% (proto, video_id, el_type))
|
'ps': 'default',
|
||||||
|
'eurl': '',
|
||||||
|
'gl': 'US',
|
||||||
|
'hl': 'en',
|
||||||
|
}
|
||||||
|
if el:
|
||||||
|
query['el'] = el
|
||||||
|
if sts:
|
||||||
|
query['sts'] = sts
|
||||||
video_info_webpage = self._download_webpage(
|
video_info_webpage = self._download_webpage(
|
||||||
video_info_url,
|
'%s://www.youtube.com/get_video_info' % proto,
|
||||||
video_id, note=False,
|
video_id, note=False,
|
||||||
errnote='unable to download video info webpage')
|
errnote='unable to download video info webpage',
|
||||||
|
fatal=False, query=query)
|
||||||
|
if not video_info_webpage:
|
||||||
|
continue
|
||||||
get_video_info = compat_parse_qs(video_info_webpage)
|
get_video_info = compat_parse_qs(video_info_webpage)
|
||||||
if get_video_info.get('use_cipher_signature') != ['True']:
|
add_dash_mpd(get_video_info)
|
||||||
add_dash_mpd(get_video_info)
|
|
||||||
if not video_info:
|
if not video_info:
|
||||||
video_info = get_video_info
|
video_info = get_video_info
|
||||||
if 'token' in get_video_info:
|
if 'token' in get_video_info:
|
||||||
@@ -1703,12 +1741,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
format_id = url_data['itag'][0]
|
format_id = url_data['itag'][0]
|
||||||
url = url_data['url'][0]
|
url = url_data['url'][0]
|
||||||
|
|
||||||
if 'sig' in url_data:
|
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
url += '&signature=' + url_data['sig'][0]
|
|
||||||
elif 's' in url_data:
|
|
||||||
encrypted_sig = url_data['s'][0]
|
|
||||||
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
||||||
|
|
||||||
jsplayer_url_json = self._search_regex(
|
jsplayer_url_json = self._search_regex(
|
||||||
ASSETS_RE,
|
ASSETS_RE,
|
||||||
embed_webpage if age_gate else video_webpage,
|
embed_webpage if age_gate else video_webpage,
|
||||||
@@ -1729,6 +1763,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_webpage, 'age gate player URL')
|
video_webpage, 'age gate player URL')
|
||||||
player_url = json.loads(player_url_json)
|
player_url = json.loads(player_url_json)
|
||||||
|
|
||||||
|
if 'sig' in url_data:
|
||||||
|
url += '&signature=' + url_data['sig'][0]
|
||||||
|
elif 's' in url_data:
|
||||||
|
encrypted_sig = url_data['s'][0]
|
||||||
|
|
||||||
if self._downloader.params.get('verbose'):
|
if self._downloader.params.get('verbose'):
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
player_version = 'unknown'
|
player_version = 'unknown'
|
||||||
|
@@ -214,15 +214,18 @@ class JSInterpreter(object):
|
|||||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||||
obj = {}
|
obj = {}
|
||||||
obj_m = re.search(
|
obj_m = re.search(
|
||||||
(r'(?<!this\.)%s\s*=\s*\{' % re.escape(objname)) +
|
r'''(?x)
|
||||||
r'\s*(?P<fields>(%s\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
|
(?<!this\.)%s\s*=\s*{\s*
|
||||||
r'\}\s*;' % _FUNC_NAME_RE,
|
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
|
||||||
|
}\s*;
|
||||||
|
''' % (re.escape(objname), _FUNC_NAME_RE),
|
||||||
self.code)
|
self.code)
|
||||||
fields = obj_m.group('fields')
|
fields = obj_m.group('fields')
|
||||||
# Currently, it only supports function definitions
|
# Currently, it only supports function definitions
|
||||||
fields_m = re.finditer(
|
fields_m = re.finditer(
|
||||||
r'(?P<key>%s)\s*:\s*function'
|
r'''(?x)
|
||||||
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}' % _FUNC_NAME_RE,
|
(?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
|
||||||
|
''' % _FUNC_NAME_RE,
|
||||||
fields)
|
fields)
|
||||||
for f in fields_m:
|
for f in fields_m:
|
||||||
argnames = f.group('args').split(',')
|
argnames = f.group('args').split(',')
|
||||||
|
@@ -310,7 +310,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
metavar='FILTER', dest='match_filter', default=None,
|
metavar='FILTER', dest='match_filter', default=None,
|
||||||
help=(
|
help=(
|
||||||
'Generic video filter. '
|
'Generic video filter. '
|
||||||
'Specify any key (see help for -o for a list of available keys) to '
|
'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
|
||||||
'match if the key is present, '
|
'match if the key is present, '
|
||||||
'!key to check if the key is not present, '
|
'!key to check if the key is not present, '
|
||||||
'key > NUMBER (like "comment_count > 12", also works with '
|
'key > NUMBER (like "comment_count > 12", also works with '
|
||||||
@@ -618,7 +618,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'-j', '--dump-json',
|
'-j', '--dump-json',
|
||||||
action='store_true', dest='dumpjson', default=False,
|
action='store_true', dest='dumpjson', default=False,
|
||||||
help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
|
help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'-J', '--dump-single-json',
|
'-J', '--dump-single-json',
|
||||||
action='store_true', dest='dump_single_json', default=False,
|
action='store_true', dest='dump_single_json', default=False,
|
||||||
|
@@ -4,7 +4,10 @@ import subprocess
|
|||||||
|
|
||||||
from .common import PostProcessor
|
from .common import PostProcessor
|
||||||
from ..compat import compat_shlex_quote
|
from ..compat import compat_shlex_quote
|
||||||
from ..utils import PostProcessingError
|
from ..utils import (
|
||||||
|
encodeArgument,
|
||||||
|
PostProcessingError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ExecAfterDownloadPP(PostProcessor):
|
class ExecAfterDownloadPP(PostProcessor):
|
||||||
@@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor):
|
|||||||
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
|
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
|
||||||
|
|
||||||
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
|
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
|
||||||
retCode = subprocess.call(cmd, shell=True)
|
retCode = subprocess.call(encodeArgument(cmd), shell=True)
|
||||||
if retCode != 0:
|
if retCode != 0:
|
||||||
raise PostProcessingError(
|
raise PostProcessingError(
|
||||||
'Command returned error code %d' % retCode)
|
'Command returned error code %d' % retCode)
|
||||||
|
@@ -444,7 +444,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
chapters = info.get('chapters', [])
|
chapters = info.get('chapters', [])
|
||||||
if chapters:
|
if chapters:
|
||||||
metadata_filename = encodeFilename(replace_extension(filename, 'meta'))
|
metadata_filename = replace_extension(filename, 'meta')
|
||||||
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
||||||
def ffmpeg_escape(text):
|
def ffmpeg_escape(text):
|
||||||
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
||||||
|
@@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor):
|
|||||||
title = info['title']
|
title = info['title']
|
||||||
match = re.match(self._titleregex, title)
|
match = re.match(self._titleregex, title)
|
||||||
if match is None:
|
if match is None:
|
||||||
self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat)
|
self._downloader.to_screen(
|
||||||
|
'[fromtitle] Could not interpret title of video as "%s"'
|
||||||
|
% self._titleformat)
|
||||||
return [], info
|
return [], info
|
||||||
for attribute, value in match.groupdict().items():
|
for attribute, value in match.groupdict().items():
|
||||||
value = match.group(attribute)
|
|
||||||
info[attribute] = value
|
info[attribute] = value
|
||||||
self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
|
self._downloader.to_screen(
|
||||||
|
'[fromtitle] parsed %s: %s'
|
||||||
|
% (attribute, value if value is not None else 'NA'))
|
||||||
|
|
||||||
return [], info
|
return [], info
|
||||||
|
@@ -22,7 +22,6 @@ import locale
|
|||||||
import math
|
import math
|
||||||
import operator
|
import operator
|
||||||
import os
|
import os
|
||||||
import pipes
|
|
||||||
import platform
|
import platform
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
@@ -36,6 +35,7 @@ import xml.etree.ElementTree
|
|||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
|
compat_HTMLParseError,
|
||||||
compat_HTMLParser,
|
compat_HTMLParser,
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
@@ -409,8 +409,12 @@ def extract_attributes(html_element):
|
|||||||
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
||||||
"""
|
"""
|
||||||
parser = HTMLAttributeParser()
|
parser = HTMLAttributeParser()
|
||||||
parser.feed(html_element)
|
try:
|
||||||
parser.close()
|
parser.feed(html_element)
|
||||||
|
parser.close()
|
||||||
|
# Older Python may throw HTMLParseError in case of malformed HTML
|
||||||
|
except compat_HTMLParseError:
|
||||||
|
pass
|
||||||
return parser.attrs
|
return parser.attrs
|
||||||
|
|
||||||
|
|
||||||
@@ -932,14 +936,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
except zlib.error:
|
except zlib.error:
|
||||||
return zlib.decompress(data)
|
return zlib.decompress(data)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def addinfourl_wrapper(stream, headers, url, code):
|
|
||||||
if hasattr(compat_urllib_request.addinfourl, 'getcode'):
|
|
||||||
return compat_urllib_request.addinfourl(stream, headers, url, code)
|
|
||||||
ret = compat_urllib_request.addinfourl(stream, headers, url)
|
|
||||||
ret.code = code
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||||
@@ -991,13 +987,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise original_ioerror
|
raise original_ioerror
|
||||||
resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
|
resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
del resp.headers['Content-encoding']
|
del resp.headers['Content-encoding']
|
||||||
# deflate
|
# deflate
|
||||||
if resp.headers.get('Content-encoding', '') == 'deflate':
|
if resp.headers.get('Content-encoding', '') == 'deflate':
|
||||||
gz = io.BytesIO(self.deflate(resp.read()))
|
gz = io.BytesIO(self.deflate(resp.read()))
|
||||||
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
del resp.headers['Content-encoding']
|
del resp.headers['Content-encoding']
|
||||||
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
||||||
@@ -1187,7 +1183,7 @@ def unified_timestamp(date_str, day_first=True):
|
|||||||
if date_str is None:
|
if date_str is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
date_str = date_str.replace(',', ' ')
|
date_str = re.sub(r'[,|]', '', date_str)
|
||||||
|
|
||||||
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||||
timezone, date_str = extract_timezone(date_str)
|
timezone, date_str = extract_timezone(date_str)
|
||||||
@@ -1538,7 +1534,7 @@ def shell_quote(args):
|
|||||||
if isinstance(a, bytes):
|
if isinstance(a, bytes):
|
||||||
# We may get a filename encoded with 'encodeFilename'
|
# We may get a filename encoded with 'encodeFilename'
|
||||||
a = a.decode(encoding)
|
a = a.decode(encoding)
|
||||||
quoted_args.append(pipes.quote(a))
|
quoted_args.append(compat_shlex_quote(a))
|
||||||
return ' '.join(quoted_args)
|
return ' '.join(quoted_args)
|
||||||
|
|
||||||
|
|
||||||
@@ -2211,7 +2207,12 @@ def parse_age_limit(s):
|
|||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
r'''(?sx)^
|
||||||
|
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
|
||||||
|
(?:\s*&&\s*(?P=func_name))?
|
||||||
|
\s*\(\s*(?P<callback_data>.*)\);?
|
||||||
|
\s*?(?://[^\n]*)*$''',
|
||||||
|
r'\g<callback_data>', code)
|
||||||
|
|
||||||
|
|
||||||
def js_to_json(code):
|
def js_to_json(code):
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user