Compare commits
166 Commits
2017.05.07
...
2017.06.12
Author | SHA1 | Date | |
---|---|---|---|
![]() |
cb1e6d8985 | ||
![]() |
9932ac5c58 | ||
![]() |
bf87c36c93 | ||
![]() |
b4a3d461e4 | ||
![]() |
72b409559c | ||
![]() |
534863e057 | ||
![]() |
16bc958287 | ||
![]() |
624bd0104c | ||
![]() |
28a4d6cce8 | ||
![]() |
2ae2ffda5e | ||
![]() |
70e7967202 | ||
![]() |
6e999fbc12 | ||
![]() |
7409af9eb3 | ||
![]() |
4e3637034c | ||
![]() |
1afd0b0da7 | ||
![]() |
7515830422 | ||
![]() |
f5521ea209 | ||
![]() |
34646967ba | ||
![]() |
e4d2e76d8e | ||
![]() |
87f5646937 | ||
![]() |
cc69a3de1b | ||
![]() |
15aeeb1188 | ||
![]() |
1693bebe4d | ||
![]() |
4244a13a1d | ||
![]() |
931adf8cc1 | ||
![]() |
c996943418 | ||
![]() |
76e6378358 | ||
![]() |
a355b57f58 | ||
![]() |
1508da30c2 | ||
![]() |
eb703e5380 | ||
![]() |
0a3924e746 | ||
![]() |
e1db730d86 | ||
![]() |
537191826f | ||
![]() |
130880ba48 | ||
![]() |
f8ba3fda4d | ||
![]() |
e1b90cc3db | ||
![]() |
43e6579558 | ||
![]() |
6d923aab35 | ||
![]() |
62bafabc09 | ||
![]() |
9edcdac90c | ||
![]() |
cd138d8bd4 | ||
![]() |
cd750b731c | ||
![]() |
4bede0d8f5 | ||
![]() |
f129c3f349 | ||
![]() |
39d4c1be4d | ||
![]() |
f7a747ce59 | ||
![]() |
4489d41816 | ||
![]() |
87b5184a0d | ||
![]() |
c56ad5c975 | ||
![]() |
6b7ce85cdc | ||
![]() |
d10d0e3cf8 | ||
![]() |
941ea38ef5 | ||
![]() |
99bea8d298 | ||
![]() |
a49eccdfa7 | ||
![]() |
a846173d93 | ||
![]() |
78e210dea5 | ||
![]() |
8555204274 | ||
![]() |
164fcbfeb7 | ||
![]() |
bc22df29c4 | ||
![]() |
7e688d2f6a | ||
![]() |
5a6d1da442 | ||
![]() |
703751add4 | ||
![]() |
4050be78e5 | ||
![]() |
4d9fc40100 | ||
![]() |
765522345f | ||
![]() |
6bceb36b99 | ||
![]() |
1e0d65f0bd | ||
![]() |
03327bc9a6 | ||
![]() |
b407d8533d | ||
![]() |
20e2c9de04 | ||
![]() |
d16c0121b9 | ||
![]() |
7f4c3a7439 | ||
![]() |
28dbde9cc3 | ||
![]() |
cc304ce588 | ||
![]() |
98a0618941 | ||
![]() |
fd545fc6d1 | ||
![]() |
97067db2ae | ||
![]() |
c130f0a37b | ||
![]() |
d3d4ba7f24 | ||
![]() |
5552c9eb0f | ||
![]() |
59ed87cbd9 | ||
![]() |
b7f8749304 | ||
![]() |
5192ee17e7 | ||
![]() |
e834f04400 | ||
![]() |
884d09f330 | ||
![]() |
9e35298f97 | ||
![]() |
0551f1b07b | ||
![]() |
de53511201 | ||
![]() |
2570e85167 | ||
![]() |
9dc5ab041f | ||
![]() |
01f3c8e290 | ||
![]() |
06c1b3ce07 | ||
![]() |
0b75e42dfb | ||
![]() |
a609e61a90 | ||
![]() |
afdb387cd8 | ||
![]() |
dc4e4f90a2 | ||
![]() |
fdc20f87a6 | ||
![]() |
35a2d221a3 | ||
![]() |
daa4e9ff90 | ||
![]() |
2ca29f1aaf | ||
![]() |
77d682da9d | ||
![]() |
8fffac6927 | ||
![]() |
5f6fbcea08 | ||
![]() |
00cb0faca8 | ||
![]() |
bfdf6fcc66 | ||
![]() |
bcaa1dd060 | ||
![]() |
0e2d626ddd | ||
![]() |
9221d5d7a8 | ||
![]() |
9d63e57d1f | ||
![]() |
3bc1eea0d8 | ||
![]() |
7769f83701 | ||
![]() |
650bd94716 | ||
![]() |
36b226d48f | ||
![]() |
f2e2f0c777 | ||
![]() |
6f76679804 | ||
![]() |
7073015a23 | ||
![]() |
89fd03079b | ||
![]() |
1c45b7a8a9 | ||
![]() |
60f5c9fb19 | ||
![]() |
c360e641e9 | ||
![]() |
6f3c632c24 | ||
![]() |
09b866e171 | ||
![]() |
166d12b00c | ||
![]() |
2b8e6a68f8 | ||
![]() |
d105a7edc6 | ||
![]() |
5d29af3d15 | ||
![]() |
ca04de463d | ||
![]() |
946826eec7 | ||
![]() |
76d5a36391 | ||
![]() |
56f9c77f0e | ||
![]() |
0de136341a | ||
![]() |
1339ecb2f8 | ||
![]() |
efe9316703 | ||
![]() |
851a01aed6 | ||
![]() |
b845766597 | ||
![]() |
fa26734e07 | ||
![]() |
12f01118b0 | ||
![]() |
7fc60f4ee9 | ||
![]() |
58bb440283 | ||
![]() |
7ad4362357 | ||
![]() |
6c52477f59 | ||
![]() |
116283ff64 | ||
![]() |
7274f3d0e9 | ||
![]() |
3166b1f0ac | ||
![]() |
39ee263819 | ||
![]() |
a7ed6b341c | ||
![]() |
cbd84b5817 | ||
![]() |
6d1ded7502 | ||
![]() |
5d0968f0af | ||
![]() |
8d65880e24 | ||
![]() |
b972fb037b | ||
![]() |
5996d21aea | ||
![]() |
afa0200bf0 | ||
![]() |
e9137224b3 | ||
![]() |
804181dda9 | ||
![]() |
8fa17117df | ||
![]() |
3b859145c2 | ||
![]() |
04c09f1961 | ||
![]() |
bf82b87323 | ||
![]() |
b6eb74e340 | ||
![]() |
3d40084b83 | ||
![]() |
52294cdda7 | ||
![]() |
2eeb588efe | ||
![]() |
96820c1c6b | ||
![]() |
e095109da1 | ||
![]() |
d68afc5bc9 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.07**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.05.07
|
[debug] youtube-dl version 2017.06.12
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
8
AUTHORS
8
AUTHORS
@@ -212,3 +212,11 @@ Xiao Di Guan
|
|||||||
Thomas Winant
|
Thomas Winant
|
||||||
Daniel Twardowski
|
Daniel Twardowski
|
||||||
Jeremie Jarosh
|
Jeremie Jarosh
|
||||||
|
Gerard Rovira
|
||||||
|
Marvin Ewald
|
||||||
|
Frédéric Bournival
|
||||||
|
Timendum
|
||||||
|
gritstub
|
||||||
|
Adam Voss
|
||||||
|
Mike Fährmann
|
||||||
|
Jan Kundrát
|
||||||
|
172
ChangeLog
172
ChangeLog
@@ -1,3 +1,175 @@
|
|||||||
|
version 2017.06.12
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
|
||||||
|
+ [compat] Introduce compat_HTMLParseError
|
||||||
|
* [utils] Improve unified_timestamp
|
||||||
|
* [extractor/generic] Ensure format id is unicode string
|
||||||
|
* [extractor/common] Return unicode string from _match_id
|
||||||
|
+ [YoutubeDL] Sanitize more fields (#13313)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [xfileshare] Add support for rapidvideo.tv (#13348)
|
||||||
|
* [xfileshare] Modernize and pass Referer
|
||||||
|
+ [rutv] Add support for testplayer.vgtrk.com (#13347)
|
||||||
|
+ [newgrounds] Extract more metadata (#13232)
|
||||||
|
+ [newgrounds:playlist] Add support for playlists (#10611)
|
||||||
|
* [newgrounds] Improve formats and uploader extraction (#13346)
|
||||||
|
* [msn] Fix formats extraction
|
||||||
|
* [turbo] Ensure format id is string
|
||||||
|
* [sexu] Ensure height is int
|
||||||
|
* [jove] Ensure comment count is int
|
||||||
|
* [golem] Ensure format id is string
|
||||||
|
* [gfycat] Ensure filesize is int
|
||||||
|
* [foxgay] Ensure height is int
|
||||||
|
* [flickr] Ensure format id is string
|
||||||
|
* [sohu] Fix numeric fields
|
||||||
|
* [safari] Improve authentication detection (#13319)
|
||||||
|
* [liveleak] Ensure height is int (#13313)
|
||||||
|
* [streamango] Make title optional (#13292)
|
||||||
|
* [rtlnl] Improve URL regular expression (#13295)
|
||||||
|
* [tvplayer] Fix extraction (#13291)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.05
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
|
||||||
|
* [pornhub:playlist] Fix extraction (#13281)
|
||||||
|
- [godtv] Remove extractor (#13175)
|
||||||
|
* [safari] Fix typo (#13252)
|
||||||
|
* [youtube] Improve chapters extraction (#13247)
|
||||||
|
* [1tv] Lower preference for HTTP formats (#13246)
|
||||||
|
* [francetv] Relax URL regular expression
|
||||||
|
* [drbonanza] Fix extraction (#13231)
|
||||||
|
* [packtpub] Fix authentication (#13240)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.29
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
|
||||||
|
(#13211)
|
||||||
|
* [xhamster] Fix uploader and like/dislike count extraction (#13216))
|
||||||
|
+ [xhamster] Extract categories (#11728)
|
||||||
|
+ [abcnews] Add support for embed URLs (#12851)
|
||||||
|
* [gaskrank] Fix extraction (#12493)
|
||||||
|
* [medialaan] Fix videos with missing videoUrl (#12774)
|
||||||
|
* [dvtv] Fix playlist support
|
||||||
|
+ [dvtv] Add support for DASH and HLS formats (#3063)
|
||||||
|
+ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
|
||||||
|
* [cbsinteractive] Relax URL regular expression (#13213)
|
||||||
|
* [adn] Fix formats extraction
|
||||||
|
+ [youku] Extract more metadata (#10433)
|
||||||
|
* [cbsnews] Fix extraction (#13205)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.26
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] strip_jsonp() can recognize more patterns
|
||||||
|
* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
|
||||||
|
+ [bbc] Add support for authentication
|
||||||
|
* [tudou] Merge into youku extractor (#12214)
|
||||||
|
* [youku:show] Fix extraction
|
||||||
|
* [youku] Fix extraction (#13191)
|
||||||
|
* [udemy] Fix extraction for outputs' format entries without URL (#13192)
|
||||||
|
* [vimeo] Fix formats' sorting (#13189)
|
||||||
|
* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
|
||||||
|
+ [adobepass] Add support for Bright House Networks (#13149)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [streamcz] Add support for subtitles (#13174)
|
||||||
|
* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
|
||||||
|
* [toggle] Relax URL regular expression (#13172)
|
||||||
|
* [toypics] Fix extraction (#13077)
|
||||||
|
* [njpwworld] Fix extraction (#13162, #13169)
|
||||||
|
+ [hitbox] Add support for smashcast.tv (#13154)
|
||||||
|
* [mitele] Update app key regular expression (#13158)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.18.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [jsinterp] Fix typo and cleanup regular expressions (#13134)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125,
|
||||||
|
#13126, #13128, #13129, #13130, #13131, #13132)
|
||||||
|
+ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats
|
||||||
|
(#13088, #13092)
|
||||||
|
+ [utils] Recognize more audio codecs (#13081)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [vier] Extract more metadata (#12539)
|
||||||
|
* [vier] Improve extraction (#12801)
|
||||||
|
+ Add support for authentication
|
||||||
|
* Bypass authentication when no credentials provided
|
||||||
|
* Improve extraction robustness
|
||||||
|
* [dailymail] Fix sources extraction (#13057)
|
||||||
|
* [dailymotion] Extend URL regular expression (#13079)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Respect Width and Height attributes in ISM manifests
|
||||||
|
+ [postprocessor/metadatafromtitle] Add support regular expression syntax for
|
||||||
|
--metadata-from-title (#13065)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [mediaset] Add support for video.mediaset.it (#12708, #12964)
|
||||||
|
* [orf:radio] Fix extraction (#11643, #12926)
|
||||||
|
* [aljazeera] Extend URL regular expression (#13053)
|
||||||
|
* [imdb] Relax URL regular expression (#13056)
|
||||||
|
+ [francetv] Add support for mobile.france.tv (#13068)
|
||||||
|
+ [upskill] Add support for upskillcourses.com (#13043)
|
||||||
|
* [thescene] Fix extraction (#13061)
|
||||||
|
* [condenast] Improve embed support
|
||||||
|
* [liveleak] Fix extraction (#12053)
|
||||||
|
+ [douyu] Support Douyu shows (#12228)
|
||||||
|
* [myspace] Improve URL regular expression (#13040)
|
||||||
|
* [adultswim] Use desktop platform in assets URL (#13041)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Force --restrict-filenames when no locale is set on all python
|
||||||
|
versions (#13027)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [francetv] Adapt to site redesign (#13034)
|
||||||
|
+ [packtpub] Add support for authentication (#12622)
|
||||||
|
* [drtv] Lower preference for SignLanguage formats (#13013, #13016)
|
||||||
|
+ [cspan] Add support for brightcove live embeds (#13028)
|
||||||
|
* [vrv] Extract DASH formats and subtitles
|
||||||
|
* [funimation] Fix authentication (#13021)
|
||||||
|
* [adultswim] Fix extraction (#8640, #10950, #11042, #12121)
|
||||||
|
+ Add support for Adobe Pass authentication
|
||||||
|
+ Add support for live streams
|
||||||
|
+ Add support for show pages
|
||||||
|
* [turner] Extract thumbnail, is_live and strip description
|
||||||
|
+ [nonktube] Add support for nonktube.com (#8647, #13024)
|
||||||
|
+ [nuevo] Pass headers to _extract_nuevo
|
||||||
|
* [nbc] Improve extraction (#12364)
|
||||||
|
|
||||||
|
|
||||||
version 2017.05.07
|
version 2017.05.07
|
||||||
|
|
||||||
Common
|
Common
|
||||||
|
53
README.md
53
README.md
@@ -145,18 +145,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--max-views COUNT Do not download any videos with more than
|
--max-views COUNT Do not download any videos with more than
|
||||||
COUNT views
|
COUNT views
|
||||||
--match-filter FILTER Generic video filter. Specify any key (see
|
--match-filter FILTER Generic video filter. Specify any key (see
|
||||||
help for -o for a list of available keys)
|
the "OUTPUT TEMPLATE" for a list of
|
||||||
to match if the key is present, !key to
|
available keys) to match if the key is
|
||||||
check if the key is not present, key >
|
present, !key to check if the key is not
|
||||||
NUMBER (like "comment_count > 12", also
|
present, key > NUMBER (like "comment_count
|
||||||
works with >=, <, <=, !=, =) to compare
|
> 12", also works with >=, <, <=, !=, =) to
|
||||||
against a number, key = 'LITERAL' (like
|
compare against a number, key = 'LITERAL'
|
||||||
"uploader = 'Mike Smith'", also works with
|
(like "uploader = 'Mike Smith'", also works
|
||||||
!=) to match against a string literal and &
|
with !=) to match against a string literal
|
||||||
to require multiple matches. Values which
|
and & to require multiple matches. Values
|
||||||
are not known are excluded unless you put a
|
which are not known are excluded unless you
|
||||||
question mark (?) after the operator. For
|
put a question mark (?) after the operator.
|
||||||
example, to only match videos that have
|
For example, to only match videos that have
|
||||||
been liked more than 100 times and disliked
|
been liked more than 100 times and disliked
|
||||||
less than 50 times (or the dislike
|
less than 50 times (or the dislike
|
||||||
functionality is not available at the given
|
functionality is not available at the given
|
||||||
@@ -277,8 +277,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--get-filename Simulate, quiet but print output filename
|
--get-filename Simulate, quiet but print output filename
|
||||||
--get-format Simulate, quiet but print output format
|
--get-format Simulate, quiet but print output format
|
||||||
-j, --dump-json Simulate, quiet but print JSON information.
|
-j, --dump-json Simulate, quiet but print JSON information.
|
||||||
See --output for a description of available
|
See the "OUTPUT TEMPLATE" for a description
|
||||||
keys.
|
of available keys.
|
||||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||||
for each command-line argument. If the URL
|
for each command-line argument. If the URL
|
||||||
refers to a playlist, dump the whole
|
refers to a playlist, dump the whole
|
||||||
@@ -400,12 +400,14 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--add-metadata Write metadata to the video file
|
--add-metadata Write metadata to the video file
|
||||||
--metadata-from-title FORMAT Parse additional metadata like song title /
|
--metadata-from-title FORMAT Parse additional metadata like song title /
|
||||||
artist from the video title. The format
|
artist from the video title. The format
|
||||||
syntax is the same as --output, the parsed
|
syntax is the same as --output. Regular
|
||||||
parameters replace existing values.
|
expression with named capture groups may
|
||||||
Additional templates: %(album)s,
|
also be used. The parsed parameters replace
|
||||||
%(artist)s. Example: --metadata-from-title
|
existing values. Example: --metadata-from-
|
||||||
"%(artist)s - %(title)s" matches a title
|
title "%(artist)s - %(title)s" matches a
|
||||||
like "Coldplay - Paradise"
|
title like "Coldplay - Paradise". Example
|
||||||
|
(regex): --metadata-from-title
|
||||||
|
"(?P<artist>.+?) - (?P<title>.+)"
|
||||||
--xattrs Write metadata to the video file's xattrs
|
--xattrs Write metadata to the video file's xattrs
|
||||||
(using dublin core and xdg standards)
|
(using dublin core and xdg standards)
|
||||||
--fixup POLICY Automatically correct known faults of the
|
--fixup POLICY Automatically correct known faults of the
|
||||||
@@ -472,7 +474,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
|
|||||||
```
|
```
|
||||||
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
||||||
|
|
||||||
On Windows you may also need to setup the `%HOME%` environment variable manually.
|
On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
|
||||||
|
```
|
||||||
|
set HOME=%USERPROFILE%
|
||||||
|
```
|
||||||
|
|
||||||
# OUTPUT TEMPLATE
|
# OUTPUT TEMPLATE
|
||||||
|
|
||||||
@@ -530,13 +535,14 @@ The basic usage is not to set any template arguments when downloading a single f
|
|||||||
- `playlist_id` (string): Playlist identifier
|
- `playlist_id` (string): Playlist identifier
|
||||||
- `playlist_title` (string): Playlist title
|
- `playlist_title` (string): Playlist title
|
||||||
|
|
||||||
|
|
||||||
Available for the video that belongs to some logical chapter or section:
|
Available for the video that belongs to some logical chapter or section:
|
||||||
|
|
||||||
- `chapter` (string): Name or title of the chapter the video belongs to
|
- `chapter` (string): Name or title of the chapter the video belongs to
|
||||||
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||||
- `chapter_id` (string): Id of the chapter the video belongs to
|
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||||
|
|
||||||
Available for the video that is an episode of some series or programme:
|
Available for the video that is an episode of some series or programme:
|
||||||
|
|
||||||
- `series` (string): Title of the series or programme the video episode belongs to
|
- `series` (string): Title of the series or programme the video episode belongs to
|
||||||
- `season` (string): Title of the season the video episode belongs to
|
- `season` (string): Title of the season the video episode belongs to
|
||||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||||
@@ -546,6 +552,7 @@ Available for the video that is an episode of some series or programme:
|
|||||||
- `episode_id` (string): Id of the video episode
|
- `episode_id` (string): Id of the video episode
|
||||||
|
|
||||||
Available for the media that is a track or a part of a music album:
|
Available for the media that is a track or a part of a music album:
|
||||||
|
|
||||||
- `track` (string): Title of the track
|
- `track` (string): Title of the track
|
||||||
- `track_number` (numeric): Number of the track within an album or a disc
|
- `track_number` (numeric): Number of the track within an album or a disc
|
||||||
- `track_id` (string): Id of the track
|
- `track_id` (string): Id of the track
|
||||||
@@ -647,7 +654,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
|||||||
- `acodec`: Name of the audio codec in use
|
- `acodec`: Name of the audio codec in use
|
||||||
- `vcodec`: Name of the video codec in use
|
- `vcodec`: Name of the video codec in use
|
||||||
- `container`: Name of the container format
|
- `container`: Name of the container format
|
||||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
|
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||||
- `format_id`: A short description of the format
|
- `format_id`: A short description of the format
|
||||||
|
|
||||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||||
|
@@ -87,13 +87,13 @@
|
|||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
|
- **Bandcamp:weekly**
|
||||||
- **bangumi.bilibili.com**: BiliBili番剧
|
- **bangumi.bilibili.com**: BiliBili番剧
|
||||||
- **bbc**: BBC
|
- **bbc**: BBC
|
||||||
- **bbc.co.uk**: BBC iPlayer
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
- **bbc.co.uk:article**: BBC articles
|
- **bbc.co.uk:article**: BBC articles
|
||||||
- **bbc.co.uk:iplayer:playlist**
|
- **bbc.co.uk:iplayer:playlist**
|
||||||
- **bbc.co.uk:playlist**
|
- **bbc.co.uk:playlist**
|
||||||
- **Beam:live**
|
|
||||||
- **Beatport**
|
- **Beatport**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
@@ -216,6 +216,7 @@
|
|||||||
- **DiscoveryVR**
|
- **DiscoveryVR**
|
||||||
- **Disney**
|
- **Disney**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
|
- **DouyuShow**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **DPlay**
|
- **DPlay**
|
||||||
- **DPlayIt**
|
- **DPlayIt**
|
||||||
@@ -281,7 +282,8 @@
|
|||||||
- **france2.fr:generation-quoi**
|
- **france2.fr:generation-quoi**
|
||||||
- **FranceCulture**
|
- **FranceCulture**
|
||||||
- **FranceInter**
|
- **FranceInter**
|
||||||
- **francetv**: France 2, 3, 4, 5 and Ô
|
- **FranceTV**
|
||||||
|
- **FranceTVEmbed**
|
||||||
- **francetvinfo.fr**
|
- **francetvinfo.fr**
|
||||||
- **Freesound**
|
- **Freesound**
|
||||||
- **freespeech.org**
|
- **freespeech.org**
|
||||||
@@ -309,7 +311,6 @@
|
|||||||
- **Go**
|
- **Go**
|
||||||
- **Go90**
|
- **Go90**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **GodTV**
|
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GoogleDrive**
|
- **GoogleDrive**
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
@@ -432,6 +433,7 @@
|
|||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **Medialaan**
|
- **Medialaan**
|
||||||
|
- **Mediaset**
|
||||||
- **Medici**
|
- **Medici**
|
||||||
- **Meipai**: 美拍
|
- **Meipai**: 美拍
|
||||||
- **MelonVOD**
|
- **MelonVOD**
|
||||||
@@ -450,6 +452,8 @@
|
|||||||
- **mixcloud:playlist**
|
- **mixcloud:playlist**
|
||||||
- **mixcloud:stream**
|
- **mixcloud:stream**
|
||||||
- **mixcloud:user**
|
- **mixcloud:user**
|
||||||
|
- **Mixer:live**
|
||||||
|
- **Mixer:vod**
|
||||||
- **MLB**
|
- **MLB**
|
||||||
- **Mnet**
|
- **Mnet**
|
||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
@@ -508,6 +512,7 @@
|
|||||||
- **netease:song**: 网易云音乐
|
- **netease:song**: 网易云音乐
|
||||||
- **Netzkino**
|
- **Netzkino**
|
||||||
- **Newgrounds**
|
- **Newgrounds**
|
||||||
|
- **NewgroundsPlaylist**
|
||||||
- **Newstube**
|
- **Newstube**
|
||||||
- **NextMedia**: 蘋果日報
|
- **NextMedia**: 蘋果日報
|
||||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||||
@@ -530,6 +535,7 @@
|
|||||||
- **NJPWWorld**: 新日本プロレスワールド
|
- **NJPWWorld**: 新日本プロレスワールド
|
||||||
- **NobelPrize**
|
- **NobelPrize**
|
||||||
- **Noco**
|
- **Noco**
|
||||||
|
- **NonkTube**
|
||||||
- **Noovo**
|
- **Noovo**
|
||||||
- **Normalboots**
|
- **Normalboots**
|
||||||
- **NosVideo**
|
- **NosVideo**
|
||||||
@@ -602,7 +608,6 @@
|
|||||||
- **pluralsight**
|
- **pluralsight**
|
||||||
- **pluralsight:course**
|
- **pluralsight:course**
|
||||||
- **plus.google**: Google Plus
|
- **plus.google**: Google Plus
|
||||||
- **pluzz.francetv.fr**
|
|
||||||
- **podomatic**
|
- **podomatic**
|
||||||
- **Pokemon**
|
- **Pokemon**
|
||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
@@ -800,16 +805,13 @@
|
|||||||
- **ToonGoggles**
|
- **ToonGoggles**
|
||||||
- **Tosh**: Tosh.0
|
- **Tosh**: Tosh.0
|
||||||
- **tou.tv**
|
- **tou.tv**
|
||||||
- **Toypics**: Toypics user profile
|
- **Toypics**: Toypics video
|
||||||
- **ToypicsUser**: Toypics user profile
|
- **ToypicsUser**: Toypics user profile
|
||||||
- **TrailerAddict** (Currently broken)
|
- **TrailerAddict** (Currently broken)
|
||||||
- **Trilulilu**
|
- **Trilulilu**
|
||||||
- **TruTV**
|
- **TruTV**
|
||||||
- **Tube8**
|
- **Tube8**
|
||||||
- **TubiTv**
|
- **TubiTv**
|
||||||
- **tudou**
|
|
||||||
- **tudou:album**
|
|
||||||
- **tudou:playlist**
|
|
||||||
- **Tumblr**
|
- **Tumblr**
|
||||||
- **tunein:clip**
|
- **tunein:clip**
|
||||||
- **tunein:program**
|
- **tunein:program**
|
||||||
@@ -860,6 +862,8 @@
|
|||||||
- **uol.com.br**
|
- **uol.com.br**
|
||||||
- **uplynk**
|
- **uplynk**
|
||||||
- **uplynk:preplay**
|
- **uplynk:preplay**
|
||||||
|
- **Upskill**
|
||||||
|
- **UpskillCourse**
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
- **URPlay**
|
- **URPlay**
|
||||||
- **USANetwork**
|
- **USANetwork**
|
||||||
@@ -971,7 +975,7 @@
|
|||||||
- **WSJArticle**
|
- **WSJArticle**
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
- **XHamsterEmbed**
|
- **XHamsterEmbed**
|
||||||
- **xiami:album**: 虾米音乐 - 专辑
|
- **xiami:album**: 虾米音乐 - 专辑
|
||||||
|
@@ -340,6 +340,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||||
|
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||||
|
|
||||||
def test_determine_ext(self):
|
def test_determine_ext(self):
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||||
@@ -678,6 +679,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, {'status': 'success'})
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('window.cb && cb({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
def test_uppercase_escape(self):
|
def test_uppercase_escape(self):
|
||||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||||
@@ -907,6 +916,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
supports_outside_bmp = False
|
supports_outside_bmp = False
|
||||||
if supports_outside_bmp:
|
if supports_outside_bmp:
|
||||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||||
|
# Malformed HTML should not break attributes extraction on older Python
|
||||||
|
self.assertEqual(extract_attributes('<mal"formed/>'), {})
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
|
@@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase):
|
|||||||
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
||||||
}]
|
}]
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
# https://www.youtube.com/watch?v=xZW70zEasOk
|
||||||
|
# time point more than duration
|
||||||
|
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
|
||||||
|
283,
|
||||||
|
[]
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
def test_youtube_chapters(self):
|
def test_youtube_chapters(self):
|
||||||
|
@@ -58,6 +58,7 @@ from .utils import (
|
|||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
@@ -302,6 +303,17 @@ class YoutubeDL(object):
|
|||||||
postprocessor.
|
postprocessor.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_NUMERIC_FIELDS = set((
|
||||||
|
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||||
|
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||||
|
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||||
|
'average_rating', 'comment_count', 'age_limit',
|
||||||
|
'start_time', 'end_time',
|
||||||
|
'chapter_number', 'season_number', 'episode_number',
|
||||||
|
'track_number', 'disc_number', 'release_year',
|
||||||
|
'playlist_index',
|
||||||
|
))
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
_ies = []
|
_ies = []
|
||||||
_pps = []
|
_pps = []
|
||||||
@@ -370,10 +382,10 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
||||||
not params.get('restrictfilenames', False)):
|
not params.get('restrictfilenames', False)):
|
||||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
# Unicode filesystem API will throw errors (#1474, #13027)
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Assuming --restrict-filenames since file system encoding '
|
'Assuming --restrict-filenames since file system encoding '
|
||||||
'cannot encode all characters. '
|
'cannot encode all characters. '
|
||||||
@@ -498,24 +510,25 @@ class YoutubeDL(object):
|
|||||||
def to_console_title(self, message):
|
def to_console_title(self, message):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
if compat_os_name == 'nt':
|
||||||
# c_wchar_p() might not be necessary if `message` is
|
if ctypes.windll.kernel32.GetConsoleWindow():
|
||||||
# already of type unicode()
|
# c_wchar_p() might not be necessary if `message` is
|
||||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
# already of type unicode()
|
||||||
|
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||||
elif 'TERM' in os.environ:
|
elif 'TERM' in os.environ:
|
||||||
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
||||||
|
|
||||||
def save_console_title(self):
|
def save_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||||
# Save the title on stack
|
# Save the title on stack
|
||||||
self._write_string('\033[22;0t', self._screen_file)
|
self._write_string('\033[22;0t', self._screen_file)
|
||||||
|
|
||||||
def restore_console_title(self):
|
def restore_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||||
# Restore the title from stack
|
# Restore the title from stack
|
||||||
self._write_string('\033[23;0t', self._screen_file)
|
self._write_string('\033[23;0t', self._screen_file)
|
||||||
|
|
||||||
@@ -638,22 +651,11 @@ class YoutubeDL(object):
|
|||||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||||
outtmpl)
|
outtmpl)
|
||||||
|
|
||||||
NUMERIC_FIELDS = set((
|
|
||||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
|
||||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
|
||||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
|
||||||
'average_rating', 'comment_count', 'age_limit',
|
|
||||||
'start_time', 'end_time',
|
|
||||||
'chapter_number', 'season_number', 'episode_number',
|
|
||||||
'track_number', 'disc_number', 'release_year',
|
|
||||||
'playlist_index',
|
|
||||||
))
|
|
||||||
|
|
||||||
# Missing numeric fields used together with integer presentation types
|
# Missing numeric fields used together with integer presentation types
|
||||||
# in format specification will break the argument substitution since
|
# in format specification will break the argument substitution since
|
||||||
# string 'NA' is returned for missing fields. We will patch output
|
# string 'NA' is returned for missing fields. We will patch output
|
||||||
# template for missing fields to meet string presentation type.
|
# template for missing fields to meet string presentation type.
|
||||||
for numeric_field in NUMERIC_FIELDS:
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
if numeric_field not in template_dict:
|
if numeric_field not in template_dict:
|
||||||
# As of [1] format syntax is:
|
# As of [1] format syntax is:
|
||||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||||
@@ -1344,9 +1346,28 @@ class YoutubeDL(object):
|
|||||||
if 'title' not in info_dict:
|
if 'title' not in info_dict:
|
||||||
raise ExtractorError('Missing "title" field in extractor result')
|
raise ExtractorError('Missing "title" field in extractor result')
|
||||||
|
|
||||||
if not isinstance(info_dict['id'], compat_str):
|
def report_force_conversion(field, field_not, conversion):
|
||||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
self.report_warning(
|
||||||
info_dict['id'] = compat_str(info_dict['id'])
|
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||||
|
% (field, field_not, conversion))
|
||||||
|
|
||||||
|
def sanitize_string_field(info, string_field):
|
||||||
|
field = info.get(string_field)
|
||||||
|
if field is None or isinstance(field, compat_str):
|
||||||
|
return
|
||||||
|
report_force_conversion(string_field, 'a string', 'string')
|
||||||
|
info[string_field] = compat_str(field)
|
||||||
|
|
||||||
|
def sanitize_numeric_fields(info):
|
||||||
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
|
field = info.get(numeric_field)
|
||||||
|
if field is None or isinstance(field, compat_numeric_types):
|
||||||
|
continue
|
||||||
|
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||||
|
info[numeric_field] = int_or_none(field)
|
||||||
|
|
||||||
|
sanitize_string_field(info_dict, 'id')
|
||||||
|
sanitize_numeric_fields(info_dict)
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
@@ -1434,6 +1455,8 @@ class YoutubeDL(object):
|
|||||||
if 'url' not in format:
|
if 'url' not in format:
|
||||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||||
|
|
||||||
|
sanitize_string_field(format, 'format_id')
|
||||||
|
sanitize_numeric_fields(format)
|
||||||
format['url'] = sanitize_url(format['url'])
|
format['url'] = sanitize_url(format['url'])
|
||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
|
@@ -2322,6 +2322,19 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||||
|
|
||||||
|
try: # Python 2
|
||||||
|
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||||
|
except ImportError: # Python <3.4
|
||||||
|
try:
|
||||||
|
from html.parser import HTMLParseError as compat_HTMLParseError
|
||||||
|
except ImportError: # Python >3.4
|
||||||
|
|
||||||
|
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||||
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
|
# and uniform cross-version exceptiong handling
|
||||||
|
class compat_HTMLParseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
from subprocess import DEVNULL
|
||||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||||
|
@@ -212,6 +212,11 @@ class FFmpegFD(ExternalFD):
|
|||||||
|
|
||||||
args = [ffpp.executable, '-y']
|
args = [ffpp.executable, '-y']
|
||||||
|
|
||||||
|
for log_level in ('quiet', 'verbose'):
|
||||||
|
if self.params.get(log_level, False):
|
||||||
|
args += ['-loglevel', log_level]
|
||||||
|
break
|
||||||
|
|
||||||
seekable = info_dict.get('_seekable')
|
seekable = info_dict.get('_seekable')
|
||||||
if seekable is not None:
|
if seekable is not None:
|
||||||
# setting -seekable prevents ffmpeg from guessing if the server
|
# setting -seekable prevents ffmpeg from guessing if the server
|
||||||
|
@@ -12,7 +12,15 @@ from ..compat import compat_urlparse
|
|||||||
|
|
||||||
class AbcNewsVideoIE(AMPIE):
|
class AbcNewsVideoIE(AMPIE):
|
||||||
IE_NAME = 'abcnews:video'
|
IE_NAME = 'abcnews:video'
|
||||||
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
abcnews\.go\.com/
|
||||||
|
(?:
|
||||||
|
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||||
|
video/embed\?.*?\bid=
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||||
@@ -29,6 +37,9 @@ class AbcNewsVideoIE(AMPIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/video/embed?id=46979033',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,25 +32,28 @@ class ADNIE(InfoExtractor):
|
|||||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
|
|
||||||
def _get_subtitles(self, sub_path, video_id):
|
def _get_subtitles(self, sub_path, video_id):
|
||||||
if not sub_path:
|
if not sub_path:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
'http://animedigitalnetwork.fr/' + sub_path,
|
urljoin(self._BASE_URL, sub_path),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
|
||||||
|
})
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||||
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
|
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||||
))
|
))
|
||||||
subtitles_json = self._parse_json(
|
subtitles_json = self._parse_json(
|
||||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
|
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||||
None, fatal=False)
|
None, fatal=False)
|
||||||
if not subtitles_json:
|
if not subtitles_json:
|
||||||
return None
|
return None
|
||||||
@@ -103,9 +107,16 @@ class ADNIE(InfoExtractor):
|
|||||||
metas = options.get('metas') or {}
|
metas = options.get('metas') or {}
|
||||||
title = metas.get('title') or video_info['title']
|
title = metas.get('title') or video_info['title']
|
||||||
links = player_config.get('links') or {}
|
links = player_config.get('links') or {}
|
||||||
|
if not links:
|
||||||
|
links_url = player_config['linksurl']
|
||||||
|
links_data = self._download_json(urljoin(
|
||||||
|
self._BASE_URL, links_url), video_id)
|
||||||
|
links = links_data.get('links') or {}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, qualities in links.items():
|
for format_id, qualities in links.items():
|
||||||
|
if not isinstance(qualities, dict):
|
||||||
|
continue
|
||||||
for load_balancer_url in qualities.values():
|
for load_balancer_url in qualities.values():
|
||||||
load_balancer_data = self._download_json(
|
load_balancer_data = self._download_json(
|
||||||
load_balancer_url, video_id, fatal=False) or {}
|
load_balancer_url, video_id, fatal=False) or {}
|
||||||
|
@@ -36,6 +36,11 @@ MSO_INFO = {
|
|||||||
'username_field': 'Ecom_User_ID',
|
'username_field': 'Ecom_User_ID',
|
||||||
'password_field': 'Ecom_Password',
|
'password_field': 'Ecom_Password',
|
||||||
},
|
},
|
||||||
|
'Brighthouse': {
|
||||||
|
'name': 'Bright House Networks | Spectrum',
|
||||||
|
'username_field': 'j_username',
|
||||||
|
'password_field': 'j_password',
|
||||||
|
},
|
||||||
'Charter_Direct': {
|
'Charter_Direct': {
|
||||||
'name': 'Charter Spectrum',
|
'name': 'Charter Spectrum',
|
||||||
'username_field': 'IDToken1',
|
'username_field': 'IDToken1',
|
||||||
|
@@ -5,91 +5,52 @@ import re
|
|||||||
|
|
||||||
from .turner import TurnerBaseIE
|
from .turner import TurnerBaseIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AdultSwimIE(TurnerBaseIE):
|
class AdultSwimIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
|
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
|
||||||
'playlist': [
|
|
||||||
{
|
|
||||||
'md5': '247572debc75c7652f253c8daa51a14d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Rick and Morty - Pilot Part 1',
|
|
||||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Rick and Morty - Pilot Part 4',
|
|
||||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
|
||||||
},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Rick and Morty - Pilot',
|
'title': 'Rick and Morty - Pilot',
|
||||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
|
||||||
},
|
'timestamp': 1493267400,
|
||||||
'skip': 'This video is only available for registered users',
|
'upload_date': '20170427',
|
||||||
}, {
|
|
||||||
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
|
|
||||||
'playlist': [
|
|
||||||
{
|
|
||||||
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'American Dad - Putting Francine Out of Business',
|
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
|
||||||
},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
'info_dict': {
|
|
||||||
'id': '-t8CamQlQ2aYZ49ItZCFog',
|
|
||||||
'title': 'American Dad - Putting Francine Out of Business',
|
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
|
||||||
'playlist': [
|
|
||||||
{
|
|
||||||
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
|
||||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
|
||||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
|
||||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
|
||||||
|
'upload_date': '20080124',
|
||||||
|
'timestamp': 1201150800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# heroMetadata.trailer
|
|
||||||
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Decker - Inside Decker: A New Hero',
|
'title': 'Decker - Inside Decker: A New Hero',
|
||||||
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
|
'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
|
||||||
'duration': 249.008,
|
'timestamp': 1469480460,
|
||||||
|
'upload_date': '20160725',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@@ -97,136 +58,102 @@ class AdultSwimIE(TurnerBaseIE):
|
|||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
|
'url': 'http://www.adultswim.com/videos/attack-on-titan',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
'id': 'b7A69dzfRzuaXIECdxW8XQ',
|
||||||
'title': 'Toonami - Friday, October 14th, 2016',
|
'title': 'Attack on Titan',
|
||||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
'description': 'md5:6c8e003ea0777b47013e894767f5e114',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.adultswim.com/videos/streams/williams-stream',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd8DEBj7QRfetLsRgFnGEyg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'description': 'original programming',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
|
||||||
'md5': '',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Toonami - Friday, October 14th, 2016',
|
|
||||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def find_video_info(collection, slug):
|
|
||||||
for video in collection.get('videos'):
|
|
||||||
if video.get('slug') == slug:
|
|
||||||
return video
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def find_collection_by_linkURL(collections, linkURL):
|
|
||||||
for collection in collections:
|
|
||||||
if collection.get('linkURL') == linkURL:
|
|
||||||
return collection
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def find_collection_containing_video(collections, slug):
|
|
||||||
for collection in collections:
|
|
||||||
for video in collection.get('videos'):
|
|
||||||
if video.get('slug') == slug:
|
|
||||||
return collection, video
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
show_path, episode_path = re.match(self._VALID_URL, url).groups()
|
||||||
show_path = mobj.group('show_path')
|
display_id = episode_path or show_path
|
||||||
episode_path = mobj.group('episode_path')
|
webpage = self._download_webpage(url, display_id)
|
||||||
is_playlist = True if mobj.group('is_playlist') else False
|
initial_data = self._parse_json(self._search_regex(
|
||||||
|
r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
|
||||||
|
webpage, 'initial data'), display_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, episode_path)
|
is_stream = show_path == 'streams'
|
||||||
|
if is_stream:
|
||||||
|
if not episode_path:
|
||||||
|
episode_path = 'live-stream'
|
||||||
|
|
||||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
|
||||||
bootstrapped_data = self._parse_json(self._search_regex(
|
video_id = video_data.get('stream')
|
||||||
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
|
||||||
|
|
||||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
if not video_id:
|
||||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
entries = []
|
||||||
if is_playlist:
|
for episode in video_data.get('archiveEpisodes', []):
|
||||||
collections = bootstrapped_data['playlists']['collections']
|
episode_url = episode.get('url')
|
||||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
if not episode_url:
|
||||||
video_info = self.find_video_info(collection, episode_path)
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
show_title = video_info['showTitle']
|
episode_url, 'AdultSwim', episode.get('id')))
|
||||||
segment_ids = [video_info['videoPlaybackID']]
|
return self.playlist_result(
|
||||||
|
entries, video_data.get('id'), video_data.get('title'),
|
||||||
|
strip_or_none(video_data.get('description')))
|
||||||
else:
|
else:
|
||||||
collections = bootstrapped_data['show']['collections']
|
show_data = initial_data['show']
|
||||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
|
||||||
# Video wasn't found in the collections, let's try `slugged_video`.
|
|
||||||
if video_info is None:
|
|
||||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
|
||||||
video_info = bootstrapped_data['slugged_video']
|
|
||||||
if not video_info:
|
|
||||||
video_info = bootstrapped_data.get(
|
|
||||||
'heroMetadata', {}).get('trailer', {}).get('video')
|
|
||||||
if not video_info:
|
|
||||||
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
|
|
||||||
if not video_info:
|
|
||||||
raise ExtractorError('Unable to find video info')
|
|
||||||
|
|
||||||
show = bootstrapped_data['show']
|
if not episode_path:
|
||||||
show_title = show['title']
|
entries = []
|
||||||
stream = video_info.get('stream')
|
for video in show_data.get('videos', []):
|
||||||
if stream and stream.get('videoPlaybackID'):
|
slug = video.get('slug')
|
||||||
segment_ids = [stream['videoPlaybackID']]
|
if not slug:
|
||||||
elif video_info.get('clips'):
|
continue
|
||||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
entries.append(self.url_result(
|
||||||
elif video_info.get('videoPlaybackID'):
|
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
|
||||||
segment_ids = [video_info['videoPlaybackID']]
|
'AdultSwim', video.get('id')))
|
||||||
elif video_info.get('id'):
|
return self.playlist_result(
|
||||||
segment_ids = [video_info['id']]
|
entries, show_data.get('id'), show_data.get('title'),
|
||||||
else:
|
strip_or_none(show_data.get('metadata', {}).get('description')))
|
||||||
if video_info.get('auth') is True:
|
|
||||||
raise ExtractorError(
|
|
||||||
'This video is only available via cable service provider subscription that'
|
|
||||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to find stream or clips')
|
|
||||||
|
|
||||||
episode_id = video_info['id']
|
video_data = show_data['sluggedVideo']
|
||||||
episode_title = video_info['title']
|
video_id = video_data['id']
|
||||||
episode_description = video_info.get('description')
|
|
||||||
episode_duration = int_or_none(video_info.get('duration'))
|
|
||||||
view_count = int_or_none(video_info.get('views'))
|
|
||||||
|
|
||||||
entries = []
|
info = self._extract_cvp_info(
|
||||||
for part_num, segment_id in enumerate(segment_ids):
|
'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
|
||||||
segement_info = self._extract_cvp_info(
|
video_id, {
|
||||||
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
|
'secure': {
|
||||||
segment_id, {
|
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||||
'secure': {
|
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
||||||
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
},
|
||||||
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
}, {
|
||||||
},
|
'url': url,
|
||||||
})
|
'site_name': 'AdultSwim',
|
||||||
segment_title = '%s - %s' % (show_title, episode_title)
|
'auth_required': video_data.get('auth'),
|
||||||
if len(segment_ids) > 1:
|
|
||||||
segment_title += ' Part %d' % (part_num + 1)
|
|
||||||
segement_info.update({
|
|
||||||
'id': segment_id,
|
|
||||||
'title': segment_title,
|
|
||||||
'description': episode_description,
|
|
||||||
})
|
})
|
||||||
entries.append(segement_info)
|
|
||||||
|
|
||||||
return {
|
info.update({
|
||||||
'_type': 'playlist',
|
'id': video_id,
|
||||||
'id': episode_id,
|
'display_id': display_id,
|
||||||
'display_id': episode_path,
|
'description': info.get('description') or strip_or_none(video_data.get('description')),
|
||||||
'entries': entries,
|
})
|
||||||
'title': '%s - %s' % (show_title, episode_title),
|
if not is_stream:
|
||||||
'description': episode_description,
|
info.update({
|
||||||
'duration': episode_duration,
|
'duration': info.get('duration') or int_or_none(video_data.get('duration')),
|
||||||
'view_count': view_count,
|
'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
|
||||||
}
|
'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
|
||||||
|
'episode': info['title'],
|
||||||
|
'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
|
||||||
|
})
|
||||||
|
|
||||||
|
info['series'] = video_data.get('collection_title') or info.get('series')
|
||||||
|
if info['series'] and info['series'] != info['title']:
|
||||||
|
info['title'] = '%s - %s' % (info['series'], info['title'])
|
||||||
|
|
||||||
|
return info
|
||||||
|
@@ -4,9 +4,9 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class AlJazeeraIE(InfoExtractor):
|
class AlJazeeraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3792260579001',
|
'id': '3792260579001',
|
||||||
@@ -19,7 +19,10 @@ class AlJazeeraIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['BrightcoveNew'],
|
'add_ie': ['BrightcoveNew'],
|
||||||
'skip': 'Not accessible from Travis CI server',
|
'skip': 'Not accessible from Travis CI server',
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -14,14 +14,16 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
KNOWN_EXTENSIONS,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
@@ -155,7 +157,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
@@ -222,6 +224,12 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (False
|
||||||
|
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||||
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader_id = mobj.group('subdomain')
|
uploader_id = mobj.group('subdomain')
|
||||||
@@ -250,3 +258,92 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BandcampWeeklyIE(InfoExtractor):
|
||||||
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://bandcamp.com/?show=224',
|
||||||
|
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '224',
|
||||||
|
'ext': 'opus',
|
||||||
|
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||||
|
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||||
|
'duration': 5829.77,
|
||||||
|
'release_date': '20170404',
|
||||||
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': 'Magic Moments',
|
||||||
|
'episode_number': 208,
|
||||||
|
'episode_id': '224',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
blob = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||||
|
'blob', group='blob'),
|
||||||
|
video_id, transform_source=unescapeHTML)
|
||||||
|
|
||||||
|
show = blob['bcw_show']
|
||||||
|
|
||||||
|
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||||
|
# which happens to expose the latest Bandcamp Weekly episode.
|
||||||
|
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
|
if not isinstance(format_url, compat_str):
|
||||||
|
continue
|
||||||
|
for known_ext in KNOWN_EXTENSIONS:
|
||||||
|
if known_ext in format_id:
|
||||||
|
ext = known_ext
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ext = None
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': format_url,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||||
|
subtitle = show.get('subtitle')
|
||||||
|
if subtitle:
|
||||||
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
|
episode_number = None
|
||||||
|
seq = blob.get('bcw_seq')
|
||||||
|
|
||||||
|
if seq and isinstance(seq, list):
|
||||||
|
try:
|
||||||
|
episode_number = next(
|
||||||
|
int_or_none(e.get('episode_number'))
|
||||||
|
for e in seq
|
||||||
|
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
|
'is_live': False,
|
||||||
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': show.get('subtitle'),
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'episode_id': compat_str(video_id),
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
@@ -6,14 +6,18 @@ import itertools
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
@@ -38,6 +42,9 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
''' % _ID_REGEX
|
''' % _ID_REGEX
|
||||||
|
|
||||||
|
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||||
|
_NETRC_MACHINE = 'bbc'
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIASELECTOR_URLS = [
|
||||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||||
@@ -227,6 +234,39 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
|
|
||||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Downloading signin page')
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
})
|
||||||
|
|
||||||
|
post_url = urljoin(self._LOGIN_URL, self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||||
|
'post url', default=self._LOGIN_URL, group='url'))
|
||||||
|
|
||||||
|
response, urlh = self._download_webpage_handle(
|
||||||
|
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||||
|
headers={'Referer': self._LOGIN_URL})
|
||||||
|
|
||||||
|
if self._LOGIN_URL in urlh.geturl():
|
||||||
|
error = clean_html(get_element_by_class('form-message', response))
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % error, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
class MediaSelectionError(Exception):
|
class MediaSelectionError(Exception):
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.id = id
|
self.id = id
|
||||||
|
@@ -6,18 +6,33 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
clean_html,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BeamProLiveIE(InfoExtractor):
|
class BeamProBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'Beam:live'
|
_API_BASE = 'https://mixer.com/api/v1'
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
|
|
||||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||||
|
|
||||||
|
def _extract_channel_info(self, chan):
|
||||||
|
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||||
|
return {
|
||||||
|
'uploader': chan.get('token') or try_get(
|
||||||
|
chan, lambda x: x['user']['username'], compat_str),
|
||||||
|
'uploader_id': compat_str(user_id) if user_id else None,
|
||||||
|
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BeamProLiveIE(BeamProBaseIE):
|
||||||
|
IE_NAME = 'Mixer:live'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.beam.pro/niterhayven',
|
'url': 'http://mixer.com/niterhayven',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '261562',
|
'id': '261562',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -38,11 +53,17 @@ class BeamProLiveIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_name = self._match_id(url)
|
channel_name = self._match_id(url)
|
||||||
|
|
||||||
chan = self._download_json(
|
chan = self._download_json(
|
||||||
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
|
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
||||||
|
|
||||||
if chan.get('online') is False:
|
if chan.get('online') is False:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -50,24 +71,118 @@ class BeamProLiveIE(InfoExtractor):
|
|||||||
|
|
||||||
channel_id = chan['id']
|
channel_id = chan['id']
|
||||||
|
|
||||||
|
def manifest_url(kind):
|
||||||
|
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
|
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
||||||
channel_name, ext='mp4', m3u8_id='hls', fatal=False)
|
fatal=False)
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
manifest_url('smil'), channel_name, fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
info = {
|
||||||
|
|
||||||
return {
|
|
||||||
'id': compat_str(chan.get('id') or channel_name),
|
'id': compat_str(chan.get('id') or channel_name),
|
||||||
'title': self._live_title(chan.get('name') or channel_name),
|
'title': self._live_title(chan.get('name') or channel_name),
|
||||||
'description': clean_html(chan.get('description')),
|
'description': clean_html(chan.get('description')),
|
||||||
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
|
'thumbnail': try_get(
|
||||||
|
chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||||
'uploader': chan.get('token') or try_get(
|
|
||||||
chan, lambda x: x['user']['username'], compat_str),
|
|
||||||
'uploader_id': compat_str(user_id) if user_id else None,
|
|
||||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
info.update(self._extract_channel_info(chan))
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class BeamProVodIE(BeamProBaseIE):
|
||||||
|
IE_NAME = 'Mixer:vod'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||||
|
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2259830',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'willow8714\'s Channel',
|
||||||
|
'duration': 6828.15,
|
||||||
|
'thumbnail': r're:https://.*source\.png$',
|
||||||
|
'timestamp': 1494046474,
|
||||||
|
'upload_date': '20170506',
|
||||||
|
'uploader': 'willow8714',
|
||||||
|
'uploader_id': '6085379',
|
||||||
|
'age_limit': 13,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_format(vod, vod_type):
|
||||||
|
if not vod.get('baseUrl'):
|
||||||
|
return []
|
||||||
|
|
||||||
|
if vod_type == 'hls':
|
||||||
|
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
||||||
|
elif vod_type == 'raw':
|
||||||
|
filename, protocol = 'source.mp4', 'https'
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
||||||
|
|
||||||
|
format_id = [vod_type]
|
||||||
|
if isinstance(data.get('Height'), compat_str):
|
||||||
|
format_id.append('%sp' % data['Height'])
|
||||||
|
|
||||||
|
return [{
|
||||||
|
'url': urljoin(vod['baseUrl'], filename),
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': protocol,
|
||||||
|
'width': int_or_none(data.get('Width')),
|
||||||
|
'height': int_or_none(data.get('Height')),
|
||||||
|
'fps': int_or_none(data.get('Fps')),
|
||||||
|
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
vod_id = self._match_id(url)
|
||||||
|
|
||||||
|
vod_info = self._download_json(
|
||||||
|
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
||||||
|
|
||||||
|
state = vod_info.get('state')
|
||||||
|
if state != 'AVAILABLE':
|
||||||
|
raise ExtractorError(
|
||||||
|
'VOD %s is not available (state: %s)' % (vod_id, state),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnail_url = None
|
||||||
|
|
||||||
|
for vod in vod_info['vods']:
|
||||||
|
vod_type = vod.get('format')
|
||||||
|
if vod_type in ('hls', 'raw'):
|
||||||
|
formats.extend(self._extract_format(vod, vod_type))
|
||||||
|
elif vod_type == 'thumbnail':
|
||||||
|
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': vod_id,
|
||||||
|
'title': vod_info.get('name') or vod_id,
|
||||||
|
'duration': float_or_none(vod_info.get('duration')),
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
||||||
|
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
||||||
|
|
||||||
|
return info
|
||||||
|
@@ -5,6 +5,7 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .adobepass import AdobePassIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
@@ -448,7 +449,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveNewIE(InfoExtractor):
|
class BrightcoveNewIE(AdobePassIE):
|
||||||
IE_NAME = 'brightcove:new'
|
IE_NAME = 'brightcove:new'
|
||||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
|
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -602,6 +603,20 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
raise ExtractorError(message, expected=True)
|
raise ExtractorError(message, expected=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
errors = json_data.get('errors')
|
||||||
|
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||||
|
custom_fields = json_data['custom_fields']
|
||||||
|
tve_token = self._extract_mvpd_auth(
|
||||||
|
smuggled_data['source_url'], video_id,
|
||||||
|
custom_fields['bcadobepassrequestorid'],
|
||||||
|
custom_fields['bcadobepassresourceid'])
|
||||||
|
json_data = self._download_json(
|
||||||
|
api_url, video_id, headers={
|
||||||
|
'Accept': 'application/json;pk=%s' % policy_key
|
||||||
|
}, query={
|
||||||
|
'tveToken': tve_token,
|
||||||
|
})
|
||||||
|
|
||||||
title = json_data['name'].strip()
|
title = json_data['name'].strip()
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -667,7 +682,6 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
errors = json_data.get('errors')
|
|
||||||
if not formats and errors:
|
if not formats and errors:
|
||||||
error = errors[0]
|
error = errors[0]
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -684,7 +698,7 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
|
|
||||||
is_live = False
|
is_live = False
|
||||||
duration = float_or_none(json_data.get('duration'), 1000)
|
duration = float_or_none(json_data.get('duration'), 1000)
|
||||||
if duration and duration < 0:
|
if duration is not None and duration <= 0:
|
||||||
is_live = True
|
is_live = True
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -49,13 +49,13 @@ class CBSIE(CBSBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, content_id):
|
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||||
items_data = self._download_xml(
|
items_data = self._download_xml(
|
||||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
content_id, query={'partner': site, 'contentId': content_id})
|
||||||
video_data = xpath_element(items_data, './/item')
|
video_data = xpath_element(items_data, './/item')
|
||||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||||
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||||
|
|
||||||
asset_types = []
|
asset_types = []
|
||||||
|
@@ -3,17 +3,18 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .cbs import CBSIE
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class CBSInteractiveIE(ThePlatformIE):
|
class CBSInteractiveIE(CBSIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
|
||||||
'ext': 'flv',
|
'display_id': 'hands-on-with-microsofts-windows-8-1-update',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||||
@@ -22,13 +23,19 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
'timestamp': 1396479627,
|
'timestamp': 1396479627,
|
||||||
'upload_date': '20140402',
|
'upload_date': '20140402',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||||
|
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
|
'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
|
||||||
'ext': 'flv',
|
'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
||||||
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
|
'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
|
||||||
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||||
'uploader': 'Ashley Esqueda',
|
'uploader': 'Ashley Esqueda',
|
||||||
'duration': 1482,
|
'duration': 1482,
|
||||||
@@ -38,23 +45,28 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
|
'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
|
||||||
|
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video: Keeping Android smartphones and tablets secure',
|
'title': 'Video: Keeping Android smartphones and tablets secure',
|
||||||
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
||||||
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
||||||
'uploader': 'Adrian Kingsley-Hughes',
|
'uploader': 'Adrian Kingsley-Hughes',
|
||||||
'timestamp': 1448961720,
|
'duration': 731,
|
||||||
'upload_date': '20151201',
|
'timestamp': 1449129925,
|
||||||
|
'upload_date': '20151203',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
|
|
||||||
MPX_ACCOUNTS = {
|
MPX_ACCOUNTS = {
|
||||||
'cnet': 2288573011,
|
'cnet': 2198311517,
|
||||||
'zdnet': 2387448114,
|
'zdnet': 2387448114,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,7 +80,8 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
data = self._parse_json(data_json, display_id)
|
data = self._parse_json(data_json, display_id)
|
||||||
vdata = data.get('video') or data['videos'][0]
|
vdata = data.get('video') or data['videos'][0]
|
||||||
|
|
||||||
video_id = vdata['id']
|
video_id = vdata['mpxRefId']
|
||||||
|
|
||||||
title = vdata['title']
|
title = vdata['title']
|
||||||
author = vdata.get('author')
|
author = vdata.get('author')
|
||||||
if author:
|
if author:
|
||||||
@@ -78,20 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
uploader = None
|
uploader = None
|
||||||
uploader_id = None
|
uploader_id = None
|
||||||
|
|
||||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
|
||||||
formats, subtitles = [], {}
|
|
||||||
for (fkey, vid) in vdata['files'].items():
|
|
||||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
|
||||||
continue
|
|
||||||
release_url = self.TP_RELEASE_URL_TEMPLATE % vid
|
|
||||||
if fkey == 'hds':
|
|
||||||
release_url += '&manifest=f4m'
|
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
|
||||||
formats.extend(tp_formats)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
|
||||||
info.update({
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
@@ -99,7 +99,5 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
'duration': int_or_none(vdata.get('duration')),
|
'duration': int_or_none(vdata.get('duration')),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
@@ -61,11 +61,17 @@ class CBSNewsIE(CBSIE):
|
|||||||
|
|
||||||
video_info = self._parse_json(self._html_search_regex(
|
video_info = self._parse_json(self._html_search_regex(
|
||||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||||
webpage, 'video JSON info'), video_id)
|
webpage, 'video JSON info', default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
item = video_info['item'] if 'item' in video_info else video_info
|
if video_info:
|
||||||
guid = item['mpxRefId']
|
item = video_info['item'] if 'item' in video_info else video_info
|
||||||
return self._extract_video_info(guid)
|
else:
|
||||||
|
state = self._parse_json(self._search_regex(
|
||||||
|
r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
|
||||||
|
'playlist JSON info', group='json'), video_id)['state']
|
||||||
|
item = state['playlist'][state['pid']]
|
||||||
|
|
||||||
|
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
|
@@ -376,7 +376,7 @@ class InfoExtractor(object):
|
|||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
m = cls._VALID_URL_RE.match(url)
|
m = cls._VALID_URL_RE.match(url)
|
||||||
assert m
|
assert m
|
||||||
return m.group('id')
|
return compat_str(m.group('id'))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def working(cls):
|
def working(cls):
|
||||||
@@ -2001,6 +2001,12 @@ class InfoExtractor(object):
|
|||||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||||
|
|
||||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||||
|
"""
|
||||||
|
Parse formats from ISM manifest.
|
||||||
|
References:
|
||||||
|
1. [MS-SSTR]: Smooth Streaming Protocol,
|
||||||
|
https://msdn.microsoft.com/en-us/library/ff469518.aspx
|
||||||
|
"""
|
||||||
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
|
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -2022,8 +2028,11 @@ class InfoExtractor(object):
|
|||||||
self.report_warning('%s is not a supported codec' % fourcc)
|
self.report_warning('%s is not a supported codec' % fourcc)
|
||||||
continue
|
continue
|
||||||
tbr = int(track.attrib['Bitrate']) // 1000
|
tbr = int(track.attrib['Bitrate']) // 1000
|
||||||
width = int_or_none(track.get('MaxWidth'))
|
# [1] does not mention Width and Height attributes. However,
|
||||||
height = int_or_none(track.get('MaxHeight'))
|
# they're often present while MaxWidth and MaxHeight are
|
||||||
|
# missing, so should be used as fallbacks
|
||||||
|
width = int_or_none(track.get('MaxWidth') or track.get('Width'))
|
||||||
|
height = int_or_none(track.get('MaxHeight') or track.get('Height'))
|
||||||
sampling_rate = int_or_none(track.get('SamplingRate'))
|
sampling_rate = int_or_none(track.get('SamplingRate'))
|
||||||
|
|
||||||
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
|
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
|
||||||
@@ -2196,8 +2205,9 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||||
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
|
url_base = self._search_regex(
|
||||||
http_base_url = 'http' + url_base
|
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
|
||||||
|
http_base_url = '%s:%s' % ('http', url_base)
|
||||||
formats = []
|
formats = []
|
||||||
if 'm3u8' not in skip_protocols:
|
if 'm3u8' not in skip_protocols:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@@ -2231,7 +2241,7 @@ class InfoExtractor(object):
|
|||||||
for protocol in ('rtmp', 'rtsp'):
|
for protocol in ('rtmp', 'rtsp'):
|
||||||
if protocol not in skip_protocols:
|
if protocol not in skip_protocols:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': protocol + url_base,
|
'url': '%s:%s' % (protocol, url_base),
|
||||||
'format_id': protocol,
|
'format_id': protocol,
|
||||||
'protocol': protocol,
|
'protocol': protocol,
|
||||||
})
|
})
|
||||||
|
@@ -16,7 +16,6 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -50,10 +49,17 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'wmagazine': 'W Magazine',
|
'wmagazine': 'W Magazine',
|
||||||
}
|
}
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
_VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:%s)\.com/
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
embed(?:js)?|
|
||||||
|
(?:script|inline)/video
|
||||||
|
)/(?P<id>[0-9a-f]{24})(?:/(?P<player_id>[0-9a-f]{24}))?(?:.+?\btarget=(?P<target>[^&]+))?|
|
||||||
|
(?P<type>watch|series|video)/(?P<display_id>[^/?#]+)
|
||||||
|
)''' % '|'.join(_SITES.keys())
|
||||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||||
|
|
||||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
|
EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys())
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||||
@@ -89,6 +95,12 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'upload_date': '20150916',
|
'upload_date': '20150916',
|
||||||
'timestamp': 1442434955,
|
'timestamp': 1442434955,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://player-backend.cnevids.com/script/video/59138decb57ac36b83000005.js',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_series(self, url, webpage):
|
def _extract_series(self, url, webpage):
|
||||||
@@ -104,7 +116,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video(self, webpage, url_type):
|
def _extract_video_params(self, webpage):
|
||||||
query = {}
|
query = {}
|
||||||
params = self._search_regex(
|
params = self._search_regex(
|
||||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||||
@@ -123,17 +135,30 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'playerId': params['data-player'],
|
'playerId': params['data-player'],
|
||||||
'target': params['id'],
|
'target': params['id'],
|
||||||
})
|
})
|
||||||
video_id = query['videoId']
|
return query
|
||||||
|
|
||||||
|
def _extract_video(self, params):
|
||||||
|
video_id = params['videoId']
|
||||||
|
|
||||||
video_info = None
|
video_info = None
|
||||||
info_page = self._download_json(
|
if params.get('playerId'):
|
||||||
'http://player.cnevids.com/player/video.js',
|
info_page = self._download_json(
|
||||||
video_id, 'Downloading video info', fatal=False, query=query)
|
'http://player.cnevids.com/player/video.js',
|
||||||
if info_page:
|
video_id, 'Downloading video info', fatal=False, query=params)
|
||||||
video_info = info_page.get('video')
|
if info_page:
|
||||||
if not video_info:
|
video_info = info_page.get('video')
|
||||||
|
if not video_info:
|
||||||
|
info_page = self._download_webpage(
|
||||||
|
'http://player.cnevids.com/player/loader.js',
|
||||||
|
video_id, 'Downloading loader info', query=params)
|
||||||
|
else:
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'http://player.cnevids.com/player/loader.js',
|
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
||||||
video_id, 'Downloading loader info', query=query)
|
video_id, 'Downloading inline info', query={
|
||||||
|
'target': params.get('target', 'embedplayer')
|
||||||
|
})
|
||||||
|
|
||||||
|
if not video_info:
|
||||||
video_info = self._parse_json(
|
video_info = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
|
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
|
||||||
@@ -161,9 +186,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = self._search_json_ld(
|
return {
|
||||||
webpage, video_id, fatal=False) if url_type != 'embed' else {}
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -174,22 +197,26 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'series': video_info.get('series_title'),
|
'series': video_info.get('series_title'),
|
||||||
'season': video_info.get('season_title'),
|
'season': video_info.get('season_title'),
|
||||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
})
|
'categories': video_info.get('categories'),
|
||||||
return info
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, url_type, item_id = re.match(self._VALID_URL, url).groups()
|
video_id, player_id, target, url_type, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
# Convert JS embed to regular embed
|
if video_id:
|
||||||
if url_type == 'embedjs':
|
return self._extract_video({
|
||||||
parsed_url = compat_urlparse.urlparse(url)
|
'videoId': video_id,
|
||||||
url = compat_urlparse.urlunparse(parsed_url._replace(
|
'playerId': player_id,
|
||||||
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
'target': target,
|
||||||
url_type = 'embed'
|
})
|
||||||
|
|
||||||
webpage = self._download_webpage(url, item_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
return self._extract_video(webpage, url_type)
|
params = self._extract_video_params(webpage)
|
||||||
|
info = self._search_json_ld(
|
||||||
|
webpage, display_id, fatal=False)
|
||||||
|
info.update(self._extract_video(params))
|
||||||
|
return info
|
||||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
smuggle_url,
|
smuggle_url,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
extract_attributes,
|
||||||
)
|
)
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
@@ -68,6 +69,7 @@ class CSpanIE(InfoExtractor):
|
|||||||
'uploader_id': '12987475',
|
'uploader_id': '12987475',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -78,6 +80,19 @@ class CSpanIE(InfoExtractor):
|
|||||||
if ustream_url:
|
if ustream_url:
|
||||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||||
|
|
||||||
|
if '&vod' not in url:
|
||||||
|
bc = self._search_regex(
|
||||||
|
r"(<[^>]+id='brightcove-player-embed'[^>]+>)",
|
||||||
|
webpage, 'brightcove embed', default=None)
|
||||||
|
if bc:
|
||||||
|
bc_attr = extract_attributes(bc)
|
||||||
|
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % (
|
||||||
|
bc_attr.get('data-bcaccountid', '3162030207001'),
|
||||||
|
bc_attr.get('data-noprebcplayerid', 'SyGGpuJy3g'),
|
||||||
|
bc_attr.get('data-newbcplayerid', 'default'),
|
||||||
|
bc_attr['data-bcid'])
|
||||||
|
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
||||||
|
|
||||||
# We first look for clipid, because clipprog always appears before
|
# We first look for clipid, because clipprog always appears before
|
||||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||||
|
@@ -2,9 +2,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -28,8 +30,14 @@ class DailyMailIE(InfoExtractor):
|
|||||||
video_data = self._parse_json(self._search_regex(
|
video_data = self._parse_json(self._search_regex(
|
||||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||||
title = unescapeHTML(video_data['title'])
|
title = unescapeHTML(video_data['title'])
|
||||||
video_sources = self._download_json(video_data.get(
|
|
||||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
sources_url = (try_get(
|
||||||
|
video_data,
|
||||||
|
(lambda x: x['plugins']['sources']['url'],
|
||||||
|
lambda x: x['sources']['url']), compat_str) or
|
||||||
|
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||||
|
|
||||||
|
video_sources = self._download_json(sources_url, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in video_sources['renditions']:
|
for rendition in video_sources['renditions']:
|
||||||
|
@@ -38,7 +38,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:embed|swf|#)/)?video/(?P<id>[^/?_]+)'
|
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
|
|
||||||
_FORMATS = [
|
_FORMATS = [
|
||||||
@@ -49,87 +49,82 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
('stream_h264_hd1080_url', 'hd180'),
|
('stream_h264_hd1080_url', 'hd180'),
|
||||||
]
|
]
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
||||||
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
||||||
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'x5kesuj',
|
||||||
'id': 'x5kesuj',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
||||||
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
||||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
'duration': 187,
|
||||||
'duration': 187,
|
'timestamp': 1493651285,
|
||||||
'timestamp': 1493651285,
|
'upload_date': '20170501',
|
||||||
'upload_date': '20170501',
|
'uploader': 'Deadline',
|
||||||
'uploader': 'Deadline',
|
'uploader_id': 'x1xm8ri',
|
||||||
'uploader_id': 'x1xm8ri',
|
'age_limit': 0,
|
||||||
'age_limit': 0,
|
'view_count': int,
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'x2iuewm',
|
'id': 'x2iuewm',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||||
'description': 'Several come bundled with the Steam Controller.',
|
'description': 'Several come bundled with the Steam Controller.',
|
||||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
||||||
'duration': 74,
|
'duration': 74,
|
||||||
'timestamp': 1425657362,
|
'timestamp': 1425657362,
|
||||||
'upload_date': '20150306',
|
'upload_date': '20150306',
|
||||||
'uploader': 'IGN',
|
'uploader': 'IGN',
|
||||||
'uploader_id': 'xijv66',
|
'uploader_id': 'xijv66',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
|
||||||
'skip': 'video gone',
|
|
||||||
},
|
},
|
||||||
|
'skip': 'video gone',
|
||||||
|
}, {
|
||||||
# Vevo video
|
# Vevo video
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
|
||||||
'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
|
'info_dict': {
|
||||||
'info_dict': {
|
'title': 'Roar (Official)',
|
||||||
'title': 'Roar (Official)',
|
'id': 'USUV71301934',
|
||||||
'id': 'USUV71301934',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'uploader': 'Katy Perry',
|
||||||
'uploader': 'Katy Perry',
|
'upload_date': '20130905',
|
||||||
'upload_date': '20130905',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'VEVO is only available in some countries',
|
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'VEVO is only available in some countries',
|
||||||
|
}, {
|
||||||
# age-restricted video
|
# age-restricted video
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
||||||
'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
'md5': '0d667a7b9cebecc3c89ee93099c4159d',
|
||||||
'md5': '0d667a7b9cebecc3c89ee93099c4159d',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'xyh2zz',
|
||||||
'id': 'xyh2zz',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||||
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
'uploader': 'HotWaves1012',
|
||||||
'uploader': 'HotWaves1012',
|
'age_limit': 18,
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
'skip': 'video gone',
|
|
||||||
},
|
},
|
||||||
|
'skip': 'video gone',
|
||||||
|
}, {
|
||||||
# geo-restricted, player v5
|
# geo-restricted, player v5
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/xhza0o',
|
||||||
'url': 'http://www.dailymotion.com/video/xhza0o',
|
'only_matching': True,
|
||||||
'only_matching': True,
|
}, {
|
||||||
},
|
|
||||||
# with subtitles
|
# with subtitles
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
|
||||||
'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
|
'only_matching': True,
|
||||||
'only_matching': True,
|
}, {
|
||||||
},
|
'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
|
||||||
{
|
'only_matching': True,
|
||||||
'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
|
}, {
|
||||||
'only_matching': True,
|
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
|
||||||
}
|
'only_matching': True,
|
||||||
]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
|
@@ -3,11 +3,14 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_strdate,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -117,3 +120,82 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DouyuShowIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||||
|
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rjNBdvnVXNzvE2yw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||||
|
'duration': 7150.08,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': '陈一发儿',
|
||||||
|
'uploader_id': 'XrZwYelr5wbK',
|
||||||
|
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||||
|
'upload_date': '20170402',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url = url.replace('vmobile.', 'v.')
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
room_info = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||||
|
|
||||||
|
video_info = None
|
||||||
|
|
||||||
|
for trial in range(5):
|
||||||
|
# Sometimes Douyu rejects our request. Let's try it more times
|
||||||
|
try:
|
||||||
|
video_info = self._download_json(
|
||||||
|
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||||
|
query={'vid': video_id},
|
||||||
|
headers={
|
||||||
|
'Referer': url,
|
||||||
|
'x-requested-with': 'XMLHttpRequest',
|
||||||
|
})
|
||||||
|
break
|
||||||
|
except ExtractorError:
|
||||||
|
self._sleep(1, video_id)
|
||||||
|
|
||||||
|
if not video_info:
|
||||||
|
raise ExtractorError('Can\'t fetch video info')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_info['data']['video_url'], video_id,
|
||||||
|
entry_protocol='m3u8_native', ext='mp4')
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||||
|
'upload date', fatal=False))
|
||||||
|
|
||||||
|
uploader = uploader_id = uploader_url = None
|
||||||
|
mobj = re.search(
|
||||||
|
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
uploader_id, uploader = mobj.groups()
|
||||||
|
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': room_info['name'],
|
||||||
|
'formats': formats,
|
||||||
|
'duration': room_info.get('duration'),
|
||||||
|
'thumbnail': room_info.get('pic'),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'uploader_url': uploader_url,
|
||||||
|
}
|
||||||
|
@@ -1,135 +1,59 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
js_to_json,
|
||||||
parse_iso8601,
|
parse_duration,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DRBonanzaIE(InfoExtractor):
|
class DRBonanzaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
_TESTS = [{
|
'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
|
||||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '65517',
|
'id': '40312',
|
||||||
|
'display_id': 'matador---0824-komme-fremmede-',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Talkshowet - Leonard Cohen',
|
'title': 'MATADOR - 08:24. "Komme fremmede".',
|
||||||
'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
|
'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
|
||||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
||||||
'timestamp': 1295537932,
|
'duration': 4613,
|
||||||
'upload_date': '20110120',
|
|
||||||
'duration': 3664,
|
|
||||||
},
|
},
|
||||||
'params': {
|
}
|
||||||
'skip_download': True, # requires rtmp
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
|
||||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '59410',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
|
|
||||||
'description': 'md5:501e5a195749480552e214fbbed16c4e',
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
|
||||||
'timestamp': 1223274900,
|
|
||||||
'upload_date': '20081006',
|
|
||||||
'duration': 7369,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
webpage = self._download_webpage(url, url_id)
|
video_id, display_id = mobj.group('id', 'display_id')
|
||||||
|
|
||||||
if url_id:
|
webpage = self._download_webpage(url, display_id)
|
||||||
info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
|
|
||||||
else:
|
|
||||||
# Just fetch the first video on that page
|
|
||||||
info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
|
|
||||||
|
|
||||||
asset_id = str(info['AssetId'])
|
info = self._parse_html5_media_entries(
|
||||||
title = info['Title'].rstrip(' \'\"-,.:;!?')
|
url, webpage, display_id, m3u8_id='hls',
|
||||||
duration = int_or_none(info.get('Duration'), scale=1000)
|
m3u8_entry_protocol='m3u8_native')[0]
|
||||||
# First published online. "FirstPublished" contains the date for original airing.
|
self._sort_formats(info['formats'])
|
||||||
timestamp = parse_iso8601(
|
|
||||||
re.sub(r'\.\d+$', '', info['Created']))
|
|
||||||
|
|
||||||
def parse_filename_info(url):
|
asset = self._parse_json(
|
||||||
match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
self._search_regex(
|
||||||
if match:
|
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
|
||||||
return {
|
display_id, transform_source=js_to_json)
|
||||||
'width': int(match.group('width')),
|
|
||||||
'height': int(match.group('height')),
|
|
||||||
'vbr': int(match.group('bitrate')),
|
|
||||||
'ext': match.group('ext')
|
|
||||||
}
|
|
||||||
match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
|
||||||
if match:
|
|
||||||
return {
|
|
||||||
'vbr': int(match.group('bitrate')),
|
|
||||||
'ext': match.group(2)
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
video_types = ['VideoHigh', 'VideoMid', 'VideoLow']
|
title = unescapeHTML(asset['AssetTitle']).strip()
|
||||||
preferencemap = {
|
|
||||||
'VideoHigh': -1,
|
|
||||||
'VideoMid': -2,
|
|
||||||
'VideoLow': -3,
|
|
||||||
'Audio': -4,
|
|
||||||
}
|
|
||||||
|
|
||||||
formats = []
|
def extract(field):
|
||||||
for file in info['Files']:
|
return self._search_regex(
|
||||||
if info['Type'] == 'Video':
|
r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
|
||||||
if file['Type'] in video_types:
|
webpage, field, default=None)
|
||||||
format = parse_filename_info(file['Location'])
|
|
||||||
format.update({
|
|
||||||
'url': file['Location'],
|
|
||||||
'format_id': file['Type'].replace('Video', ''),
|
|
||||||
'preference': preferencemap.get(file['Type'], -10),
|
|
||||||
})
|
|
||||||
if format['url'].startswith('rtmp'):
|
|
||||||
rtmp_url = format['url']
|
|
||||||
format['rtmp_live'] = True # --resume does not work
|
|
||||||
if '/bonanza/' in rtmp_url:
|
|
||||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
|
||||||
formats.append(format)
|
|
||||||
elif file['Type'] == 'Thumb':
|
|
||||||
thumbnail = file['Location']
|
|
||||||
elif info['Type'] == 'Audio':
|
|
||||||
if file['Type'] == 'Audio':
|
|
||||||
format = parse_filename_info(file['Location'])
|
|
||||||
format.update({
|
|
||||||
'url': file['Location'],
|
|
||||||
'format_id': file['Type'],
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
formats.append(format)
|
|
||||||
elif file['Type'] == 'Thumb':
|
|
||||||
thumbnail = file['Location']
|
|
||||||
|
|
||||||
description = '%s\n%s\n%s\n' % (
|
info.update({
|
||||||
info['Description'], info['Actors'], info['Colophon'])
|
'id': asset.get('AssetId') or video_id,
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
|
||||||
display_id = re.sub(r'-+', '-', display_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': asset_id,
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'description': extract('Programinfo'),
|
||||||
'description': description,
|
'duration': parse_duration(extract('Tid')),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': asset.get('AssetImageUrl'),
|
||||||
'timestamp': timestamp,
|
})
|
||||||
'duration': duration,
|
return info
|
||||||
}
|
|
||||||
|
@@ -20,7 +20,7 @@ class DRTVIE(InfoExtractor):
|
|||||||
IE_NAME = 'drtv'
|
IE_NAME = 'drtv'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
'md5': '7ae17b4e18eb5d29212f424a7511c184',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'klassen-darlig-taber-10',
|
'id': 'klassen-darlig-taber-10',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -30,21 +30,37 @@ class DRTVIE(InfoExtractor):
|
|||||||
'upload_date': '20160823',
|
'upload_date': '20160823',
|
||||||
'duration': 606.84,
|
'duration': 606.84,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
|
# embed
|
||||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||||
'md5': '2c37175c718155930f939ef59952474a',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||||
'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
|
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
|
||||||
'timestamp': 1472800279,
|
'timestamp': 1472800279,
|
||||||
'upload_date': '20160902',
|
'upload_date': '20160902',
|
||||||
'duration': 131.4,
|
'duration': 131.4,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# with SignLanguage formats
|
||||||
|
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'historien-om-danmark-stenalder',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Historien om Danmark: Stenalder (1)',
|
||||||
|
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||||
|
'timestamp': 1490401996,
|
||||||
|
'upload_date': '20170325',
|
||||||
|
'duration': 3502.04,
|
||||||
|
'formats': 'mincount:20',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -88,7 +104,7 @@ class DRTVIE(InfoExtractor):
|
|||||||
elif kind in ('VideoResource', 'AudioResource'):
|
elif kind in ('VideoResource', 'AudioResource'):
|
||||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
asset_target = asset.get('Target')
|
||||||
for link in asset.get('Links', []):
|
for link in asset.get('Links', []):
|
||||||
uri = link.get('Uri')
|
uri = link.get('Uri')
|
||||||
if not uri:
|
if not uri:
|
||||||
@@ -96,9 +112,9 @@ class DRTVIE(InfoExtractor):
|
|||||||
target = link.get('Target')
|
target = link.get('Target')
|
||||||
format_id = target or ''
|
format_id = target or ''
|
||||||
preference = None
|
preference = None
|
||||||
if spoken_subtitles:
|
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
|
||||||
preference = -1
|
preference = -1
|
||||||
format_id += '-spoken-subtitles'
|
format_id += '-%s' % asset_target
|
||||||
if target == 'HDS':
|
if target == 'HDS':
|
||||||
f4m_formats = self._extract_f4m_formats(
|
f4m_formats = self._extract_f4m_formats(
|
||||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||||
|
@@ -5,9 +5,12 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
determine_ext,
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -24,14 +27,7 @@ class DVTVIE(InfoExtractor):
|
|||||||
'id': 'dc0768de855511e49e4b0025900fea04',
|
'id': 'dc0768de855511e49e4b0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
||||||
}
|
'duration': 1484,
|
||||||
}, {
|
|
||||||
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
|
|
||||||
'md5': '6388f1941b48537dbd28791f712af8bf',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '72c02230849211e49f60002590604f2e',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||||
@@ -44,55 +40,100 @@ class DVTVIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b0b40906854d11e4bdad0025900fea04',
|
'id': 'b0b40906854d11e4bdad0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
|
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne',
|
||||||
|
'description': 'md5:0916925dea8e30fe84222582280b47a0',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '5f7652a08b05009c1292317b449ffea2',
|
'md5': '5f7652a08b05009c1292317b449ffea2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '420ad9ec854a11e4bdad0025900fea04',
|
'id': '420ad9ec854a11e4bdad0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
|
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka',
|
||||||
|
'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95d35580846a11e4b6d20025900fea04',
|
'id': '95d35580846a11e4b6d20025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
|
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?',
|
||||||
|
'description': 'md5:889fe610a70fee5511dc3326a089188e',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6fe14d66853511e4833a0025900fea04',
|
'id': '6fe14d66853511e4833a0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
|
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády',
|
||||||
|
'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
|
||||||
|
'md5': 'f8efe9656017da948369aa099788c8ea',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3c496fec365911e7a6500025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
|
||||||
|
'duration': 1103,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_video_metadata(self, js, video_id):
|
def _parse_video_metadata(self, js, video_id):
|
||||||
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
|
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
title = unescapeHTML(data['title'])
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video in metadata['sources']:
|
for video in data['sources']:
|
||||||
ext = video['type'][6:]
|
video_url = video.get('file')
|
||||||
formats.append({
|
if not video_url:
|
||||||
'url': video['file'],
|
continue
|
||||||
'ext': ext,
|
video_type = video.get('type')
|
||||||
'format_id': '%s-%s' % (ext, video['label']),
|
ext = determine_ext(video_url, mimetype2ext(video_type))
|
||||||
'height': int(video['label'].rstrip('p')),
|
if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
|
||||||
'fps': 25,
|
formats.extend(self._extract_m3u8_formats(
|
||||||
})
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif video_type == 'application/dash+xml' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
label = video.get('label')
|
||||||
|
height = self._search_regex(
|
||||||
|
r'^(\d+)[pP]', label or '', 'height', default=None)
|
||||||
|
format_id = ['http']
|
||||||
|
for f in (ext, label):
|
||||||
|
if f:
|
||||||
|
format_id.append(f)
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'height': int_or_none(height),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': metadata['mediaid'],
|
'id': data.get('mediaid') or video_id,
|
||||||
'title': unescapeHTML(metadata['title']),
|
'title': title,
|
||||||
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
|
'description': data.get('description'),
|
||||||
|
'thumbnail': data.get('image'),
|
||||||
|
'duration': int_or_none(data.get('duration')),
|
||||||
|
'timestamp': int_or_none(data.get('pubtime')),
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,7 +144,7 @@ class DVTVIE(InfoExtractor):
|
|||||||
|
|
||||||
# single video
|
# single video
|
||||||
item = self._search_regex(
|
item = self._search_regex(
|
||||||
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
|
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
|
||||||
webpage, 'video', default=None, fatal=False)
|
webpage, 'video', default=None, fatal=False)
|
||||||
|
|
||||||
if item:
|
if item:
|
||||||
@@ -113,6 +154,8 @@ class DVTVIE(InfoExtractor):
|
|||||||
items = re.findall(
|
items = re.findall(
|
||||||
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
||||||
webpage)
|
webpage)
|
||||||
|
if not items:
|
||||||
|
items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
|
||||||
|
|
||||||
if items:
|
if items:
|
||||||
return {
|
return {
|
||||||
|
@@ -90,7 +90,7 @@ from .azmedien import (
|
|||||||
)
|
)
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
from .bbc import (
|
from .bbc import (
|
||||||
BBCCoUkIE,
|
BBCCoUkIE,
|
||||||
BBCCoUkArticleIE,
|
BBCCoUkArticleIE,
|
||||||
@@ -98,7 +98,10 @@ from .bbc import (
|
|||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
from .beampro import BeamProLiveIE
|
from .beampro import (
|
||||||
|
BeamProLiveIE,
|
||||||
|
BeamProVodIE,
|
||||||
|
)
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
from .bellmedia import BellMediaIE
|
from .bellmedia import BellMediaIE
|
||||||
@@ -251,7 +254,10 @@ from .democracynow import DemocracynowIE
|
|||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .douyutv import DouyuTVIE
|
from .douyutv import (
|
||||||
|
DouyuShowIE,
|
||||||
|
DouyuTVIE,
|
||||||
|
)
|
||||||
from .dplay import (
|
from .dplay import (
|
||||||
DPlayIE,
|
DPlayIE,
|
||||||
DPlayItIE,
|
DPlayItIE,
|
||||||
@@ -350,9 +356,9 @@ from .foxsports import FoxSportsIE
|
|||||||
from .franceculture import FranceCultureIE
|
from .franceculture import FranceCultureIE
|
||||||
from .franceinter import FranceInterIE
|
from .franceinter import FranceInterIE
|
||||||
from .francetv import (
|
from .francetv import (
|
||||||
PluzzIE,
|
|
||||||
FranceTvInfoIE,
|
|
||||||
FranceTVIE,
|
FranceTVIE,
|
||||||
|
FranceTVEmbedIE,
|
||||||
|
FranceTVInfoIE,
|
||||||
GenerationQuoiIE,
|
GenerationQuoiIE,
|
||||||
CultureboxIE,
|
CultureboxIE,
|
||||||
)
|
)
|
||||||
@@ -386,7 +392,6 @@ from .globo import (
|
|||||||
from .go import GoIE
|
from .go import GoIE
|
||||||
from .go90 import Go90IE
|
from .go90 import Go90IE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
from .godtv import GodTVIE
|
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .googledrive import GoogleDriveIE
|
from .googledrive import GoogleDriveIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
@@ -542,6 +547,7 @@ from .mangomolo import (
|
|||||||
)
|
)
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
|
from .mediaset import MediasetIE
|
||||||
from .medici import MediciIE
|
from .medici import MediciIE
|
||||||
from .meipai import MeipaiIE
|
from .meipai import MeipaiIE
|
||||||
from .melonvod import MelonVODIE
|
from .melonvod import MelonVODIE
|
||||||
@@ -630,7 +636,10 @@ from .neteasemusic import (
|
|||||||
NetEaseMusicProgramIE,
|
NetEaseMusicProgramIE,
|
||||||
NetEaseMusicDjRadioIE,
|
NetEaseMusicDjRadioIE,
|
||||||
)
|
)
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import (
|
||||||
|
NewgroundsIE,
|
||||||
|
NewgroundsPlaylistIE,
|
||||||
|
)
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
NextMediaIE,
|
NextMediaIE,
|
||||||
@@ -663,6 +672,7 @@ from .nintendo import NintendoIE
|
|||||||
from .njpwworld import NJPWWorldIE
|
from .njpwworld import NJPWWorldIE
|
||||||
from .nobelprize import NobelPrizeIE
|
from .nobelprize import NobelPrizeIE
|
||||||
from .noco import NocoIE
|
from .noco import NocoIE
|
||||||
|
from .nonktube import NonkTubeIE
|
||||||
from .noovo import NoovoIE
|
from .noovo import NoovoIE
|
||||||
from .normalboots import NormalbootsIE
|
from .normalboots import NormalbootsIE
|
||||||
from .nosvideo import NosVideoIE
|
from .nosvideo import NosVideoIE
|
||||||
@@ -732,8 +742,8 @@ from .openload import OpenloadIE
|
|||||||
from .ora import OraTVIE
|
from .ora import OraTVIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFOE1IE,
|
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
|
ORFOE1IE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
from .packtpub import (
|
from .packtpub import (
|
||||||
@@ -1014,11 +1024,6 @@ from .trilulilu import TriluliluIE
|
|||||||
from .trutv import TruTVIE
|
from .trutv import TruTVIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tubitv import TubiTvIE
|
from .tubitv import TubiTvIE
|
||||||
from .tudou import (
|
|
||||||
TudouIE,
|
|
||||||
TudouPlaylistIE,
|
|
||||||
TudouAlbumIE,
|
|
||||||
)
|
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tunein import (
|
from .tunein import (
|
||||||
TuneInClipIE,
|
TuneInClipIE,
|
||||||
@@ -1098,6 +1103,10 @@ from .uplynk import (
|
|||||||
UplynkIE,
|
UplynkIE,
|
||||||
UplynkPreplayIE,
|
UplynkPreplayIE,
|
||||||
)
|
)
|
||||||
|
from .upskill import (
|
||||||
|
UpskillIE,
|
||||||
|
UpskillCourseIE,
|
||||||
|
)
|
||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
from .urplay import URPlayIE
|
from .urplay import URPlayIE
|
||||||
from .usanetwork import USANetworkIE
|
from .usanetwork import USANetworkIE
|
||||||
|
@@ -102,6 +102,8 @@ class FirstTVIE(InfoExtractor):
|
|||||||
'format_id': f.get('name'),
|
'format_id': f.get('name'),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'source_preference': quality(f.get('name')),
|
'source_preference': quality(f.get('name')),
|
||||||
|
# quality metadata of http formats may be incorrect
|
||||||
|
'preference': -1,
|
||||||
})
|
})
|
||||||
# m3u8 URL format is reverse engineered from [1] (search for
|
# m3u8 URL format is reverse engineered from [1] (search for
|
||||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||||
|
@@ -1,7 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_urlencode,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -81,7 +84,7 @@ class FlickrIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for stream in streams['stream']:
|
for stream in streams['stream']:
|
||||||
stream_type = str(stream.get('type'))
|
stream_type = compat_str(stream.get('type'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': stream_type,
|
'format_id': stream_type,
|
||||||
'url': stream['_content'],
|
'url': stream['_content'],
|
||||||
|
@@ -5,6 +5,7 @@ import itertools
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
int_or_none,
|
||||||
remove_end,
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -46,7 +47,7 @@ class FoxgayIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': source,
|
'url': source,
|
||||||
'height': resolution,
|
'height': int_or_none(resolution),
|
||||||
} for source, resolution in zip(
|
} for source, resolution in zip(
|
||||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||||
|
|
||||||
|
@@ -21,11 +21,13 @@ from .dailymotion import (
|
|||||||
|
|
||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_video(self, video_id, catalogue):
|
def _extract_video(self, video_id, catalogue=None):
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
|
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
|
||||||
% (video_id, catalogue),
|
video_id, 'Downloading video JSON', query={
|
||||||
video_id, 'Downloading video JSON')
|
'idDiffusion': video_id,
|
||||||
|
'catalogue': catalogue or '',
|
||||||
|
})
|
||||||
|
|
||||||
if info.get('status') == 'NOK':
|
if info.get('status') == 'NOK':
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -109,27 +111,100 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = 'pluzz.francetv.fr'
|
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||||
_VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'
|
|
||||||
|
|
||||||
# Can't use tests, videos expire in 7 days
|
_TESTS = [{
|
||||||
|
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '157550144',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||||
|
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||||
|
'timestamp': 1494156300,
|
||||||
|
'upload_date': '20170507',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 downloads
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# france3
|
||||||
|
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# france4
|
||||||
|
'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# france5
|
||||||
|
'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# franceo
|
||||||
|
'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# france2 live
|
||||||
|
'url': 'https://www.france.tv/france-2/direct.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.france.tv/142749-rouge-sang.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
video_id = self._html_search_meta(
|
catalogue = None
|
||||||
'id_video', webpage, 'video id', default=None)
|
video_id = self._search_regex(
|
||||||
|
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'video id', default=None, group='id')
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = self._search_regex(
|
video_id, catalogue = self._html_search_regex(
|
||||||
r'data-diffusion=["\'](\d+)', webpage, 'video id')
|
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||||
|
webpage, 'video ID').split('@')
|
||||||
return self._extract_video(video_id, 'Pluzz')
|
return self._extract_video(video_id, catalogue)
|
||||||
|
|
||||||
|
|
||||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||||
|
_VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'NI_983319',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Le Pen Reims',
|
||||||
|
'upload_date': '20170505',
|
||||||
|
'timestamp': 1493981780,
|
||||||
|
'duration': 16,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
return self._extract_video(video['video_id'], video.get('catalog'))
|
||||||
|
|
||||||
|
|
||||||
|
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = 'francetvinfo.fr'
|
IE_NAME = 'francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
|
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
|
||||||
|
|
||||||
@@ -233,124 +308,6 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
return self._extract_video(video_id, catalogue)
|
return self._extract_video(video_id, catalogue)
|
||||||
|
|
||||||
|
|
||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
|
||||||
IE_NAME = 'francetv'
|
|
||||||
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https?://
|
|
||||||
(?:
|
|
||||||
(?:www\.)?france[2345o]\.fr/
|
|
||||||
(?:
|
|
||||||
emissions/[^/]+/(?:videos|diffusions)|
|
|
||||||
emission/[^/]+|
|
|
||||||
videos|
|
|
||||||
jt
|
|
||||||
)
|
|
||||||
/|
|
|
||||||
embed\.francetv\.fr/\?ue=
|
|
||||||
)
|
|
||||||
(?P<id>[^/?]+)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = [
|
|
||||||
# france2
|
|
||||||
{
|
|
||||||
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
|
||||||
'md5': 'c03fc87cb85429ffd55df32b9fc05523',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '109169362',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '13h15, le dimanche...',
|
|
||||||
'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
|
|
||||||
'upload_date': '20140914',
|
|
||||||
'timestamp': 1410693600,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# france3
|
|
||||||
{
|
|
||||||
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
|
||||||
'md5': '679bb8f8921f8623bd658fa2f8364da0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Le scandale du prix des médicaments',
|
|
||||||
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
|
||||||
'upload_date': '20131113',
|
|
||||||
'timestamp': 1384380000,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# france4
|
|
||||||
{
|
|
||||||
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
|
||||||
'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Hero Corp Making of - Extrait 1',
|
|
||||||
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
|
||||||
'upload_date': '20131106',
|
|
||||||
'timestamp': 1383766500,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# france5
|
|
||||||
{
|
|
||||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
|
|
||||||
'md5': 'f6c577df3806e26471b3d21631241fd0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '123327454',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
|
|
||||||
'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
|
|
||||||
'upload_date': '20150831',
|
|
||||||
'timestamp': 1441035120,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# franceo
|
|
||||||
{
|
|
||||||
'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
|
|
||||||
'md5': '47d5816d3b24351cdce512ad7ab31da8',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '125377621',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Infô soir',
|
|
||||||
'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
|
|
||||||
'upload_date': '20150718',
|
|
||||||
'timestamp': 1437241200,
|
|
||||||
'duration': 414,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
# francetv embed
|
|
||||||
'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'EV_30231',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Alcaline, le concert avec Calogero',
|
|
||||||
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
|
||||||
'upload_date': '20150226',
|
|
||||||
'timestamp': 1424989860,
|
|
||||||
'duration': 5400,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.franceo.fr/videos/125377617',
|
|
||||||
'only_matching': True,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
video_id, catalogue = self._html_search_regex(
|
|
||||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
|
||||||
webpage, 'video ID').split('@')
|
|
||||||
return self._extract_video(video_id, catalogue)
|
|
||||||
|
|
||||||
|
|
||||||
class GenerationQuoiIE(InfoExtractor):
|
class GenerationQuoiIE(InfoExtractor):
|
||||||
IE_NAME = 'france2.fr:generation-quoi'
|
IE_NAME = 'france2.fr:generation-quoi'
|
||||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
|
||||||
|
@@ -2,15 +2,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_HTTPError
|
||||||
compat_HTTPError,
|
|
||||||
compat_urllib_parse_unquote_plus,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
sanitized_Request,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
urlencode_postdata
|
urlencode_postdata
|
||||||
)
|
)
|
||||||
@@ -20,6 +16,7 @@ class FunimationIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_NETRC_MACHINE = 'funimation'
|
_NETRC_MACHINE = 'funimation'
|
||||||
|
_TOKEN = None
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||||
@@ -38,56 +35,38 @@ class FunimationIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9635',
|
'id': '210051',
|
||||||
'display_id': 'broadcast-dub-preview',
|
'display_id': 'broadcast-dub-preview',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
|
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
|
||||||
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
|
|
||||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||||
},
|
},
|
||||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_LOGIN_URL = 'http://www.funimation.com/login'
|
|
||||||
|
|
||||||
def _extract_cloudflare_session_ua(self, url):
|
|
||||||
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
|
||||||
if ci_session_cookie:
|
|
||||||
ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
|
|
||||||
# ci_session is a string serialized by PHP function serialize()
|
|
||||||
# This case is simple enough to use regular expressions only
|
|
||||||
return self._search_regex(
|
|
||||||
r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
data = urlencode_postdata({
|
try:
|
||||||
'email_field': username,
|
data = self._download_json(
|
||||||
'password_field': password,
|
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
|
||||||
})
|
None, 'Logging in as %s' % username, data=urlencode_postdata({
|
||||||
user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
|
'username': username,
|
||||||
if not user_agent:
|
'password': password,
|
||||||
user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
|
}))
|
||||||
login_request = sanitized_Request(self._LOGIN_URL, data, headers={
|
self._TOKEN = data['token']
|
||||||
'User-Agent': user_agent,
|
except ExtractorError as e:
|
||||||
'Content-Type': 'application/x-www-form-urlencoded'
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
})
|
error = self._parse_json(e.cause.read().decode(), None)['error']
|
||||||
login_page = self._download_webpage(
|
raise ExtractorError(error, expected=True)
|
||||||
login_request, None, 'Logging in as %s' % username)
|
raise
|
||||||
if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')):
|
|
||||||
return
|
|
||||||
error = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>',
|
|
||||||
login_page, 'error messages', default=None)
|
|
||||||
if error:
|
|
||||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
|
||||||
raise ExtractorError('Unable to log in')
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
@@ -125,9 +104,12 @@ class FunimationIE(InfoExtractor):
|
|||||||
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
|
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
headers = {}
|
||||||
|
if self._TOKEN:
|
||||||
|
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||||
sources = self._download_json(
|
sources = self._download_json(
|
||||||
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
||||||
video_id)['items']
|
video_id, headers=headers)['items']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||||
|
@@ -6,62 +6,52 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GaskrankIE(InfoExtractor):
|
class GaskrankIE(InfoExtractor):
|
||||||
"""InfoExtractor for gaskrank.tv"""
|
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
|
||||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
_TESTS = [{
|
||||||
_TESTS = [
|
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||||
{
|
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||||
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
'info_dict': {
|
||||||
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
'id': '201601/26955',
|
||||||
'info_dict': {
|
'ext': 'mp4',
|
||||||
'id': '201601/26955',
|
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||||
'ext': 'mp4',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
'categories': ['motorrad-fun'],
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||||
'categories': ['motorrad-fun'],
|
'uploader_id': 'Bikefun',
|
||||||
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
'upload_date': '20170110',
|
||||||
'uploader_id': 'Bikefun',
|
'uploader_url': None,
|
||||||
'upload_date': '20170110',
|
|
||||||
'uploader_url': None,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
|
||||||
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '201106/15920',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'categories': ['racing'],
|
|
||||||
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
|
||||||
'uploader_id': 'IOM',
|
|
||||||
'upload_date': '20160506',
|
|
||||||
'uploader_url': 'www.iomtt.com',
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
}, {
|
||||||
|
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||||
|
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201106/15920',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'categories': ['racing'],
|
||||||
|
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||||
|
'uploader_id': 'IOM',
|
||||||
|
'upload_date': '20170523',
|
||||||
|
'uploader_url': 'www.iomtt.com',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
"""extract information from gaskrank.tv"""
|
|
||||||
def fix_json(code):
|
|
||||||
"""Removes trailing comma in json: {{},} --> {{}}"""
|
|
||||||
return re.sub(r',\s*}', r'}', js_to_json(code))
|
|
||||||
|
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'title', webpage, fatal=True)
|
||||||
|
|
||||||
categories = [re.match(self._VALID_URL, url).group('categories')]
|
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||||
title = self._search_regex(
|
|
||||||
r'movieName\s*:\s*\'([^\']*)\'',
|
|
||||||
webpage, 'title')
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'poster\s*:\s*\'([^\']*)\'',
|
|
||||||
webpage, 'thumbnail', default=None)
|
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||||
@@ -89,29 +79,14 @@ class GaskrankIE(InfoExtractor):
|
|||||||
if average_rating:
|
if average_rating:
|
||||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||||
|
|
||||||
playlist = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'playlist\s*:\s*\[([^\]]*)\]',
|
|
||||||
webpage, 'playlist', default='{}'),
|
|
||||||
display_id, transform_source=fix_json, fatal=False)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||||
playlist.get('0').get('src'), 'video id')
|
webpage, 'video id', default=display_id)
|
||||||
|
|
||||||
formats = []
|
entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
for key in playlist:
|
entry.update({
|
||||||
formats.append({
|
|
||||||
'url': playlist[key]['src'],
|
|
||||||
'format_id': key,
|
|
||||||
'quality': playlist[key].get('quality')})
|
|
||||||
self._sort_formats(formats, field_preference=['format_id'])
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
@@ -120,4 +95,7 @@ class GaskrankIE(InfoExtractor):
|
|||||||
'tags': tags,
|
'tags': tags,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'average_rating': average_rating,
|
'average_rating': average_rating,
|
||||||
}
|
})
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
|
||||||
|
return entry
|
||||||
|
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
|||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
@@ -88,6 +89,8 @@ from .rutube import RutubeIE
|
|||||||
from .limelight import LimelightBaseIE
|
from .limelight import LimelightBaseIE
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
from .washingtonpost import WashingtonPostIE
|
from .washingtonpost import WashingtonPostIE
|
||||||
|
from .wistia import WistiaIE
|
||||||
|
from .mediaset import MediasetIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -1718,6 +1721,19 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': [WashingtonPostIE.ie_key()],
|
'add_ie': [WashingtonPostIE.ie_key()],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Mediaset embed
|
||||||
|
'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '720642',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [MediasetIE.ie_key()],
|
||||||
|
},
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
@@ -1892,14 +1908,14 @@ class GenericIE(InfoExtractor):
|
|||||||
content_type = head_response.headers.get('Content-Type', '').lower()
|
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
format_id = m.group('format_id')
|
format_id = compat_str(m.group('format_id'))
|
||||||
if format_id.endswith('mpegurl'):
|
if format_id.endswith('mpegurl'):
|
||||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
elif format_id == 'f4m':
|
elif format_id == 'f4m':
|
||||||
formats = self._extract_f4m_formats(url, video_id)
|
formats = self._extract_f4m_formats(url, video_id)
|
||||||
else:
|
else:
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': m.group('format_id'),
|
'format_id': format_id,
|
||||||
'url': url,
|
'url': url,
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
}]
|
}]
|
||||||
@@ -2111,57 +2127,20 @@ class GenericIE(InfoExtractor):
|
|||||||
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
match = re.search(
|
wistia_url = WistiaIE._extract_url(webpage)
|
||||||
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
if wistia_url:
|
||||||
if match:
|
|
||||||
embed_url = self._proto_relative_url(
|
|
||||||
unescapeHTML(match.group('url')))
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': embed_url,
|
'url': self._proto_relative_url(wistia_url),
|
||||||
'ie_key': 'Wistia',
|
'ie_key': WistiaIE.ie_key(),
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
|
||||||
if match:
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'wistia:%s' % match.group('id'),
|
|
||||||
'ie_key': 'Wistia',
|
|
||||||
'uploader': video_uploader,
|
|
||||||
}
|
|
||||||
|
|
||||||
match = re.search(
|
|
||||||
r'''(?sx)
|
|
||||||
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
|
||||||
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
|
|
||||||
''', webpage)
|
|
||||||
if match:
|
|
||||||
return self.url_result(self._proto_relative_url(
|
|
||||||
'wistia:%s' % match.group('id')), 'Wistia')
|
|
||||||
|
|
||||||
# Look for SVT player
|
# Look for SVT player
|
||||||
svt_url = SVTIE._extract_url(webpage)
|
svt_url = SVTIE._extract_url(webpage)
|
||||||
if svt_url:
|
if svt_url:
|
||||||
return self.url_result(svt_url, 'SVT')
|
return self.url_result(svt_url, 'SVT')
|
||||||
|
|
||||||
# Look for embedded condenast player
|
|
||||||
matches = re.findall(
|
|
||||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
|
|
||||||
webpage)
|
|
||||||
if matches:
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'entries': [{
|
|
||||||
'_type': 'url',
|
|
||||||
'ie_key': 'CondeNast',
|
|
||||||
'url': ma,
|
|
||||||
} for ma in matches],
|
|
||||||
'title': video_title,
|
|
||||||
'id': video_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@@ -2555,29 +2534,6 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
limelight_urls, video_id, video_title, video_description)
|
limelight_urls, video_id, video_title, video_description)
|
||||||
|
|
||||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
|
||||||
if mobj:
|
|
||||||
lm = {
|
|
||||||
'Media': 'media',
|
|
||||||
'Channel': 'channel',
|
|
||||||
'ChannelList': 'channel_list',
|
|
||||||
}
|
|
||||||
return self.url_result(smuggle_url('limelight:%s:%s' % (
|
|
||||||
lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
|
|
||||||
'Limelight%s' % mobj.group(1), mobj.group(2))
|
|
||||||
|
|
||||||
mobj = re.search(
|
|
||||||
r'''(?sx)
|
|
||||||
<object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
|
|
||||||
<param[^>]+
|
|
||||||
name=(["\'])flashVars\2[^>]+
|
|
||||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
|
||||||
''', webpage)
|
|
||||||
if mobj:
|
|
||||||
return self.url_result(smuggle_url(
|
|
||||||
'limelight:media:%s' % mobj.group('id'),
|
|
||||||
{'source_url': url}), 'LimelightMedia', mobj.group('id'))
|
|
||||||
|
|
||||||
# Look for Anvato embeds
|
# Look for Anvato embeds
|
||||||
anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
|
anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
|
||||||
if anvato_urls:
|
if anvato_urls:
|
||||||
@@ -2707,6 +2663,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Mediaset embeds
|
||||||
|
mediaset_urls = MediasetIE._extract_urls(webpage)
|
||||||
|
if mediaset_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
|
@@ -82,7 +82,7 @@ class GfycatIE(InfoExtractor):
|
|||||||
video_url = gfy.get('%sUrl' % format_id)
|
video_url = gfy.get('%sUrl' % format_id)
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
filesize = gfy.get('%sSize' % format_id)
|
filesize = int_or_none(gfy.get('%sSize' % format_id))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@@ -1,66 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .ooyala import OoyalaIE
|
|
||||||
from ..utils import js_to_json
|
|
||||||
|
|
||||||
|
|
||||||
class GodTVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Randy Needham',
|
|
||||||
'duration': 3615.08,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/playlist/bible-study',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'bible-study',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 37,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/node/15097',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/live/africa',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/liveevents',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
settings = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
|
||||||
webpage, 'settings', default='{}'),
|
|
||||||
display_id, transform_source=js_to_json, fatal=False)
|
|
||||||
|
|
||||||
ooyala_id = None
|
|
||||||
|
|
||||||
if settings:
|
|
||||||
playlist = settings.get('playlist')
|
|
||||||
if playlist and isinstance(playlist, list):
|
|
||||||
entries = [
|
|
||||||
OoyalaIE._build_url_result(video['content_id'])
|
|
||||||
for video in playlist if video.get('content_id')]
|
|
||||||
if entries:
|
|
||||||
return self.playlist_result(entries, display_id)
|
|
||||||
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
|
||||||
|
|
||||||
if not ooyala_id:
|
|
||||||
ooyala_id = self._search_regex(
|
|
||||||
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
|
||||||
webpage, 'ooyala id', group='id')
|
|
||||||
|
|
||||||
return OoyalaIE._build_url_result(ooyala_id)
|
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -46,7 +47,7 @@ class GolemIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': e.tag,
|
'format_id': compat_str(e.tag),
|
||||||
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||||
'height': self._int(e.get('height'), 'height'),
|
'height': self._int(e.get('height'), 'height'),
|
||||||
'width': self._int(e.get('width'), 'width'),
|
'width': self._int(e.get('width'), 'width'),
|
||||||
|
@@ -16,8 +16,8 @@ from ..utils import (
|
|||||||
|
|
||||||
class HitboxIE(InfoExtractor):
|
class HitboxIE(InfoExtractor):
|
||||||
IE_NAME = 'hitbox'
|
IE_NAME = 'hitbox'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hitbox.tv/video/203213',
|
'url': 'http://www.hitbox.tv/video/203213',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '203213',
|
'id': '203213',
|
||||||
@@ -38,13 +38,15 @@ class HitboxIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _extract_metadata(self, url, video_id):
|
def _extract_metadata(self, url, video_id):
|
||||||
thumb_base = 'https://edge.sf.hitbox.tv'
|
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
'%s/%s' % (url, video_id), video_id,
|
'%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
|
||||||
'Downloading metadata JSON')
|
|
||||||
|
|
||||||
date = 'media_live_since'
|
date = 'media_live_since'
|
||||||
media_type = 'livestream'
|
media_type = 'livestream'
|
||||||
@@ -63,14 +65,15 @@ class HitboxIE(InfoExtractor):
|
|||||||
views = int_or_none(video_meta.get('media_views'))
|
views = int_or_none(video_meta.get('media_views'))
|
||||||
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||||
categories = [video_meta.get('category_name')]
|
categories = [video_meta.get('category_name')]
|
||||||
thumbs = [
|
thumbs = [{
|
||||||
{'url': thumb_base + video_meta.get('media_thumbnail'),
|
'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||||
'width': 320,
|
'width': 320,
|
||||||
'height': 180},
|
'height': 180
|
||||||
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
}, {
|
||||||
'width': 768,
|
'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||||
'height': 432},
|
'width': 768,
|
||||||
]
|
'height': 432
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -90,7 +93,7 @@ class HitboxIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
player_config = self._download_json(
|
player_config = self._download_json(
|
||||||
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -121,8 +124,7 @@ class HitboxIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
metadata = self._extract_metadata(
|
metadata = self._extract_metadata(
|
||||||
'https://www.hitbox.tv/api/media/video',
|
'https://www.smashcast.tv/api/media/video', video_id)
|
||||||
video_id)
|
|
||||||
metadata['formats'] = formats
|
metadata['formats'] = formats
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
@@ -130,8 +132,8 @@ class HitboxIE(InfoExtractor):
|
|||||||
|
|
||||||
class HitboxLiveIE(HitboxIE):
|
class HitboxLiveIE(HitboxIE):
|
||||||
IE_NAME = 'hitbox:live'
|
IE_NAME = 'hitbox:live'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hitbox.tv/dimak',
|
'url': 'http://www.hitbox.tv/dimak',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dimak',
|
'id': 'dimak',
|
||||||
@@ -146,13 +148,20 @@ class HitboxLiveIE(HitboxIE):
|
|||||||
# live
|
# live
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.smashcast.tv/dimak',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
player_config = self._download_json(
|
player_config = self._download_json(
|
||||||
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -197,8 +206,7 @@ class HitboxLiveIE(HitboxIE):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
metadata = self._extract_metadata(
|
metadata = self._extract_metadata(
|
||||||
'https://www.hitbox.tv/api/media/live',
|
'https://www.smashcast.tv/api/media/live', video_id)
|
||||||
video_id)
|
|
||||||
metadata['formats'] = formats
|
metadata['formats'] = formats
|
||||||
metadata['is_live'] = True
|
metadata['is_live'] = True
|
||||||
metadata['title'] = self._live_title(metadata.get('title'))
|
metadata['title'] = self._live_title(metadata.get('title'))
|
||||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
class ImdbIE(InfoExtractor):
|
class ImdbIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb'
|
IE_NAME = 'imdb'
|
||||||
IE_DESC = 'Internet Movie Database trailers'
|
IE_DESC = 'Internet Movie Database trailers'
|
||||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title).+?[/-]vi(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
@@ -35,6 +35,9 @@ class ImdbIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -65,9 +65,9 @@ class JoveIE(InfoExtractor):
|
|||||||
webpage, 'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
publish_date = unified_strdate(self._html_search_meta(
|
publish_date = unified_strdate(self._html_search_meta(
|
||||||
'citation_publication_date', webpage, 'publish date', fatal=False))
|
'citation_publication_date', webpage, 'publish date', fatal=False))
|
||||||
comment_count = self._html_search_regex(
|
comment_count = int(self._html_search_regex(
|
||||||
r'<meta name="num_comments" content="(\d+) Comments?"',
|
r'<meta name="num_comments" content="(\d+) Comments?"',
|
||||||
webpage, 'comment count', fatal=False)
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,10 +10,10 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||||
'md5': '50f79e05ba149149c1b4ea961223d5b3',
|
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '757_1364311680',
|
'id': '757_1364311680',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'description': 'extremely bad day for this guy..!',
|
'description': 'extremely bad day for this guy..!',
|
||||||
'uploader': 'ljfriel2',
|
'uploader': 'ljfriel2',
|
||||||
'title': 'Most unlucky car accident',
|
'title': 'Most unlucky car accident',
|
||||||
@@ -22,7 +21,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||||
'md5': 'b13a29626183c9d33944e6a04f41aafc',
|
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'f93_1390833151',
|
'id': 'f93_1390833151',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -32,6 +31,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
'thumbnail': r're:^https?://.*\.jpg$'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# Prochan embed
|
||||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -41,11 +41,13 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'uploader': 'CapObveus',
|
'uploader': 'CapObveus',
|
||||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
|
'skip': 'Video is dead',
|
||||||
}, {
|
}, {
|
||||||
# Covers https://github.com/rg3/youtube-dl/pull/5983
|
# Covers https://github.com/rg3/youtube-dl/pull/5983
|
||||||
|
# Multiple resolutions
|
||||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||||
'md5': '0b3bec2d888c20728ca2ad3642f0ef15',
|
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '801_1409392012',
|
'id': '801_1409392012',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -93,57 +95,39 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
webpage, 'age limit', default=None))
|
webpage, 'age limit', default=None))
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
sources_raw = self._search_regex(
|
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
if not entries:
|
||||||
if sources_raw is None:
|
# Maybe an embed?
|
||||||
alt_source = self._search_regex(
|
embed_url = self._search_regex(
|
||||||
r'(file: ".*?"),', webpage, 'video URL', default=None)
|
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
||||||
if alt_source:
|
webpage, 'embed URL')
|
||||||
sources_raw = '[{ %s}]' % alt_source
|
return {
|
||||||
else:
|
'_type': 'url_transparent',
|
||||||
# Maybe an embed?
|
'url': embed_url,
|
||||||
embed_url = self._search_regex(
|
'id': video_id,
|
||||||
r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
'title': video_title,
|
||||||
webpage, 'embed URL')
|
'description': video_description,
|
||||||
return {
|
'uploader': video_uploader,
|
||||||
'_type': 'url_transparent',
|
'age_limit': age_limit,
|
||||||
'url': embed_url,
|
}
|
||||||
'id': video_id,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'uploader': video_uploader,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
}
|
|
||||||
|
|
||||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
info_dict = entries[0]
|
||||||
sources = json.loads(sources_json)
|
|
||||||
|
|
||||||
formats = [{
|
for a_format in info_dict['formats']:
|
||||||
'format_id': '%s' % i,
|
if not a_format.get('height'):
|
||||||
'format_note': s.get('label'),
|
a_format['height'] = int_or_none(self._search_regex(
|
||||||
'url': s['file'],
|
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||||
} for i, s in enumerate(sources)]
|
default=None))
|
||||||
|
|
||||||
for i, s in enumerate(sources):
|
self._sort_formats(info_dict['formats'])
|
||||||
# Removing '.h264_*.mp4' gives the raw video, which is essentially
|
|
||||||
# the same video without the LiveLeak logo at the top (see
|
|
||||||
# https://github.com/rg3/youtube-dl/pull/4768)
|
|
||||||
orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
|
|
||||||
if s['file'] != orig_url:
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'original-%s' % i,
|
|
||||||
'format_note': s.get('label'),
|
|
||||||
'url': orig_url,
|
|
||||||
'preference': 1,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
info_dict.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'formats': formats,
|
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
class MedialaanIE(InfoExtractor):
|
class MedialaanIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.|nieuws\.)?
|
||||||
(?:
|
(?:
|
||||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||||
(?:
|
(?:
|
||||||
@@ -85,6 +85,22 @@ class MedialaanIE(InfoExtractor):
|
|||||||
# clip
|
# clip
|
||||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# http/s redirect
|
||||||
|
'url': 'https://vtmkzoom.be/video?aid=45724',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '257136373657000',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'K3 Dansstudio Ushuaia afl.6',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires account credentials',
|
||||||
|
}, {
|
||||||
|
# nieuws.vtm.be
|
||||||
|
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@@ -146,6 +162,8 @@ class MedialaanIE(InfoExtractor):
|
|||||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||||
if player:
|
if player:
|
||||||
video = player[-1]
|
video = player[-1]
|
||||||
|
if video['videoUrl'] in ('http', 'https'):
|
||||||
|
return self.url_result(video['url'], MedialaanIE.ie_key())
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video['videoUrl'],
|
'url': video['videoUrl'],
|
||||||
|
118
youtube_dl/extractor/mediaset.py
Normal file
118
youtube_dl/extractor/mediaset.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MediasetIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
mediaset:|
|
||||||
|
https?://
|
||||||
|
(?:www\.)?video\.mediaset\.it/
|
||||||
|
(?:
|
||||||
|
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||||
|
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
|
||||||
|
)
|
||||||
|
)(?P<id>[0-9]+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
# full episode
|
||||||
|
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
|
||||||
|
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '661824',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Quarta puntata',
|
||||||
|
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1414,
|
||||||
|
'creator': 'mediaset',
|
||||||
|
'upload_date': '20161107',
|
||||||
|
'series': 'Hello Goodbye',
|
||||||
|
'categories': ['reality'],
|
||||||
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec'],
|
||||||
|
}, {
|
||||||
|
# clip
|
||||||
|
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# iframe simple
|
||||||
|
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
||||||
|
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'mediaset:661824',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video_list = self._download_json(
|
||||||
|
'http://cdnsel01.mediaset.net/GetCdn.aspx',
|
||||||
|
video_id, 'Downloading video CDN JSON', query={
|
||||||
|
'streamid': video_id,
|
||||||
|
'format': 'json',
|
||||||
|
})['videoList']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_url in video_list:
|
||||||
|
if '.ism' in format_url:
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
format_url, video_id, ism_id='mss', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': determine_ext(format_url),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
mediainfo = self._download_json(
|
||||||
|
'http://plr.video.mediaset.it/html/metainfo.sjson',
|
||||||
|
video_id, 'Downloading video info JSON', query={
|
||||||
|
'id': video_id,
|
||||||
|
})['video']
|
||||||
|
|
||||||
|
title = mediainfo['title']
|
||||||
|
|
||||||
|
creator = try_get(
|
||||||
|
mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
|
||||||
|
category = try_get(
|
||||||
|
mediainfo, lambda x: x['brand-info']['category'], compat_str)
|
||||||
|
categories = [category] if category else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': mediainfo.get('short-description'),
|
||||||
|
'thumbnail': mediainfo.get('thumbnail'),
|
||||||
|
'duration': parse_duration(mediainfo.get('duration')),
|
||||||
|
'creator': creator,
|
||||||
|
'upload_date': unified_strdate(mediainfo.get('production-date')),
|
||||||
|
'webpage_url': mediainfo.get('url'),
|
||||||
|
'series': mediainfo.get('brand-value'),
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -136,11 +136,9 @@ class MiTeleIE(InfoExtractor):
|
|||||||
video_id, 'Downloading gigya script')
|
video_id, 'Downloading gigya script')
|
||||||
|
|
||||||
# Get a appKey/uuid for getting the session key
|
# Get a appKey/uuid for getting the session key
|
||||||
appKey_var = self._search_regex(
|
|
||||||
r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
|
|
||||||
gigya_sc, 'appKey variable')
|
|
||||||
appKey = self._search_regex(
|
appKey = self._search_regex(
|
||||||
r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
|
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
|
||||||
|
gigya_sc, 'appKey')
|
||||||
|
|
||||||
session_json = self._download_json(
|
session_json = self._download_json(
|
||||||
'https://appgrid-api.cloud.accedo.tv/session',
|
'https://appgrid-api.cloud.accedo.tv/session',
|
||||||
|
@@ -68,10 +68,6 @@ class MSNIE(InfoExtractor):
|
|||||||
format_url = file_.get('url')
|
format_url = file_.get('url')
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(format_url)
|
|
||||||
if ext == 'ism':
|
|
||||||
formats.extend(self._extract_ism_formats(
|
|
||||||
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
|
||||||
if 'm3u8' in format_url:
|
if 'm3u8' in format_url:
|
||||||
# m3u8_native should not be used here until
|
# m3u8_native should not be used here until
|
||||||
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
||||||
@@ -79,6 +75,9 @@ class MSNIE(InfoExtractor):
|
|||||||
format_url, display_id, 'mp4',
|
format_url, display_id, 'mp4',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
|
elif determine_ext(format_url) == 'ism':
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@@ -12,64 +12,62 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MySpaceIE(InfoExtractor):
|
class MySpaceIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
myspace\.com/[^/]+/
|
||||||
|
(?P<mediatype>
|
||||||
|
video/[^/]+/(?P<video_id>\d+)|
|
||||||
|
music/song/[^/?#&]+-(?P<song_id>\d+)-\d+(?:[/?#&]|$)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
'md5': '9c1483c106f4a695c47d2911feed50a7',
|
||||||
'md5': '9c1483c106f4a695c47d2911feed50a7',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': '109594919',
|
||||||
'id': '109594919',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Little Big Town',
|
||||||
'title': 'Little Big Town',
|
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
'uploader': 'Five Minutes to the Stage',
|
||||||
'uploader': 'Five Minutes to the Stage',
|
'uploader_id': 'fiveminutestothestage',
|
||||||
'uploader_id': 'fiveminutestothestage',
|
'timestamp': 1414108751,
|
||||||
'timestamp': 1414108751,
|
'upload_date': '20141023',
|
||||||
'upload_date': '20141023',
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
# songs
|
# songs
|
||||||
{
|
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
'md5': '1d7ee4604a3da226dd69a123f748b262',
|
||||||
'md5': '1d7ee4604a3da226dd69a123f748b262',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': '93388656',
|
||||||
'id': '93388656',
|
'ext': 'm4a',
|
||||||
'ext': 'm4a',
|
'title': 'Of weakened soul...',
|
||||||
'title': 'Of weakened soul...',
|
'uploader': 'Killsorrow',
|
||||||
'uploader': 'Killsorrow',
|
'uploader_id': 'killsorrow',
|
||||||
'uploader_id': 'killsorrow',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'xqds0B_meys',
|
|
||||||
'ext': 'webm',
|
|
||||||
'title': 'Three Days Grace - Animal I Have Become',
|
|
||||||
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
|
|
||||||
'uploader': 'ThreeDaysGraceVEVO',
|
|
||||||
'uploader_id': 'ThreeDaysGraceVEVO',
|
|
||||||
'upload_date': '20091002',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ypWvQgnJrSU',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Starset - First Light',
|
|
||||||
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
|
|
||||||
'uploader': 'Yumi K',
|
|
||||||
'uploader_id': 'SorenPromotions',
|
|
||||||
'upload_date': '20140725',
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
]
|
}, {
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'xqds0B_meys',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': 'Three Days Grace - Animal I Have Become',
|
||||||
|
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
|
||||||
|
'uploader': 'ThreeDaysGraceVEVO',
|
||||||
|
'uploader_id': 'ThreeDaysGraceVEVO',
|
||||||
|
'upload_date': '20091002',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://myspace.com/thelargemouthbassband/music/song/02-pure-eyes.mp3-94422330-105113388',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('video_id') or mobj.group('song_id')
|
||||||
is_song = mobj.group('mediatype').startswith('music/song')
|
is_song = mobj.group('mediatype').startswith('music/song')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
player_url = self._search_regex(
|
player_url = self._search_regex(
|
||||||
|
@@ -5,10 +5,8 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..compat import compat_urllib_parse_urlparse
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
lowercase_escape,
|
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
@@ -17,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NBCIE(AdobePassIE):
|
class NBCIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
_VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -36,16 +34,6 @@ class NBCIE(AdobePassIE):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '176',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
|
|
||||||
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
|
|
||||||
},
|
|
||||||
'skip': '404 Not Found',
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -63,11 +51,6 @@ class NBCIE(AdobePassIE):
|
|||||||
},
|
},
|
||||||
'skip': 'Only works from US',
|
'skip': 'Only works from US',
|
||||||
},
|
},
|
||||||
{
|
|
||||||
# This video has expired but with an escaped embedURL
|
|
||||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
# HLS streams requires the 'hdnea3' cookie
|
# HLS streams requires the 'hdnea3' cookie
|
||||||
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||||
@@ -88,59 +71,38 @@ class NBCIE(AdobePassIE):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_data = self._download_json(
|
||||||
info = {
|
'https://api.nbc.com/v3/videos', video_id, query={
|
||||||
'_type': 'url_transparent',
|
'filter[permalink]': permalink,
|
||||||
'ie_key': 'ThePlatform',
|
})['data'][0]['attributes']
|
||||||
'id': video_id,
|
query = {
|
||||||
|
'mbr': 'true',
|
||||||
|
'manifest': 'm3u',
|
||||||
|
}
|
||||||
|
video_id = video_data['guid']
|
||||||
|
title = video_data['title']
|
||||||
|
if video_data.get('entitlement') == 'auth':
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
'nbcentertainment', title, video_id,
|
||||||
|
video_data.get('vChipRating'))
|
||||||
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
|
url, video_id, 'nbcentertainment', resource)
|
||||||
|
theplatform_url = smuggle_url(update_url_query(
|
||||||
|
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
|
||||||
|
query), {'force_smil_url': True})
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': theplatform_url,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'keywords': video_data.get('keywords'),
|
||||||
|
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||||
|
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||||
|
'series': video_data.get('showName'),
|
||||||
|
'ie_key': 'ThePlatform',
|
||||||
}
|
}
|
||||||
video_data = None
|
|
||||||
preload = self._search_regex(
|
|
||||||
r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
|
|
||||||
if preload:
|
|
||||||
preload_data = self._parse_json(preload, video_id)
|
|
||||||
path = compat_urllib_parse_urlparse(url).path.rstrip('/')
|
|
||||||
entity_id = preload_data.get('xref', {}).get(path)
|
|
||||||
video_data = preload_data.get('entities', {}).get(entity_id)
|
|
||||||
if video_data:
|
|
||||||
query = {
|
|
||||||
'mbr': 'true',
|
|
||||||
'manifest': 'm3u',
|
|
||||||
}
|
|
||||||
video_id = video_data['guid']
|
|
||||||
title = video_data['title']
|
|
||||||
if video_data.get('entitlement') == 'auth':
|
|
||||||
resource = self._get_mvpd_resource(
|
|
||||||
'nbcentertainment', title, video_id,
|
|
||||||
video_data.get('vChipRating'))
|
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
|
||||||
url, video_id, 'nbcentertainment', resource)
|
|
||||||
theplatform_url = smuggle_url(update_url_query(
|
|
||||||
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
|
|
||||||
query), {'force_smil_url': True})
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'url': theplatform_url,
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'keywords': video_data.get('keywords'),
|
|
||||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
|
||||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
|
||||||
'series': video_data.get('showName'),
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
|
||||||
[
|
|
||||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
|
||||||
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
|
|
||||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
|
||||||
],
|
|
||||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
|
||||||
if theplatform_url.startswith('//'):
|
|
||||||
theplatform_url = 'http:' + theplatform_url
|
|
||||||
info['url'] = smuggle_url(theplatform_url, {'source_url': url})
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class NBCSportsVPlayerIE(InfoExtractor):
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
|
@@ -1,6 +1,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_filesize,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsIE(InfoExtractor):
|
class NewgroundsIE(InfoExtractor):
|
||||||
@@ -13,7 +22,10 @@ class NewgroundsIE(InfoExtractor):
|
|||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'B7 - BusMode',
|
'title': 'B7 - BusMode',
|
||||||
'uploader': 'Burn7',
|
'uploader': 'Burn7',
|
||||||
}
|
'timestamp': 1378878540,
|
||||||
|
'upload_date': '20130911',
|
||||||
|
'duration': 143,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.newgrounds.com/portal/view/673111',
|
'url': 'https://www.newgrounds.com/portal/view/673111',
|
||||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||||
@@ -22,25 +34,133 @@ class NewgroundsIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dancin',
|
'title': 'Dancin',
|
||||||
'uploader': 'Squirrelman82',
|
'uploader': 'Squirrelman82',
|
||||||
|
'timestamp': 1460256780,
|
||||||
|
'upload_date': '20160410',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# source format unavailable, additional mp4 formats
|
||||||
|
'url': 'http://www.newgrounds.com/portal/view/689400',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '689400',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ZTV News Episode 8',
|
||||||
|
'uploader': 'BennettTheSage',
|
||||||
|
'timestamp': 1487965140,
|
||||||
|
'upload_date': '20170224',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
media_id = self._match_id(url)
|
media_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, media_id)
|
webpage = self._download_webpage(url, media_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>([^>]+)</title>', webpage, 'title')
|
r'<title>([^>]+)</title>', webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
media_url = self._parse_json(self._search_regex(
|
||||||
r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
|
r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
|
||||||
|
|
||||||
music_url = self._parse_json(self._search_regex(
|
formats = [{
|
||||||
r'"url":("[^"]+"),', webpage, ''), media_id)
|
'url': media_url,
|
||||||
|
'format_id': 'source',
|
||||||
|
'quality': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
max_resolution = int_or_none(self._search_regex(
|
||||||
|
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
|
||||||
|
default=None))
|
||||||
|
if max_resolution:
|
||||||
|
url_base = media_url.rpartition('.')[0]
|
||||||
|
for resolution in (360, 720, 1080):
|
||||||
|
if resolution > max_resolution:
|
||||||
|
break
|
||||||
|
formats.append({
|
||||||
|
'url': '%s.%dp.mp4' % (url_base, resolution),
|
||||||
|
'format_id': '%dp' % resolution,
|
||||||
|
'height': resolution,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._check_formats(formats, media_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(self._search_regex(
|
||||||
|
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
|
||||||
|
default=None))
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
|
||||||
|
default=None))
|
||||||
|
|
||||||
|
filesize_approx = parse_filesize(self._html_search_regex(
|
||||||
|
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
|
||||||
|
default=None))
|
||||||
|
if len(formats) == 1:
|
||||||
|
formats[0]['filesize_approx'] = filesize_approx
|
||||||
|
|
||||||
|
if '<dd>Song' in webpage:
|
||||||
|
formats[0]['vcodec'] = 'none'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': music_url,
|
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NewgroundsPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.newgrounds.com/collection/cats',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cats',
|
||||||
|
'title': 'Cats',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 46,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ZONE-SAMA',
|
||||||
|
'title': 'Portal Search: ZONE-SAMA',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 47,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.newgrounds.com/audio/search/title/cats',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<title>([^>]+)</title>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
# cut left menu
|
||||||
|
webpage = self._search_regex(
|
||||||
|
r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
|
||||||
|
webpage, 'wide column', default=webpage)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for a, path, media_id in re.findall(
|
||||||
|
r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
|
||||||
|
webpage):
|
||||||
|
a_class = extract_attributes(a).get('class')
|
||||||
|
if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
|
||||||
|
continue
|
||||||
|
entries.append(
|
||||||
|
self.url_result(
|
||||||
|
'https://www.newgrounds.com/%s' % path,
|
||||||
|
ie=NewgroundsIE.ie_key(), video_id=media_id))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
@@ -6,6 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
@@ -56,17 +57,24 @@ class NJPWWorldIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage):
|
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
|
||||||
player_url = compat_urlparse.urljoin(url, player_url)
|
player = extract_attributes(mobj.group(0))
|
||||||
|
player_path = player.get('href')
|
||||||
|
if not player_path:
|
||||||
|
continue
|
||||||
|
kind = self._search_regex(
|
||||||
|
r'(low|high)$', player.get('class') or '', 'kind',
|
||||||
|
default='low')
|
||||||
|
player_url = compat_urlparse.urljoin(url, player_path)
|
||||||
player_page = self._download_webpage(
|
player_page = self._download_webpage(
|
||||||
player_url, video_id, note='Downloading player page')
|
player_url, video_id, note='Downloading player page')
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(
|
entries = self._parse_html5_media_entries(
|
||||||
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
||||||
m3u8_entry_protocol='m3u8_native',
|
m3u8_entry_protocol='m3u8_native')
|
||||||
preference=2 if 'hq' in kind else 1)
|
kind_formats = entries[0]['formats']
|
||||||
formats.extend(entries[0]['formats'])
|
for f in kind_formats:
|
||||||
|
f['quality'] = 2 if kind == 'high' else 1
|
||||||
|
formats.extend(kind_formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
33
youtube_dl/extractor/nonktube.py
Normal file
33
youtube_dl/extractor/nonktube.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .nuevo import NuevoBaseIE
|
||||||
|
|
||||||
|
|
||||||
|
class NonkTubeIE(NuevoBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nonktube\.com/(?:(?:video|embed)/|media/nuevo/embed\.php\?.*?\bid=)(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.nonktube.com/video/118636/sensual-wife-uncensored-fucked-in-hairy-pussy-and-facialized',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '118636',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sensual Wife Uncensored Fucked In Hairy Pussy And Facialized',
|
||||||
|
'age_limit': 18,
|
||||||
|
'duration': 1150.98,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nonktube.com/embed/118636',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._extract_nuevo(
|
||||||
|
'https://www.nonktube.com/media/nuevo/econfig.php?key=%s'
|
||||||
|
% video_id, video_id)
|
||||||
|
|
||||||
|
info['age_limit'] = 18
|
||||||
|
return info
|
@@ -10,9 +10,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NuevoBaseIE(InfoExtractor):
|
class NuevoBaseIE(InfoExtractor):
|
||||||
def _extract_nuevo(self, config_url, video_id):
|
def _extract_nuevo(self, config_url, video_id, headers={}):
|
||||||
config = self._download_xml(
|
config = self._download_xml(
|
||||||
config_url, video_id, transform_source=lambda s: s.strip())
|
config_url, video_id, transform_source=lambda s: s.strip(),
|
||||||
|
headers=headers)
|
||||||
|
|
||||||
title = xpath_text(config, './title', 'title', fatal=True).strip()
|
title = xpath_text(config, './title', 'title', fatal=True).strip()
|
||||||
video_id = xpath_text(config, './mediaid', default=video_id)
|
video_id = xpath_text(config, './mediaid', default=video_id)
|
||||||
|
@@ -2,8 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import calendar
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
@@ -144,77 +142,25 @@ class ORFTVthekIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ORFOE1IE(InfoExtractor):
|
class ORFRadioIE(InfoExtractor):
|
||||||
IE_NAME = 'orf:oe1'
|
|
||||||
IE_DESC = 'Radio Österreich 1'
|
|
||||||
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)'
|
|
||||||
|
|
||||||
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
show_id = self._match_id(url)
|
|
||||||
data = self._download_json(
|
|
||||||
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
|
||||||
show_id
|
|
||||||
)
|
|
||||||
|
|
||||||
timestamp = datetime.datetime.strptime('%s %s' % (
|
|
||||||
data['item']['day_label'],
|
|
||||||
data['item']['time']
|
|
||||||
), '%d.%m.%Y %H:%M')
|
|
||||||
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': show_id,
|
|
||||||
'title': data['item']['title'],
|
|
||||||
'url': data['item']['url_stream'],
|
|
||||||
'ext': 'mp3',
|
|
||||||
'description': data['item'].get('info'),
|
|
||||||
'timestamp': unix_timestamp
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ORFFM4IE(InfoExtractor):
|
|
||||||
IE_NAME = 'orf:fm4'
|
|
||||||
IE_DESC = 'radio FM4'
|
|
||||||
_VALID_URL = r'https?://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://fm4.orf.at/player/20160110/IS/',
|
|
||||||
'md5': '01e736e8f1cef7e13246e880a59ad298',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2016-01-10_2100_tl_54_7DaysSun13_11244',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Im Sumpf',
|
|
||||||
'description': 'md5:384c543f866c4e422a55f66a62d669cd',
|
|
||||||
'duration': 7173,
|
|
||||||
'timestamp': 1452456073,
|
|
||||||
'upload_date': '20160110',
|
|
||||||
},
|
|
||||||
'skip': 'Live streams on FM4 got deleted soon',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
station = mobj.group('station')
|
||||||
show_date = mobj.group('date')
|
show_date = mobj.group('date')
|
||||||
show_id = mobj.group('show')
|
show_id = mobj.group('show')
|
||||||
|
|
||||||
|
if station == 'fm4':
|
||||||
|
show_id = '4%s' % show_id
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
|
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date),
|
||||||
show_id
|
show_id
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_entry_dict(info, title, subtitle):
|
def extract_entry_dict(info, title, subtitle):
|
||||||
return {
|
return {
|
||||||
'id': info['loopStreamId'].replace('.mp3', ''),
|
'id': info['loopStreamId'].replace('.mp3', ''),
|
||||||
'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
|
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']),
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': subtitle,
|
'description': subtitle,
|
||||||
'duration': (info['end'] - info['start']) / 1000,
|
'duration': (info['end'] - info['start']) / 1000,
|
||||||
@@ -233,6 +179,47 @@ class ORFFM4IE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFFM4IE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:fm4'
|
||||||
|
IE_DESC = 'radio FM4'
|
||||||
|
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://fm4.orf.at/player/20170107/CC',
|
||||||
|
'md5': '2b0be47375432a7ef104453432a19212',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Solid Steel Radioshow',
|
||||||
|
'description': 'Die Mixshow von Coldcut und Ninja Tune.',
|
||||||
|
'duration': 3599,
|
||||||
|
'timestamp': 1483819257,
|
||||||
|
'upload_date': '20170107',
|
||||||
|
},
|
||||||
|
'skip': 'Shows from ORF radios are only available for 7 days.'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFOE1IE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:oe1'
|
||||||
|
IE_DESC = 'Radio Österreich 1'
|
||||||
|
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://oe1.orf.at/player/20170108/456544',
|
||||||
|
'md5': '34d8a6e67ea888293741c86a099b745b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Morgenjournal',
|
||||||
|
'duration': 609,
|
||||||
|
'timestamp': 1483858796,
|
||||||
|
'upload_date': '20170108',
|
||||||
|
},
|
||||||
|
'skip': 'Shows from ORF radios are only available for 7 days.'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ORFIPTVIE(InfoExtractor):
|
class ORFIPTVIE(InfoExtractor):
|
||||||
IE_NAME = 'orf:iptv'
|
IE_NAME = 'orf:iptv'
|
||||||
IE_DESC = 'iptv.ORF.at'
|
IE_DESC = 'iptv.ORF.at'
|
||||||
|
@@ -1,9 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_HTTPError,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -34,6 +38,25 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
'upload_date': '20170331',
|
'upload_date': '20170331',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
_NETRC_MACHINE = 'packtpub'
|
||||||
|
_TOKEN = None
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self._TOKEN = self._download_json(
|
||||||
|
self._MAPT_REST + '/users/tokens', None,
|
||||||
|
'Downloading Authorization Token', data=json.dumps({
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}).encode())['data']['access']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
|
||||||
|
message = self._parse_json(e.cause.read().decode(), None)['message']
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
def _handle_error(self, response):
|
def _handle_error(self, response):
|
||||||
if response.get('status') != 'success':
|
if response.get('status') != 'success':
|
||||||
@@ -51,14 +74,17 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
course_id, chapter_id, video_id = mobj.group(
|
course_id, chapter_id, video_id = mobj.group(
|
||||||
'course_id', 'chapter_id', 'id')
|
'course_id', 'chapter_id', 'id')
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
if self._TOKEN:
|
||||||
|
headers['Authorization'] = 'Bearer ' + self._TOKEN
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
||||||
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
||||||
'Downloading JSON video')['data']
|
'Downloading JSON video', headers=headers)['data']
|
||||||
|
|
||||||
content = video.get('content')
|
content = video.get('content')
|
||||||
if not content:
|
if not content:
|
||||||
raise ExtractorError('This video is locked', expected=True)
|
self.raise_login_required('This video is locked')
|
||||||
|
|
||||||
video_url = content['file']
|
video_url = content['file']
|
||||||
|
|
||||||
|
@@ -252,11 +252,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
playlist = self._parse_json(
|
playlist = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
||||||
playlist_id)
|
'playlist', default='{}'),
|
||||||
|
playlist_id, fatal=False)
|
||||||
|
title = playlist.get('title') or self._search_regex(
|
||||||
|
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
entries, playlist_id, title, playlist.get('description'))
|
||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||||
@@ -296,6 +299,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
|||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
break
|
break
|
||||||
|
raise
|
||||||
page_entries = self._extract_entries(webpage)
|
page_entries = self._extract_entries(webpage)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
|
@@ -15,7 +15,7 @@ class RtlNlIE(InfoExtractor):
|
|||||||
https?://(?:www\.)?
|
https?://(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
||||||
rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
|
rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
|
||||||
)
|
)
|
||||||
(?P<id>[0-9a-f-]+)'''
|
(?P<id>[0-9a-f-]+)'''
|
||||||
|
|
||||||
@@ -70,6 +70,9 @@ class RtlNlIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
|
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -13,11 +13,15 @@ from ..utils import (
|
|||||||
class RUTVIE(InfoExtractor):
|
class RUTVIE(InfoExtractor):
|
||||||
IE_DESC = 'RUTV.RU'
|
IE_DESC = 'RUTV.RU'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://player\.(?:rutv\.ru|vgtrk\.com)/
|
https?://
|
||||||
(?P<path>flash\d+v/container\.swf\?id=
|
(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
|
||||||
|iframe/(?P<type>swf|video|live)/id/
|
(?P<path>
|
||||||
|index/iframe/cast_id/)
|
flash\d+v/container\.swf\?id=|
|
||||||
(?P<id>\d+)'''
|
iframe/(?P<type>swf|video|live)/id/|
|
||||||
|
index/iframe/cast_id/
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -99,17 +103,21 @@ class RUTVIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_url(cls, webpage):
|
def _extract_url(cls, webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
|
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
@@ -16,7 +16,6 @@ from ..utils import (
|
|||||||
|
|
||||||
class SafariBaseIE(InfoExtractor):
|
class SafariBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
|
||||||
_NETRC_MACHINE = 'safari'
|
_NETRC_MACHINE = 'safari'
|
||||||
|
|
||||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
||||||
@@ -28,10 +27,6 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
# We only need to log in once for courses or individual videos
|
|
||||||
if self.LOGGED_IN:
|
|
||||||
return
|
|
||||||
|
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
@@ -39,11 +34,17 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
headers = std_headers.copy()
|
headers = std_headers.copy()
|
||||||
if 'Referer' not in headers:
|
if 'Referer' not in headers:
|
||||||
headers['Referer'] = self._LOGIN_URL
|
headers['Referer'] = self._LOGIN_URL
|
||||||
login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
|
|
||||||
|
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
login_page_request, None,
|
self._LOGIN_URL, None, 'Downloading login form', headers=headers)
|
||||||
'Downloading login form')
|
|
||||||
|
def is_logged(webpage):
|
||||||
|
return any(re.search(p, webpage) for p in (
|
||||||
|
r'href=["\']/accounts/logout/', r'>Sign Out<'))
|
||||||
|
|
||||||
|
if is_logged(login_page):
|
||||||
|
self.LOGGED_IN = True
|
||||||
|
return
|
||||||
|
|
||||||
csrf = self._html_search_regex(
|
csrf = self._html_search_regex(
|
||||||
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
||||||
@@ -62,14 +63,12 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
request, None, 'Logging in as %s' % username)
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
if not is_logged(login_page):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Login failed; make sure your credentials are correct and try again.',
|
'Login failed; make sure your credentials are correct and try again.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
SafariBaseIE.LOGGED_IN = True
|
self.LOGGED_IN = True
|
||||||
|
|
||||||
self.to_screen('Login successful')
|
|
||||||
|
|
||||||
|
|
||||||
class SafariIE(SafariBaseIE):
|
class SafariIE(SafariBaseIE):
|
||||||
|
@@ -32,8 +32,9 @@ class SexuIE(InfoExtractor):
|
|||||||
formats = [{
|
formats = [{
|
||||||
'url': source['file'].replace('\\', ''),
|
'url': source['file'].replace('\\', ''),
|
||||||
'format_id': source.get('label'),
|
'format_id': source.get('label'),
|
||||||
'height': self._search_regex(
|
'height': int(self._search_regex(
|
||||||
r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
|
r'^(\d+)[pP]', source.get('label', ''), 'height',
|
||||||
|
default=None)),
|
||||||
} for source in sources if source.get('file')]
|
} for source in sources if source.get('file')]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -8,7 +8,11 @@ from ..compat import (
|
|||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
)
|
)
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
@@ -169,10 +173,11 @@ class SohuIE(InfoExtractor):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'filesize': data['clipsBytes'][i],
|
'filesize': int_or_none(
|
||||||
'width': data['width'],
|
try_get(data, lambda x: x['clipsBytes'][i])),
|
||||||
'height': data['height'],
|
'width': int_or_none(data.get('width')),
|
||||||
'fps': data['fps'],
|
'height': int_or_none(data.get('height')),
|
||||||
|
'fps': int_or_none(data.get('fps')),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -21,6 +21,17 @@ class StreamangoIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '20170315_150006.mp4',
|
'title': '20170315_150006.mp4',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# no og:title
|
||||||
|
'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'foqebrpftarclpob',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'foqebrpftarclpob',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -31,7 +42,7 @@ class StreamangoIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage, default=video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
||||||
|
@@ -26,7 +26,7 @@ class StreamCZIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||||
'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
|
'md5': '934bb6a6d220d99c010783c9719960d5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '765767',
|
'id': '765767',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -37,7 +37,7 @@ class StreamCZIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
||||||
'md5': 'e54a254fb8b871968fd8403255f28589',
|
'md5': '849a88c1e1ca47d41403c2ba5e59e261',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10002447',
|
'id': '10002447',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -85,6 +85,14 @@ class StreamCZIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
title = data['name']
|
title = data['name']
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
srt_url = data.get('subtitles_srt')
|
||||||
|
if srt_url:
|
||||||
|
subtitles['cs'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'url': srt_url,
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -93,4 +101,5 @@ class StreamCZIE(InfoExtractor):
|
|||||||
'description': data.get('web_site_text'),
|
'description': data.get('web_site_text'),
|
||||||
'duration': int_or_none(data.get('duration')),
|
'duration': int_or_none(data.get('duration')),
|
||||||
'view_count': int_or_none(data.get('views')),
|
'view_count': int_or_none(data.get('views')),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@@ -3,10 +3,6 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
qualities,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TheSceneIE(InfoExtractor):
|
class TheSceneIE(InfoExtractor):
|
||||||
@@ -24,6 +20,9 @@ class TheSceneIE(InfoExtractor):
|
|||||||
'season': 'Ready To Wear Spring 2013',
|
'season': 'Ready To Wear Spring 2013',
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
|
'upload_date': '20120913',
|
||||||
|
'timestamp': 1347512400,
|
||||||
|
'uploader': 'vogue',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -37,32 +36,9 @@ class TheSceneIE(InfoExtractor):
|
|||||||
self._html_search_regex(
|
self._html_search_regex(
|
||||||
r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
|
r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
|
||||||
|
|
||||||
player = self._download_webpage(player_url, display_id)
|
|
||||||
info = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'(?m)video\s*:\s*({.+?}),$', player, 'info json'),
|
|
||||||
display_id)
|
|
||||||
|
|
||||||
video_id = info['id']
|
|
||||||
title = info['title']
|
|
||||||
|
|
||||||
qualities_order = qualities(('low', 'high'))
|
|
||||||
formats = [{
|
|
||||||
'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']),
|
|
||||||
'url': f['src'],
|
|
||||||
'quality': qualities_order(f['quality']),
|
|
||||||
} for f in info['sources']]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'_type': 'url_transparent',
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'url': player_url,
|
||||||
'formats': formats,
|
'ie_key': 'CondeNast',
|
||||||
'thumbnail': info.get('poster_frame'),
|
|
||||||
'duration': int_or_none(info.get('duration')),
|
|
||||||
'series': info.get('series_title'),
|
|
||||||
'season': info.get('season_title'),
|
|
||||||
'tags': info.get('tags'),
|
|
||||||
'categories': info.get('categories'),
|
|
||||||
}
|
}
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class ToggleIE(InfoExtractor):
|
class ToggleIE(InfoExtractor):
|
||||||
IE_NAME = 'toggle'
|
IE_NAME = 'toggle'
|
||||||
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -73,6 +73,12 @@ class ToggleIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
|
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMAT_PREFERENCES = {
|
_FORMAT_PREFERENCES = {
|
||||||
|
@@ -6,42 +6,48 @@ import re
|
|||||||
|
|
||||||
|
|
||||||
class ToypicsIE(InfoExtractor):
|
class ToypicsIE(InfoExtractor):
|
||||||
IE_DESC = 'Toypics user profile'
|
IE_DESC = 'Toypics video'
|
||||||
_VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
|
_VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
|
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
|
||||||
'md5': '16e806ad6d6f58079d210fe30985e08b',
|
'md5': '16e806ad6d6f58079d210fe30985e08b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '514',
|
'id': '514',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Chance-Bulge\'d, 2',
|
'title': "Chance-Bulge'd, 2",
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'uploader': 'kidsune',
|
'uploader': 'kidsune',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
page = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._html_search_regex(
|
|
||||||
r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
|
formats = self._parse_html5_media_entries(
|
||||||
title = self._html_search_regex(
|
url, webpage, video_id)[0]['formats']
|
||||||
r'<title>Toypics - ([^<]+)</title>', page, 'title')
|
title = self._html_search_regex([
|
||||||
username = self._html_search_regex(
|
r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h',
|
||||||
r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
|
r'<title>([^<]+) - Toypics</title>',
|
||||||
|
], webpage, 'title')
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'uploader': username,
|
'uploader': uploader,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ToypicsUserIE(InfoExtractor):
|
class ToypicsUserIE(InfoExtractor):
|
||||||
IE_DESC = 'Toypics user profile'
|
IE_DESC = 'Toypics user profile'
|
||||||
_VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
|
_VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://videos.toypics.net/Mikey',
|
'url': 'http://videos.toypics.net/Mikey',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -51,8 +57,7 @@ class ToypicsUserIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
username = self._match_id(url)
|
||||||
username = mobj.group('username')
|
|
||||||
|
|
||||||
profile_page = self._download_webpage(
|
profile_page = self._download_webpage(
|
||||||
url, username, note='Retrieving profile page')
|
url, username, note='Retrieving profile page')
|
||||||
@@ -71,7 +76,7 @@ class ToypicsUserIE(InfoExtractor):
|
|||||||
note='Downloading page %d/%d' % (n, page_count))
|
note='Downloading page %d/%d' % (n, page_count))
|
||||||
urls.extend(
|
urls.extend(
|
||||||
re.findall(
|
re.findall(
|
||||||
r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">',
|
r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"',
|
||||||
lpage))
|
lpage))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -3,138 +3,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
|
||||||
InAdvancePagedList,
|
|
||||||
float_or_none,
|
|
||||||
unescapeHTML,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TudouIE(InfoExtractor):
|
|
||||||
IE_NAME = 'tudou'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
|
||||||
'md5': '140a49ed444bd22f93330985d8475fcb',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '159448201',
|
|
||||||
'ext': 'f4v',
|
|
||||||
'title': '卡马乔国足开大脚长传冲吊集锦',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'timestamp': 1372113489000,
|
|
||||||
'description': '卡马乔卡家军,开大脚先进战术不完全集锦!',
|
|
||||||
'duration': 289.04,
|
|
||||||
'view_count': int,
|
|
||||||
'filesize': int,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '117049447',
|
|
||||||
'ext': 'f4v',
|
|
||||||
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'timestamp': 1349207518000,
|
|
||||||
'description': 'md5:294612423894260f2dcd5c6c04fe248b',
|
|
||||||
'duration': 5478.33,
|
|
||||||
'view_count': int,
|
|
||||||
'filesize': int,
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
|
||||||
|
|
||||||
# Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
|
|
||||||
# 0001, 0002 and 4001 are not included as they indicate temporary issues
|
|
||||||
TVC_ERRORS = {
|
|
||||||
'0003': 'The video is deleted or does not exist',
|
|
||||||
'1001': 'This video is unavailable due to licensing issues',
|
|
||||||
'1002': 'This video is unavailable as it\'s under review',
|
|
||||||
'1003': 'This video is unavailable as it\'s under review',
|
|
||||||
'3001': 'Password required',
|
|
||||||
'5001': 'This video is available in Mainland China only due to licensing issues',
|
|
||||||
'7001': 'This video is unavailable',
|
|
||||||
'8001': 'This video is unavailable due to licensing issues',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _url_for_id(self, video_id, quality=None):
|
|
||||||
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
|
|
||||||
if quality:
|
|
||||||
info_url += '&hd' + quality
|
|
||||||
xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
|
|
||||||
error = xml_data.attrib.get('error')
|
|
||||||
if error is not None:
|
|
||||||
raise ExtractorError('Tudou said: %s' % error, expected=True)
|
|
||||||
final_url = xml_data.text
|
|
||||||
return final_url
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
item_data = self._download_json(
|
|
||||||
'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id)
|
|
||||||
|
|
||||||
youku_vcode = item_data.get('vcode')
|
|
||||||
if youku_vcode:
|
|
||||||
return self.url_result('youku:' + youku_vcode, ie='Youku')
|
|
||||||
|
|
||||||
if not item_data.get('itemSegs'):
|
|
||||||
tvc_code = item_data.get('tvcCode')
|
|
||||||
if tvc_code:
|
|
||||||
err_msg = self.TVC_ERRORS.get(tvc_code)
|
|
||||||
if err_msg:
|
|
||||||
raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
|
|
||||||
raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
|
|
||||||
raise ExtractorError('Unxpected error returned from Tudou')
|
|
||||||
|
|
||||||
title = unescapeHTML(item_data['kw'])
|
|
||||||
description = item_data.get('desc')
|
|
||||||
thumbnail_url = item_data.get('pic')
|
|
||||||
view_count = int_or_none(item_data.get('playTimes'))
|
|
||||||
timestamp = int_or_none(item_data.get('pt'))
|
|
||||||
|
|
||||||
segments = self._parse_json(item_data['itemSegs'], video_id)
|
|
||||||
# It looks like the keys are the arguments that have to be passed as
|
|
||||||
# the hd field in the request url, we pick the higher
|
|
||||||
# Also, filter non-number qualities (see issue #3643).
|
|
||||||
quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
|
|
||||||
key=lambda k: int(k))[-1]
|
|
||||||
parts = segments[quality]
|
|
||||||
len_parts = len(parts)
|
|
||||||
if len_parts > 1:
|
|
||||||
self.to_screen('%s: found %s parts' % (video_id, len_parts))
|
|
||||||
|
|
||||||
def part_func(partnum):
|
|
||||||
part = parts[partnum]
|
|
||||||
part_id = part['k']
|
|
||||||
final_url = self._url_for_id(part_id, quality)
|
|
||||||
ext = (final_url.split('?')[0]).split('.')[-1]
|
|
||||||
return [{
|
|
||||||
'id': '%s' % part_id,
|
|
||||||
'url': final_url,
|
|
||||||
'ext': ext,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail_url,
|
|
||||||
'description': description,
|
|
||||||
'view_count': view_count,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': float_or_none(part.get('seconds'), 1000),
|
|
||||||
'filesize': int_or_none(part.get('size')),
|
|
||||||
'http_headers': {
|
|
||||||
'Referer': self._PLAYER_URL,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
entries = InAdvancePagedList(part_func, len_parts, 1)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'multi_video',
|
|
||||||
'entries': entries,
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class TudouPlaylistIE(InfoExtractor):
|
class TudouPlaylistIE(InfoExtractor):
|
||||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -49,7 +50,7 @@ class TurboIE(InfoExtractor):
|
|||||||
for child in item:
|
for child in item:
|
||||||
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
||||||
if m:
|
if m:
|
||||||
quality = m.group('quality')
|
quality = compat_str(m.group('quality'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': quality,
|
'format_id': quality,
|
||||||
'url': child.text,
|
'url': child.text,
|
||||||
|
@@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
xpath_attr,
|
xpath_attr,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
strip_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -163,17 +164,21 @@ class TurnerBaseIE(AdobePassIE):
|
|||||||
'height': int_or_none(image.get('height')),
|
'height': int_or_none(image.get('height')),
|
||||||
} for image in video_data.findall('images/image')]
|
} for image in video_data.findall('images/image')]
|
||||||
|
|
||||||
|
is_live = xpath_text(video_data, 'isLive') == 'true'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'description': xpath_text(video_data, 'description'),
|
'thumbnail': xpath_text(video_data, 'poster'),
|
||||||
|
'description': strip_or_none(xpath_text(video_data, 'description')),
|
||||||
'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
|
'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
|
||||||
'timestamp': self._extract_timestamp(video_data),
|
'timestamp': self._extract_timestamp(video_data),
|
||||||
'upload_date': xpath_attr(video_data, 'metas', 'version'),
|
'upload_date': xpath_attr(video_data, 'metas', 'version'),
|
||||||
'series': xpath_text(video_data, 'showTitle'),
|
'series': xpath_text(video_data, 'showTitle'),
|
||||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||||
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
@@ -48,7 +48,7 @@ class TVPlayerIE(InfoExtractor):
|
|||||||
'https://tvplayer.com/watch/context', display_id,
|
'https://tvplayer.com/watch/context', display_id,
|
||||||
'Downloading JSON context', query={
|
'Downloading JSON context', query={
|
||||||
'resource': resource_id,
|
'resource': resource_id,
|
||||||
'nonce': token,
|
'gen': token,
|
||||||
})
|
})
|
||||||
|
|
||||||
validate = context['validate']
|
validate = context['validate']
|
||||||
|
@@ -52,6 +52,10 @@ class UdemyIE(InfoExtractor):
|
|||||||
# new URL schema
|
# new URL schema
|
||||||
'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
|
'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# no url in outputs format entry
|
||||||
|
'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_course_info(self, webpage, video_id):
|
def _extract_course_info(self, webpage, video_id):
|
||||||
@@ -219,7 +223,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
|
|
||||||
def extract_output_format(src, f_id):
|
def extract_output_format(src, f_id):
|
||||||
return {
|
return {
|
||||||
'url': src['url'],
|
'url': src.get('url'),
|
||||||
'format_id': '%sp' % (src.get('height') or f_id),
|
'format_id': '%sp' % (src.get('height') or f_id),
|
||||||
'width': int_or_none(src.get('width')),
|
'width': int_or_none(src.get('width')),
|
||||||
'height': int_or_none(src.get('height')),
|
'height': int_or_none(src.get('height')),
|
||||||
|
176
youtube_dl/extractor/upskill.py
Normal file
176
youtube_dl/extractor/upskill.py
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .wistia import WistiaIE
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_class,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UpskillBaseIE(InfoExtractor):
|
||||||
|
_LOGIN_URL = 'http://upskillcourses.com/sign_in'
|
||||||
|
_NETRC_MACHINE = 'upskill'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page, urlh = self._download_webpage_handle(
|
||||||
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
|
|
||||||
|
login_url = compat_str(urlh.geturl())
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'user[email]': username,
|
||||||
|
'user[password]': password,
|
||||||
|
})
|
||||||
|
|
||||||
|
post_url = self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
|
||||||
|
'post url', default=login_url, group='url')
|
||||||
|
|
||||||
|
if not post_url.startswith('http'):
|
||||||
|
post_url = urljoin(login_url, post_url)
|
||||||
|
|
||||||
|
response = self._download_webpage(
|
||||||
|
post_url, None, 'Logging in',
|
||||||
|
data=urlencode_postdata(login_form),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Referer': login_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Successful login
|
||||||
|
if any(re.search(p, response) for p in (
|
||||||
|
r'class=["\']user-signout',
|
||||||
|
r'<a[^>]+\bhref=["\']/sign_out',
|
||||||
|
r'>\s*Log out\s*<')):
|
||||||
|
return
|
||||||
|
|
||||||
|
message = get_element_by_class('alert', response)
|
||||||
|
if message is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % clean_html(message), expected=True)
|
||||||
|
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
|
class UpskillIE(UpskillBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'uzw6zw58or',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Welcome to the Course!',
|
||||||
|
'description': 'md5:8d66c13403783370af62ca97a7357bdd',
|
||||||
|
'duration': 138.763,
|
||||||
|
'timestamp': 1479846621,
|
||||||
|
'upload_date': '20161122',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
wistia_url = WistiaIE._extract_url(webpage)
|
||||||
|
if not wistia_url:
|
||||||
|
if any(re.search(p, webpage) for p in (
|
||||||
|
r'class=["\']lecture-contents-locked',
|
||||||
|
r'>\s*Lecture contents locked',
|
||||||
|
r'id=["\']lecture-locked')):
|
||||||
|
self.raise_login_required('Lecture contents locked')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, default=None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': wistia_url,
|
||||||
|
'ie_key': WistiaIE.ie_key(),
|
||||||
|
'title': title,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class UpskillCourseIE(UpskillBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '119763',
|
||||||
|
'title': 'The Essential Web Developer Course (Free)',
|
||||||
|
},
|
||||||
|
'playlist_count': 192,
|
||||||
|
}, {
|
||||||
|
'url': 'http://upskillcourses.com/courses/119763/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://upskillcourses.com/courses/enrolled/119763',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if UpskillIE.suitable(url) else super(
|
||||||
|
UpskillCourseIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
|
||||||
|
course_id = self._search_regex(
|
||||||
|
r'data-course-id=["\'](\d+)', webpage, 'course id',
|
||||||
|
default=course_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
|
||||||
|
webpage):
|
||||||
|
li = mobj.group('li')
|
||||||
|
if 'fa-youtube-play' not in li:
|
||||||
|
continue
|
||||||
|
lecture_url = self._search_regex(
|
||||||
|
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
|
||||||
|
'lecture url', default=None, group='url')
|
||||||
|
if not lecture_url:
|
||||||
|
continue
|
||||||
|
lecture_id = self._search_regex(
|
||||||
|
r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
|
||||||
|
'title', default=None)
|
||||||
|
entries.append(
|
||||||
|
self.url_result(
|
||||||
|
urljoin('http://upskillcourses.com/', lecture_url),
|
||||||
|
ie=UpskillIE.ie_key(), video_id=lecture_id,
|
||||||
|
video_title=clean_html(title)))
|
||||||
|
|
||||||
|
course_title = self._html_search_regex(
|
||||||
|
(r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
|
||||||
|
r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
|
||||||
|
webpage, 'course title', fatal=False)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_id, course_title)
|
@@ -5,24 +5,30 @@ import re
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
urlencode_postdata,
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VierIE(InfoExtractor):
|
class VierIE(InfoExtractor):
|
||||||
IE_NAME = 'vier'
|
IE_NAME = 'vier'
|
||||||
IE_DESC = 'vier.be and vijf.be'
|
IE_DESC = 'vier.be and vijf.be'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
|
||||||
|
_NETRC_MACHINE = 'vier'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
|
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
|
||||||
|
'md5': 'e4ae2054a6b040ef1e289e20d111b46e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16129',
|
'id': '16129',
|
||||||
'display_id': 'het-wordt-warm-de-moestuin',
|
'display_id': 'het-wordt-warm-de-moestuin',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Het wordt warm in De Moestuin',
|
'title': 'Het wordt warm in De Moestuin',
|
||||||
'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
|
'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
|
||||||
},
|
'upload_date': '20121025',
|
||||||
'params': {
|
'series': 'Plan B',
|
||||||
# m3u8 download
|
'tags': ['De Moestuin', 'Moestuin', 'meisjes', 'Tomaat', 'Wim', 'Droom'],
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
|
'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
|
||||||
@@ -30,32 +36,103 @@ class VierIE(InfoExtractor):
|
|||||||
'id': '2561614',
|
'id': '2561614',
|
||||||
'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
|
'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s',
|
'title': 'md5:84f45fe48b8c1fa296a7f6d208d080a7',
|
||||||
'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.',
|
'description': 'md5:0356d4981e58b8cbee19355cbd51a8fe',
|
||||||
|
'upload_date': '20170228',
|
||||||
|
'series': 'Temptation Island',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
|
'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2674839',
|
||||||
|
'display_id': 'jani-gaat-naar-tokio-aflevering-4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Jani gaat naar Tokio - Aflevering 4',
|
||||||
|
'description': 'md5:aa8d611541db6ae9e863125704511f88',
|
||||||
|
'upload_date': '20170501',
|
||||||
|
'series': 'Jani gaat',
|
||||||
|
'episode_number': 4,
|
||||||
|
'tags': ['Jani Gaat', 'Volledige Aflevering'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires account credentials',
|
||||||
|
}, {
|
||||||
|
# Requires account credentials but bypassed extraction via v3/embed page
|
||||||
|
# without metadata
|
||||||
|
'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2674839',
|
||||||
|
'display_id': 'jani-gaat-naar-tokio-aflevering-4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'jani-gaat-naar-tokio-aflevering-4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Log in to extract metadata'],
|
||||||
|
}, {
|
||||||
|
# Without video id in URL
|
||||||
|
'url': 'http://www.vier.be/planb/videos/dit-najaar-plan-b',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.vier.be/video/v3/embed/16129',
|
'url': 'http://www.vier.be/video/v3/embed/16129',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._logged_in = False
|
||||||
|
|
||||||
|
def _login(self, site):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None or password is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
'http://www.%s.be/user/login' % site,
|
||||||
|
None, note='Logging in', errnote='Unable to log in',
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'form_id': 'user_login',
|
||||||
|
'name': username,
|
||||||
|
'pass': password,
|
||||||
|
}),
|
||||||
|
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||||
|
|
||||||
|
login_error = self._html_search_regex(
|
||||||
|
r'(?s)<div class="messages error">\s*<div>\s*<h2.+?</h2>(.+?)<',
|
||||||
|
login_page, 'login error', default=None)
|
||||||
|
if login_error:
|
||||||
|
self.report_warning('Unable to log in: %s' % login_error)
|
||||||
|
else:
|
||||||
|
self._logged_in = True
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
embed_id = mobj.group('embed_id')
|
embed_id = mobj.group('embed_id')
|
||||||
display_id = mobj.group('display_id') or embed_id
|
display_id = mobj.group('display_id') or embed_id
|
||||||
|
video_id = mobj.group('id') or embed_id
|
||||||
site = mobj.group('site')
|
site = mobj.group('site')
|
||||||
|
|
||||||
|
if not self._logged_in:
|
||||||
|
self._login(site)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
if r'id="user-login"' in webpage:
|
||||||
|
self.report_warning(
|
||||||
|
'Log in to extract metadata', video_id=display_id)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://www.%s.be/video/v3/embed/%s' % (site, video_id),
|
||||||
|
display_id)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
[r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
|
[r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
|
||||||
webpage, 'video id')
|
webpage, 'video id', default=video_id or display_id)
|
||||||
application = self._search_regex(
|
application = self._search_regex(
|
||||||
[r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
|
[r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
|
||||||
webpage, 'application', default=site + '_vod')
|
webpage, 'application', default=site + '_vod')
|
||||||
@@ -64,12 +141,25 @@ class VierIE(InfoExtractor):
|
|||||||
webpage, 'filename')
|
webpage, 'filename')
|
||||||
|
|
||||||
playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
|
playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
|
||||||
formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash'])
|
formats = self._extract_wowza_formats(
|
||||||
|
playlist_url, display_id, skip_protocols=['dash'])
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=display_id)
|
title = self._og_search_title(webpage, default=display_id)
|
||||||
description = self._og_search_description(webpage, default=None)
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div\b[^>]+\bclass=(["\'])[^>]*?\bfield-type-text-with-summary\b[^>]*?\1[^>]*>.*?<p>(?P<value>.+?)</p>',
|
||||||
|
webpage, 'description', default=None, group='value')
|
||||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'(?s)<div\b[^>]+\bclass=(["\'])[^>]*?\bfield-name-post-date\b[^>]*?\1[^>]*>.*?(?P<value>\d{2}/\d{2}/\d{4})',
|
||||||
|
webpage, 'upload date', default=None, group='value'))
|
||||||
|
|
||||||
|
series = self._search_regex(
|
||||||
|
r'data-program=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'series', default=None, group='value')
|
||||||
|
episode_number = int_or_none(self._search_regex(
|
||||||
|
r'(?i)aflevering (\d+)', title, 'episode number', default=None))
|
||||||
|
tags = re.findall(r'<a\b[^>]+\bhref=["\']/tags/[^>]+>([^<]+)<', webpage)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -77,6 +167,10 @@ class VierIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'series': series,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'tags': tags,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -151,10 +151,16 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
mpd_manifest_urls = [(format_id, manifest_url)]
|
mpd_manifest_urls = [(format_id, manifest_url)]
|
||||||
for f_id, m_url in mpd_manifest_urls:
|
for f_id, m_url in mpd_manifest_urls:
|
||||||
formats.extend(self._extract_mpd_formats(
|
mpd_formats = self._extract_mpd_formats(
|
||||||
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
|
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
|
||||||
'Downloading %s MPD information' % cdn_name,
|
'Downloading %s MPD information' % cdn_name,
|
||||||
fatal=False))
|
fatal=False)
|
||||||
|
for f in mpd_formats:
|
||||||
|
if f.get('vcodec') == 'none':
|
||||||
|
f['preference'] = -50
|
||||||
|
elif f.get('acodec') == 'none':
|
||||||
|
f['preference'] = -40
|
||||||
|
formats.extend(mpd_formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
text_tracks = config['request'].get('text_tracks')
|
text_tracks = config['request'].get('text_tracks')
|
||||||
|
@@ -112,21 +112,41 @@ class VRVIE(VRVBaseIE):
|
|||||||
|
|
||||||
audio_locale = streams_json.get('audio_locale')
|
audio_locale = streams_json.get('audio_locale')
|
||||||
formats = []
|
formats = []
|
||||||
for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items():
|
for stream_type, streams in streams_json.get('streams', {}).items():
|
||||||
stream_url = stream.get('url')
|
if stream_type in ('adaptive_hls', 'adaptive_dash'):
|
||||||
if not stream_url:
|
for stream in streams.values():
|
||||||
continue
|
stream_url = stream.get('url')
|
||||||
stream_id = stream_id or audio_locale
|
if not stream_url:
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
continue
|
||||||
stream_url, video_id, 'mp4', m3u8_id=stream_id,
|
stream_id = stream.get('hardsub_locale') or audio_locale
|
||||||
note='Downloading %s m3u8 information' % stream_id,
|
format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
|
||||||
fatal=False)
|
if stream_type == 'adaptive_hls':
|
||||||
if audio_locale:
|
adaptive_formats = self._extract_m3u8_formats(
|
||||||
for f in m3u8_formats:
|
stream_url, video_id, 'mp4', m3u8_id=format_id,
|
||||||
f['language'] = audio_locale
|
note='Downloading %s m3u8 information' % stream_id,
|
||||||
formats.extend(m3u8_formats)
|
fatal=False)
|
||||||
|
else:
|
||||||
|
adaptive_formats = self._extract_mpd_formats(
|
||||||
|
stream_url, video_id, mpd_id=format_id,
|
||||||
|
note='Downloading %s MPD information' % stream_id,
|
||||||
|
fatal=False)
|
||||||
|
if audio_locale:
|
||||||
|
for f in adaptive_formats:
|
||||||
|
if f.get('acodec') != 'none':
|
||||||
|
f['language'] = audio_locale
|
||||||
|
formats.extend(adaptive_formats)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in streams_json.get('subtitles', {}).values():
|
||||||
|
subtitle_url = subtitle.get('url')
|
||||||
|
if not subtitle_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
|
||||||
|
'url': subtitle_url,
|
||||||
|
'ext': subtitle.get('format', 'ass'),
|
||||||
|
})
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
|
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
|
||||||
thumbnail_url = thumbnail.get('source')
|
thumbnail_url = thumbnail.get('source')
|
||||||
@@ -142,6 +162,7 @@ class VRVIE(VRVBaseIE):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'duration': float_or_none(video_data.get('duration_ms'), 1000),
|
'duration': float_or_none(video_data.get('duration_ms'), 1000),
|
||||||
|
@@ -1,10 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -34,6 +37,25 @@ class WistiaIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
match = re.search(
|
||||||
|
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||||
|
if match:
|
||||||
|
return unescapeHTML(match.group('url'))
|
||||||
|
|
||||||
|
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||||
|
if match:
|
||||||
|
return 'wistia:%s' % match.group('id')
|
||||||
|
|
||||||
|
match = re.search(
|
||||||
|
r'''(?sx)
|
||||||
|
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
||||||
|
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
|
||||||
|
''', webpage)
|
||||||
|
if match:
|
||||||
|
return 'wistia:%s' % match.group('id')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
@@ -10,7 +10,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -30,6 +29,7 @@ class XFileShareIE(InfoExtractor):
|
|||||||
(r'vidabc\.com', 'Vid ABC'),
|
(r'vidabc\.com', 'Vid ABC'),
|
||||||
(r'vidbom\.com', 'VidBom'),
|
(r'vidbom\.com', 'VidBom'),
|
||||||
(r'vidlo\.us', 'vidlo'),
|
(r'vidlo\.us', 'vidlo'),
|
||||||
|
(r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
|
||||||
)
|
)
|
||||||
|
|
||||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||||
@@ -109,6 +109,9 @@ class XFileShareIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.rapidvideo.cool/b667kprndr8w',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -130,12 +133,12 @@ class XFileShareIE(InfoExtractor):
|
|||||||
if countdown:
|
if countdown:
|
||||||
self._sleep(countdown, video_id)
|
self._sleep(countdown, video_id)
|
||||||
|
|
||||||
post = urlencode_postdata(fields)
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, 'Downloading video page',
|
||||||
req = sanitized_Request(url, post)
|
data=urlencode_postdata(fields), headers={
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
'Referer': url,
|
||||||
|
'Content-type': 'application/x-www-form-urlencoded',
|
||||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
})
|
||||||
|
|
||||||
title = (self._search_regex(
|
title = (self._search_regex(
|
||||||
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||||
|
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -25,6 +26,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'uploader': 'Ruseful2011',
|
'uploader': 'Ruseful2011',
|
||||||
'duration': 893,
|
'duration': 893,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||||
@@ -36,6 +38,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'uploader': 'jojo747400',
|
'uploader': 'jojo747400',
|
||||||
'duration': 200,
|
'duration': 200,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -51,6 +54,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'uploader': 'parejafree',
|
'uploader': 'parejafree',
|
||||||
'duration': 72,
|
'duration': 72,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': ['Amateur', 'Blowjobs'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -104,7 +108,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+href=["\'].+?xhamster\.com/user/[^>]+>(?P<uploader>.+?)</a>',
|
r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
|
||||||
webpage, 'uploader', default='anonymous')
|
webpage, 'uploader', default='anonymous')
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
@@ -120,7 +124,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
r'content=["\']User(?:View|Play)s:(\d+)',
|
r'content=["\']User(?:View|Play)s:(\d+)',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage)
|
mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
|
||||||
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
|
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
|
||||||
|
|
||||||
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
|
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
|
||||||
@@ -152,6 +156,12 @@ class XHamsterIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
categories_html = self._search_regex(
|
||||||
|
r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
|
||||||
|
'categories', default=None)
|
||||||
|
categories = [clean_html(category) for category in re.findall(
|
||||||
|
r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -165,6 +175,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'dislike_count': int_or_none(dislike_count),
|
'dislike_count': int_or_none(dislike_count),
|
||||||
'comment_count': int_or_none(comment_count),
|
'comment_count': int_or_none(comment_count),
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
|
||||||
import itertools
|
import itertools
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
@@ -9,15 +8,13 @@ import string
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_ord,
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
get_element_by_attribute,
|
get_element_by_class,
|
||||||
try_get,
|
js_to_json,
|
||||||
|
str_or_none,
|
||||||
|
strip_jsonp,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -26,7 +23,9 @@ class YoukuIE(InfoExtractor):
|
|||||||
IE_DESC = '优酷'
|
IE_DESC = '优酷'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
|
https?://(
|
||||||
|
(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
|
||||||
|
video\.tudou\.com/v/)|
|
||||||
youku:)
|
youku:)
|
||||||
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
|
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
|
||||||
'''
|
'''
|
||||||
@@ -35,9 +34,15 @@ class YoukuIE(InfoExtractor):
|
|||||||
# MD5 is unstable
|
# MD5 is unstable
|
||||||
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
|
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XMTc1ODE5Njcy_part1',
|
'id': 'XMTc1ODE5Njcy',
|
||||||
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
|
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
|
||||||
'ext': 'flv'
|
'ext': 'mp4',
|
||||||
|
'duration': 74.73,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '。躲猫猫、',
|
||||||
|
'uploader_id': '36017967',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
|
||||||
|
'tags': list,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
||||||
@@ -46,25 +51,42 @@ class YoukuIE(InfoExtractor):
|
|||||||
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
|
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XODgxNjg1Mzk2',
|
'id': 'XODgxNjg1Mzk2',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '武媚娘传奇 85',
|
'title': '武媚娘传奇 85',
|
||||||
|
'duration': 1999.61,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '疯狂豆花',
|
||||||
|
'uploader_id': '62583473',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 11,
|
|
||||||
'skip': 'Available in China only',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
|
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XMTI1OTczNDM5Mg',
|
'id': 'XMTI1OTczNDM5Mg',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '花千骨 04',
|
'title': '花千骨 04',
|
||||||
|
'duration': 2363,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '放剧场-花千骨',
|
||||||
|
'uploader_id': '772849359',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 13,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
||||||
'note': 'Video protected with password',
|
'note': 'Video protected with password',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XNjA1NzA2Njgw',
|
'id': 'XNjA1NzA2Njgw',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
|
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
|
||||||
|
'duration': 7264.5,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': 'FoxJin1006',
|
||||||
|
'uploader_id': '322014285',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 19,
|
|
||||||
'params': {
|
'params': {
|
||||||
'videopassword': '100600',
|
'videopassword': '100600',
|
||||||
},
|
},
|
||||||
@@ -73,130 +95,38 @@ class YoukuIE(InfoExtractor):
|
|||||||
'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
|
'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XOTUxMzg4NDMy',
|
'id': 'XOTUxMzg4NDMy',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
|
'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
|
||||||
|
'duration': 702.08,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '明月庄主moon',
|
||||||
|
'uploader_id': '38465621',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0',
|
||||||
|
'tags': list,
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
}, {
|
||||||
|
'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'XMjIyNzAzMTQ4NA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '卡马乔国足开大脚长传冲吊集锦',
|
||||||
|
'duration': 289,
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'uploader': '阿卜杜拉之星',
|
||||||
|
'uploader_id': '2382249',
|
||||||
|
'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==',
|
||||||
|
'tags': list,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def construct_video_urls(self, data):
|
|
||||||
# get sid, token
|
|
||||||
def yk_t(s1, s2):
|
|
||||||
ls = list(range(256))
|
|
||||||
t = 0
|
|
||||||
for i in range(256):
|
|
||||||
t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
|
|
||||||
ls[i], ls[t] = ls[t], ls[i]
|
|
||||||
s = bytearray()
|
|
||||||
x, y = 0, 0
|
|
||||||
for i in range(len(s2)):
|
|
||||||
y = (y + 1) % 256
|
|
||||||
x = (x + ls[y]) % 256
|
|
||||||
ls[x], ls[y] = ls[y], ls[x]
|
|
||||||
s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
|
|
||||||
return bytes(s)
|
|
||||||
|
|
||||||
sid, token = yk_t(
|
|
||||||
b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
|
|
||||||
).decode('ascii').split('_')
|
|
||||||
|
|
||||||
# get oip
|
|
||||||
oip = data['security']['ip']
|
|
||||||
|
|
||||||
fileid_dict = {}
|
|
||||||
for stream in data['stream']:
|
|
||||||
if stream.get('channel_type') == 'tail':
|
|
||||||
continue
|
|
||||||
format = stream.get('stream_type')
|
|
||||||
fileid = try_get(
|
|
||||||
stream, lambda x: x['segs'][0]['fileid'],
|
|
||||||
compat_str) or stream['stream_fileid']
|
|
||||||
fileid_dict[format] = fileid
|
|
||||||
|
|
||||||
def get_fileid(format, n):
|
|
||||||
number = hex(int(str(n), 10))[2:].upper()
|
|
||||||
if len(number) == 1:
|
|
||||||
number = '0' + number
|
|
||||||
streamfileids = fileid_dict[format]
|
|
||||||
fileid = streamfileids[0:8] + number + streamfileids[10:]
|
|
||||||
return fileid
|
|
||||||
|
|
||||||
# get ep
|
|
||||||
def generate_ep(format, n):
|
|
||||||
fileid = get_fileid(format, n)
|
|
||||||
ep_t = yk_t(
|
|
||||||
b'bf7e5f01',
|
|
||||||
('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
|
|
||||||
)
|
|
||||||
ep = base64.b64encode(ep_t).decode('ascii')
|
|
||||||
return ep
|
|
||||||
|
|
||||||
# generate video_urls
|
|
||||||
video_urls_dict = {}
|
|
||||||
for stream in data['stream']:
|
|
||||||
if stream.get('channel_type') == 'tail':
|
|
||||||
continue
|
|
||||||
format = stream.get('stream_type')
|
|
||||||
video_urls = []
|
|
||||||
for dt in stream['segs']:
|
|
||||||
n = str(stream['segs'].index(dt))
|
|
||||||
param = {
|
|
||||||
'K': dt['key'],
|
|
||||||
'hd': self.get_hd(format),
|
|
||||||
'myp': 0,
|
|
||||||
'ypp': 0,
|
|
||||||
'ctype': 12,
|
|
||||||
'ev': 1,
|
|
||||||
'token': token,
|
|
||||||
'oip': oip,
|
|
||||||
'ep': generate_ep(format, n)
|
|
||||||
}
|
|
||||||
video_url = \
|
|
||||||
'http://k.youku.com/player/getFlvPath/' + \
|
|
||||||
'sid/' + sid + \
|
|
||||||
'_00' + \
|
|
||||||
'/st/' + self.parse_ext_l(format) + \
|
|
||||||
'/fileid/' + get_fileid(format, n) + '?' + \
|
|
||||||
compat_urllib_parse_urlencode(param)
|
|
||||||
video_urls.append(video_url)
|
|
||||||
video_urls_dict[format] = video_urls
|
|
||||||
|
|
||||||
return video_urls_dict
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_ysuid():
|
def get_ysuid():
|
||||||
return '%d%s' % (int(time.time()), ''.join([
|
return '%d%s' % (int(time.time()), ''.join([
|
||||||
random.choice(string.ascii_letters) for i in range(3)]))
|
random.choice(string.ascii_letters) for i in range(3)]))
|
||||||
|
|
||||||
def get_hd(self, fm):
|
|
||||||
hd_id_dict = {
|
|
||||||
'3gp': '0',
|
|
||||||
'3gphd': '1',
|
|
||||||
'flv': '0',
|
|
||||||
'flvhd': '0',
|
|
||||||
'mp4': '1',
|
|
||||||
'mp4hd': '1',
|
|
||||||
'mp4hd2': '1',
|
|
||||||
'mp4hd3': '1',
|
|
||||||
'hd2': '2',
|
|
||||||
'hd3': '3',
|
|
||||||
}
|
|
||||||
return hd_id_dict[fm]
|
|
||||||
|
|
||||||
def parse_ext_l(self, fm):
|
|
||||||
ext_dict = {
|
|
||||||
'3gp': 'flv',
|
|
||||||
'3gphd': 'mp4',
|
|
||||||
'flv': 'flv',
|
|
||||||
'flvhd': 'flv',
|
|
||||||
'mp4': 'mp4',
|
|
||||||
'mp4hd': 'mp4',
|
|
||||||
'mp4hd2': 'flv',
|
|
||||||
'mp4hd3': 'flv',
|
|
||||||
'hd2': 'flv',
|
|
||||||
'hd3': 'flv',
|
|
||||||
}
|
|
||||||
return ext_dict[fm]
|
|
||||||
|
|
||||||
def get_format_name(self, fm):
|
def get_format_name(self, fm):
|
||||||
_dict = {
|
_dict = {
|
||||||
'3gp': 'h6',
|
'3gp': 'h6',
|
||||||
@@ -210,32 +140,40 @@ class YoukuIE(InfoExtractor):
|
|||||||
'hd2': 'h2',
|
'hd2': 'h2',
|
||||||
'hd3': 'h1',
|
'hd3': 'h1',
|
||||||
}
|
}
|
||||||
return _dict[fm]
|
return _dict.get(fm)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
|
self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
|
||||||
|
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
|
||||||
|
|
||||||
def retrieve_data(req_url, note):
|
_, urlh = self._download_webpage_handle(
|
||||||
headers = {
|
'https://log.mmstat.com/eg.js', video_id, 'Retrieving cna info')
|
||||||
'Referer': req_url,
|
# The etag header is '"foobar"'; let's remove the double quotes
|
||||||
}
|
cna = urlh.headers['etag'][1:-1]
|
||||||
headers.update(self.geo_verification_headers())
|
|
||||||
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
|
|
||||||
|
|
||||||
raw_data = self._download_json(req_url, video_id, note=note, headers=headers)
|
|
||||||
|
|
||||||
return raw_data['data']
|
|
||||||
|
|
||||||
video_password = self._downloader.params.get('videopassword')
|
|
||||||
|
|
||||||
# request basic data
|
# request basic data
|
||||||
basic_data_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % video_id
|
basic_data_params = {
|
||||||
if video_password:
|
'vid': video_id,
|
||||||
basic_data_url += '&pwd=%s' % video_password
|
'ccode': '0402' if 'tudou.com' in url else '0401',
|
||||||
|
'client_ip': '192.168.1.1',
|
||||||
|
'utid': cna,
|
||||||
|
'client_ts': time.time() / 1000,
|
||||||
|
}
|
||||||
|
|
||||||
data = retrieve_data(basic_data_url, 'Downloading JSON metadata')
|
video_password = self._downloader.params.get('videopassword')
|
||||||
|
if video_password:
|
||||||
|
basic_data_params['password'] = video_password
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Referer': url,
|
||||||
|
}
|
||||||
|
headers.update(self.geo_verification_headers())
|
||||||
|
data = self._download_json(
|
||||||
|
'https://ups.youku.com/ups/get.json', video_id,
|
||||||
|
'Downloading JSON metadata',
|
||||||
|
query=basic_data_params, headers=headers)['data']
|
||||||
|
|
||||||
error = data.get('error')
|
error = data.get('error')
|
||||||
if error:
|
if error:
|
||||||
@@ -253,86 +191,87 @@ class YoukuIE(InfoExtractor):
|
|||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
|
|
||||||
# get video title
|
# get video title
|
||||||
title = data['video']['title']
|
video_data = data['video']
|
||||||
|
title = video_data['title']
|
||||||
|
|
||||||
# generate video_urls_dict
|
formats = [{
|
||||||
video_urls_dict = self.construct_video_urls(data)
|
'url': stream['m3u8_url'],
|
||||||
|
'format_id': self.get_format_name(stream.get('stream_type')),
|
||||||
# construct info
|
'ext': 'mp4',
|
||||||
entries = [{
|
'protocol': 'm3u8_native',
|
||||||
'id': '%s_part%d' % (video_id, i + 1),
|
'filesize': int(stream.get('size')),
|
||||||
'title': title,
|
'width': stream.get('width'),
|
||||||
'formats': [],
|
'height': stream.get('height'),
|
||||||
# some formats are not available for all parts, we have to detect
|
} for stream in data['stream'] if stream.get('channel_type') != 'tail']
|
||||||
# which one has all
|
self._sort_formats(formats)
|
||||||
} for i in range(max(len(v.get('segs')) for v in data['stream']))]
|
|
||||||
for stream in data['stream']:
|
|
||||||
if stream.get('channel_type') == 'tail':
|
|
||||||
continue
|
|
||||||
fm = stream.get('stream_type')
|
|
||||||
video_urls = video_urls_dict[fm]
|
|
||||||
for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
|
|
||||||
entry['formats'].append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': self.get_format_name(fm),
|
|
||||||
'ext': self.parse_ext_l(fm),
|
|
||||||
'filesize': int(seg['size']),
|
|
||||||
'width': stream.get('width'),
|
|
||||||
'height': stream.get('height'),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'multi_video',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'formats': formats,
|
||||||
|
'duration': video_data.get('seconds'),
|
||||||
|
'thumbnail': video_data.get('logo'),
|
||||||
|
'uploader': video_data.get('username'),
|
||||||
|
'uploader_id': str_or_none(video_data.get('userid')),
|
||||||
|
'uploader_url': data.get('uploader', {}).get('homepage'),
|
||||||
|
'tags': video_data.get('tags'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoukuShowIE(InfoExtractor):
|
class YoukuShowIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html'
|
_VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
|
||||||
IE_NAME = 'youku:show'
|
IE_NAME = 'youku:show'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html',
|
'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'zc7c670be07ff11e48b3f',
|
'id': 'zc7c670be07ff11e48b3f',
|
||||||
'title': '花千骨 未删减版',
|
'title': '花千骨 未删减版',
|
||||||
'description': 'md5:578d4f2145ae3f9128d9d4d863312910',
|
'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
|
||||||
},
|
},
|
||||||
'playlist_count': 50,
|
'playlist_count': 50,
|
||||||
}
|
}
|
||||||
|
|
||||||
_PAGE_SIZE = 40
|
_PAGE_SIZE = 40
|
||||||
|
|
||||||
def _find_videos_in_page(self, webpage):
|
|
||||||
videos = re.findall(
|
|
||||||
r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage)
|
|
||||||
return [
|
|
||||||
self.url_result(video_url, YoukuIE.ie_key(), title)
|
|
||||||
for video_url, title in videos]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, show_id)
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
entries = self._find_videos_in_page(webpage)
|
entries = []
|
||||||
|
page_config = self._parse_json(self._search_regex(
|
||||||
playlist_title = self._html_search_regex(
|
r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
|
||||||
r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False)
|
show_id, transform_source=js_to_json)
|
||||||
detail_div = get_element_by_attribute('class', 'detail', webpage) or ''
|
for idx in itertools.count(0):
|
||||||
playlist_description = self._html_search_regex(
|
if idx == 0:
|
||||||
r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>',
|
playlist_data_url = 'http://list.youku.com/show/module'
|
||||||
detail_div, 'playlist description', fatal=False)
|
query = {'id': page_config['showid'], 'tab': 'point'}
|
||||||
|
else:
|
||||||
for idx in itertools.count(1):
|
playlist_data_url = 'http://list.youku.com/show/point'
|
||||||
episodes_page = self._download_webpage(
|
query = {
|
||||||
'http://www.youku.com/show_episode/id_%s.html' % show_id,
|
'id': page_config['showid'],
|
||||||
show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)},
|
'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1),
|
||||||
note='Downloading episodes page %d' % idx)
|
}
|
||||||
new_entries = self._find_videos_in_page(episodes_page)
|
query['callback'] = 'cb'
|
||||||
|
playlist_data = self._download_json(
|
||||||
|
playlist_data_url, show_id, query=query,
|
||||||
|
note='Downloading playlist data page %d' % (idx + 1),
|
||||||
|
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
|
||||||
|
video_urls = re.findall(
|
||||||
|
r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"',
|
||||||
|
playlist_data)
|
||||||
|
new_entries = [
|
||||||
|
self.url_result(urljoin(url, video_url), YoukuIE.ie_key())
|
||||||
|
for video_url in video_urls]
|
||||||
entries.extend(new_entries)
|
entries.extend(new_entries)
|
||||||
if len(new_entries) < self._PAGE_SIZE:
|
if len(new_entries) < self._PAGE_SIZE:
|
||||||
break
|
break
|
||||||
|
|
||||||
return self.playlist_result(entries, show_id, playlist_title, playlist_description)
|
desc = self._html_search_meta('description', webpage, fatal=False)
|
||||||
|
playlist_title = desc.split(',')[0] if desc else None
|
||||||
|
detail_li = get_element_by_class('p-intro', webpage)
|
||||||
|
playlist_description = get_element_by_class(
|
||||||
|
'intro-more', detail_li) if detail_li else None
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, show_id, playlist_title, playlist_description)
|
||||||
|
@@ -1353,10 +1353,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
start_time = parse_duration(time_point)
|
start_time = parse_duration(time_point)
|
||||||
if start_time is None:
|
if start_time is None:
|
||||||
continue
|
continue
|
||||||
|
if start_time > duration:
|
||||||
|
break
|
||||||
end_time = (duration if next_num == len(chapter_lines)
|
end_time = (duration if next_num == len(chapter_lines)
|
||||||
else parse_duration(chapter_lines[next_num][1]))
|
else parse_duration(chapter_lines[next_num][1]))
|
||||||
if end_time is None:
|
if end_time is None:
|
||||||
continue
|
continue
|
||||||
|
if end_time > duration:
|
||||||
|
end_time = duration
|
||||||
|
if start_time > end_time:
|
||||||
|
break
|
||||||
chapter_title = re.sub(
|
chapter_title = re.sub(
|
||||||
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
|
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
|
||||||
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||||
@@ -1435,6 +1441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
age_gate = False
|
age_gate = False
|
||||||
video_info = None
|
video_info = None
|
||||||
|
sts = None
|
||||||
# Try looking directly into the video webpage
|
# Try looking directly into the video webpage
|
||||||
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
||||||
if ytplayer_config:
|
if ytplayer_config:
|
||||||
@@ -1451,6 +1458,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
||||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||||
is_live = True
|
is_live = True
|
||||||
|
sts = ytplayer_config.get('sts')
|
||||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
# We also try looking in get_video_info since it may contain different dashmpd
|
# We also try looking in get_video_info since it may contain different dashmpd
|
||||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||||
@@ -1459,17 +1467,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# The general idea is to take a union of itags of both DASH manifests (for example
|
# The general idea is to take a union of itags of both DASH manifests (for example
|
||||||
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
|
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
|
||||||
self.report_video_info_webpage_download(video_id)
|
self.report_video_info_webpage_download(video_id)
|
||||||
for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
|
||||||
video_info_url = (
|
query = {
|
||||||
'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
'video_id': video_id,
|
||||||
% (proto, video_id, el_type))
|
'ps': 'default',
|
||||||
|
'eurl': '',
|
||||||
|
'gl': 'US',
|
||||||
|
'hl': 'en',
|
||||||
|
}
|
||||||
|
if el:
|
||||||
|
query['el'] = el
|
||||||
|
if sts:
|
||||||
|
query['sts'] = sts
|
||||||
video_info_webpage = self._download_webpage(
|
video_info_webpage = self._download_webpage(
|
||||||
video_info_url,
|
'%s://www.youtube.com/get_video_info' % proto,
|
||||||
video_id, note=False,
|
video_id, note=False,
|
||||||
errnote='unable to download video info webpage')
|
errnote='unable to download video info webpage',
|
||||||
|
fatal=False, query=query)
|
||||||
|
if not video_info_webpage:
|
||||||
|
continue
|
||||||
get_video_info = compat_parse_qs(video_info_webpage)
|
get_video_info = compat_parse_qs(video_info_webpage)
|
||||||
if get_video_info.get('use_cipher_signature') != ['True']:
|
add_dash_mpd(get_video_info)
|
||||||
add_dash_mpd(get_video_info)
|
|
||||||
if not video_info:
|
if not video_info:
|
||||||
video_info = get_video_info
|
video_info = get_video_info
|
||||||
if 'token' in get_video_info:
|
if 'token' in get_video_info:
|
||||||
@@ -1703,12 +1721,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
format_id = url_data['itag'][0]
|
format_id = url_data['itag'][0]
|
||||||
url = url_data['url'][0]
|
url = url_data['url'][0]
|
||||||
|
|
||||||
if 'sig' in url_data:
|
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
url += '&signature=' + url_data['sig'][0]
|
|
||||||
elif 's' in url_data:
|
|
||||||
encrypted_sig = url_data['s'][0]
|
|
||||||
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
||||||
|
|
||||||
jsplayer_url_json = self._search_regex(
|
jsplayer_url_json = self._search_regex(
|
||||||
ASSETS_RE,
|
ASSETS_RE,
|
||||||
embed_webpage if age_gate else video_webpage,
|
embed_webpage if age_gate else video_webpage,
|
||||||
@@ -1729,6 +1743,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_webpage, 'age gate player URL')
|
video_webpage, 'age gate player URL')
|
||||||
player_url = json.loads(player_url_json)
|
player_url = json.loads(player_url_json)
|
||||||
|
|
||||||
|
if 'sig' in url_data:
|
||||||
|
url += '&signature=' + url_data['sig'][0]
|
||||||
|
elif 's' in url_data:
|
||||||
|
encrypted_sig = url_data['s'][0]
|
||||||
|
|
||||||
if self._downloader.params.get('verbose'):
|
if self._downloader.params.get('verbose'):
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
player_version = 'unknown'
|
player_version = 'unknown'
|
||||||
|
@@ -6,6 +6,7 @@ import re
|
|||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
remove_quotes,
|
||||||
)
|
)
|
||||||
|
|
||||||
_OPERATORS = [
|
_OPERATORS = [
|
||||||
@@ -57,7 +58,6 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
def interpret_expression(self, expr, local_vars, allow_recursion):
|
def interpret_expression(self, expr, local_vars, allow_recursion):
|
||||||
expr = expr.strip()
|
expr = expr.strip()
|
||||||
|
|
||||||
if expr == '': # Empty expression
|
if expr == '': # Empty expression
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -121,11 +121,19 @@ class JSInterpreter(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
|
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
|
||||||
|
if m:
|
||||||
|
val = local_vars[m.group('in')]
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
m.group('idx'), local_vars, allow_recursion - 1)
|
||||||
|
return val[idx]
|
||||||
|
|
||||||
|
m = re.match(
|
||||||
|
r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
|
||||||
expr)
|
expr)
|
||||||
if m:
|
if m:
|
||||||
variable = m.group('var')
|
variable = m.group('var')
|
||||||
member = m.group('member')
|
member = remove_quotes(m.group('member') or m.group('member2'))
|
||||||
arg_str = m.group('args')
|
arg_str = m.group('args')
|
||||||
|
|
||||||
if variable in local_vars:
|
if variable in local_vars:
|
||||||
@@ -173,14 +181,6 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
return obj[member](argvals)
|
return obj[member](argvals)
|
||||||
|
|
||||||
m = re.match(
|
|
||||||
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
|
|
||||||
if m:
|
|
||||||
val = local_vars[m.group('in')]
|
|
||||||
idx = self.interpret_expression(
|
|
||||||
m.group('idx'), local_vars, allow_recursion - 1)
|
|
||||||
return val[idx]
|
|
||||||
|
|
||||||
for op, opfunc in _OPERATORS:
|
for op, opfunc in _OPERATORS:
|
||||||
m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
|
m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
|
||||||
if not m:
|
if not m:
|
||||||
@@ -211,21 +211,25 @@ class JSInterpreter(object):
|
|||||||
raise ExtractorError('Unsupported JS expression %r' % expr)
|
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||||
|
|
||||||
def extract_object(self, objname):
|
def extract_object(self, objname):
|
||||||
|
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||||
obj = {}
|
obj = {}
|
||||||
obj_m = re.search(
|
obj_m = re.search(
|
||||||
(r'(?<!this\.)%s\s*=\s*\{' % re.escape(objname)) +
|
r'''(?x)
|
||||||
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
|
(?<!this\.)%s\s*=\s*{\s*
|
||||||
r'\}\s*;',
|
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
|
||||||
|
}\s*;
|
||||||
|
''' % (re.escape(objname), _FUNC_NAME_RE),
|
||||||
self.code)
|
self.code)
|
||||||
fields = obj_m.group('fields')
|
fields = obj_m.group('fields')
|
||||||
# Currently, it only supports function definitions
|
# Currently, it only supports function definitions
|
||||||
fields_m = re.finditer(
|
fields_m = re.finditer(
|
||||||
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
|
r'''(?x)
|
||||||
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
(?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
|
||||||
|
''' % _FUNC_NAME_RE,
|
||||||
fields)
|
fields)
|
||||||
for f in fields_m:
|
for f in fields_m:
|
||||||
argnames = f.group('args').split(',')
|
argnames = f.group('args').split(',')
|
||||||
obj[f.group('key')] = self.build_function(argnames, f.group('code'))
|
obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
@@ -310,7 +310,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
metavar='FILTER', dest='match_filter', default=None,
|
metavar='FILTER', dest='match_filter', default=None,
|
||||||
help=(
|
help=(
|
||||||
'Generic video filter. '
|
'Generic video filter. '
|
||||||
'Specify any key (see help for -o for a list of available keys) to '
|
'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
|
||||||
'match if the key is present, '
|
'match if the key is present, '
|
||||||
'!key to check if the key is not present, '
|
'!key to check if the key is not present, '
|
||||||
'key > NUMBER (like "comment_count > 12", also works with '
|
'key > NUMBER (like "comment_count > 12", also works with '
|
||||||
@@ -618,7 +618,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'-j', '--dump-json',
|
'-j', '--dump-json',
|
||||||
action='store_true', dest='dumpjson', default=False,
|
action='store_true', dest='dumpjson', default=False,
|
||||||
help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
|
help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'-J', '--dump-single-json',
|
'-J', '--dump-single-json',
|
||||||
action='store_true', dest='dump_single_json', default=False,
|
action='store_true', dest='dump_single_json', default=False,
|
||||||
@@ -814,11 +814,12 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--metadata-from-title',
|
'--metadata-from-title',
|
||||||
metavar='FORMAT', dest='metafromtitle',
|
metavar='FORMAT', dest='metafromtitle',
|
||||||
help='Parse additional metadata like song title / artist from the video title. '
|
help='Parse additional metadata like song title / artist from the video title. '
|
||||||
'The format syntax is the same as --output, '
|
'The format syntax is the same as --output. Regular expression with '
|
||||||
'the parsed parameters replace existing values. '
|
'named capture groups may also be used. '
|
||||||
'Additional templates: %(album)s, %(artist)s. '
|
'The parsed parameters replace existing values. '
|
||||||
'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
|
'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
|
||||||
'"Coldplay - Paradise"')
|
'"Coldplay - Paradise". '
|
||||||
|
'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"')
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--xattrs',
|
'--xattrs',
|
||||||
action='store_true', dest='xattrs', default=False,
|
action='store_true', dest='xattrs', default=False,
|
||||||
|
@@ -444,7 +444,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
chapters = info.get('chapters', [])
|
chapters = info.get('chapters', [])
|
||||||
if chapters:
|
if chapters:
|
||||||
metadata_filename = encodeFilename(replace_extension(filename, 'meta'))
|
metadata_filename = replace_extension(filename, 'meta')
|
||||||
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
||||||
def ffmpeg_escape(text):
|
def ffmpeg_escape(text):
|
||||||
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
||||||
|
@@ -9,7 +9,9 @@ class MetadataFromTitlePP(PostProcessor):
|
|||||||
def __init__(self, downloader, titleformat):
|
def __init__(self, downloader, titleformat):
|
||||||
super(MetadataFromTitlePP, self).__init__(downloader)
|
super(MetadataFromTitlePP, self).__init__(downloader)
|
||||||
self._titleformat = titleformat
|
self._titleformat = titleformat
|
||||||
self._titleregex = self.format_to_regex(titleformat)
|
self._titleregex = (self.format_to_regex(titleformat)
|
||||||
|
if re.search(r'%\(\w+\)s', titleformat)
|
||||||
|
else titleformat)
|
||||||
|
|
||||||
def format_to_regex(self, fmt):
|
def format_to_regex(self, fmt):
|
||||||
r"""
|
r"""
|
||||||
|
@@ -36,6 +36,7 @@ import xml.etree.ElementTree
|
|||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
|
compat_HTMLParseError,
|
||||||
compat_HTMLParser,
|
compat_HTMLParser,
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
@@ -409,8 +410,12 @@ def extract_attributes(html_element):
|
|||||||
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
||||||
"""
|
"""
|
||||||
parser = HTMLAttributeParser()
|
parser = HTMLAttributeParser()
|
||||||
parser.feed(html_element)
|
try:
|
||||||
parser.close()
|
parser.feed(html_element)
|
||||||
|
parser.close()
|
||||||
|
# Older Python may throw HTMLParseError in case of malformed HTML
|
||||||
|
except compat_HTMLParseError:
|
||||||
|
pass
|
||||||
return parser.attrs
|
return parser.attrs
|
||||||
|
|
||||||
|
|
||||||
@@ -932,14 +937,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
except zlib.error:
|
except zlib.error:
|
||||||
return zlib.decompress(data)
|
return zlib.decompress(data)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def addinfourl_wrapper(stream, headers, url, code):
|
|
||||||
if hasattr(compat_urllib_request.addinfourl, 'getcode'):
|
|
||||||
return compat_urllib_request.addinfourl(stream, headers, url, code)
|
|
||||||
ret = compat_urllib_request.addinfourl(stream, headers, url)
|
|
||||||
ret.code = code
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||||
@@ -991,13 +988,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise original_ioerror
|
raise original_ioerror
|
||||||
resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
|
resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
del resp.headers['Content-encoding']
|
del resp.headers['Content-encoding']
|
||||||
# deflate
|
# deflate
|
||||||
if resp.headers.get('Content-encoding', '') == 'deflate':
|
if resp.headers.get('Content-encoding', '') == 'deflate':
|
||||||
gz = io.BytesIO(self.deflate(resp.read()))
|
gz = io.BytesIO(self.deflate(resp.read()))
|
||||||
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
del resp.headers['Content-encoding']
|
del resp.headers['Content-encoding']
|
||||||
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
||||||
@@ -1187,7 +1184,7 @@ def unified_timestamp(date_str, day_first=True):
|
|||||||
if date_str is None:
|
if date_str is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
date_str = date_str.replace(',', ' ')
|
date_str = re.sub(r'[,|]', '', date_str)
|
||||||
|
|
||||||
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||||
timezone, date_str = extract_timezone(date_str)
|
timezone, date_str = extract_timezone(date_str)
|
||||||
@@ -2211,7 +2208,12 @@ def parse_age_limit(s):
|
|||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
r'''(?sx)^
|
||||||
|
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
|
||||||
|
(?:\s*&&\s*(?P=func_name))?
|
||||||
|
\s*\(\s*(?P<callback_data>.*)\);?
|
||||||
|
\s*?(?://[^\n]*)*$''',
|
||||||
|
r'\g<callback_data>', code)
|
||||||
|
|
||||||
|
|
||||||
def js_to_json(code):
|
def js_to_json(code):
|
||||||
@@ -2360,11 +2362,11 @@ def parse_codecs(codecs_str):
|
|||||||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
|
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
|
||||||
if not vcodec:
|
if not vcodec:
|
||||||
vcodec = full_codec
|
vcodec = full_codec
|
||||||
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'):
|
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
|
||||||
if not acodec:
|
if not acodec:
|
||||||
acodec = full_codec
|
acodec = full_codec
|
||||||
else:
|
else:
|
||||||
write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr)
|
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
|
||||||
if not vcodec and not acodec:
|
if not vcodec and not acodec:
|
||||||
if len(splited_codecs) == 2:
|
if len(splited_codecs) == 2:
|
||||||
return {
|
return {
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.05.07'
|
__version__ = '2017.06.12'
|
||||||
|
Reference in New Issue
Block a user