Compare commits
159 Commits
2016.10.21
...
2016.11.27
Author | SHA1 | Date | |
---|---|---|---|
![]() |
2b380fc299 | ||
![]() |
294d4926d7 | ||
![]() |
83f1481baa | ||
![]() |
f25e1c8d8c | ||
![]() |
6901673868 | ||
![]() |
560c8c6ec0 | ||
![]() |
9338a0eae3 | ||
![]() |
74394b5e10 | ||
![]() |
1db058466d | ||
![]() |
e94eeb1dd3 | ||
![]() |
8b27d83e4e | ||
![]() |
8eb7b5c3f1 | ||
![]() |
b68599ed47 | ||
![]() |
44444f0d3b | ||
![]() |
c867adc68c | ||
![]() |
3b5daf0736 | ||
![]() |
c8f56741dd | ||
![]() |
868630fbe5 | ||
![]() |
1d6ae5628f | ||
![]() |
6334794f2a | ||
![]() |
4eece8ba57 | ||
![]() |
2574721a81 | ||
![]() |
dbcc4a6b32 | ||
![]() |
0bb58a208b | ||
![]() |
dc6a9e4195 | ||
![]() |
8f8f182d0b | ||
![]() |
2176e466e0 | ||
![]() |
303b38fa84 | ||
![]() |
fb27d0ce5e | ||
![]() |
0aacd2deb1 | ||
![]() |
08ec95a6db | ||
![]() |
df46b19cb8 | ||
![]() |
748a462fbe | ||
![]() |
c131fc3372 | ||
![]() |
b25459b88a | ||
![]() |
5f75c4a4ad | ||
![]() |
689f31fde5 | ||
![]() |
582be35847 | ||
![]() |
073d5bf583 | ||
![]() |
315cb86a95 | ||
![]() |
b2fc1c4fb9 | ||
![]() |
d76767c90e | ||
![]() |
eceba9f805 | ||
![]() |
d755396804 | ||
![]() |
58355a3bf1 | ||
![]() |
49b69ad91c | ||
![]() |
6b4dfa2819 | ||
![]() |
9f60134a9d | ||
![]() |
b3d4bd05f9 | ||
![]() |
dbffd00ba9 | ||
![]() |
50913b8241 | ||
![]() |
7e08e2cab0 | ||
![]() |
690355551c | ||
![]() |
754e6c8322 | ||
![]() |
e58609b22c | ||
![]() |
4ea4c0bb22 | ||
![]() |
577281b0c6 | ||
![]() |
3d2729514f | ||
![]() |
f076d7972c | ||
![]() |
8b1aeadc33 | ||
![]() |
95ad9ce573 | ||
![]() |
189935f159 | ||
![]() |
bc40b3a5ba | ||
![]() |
3eaaa8abac | ||
![]() |
db3367f43e | ||
![]() |
6590925c27 | ||
![]() |
4719af097c | ||
![]() |
9946aa5ccf | ||
![]() |
c58e07a7aa | ||
![]() |
f700afa24c | ||
![]() |
5d47b38cf5 | ||
![]() |
ebc7ab1e23 | ||
![]() |
97726317ac | ||
![]() |
cb882540e8 | ||
![]() |
98708e6cbd | ||
![]() |
b52c9ef165 | ||
![]() |
e28ed498e6 | ||
![]() |
5021ca6c13 | ||
![]() |
37e7a71c6c | ||
![]() |
f5c4b06f17 | ||
![]() |
519d897049 | ||
![]() |
b61cd51869 | ||
![]() |
f420902a3b | ||
![]() |
de328af362 | ||
![]() |
b30e4c2754 | ||
![]() |
09ffe34b00 | ||
![]() |
640aff1d0c | ||
![]() |
c897af8aac | ||
![]() |
f3c705f8ec | ||
![]() |
f93ac1d175 | ||
![]() |
c4c9b8440c | ||
![]() |
32f2627aed | ||
![]() |
9d64e1dcdc | ||
![]() |
10380e55de | ||
![]() |
22979993e7 | ||
![]() |
b47ecd0b74 | ||
![]() |
3a86b2c51e | ||
![]() |
b811b4c93b | ||
![]() |
f4dfa9a5ed | ||
![]() |
3b4b66b50c | ||
![]() |
4119a96ce5 | ||
![]() |
26aae56690 | ||
![]() |
4f9cd4d36f | ||
![]() |
cc99a77ac1 | ||
![]() |
8956d6608a | ||
![]() |
3365ea8929 | ||
![]() |
a18aeee803 | ||
![]() |
1616f9b452 | ||
![]() |
02dc0a36b7 | ||
![]() |
639e3b5c99 | ||
![]() |
b2758123c5 | ||
![]() |
f449c061d0 | ||
![]() |
9c82bba05d | ||
![]() |
e3577722b0 | ||
![]() |
b82c33dd67 | ||
![]() |
e5a088dc4b | ||
![]() |
2c6da7df4a | ||
![]() |
7e7a028aa4 | ||
![]() |
e70a5e6566 | ||
![]() |
3bf55be466 | ||
![]() |
a901fc5fc2 | ||
![]() |
cae6bc0118 | ||
![]() |
d9ee2e5cf6 | ||
![]() |
e1a0b3b81c | ||
![]() |
2a048f9878 | ||
![]() |
ea331f40e6 | ||
![]() |
f02700a1fa | ||
![]() |
f3517569f6 | ||
![]() |
c725333d41 | ||
![]() |
a5a8877f9c | ||
![]() |
43c53a1700 | ||
![]() |
ec8705117a | ||
![]() |
3d8d44c7b1 | ||
![]() |
88839f4380 | ||
![]() |
83e9374464 | ||
![]() |
773017c648 | ||
![]() |
777d90dc28 | ||
![]() |
3791d84acc | ||
![]() |
9305a0dc60 | ||
![]() |
94e08950e3 | ||
![]() |
ee824a8d06 | ||
![]() |
d3b6b3b95b | ||
![]() |
b17422753f | ||
![]() |
b0b28b8241 | ||
![]() |
81cb7a5978 | ||
![]() |
d2e96a8ed4 | ||
![]() |
2e7c8cab55 | ||
![]() |
d7d4481c6a | ||
![]() |
5ace137bf4 | ||
![]() |
9dde0e04e6 | ||
![]() |
f16f8505b1 | ||
![]() |
9dc13a6780 | ||
![]() |
9aa929d337 | ||
![]() |
425f3fdfcb | ||
![]() |
e034cbc581 | ||
![]() |
5378f8ce0d | ||
![]() |
b64d04c119 | ||
![]() |
00ca755231 | ||
![]() |
6cbb20bb09 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.21.1**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.27**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.10.21.1
|
||||
[debug] youtube-dl version 2016.11.27
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@@ -30,6 +30,9 @@ updates_key.pem
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.3gp
|
||||
*.wav
|
||||
*.ape
|
||||
*.mkv
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
|
2
AUTHORS
2
AUTHORS
@@ -188,3 +188,5 @@ Xie Yanbo
|
||||
Philip Xu
|
||||
John Hawkinson
|
||||
Rich Leeper
|
||||
Zhong Jianxin
|
||||
Thor77
|
||||
|
@@ -92,7 +92,7 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
@@ -245,7 +245,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
|
149
ChangeLog
149
ChangeLog
@@ -1,3 +1,152 @@
|
||||
version 2016.11.27
|
||||
|
||||
Extractors
|
||||
+ [webcaster] Add support for webcaster.pro
|
||||
+ [azubu] Add support for azubu.uol.com.br (#11305)
|
||||
* [viki] Prefer hls formats
|
||||
* [viki] Fix rtmp formats extraction (#11255)
|
||||
* [puls4] Relax URL regular expression (#11267)
|
||||
* [vevo] Improve artist extraction (#10911)
|
||||
* [mitele] Relax URL regular expression and extract more metadata (#11244)
|
||||
+ [cbslocal] Recognize New York site (#11285)
|
||||
+ [youtube:playlist] Pass disable_polymer in URL query (#11193)
|
||||
|
||||
|
||||
version 2016.11.22
|
||||
|
||||
Extractors
|
||||
* [hellporno] Fix video extension extraction (#11247)
|
||||
+ [hellporno] Add support for hellporno.net (#11247)
|
||||
+ [amcnetworks] Recognize more BBC America URLs (#11263)
|
||||
* [funnyordie] Improve extraction (#11208)
|
||||
* [extractor/generic] Improve limelight embeds support
|
||||
- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
|
||||
* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
|
||||
* [twitter:card] Relax URL regular expression (#11225)
|
||||
+ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
|
||||
|
||||
|
||||
version 2016.11.18
|
||||
|
||||
Extractors
|
||||
* [youtube:live] Relax URL regular expression (#11164)
|
||||
* [openload] Fix extraction (#10408, #11122)
|
||||
* [vlive] Prefer locale over language for subtitles id (#11203)
|
||||
|
||||
|
||||
version 2016.11.14.1
|
||||
|
||||
Core
|
||||
+ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
|
||||
* [extractor/common] Fix media templates with Bandwidth substitution pattern in
|
||||
MPD manifests (#11175)
|
||||
* [extractor/common] Improve thumbnail extraction from JSON-LD
|
||||
|
||||
Extractors
|
||||
+ [nrk] Workaround geo restriction
|
||||
+ [nrk] Improve error detection and messages
|
||||
+ [afreecatv] Add support for vod.afreecatv.com (#11174)
|
||||
* [cda] Fix and improve extraction (#10929, #10936)
|
||||
* [plays] Fix extraction (#11165)
|
||||
* [eagleplatform] Fix extraction (#11160)
|
||||
+ [audioboom] Recognize /posts/ URLs (#11149)
|
||||
|
||||
|
||||
version 2016.11.08.1
|
||||
|
||||
Extractors
|
||||
* [espn:article] Fix support for espn.com articles
|
||||
* [franceculture] Fix extraction (#11140)
|
||||
|
||||
|
||||
version 2016.11.08
|
||||
|
||||
Extractors
|
||||
* [tmz:article] Fix extraction (#11052)
|
||||
* [espn] Fix extraction (#11041)
|
||||
* [mitele] Fix extraction after website redesign (#10824)
|
||||
- [ard] Remove age restriction check (#11129)
|
||||
* [generic] Improve support for pornhub.com embeds (#11100)
|
||||
+ [generic] Add support for redtube.com embeds (#11099)
|
||||
+ [generic] Add support for drtuber.com embeds (#11098)
|
||||
+ [redtube] Add support for embed URLs
|
||||
+ [drtuber] Add support for embed URLs
|
||||
+ [yahoo] Improve content id extraction (#11088)
|
||||
* [toutv] Relax URL regular expression (#11121)
|
||||
|
||||
|
||||
version 2016.11.04
|
||||
|
||||
Core
|
||||
* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
|
||||
manifests (#11113)
|
||||
* [downloader/ism] Fix AVC Decoder Configuration Record
|
||||
|
||||
Extractors
|
||||
+ [fox9] Add support for fox9.com (#11110)
|
||||
+ [anvato] Extract more metadata and improve formats extraction
|
||||
* [vodlocker] Improve removed videos detection (#11106)
|
||||
+ [vzaar] Add support for vzaar.com (#11093)
|
||||
+ [vice] Add support for uplynk preplay videos (#11101)
|
||||
* [tubitv] Fix extraction (#11061)
|
||||
+ [shahid] Add support for authentication (#11091)
|
||||
+ [radiocanada] Add subtitles support (#11096)
|
||||
+ [generic] Add support for ISM manifests
|
||||
|
||||
|
||||
version 2016.11.02
|
||||
|
||||
Core
|
||||
+ Add basic support for Smooth Streaming protocol (#8118, #10969)
|
||||
* Improve MPD manifest base URL extraction (#10909, #11079)
|
||||
* Fix --match-filter for int-like strings (#11082)
|
||||
|
||||
Extractors
|
||||
+ [mva] Add support for ISM formats
|
||||
+ [msn] Add support for ISM formats
|
||||
+ [onet] Add support for ISM formats
|
||||
+ [tvp] Add support for ISM formats
|
||||
+ [nicknight] Add support for nicknight sites (#10769)
|
||||
|
||||
|
||||
version 2016.10.30
|
||||
|
||||
Extractors
|
||||
* [facebook] Improve 1080P video detection (#11073)
|
||||
* [imgur] Recognize /r/ URLs (#11071)
|
||||
* [beeg] Fix extraction (#11069)
|
||||
* [openload] Fix extraction (#10408)
|
||||
* [gvsearch] Modernize and fix search request (#11051)
|
||||
* [adultswim] Fix extraction (#10979)
|
||||
+ [nobelprize] Add support for nobelprize.org (#9999)
|
||||
* [hornbunny] Fix extraction (#10981)
|
||||
* [tvp] Improve video id extraction (#10585)
|
||||
|
||||
|
||||
version 2016.10.26
|
||||
|
||||
Extractors
|
||||
+ [rentv] Add support for ren.tv (#10620)
|
||||
+ [ard] Detect unavailable videos (#11018)
|
||||
* [vk] Fix extraction (#11022)
|
||||
|
||||
|
||||
version 2016.10.25
|
||||
|
||||
Core
|
||||
* Running youtube-dl in the background is fixed (#10996, #10706, #955)
|
||||
|
||||
Extractors
|
||||
+ [jamendo] Add support for jamendo.com (#10132, #10736)
|
||||
+ [pandatv] Add support for panda.tv (#10736)
|
||||
+ [dotsub] Support Vimeo embed (#10964)
|
||||
* [litv] Fix extraction
|
||||
+ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994)
|
||||
* [vivo] Fix extraction (#11003)
|
||||
+ [twitch:stream] Add support for rebroadcasts (#10995)
|
||||
* [pluralsight] Fix subtitles conversion (#10990)
|
||||
|
||||
|
||||
version 2016.10.21.1
|
||||
|
||||
Extractors
|
||||
|
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
|
@@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https://
|
||||
|
||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||
|
||||
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
|
||||
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
|
||||
|
||||
It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
|
||||
|
||||
@@ -930,7 +930,7 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
@@ -1083,7 +1083,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
|
@@ -25,5 +25,6 @@ def build_completion(opt_parser):
|
||||
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
|
||||
f.write(filled_template)
|
||||
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
||||
|
@@ -2,11 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import mimetypes
|
||||
import netrc
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@@ -90,16 +92,23 @@ class GitHubReleaser(object):
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
|
||||
parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
if len(args) != 3:
|
||||
parser.error('Expected a version and a build directory')
|
||||
|
||||
version, build_path = args
|
||||
changelog_file, version, build_path = args
|
||||
|
||||
with io.open(changelog_file, encoding='utf-8') as inf:
|
||||
changelog = inf.read()
|
||||
|
||||
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
|
||||
body = mobj.group(1) if mobj else ''
|
||||
|
||||
releaser = GitHubReleaser()
|
||||
|
||||
new_release = releaser.create_release(version, name='youtube-dl %s' % version)
|
||||
new_release = releaser.create_release(
|
||||
version, name='youtube-dl %s' % version, body=body)
|
||||
release_id = new_release['id']
|
||||
|
||||
for asset in os.listdir(build_path):
|
||||
|
@@ -44,5 +44,6 @@ def build_completion(opt_parser):
|
||||
with open(FISH_COMPLETION_FILE, 'w') as f:
|
||||
f.write(filled_template)
|
||||
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
||||
|
@@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv):
|
||||
out, _ = prog.communicate(secret_msg)
|
||||
return out
|
||||
|
||||
|
||||
iv = key = [0x20, 0x15] + 14 * [0]
|
||||
|
||||
r = openssl_encode('aes-128-cbc', key, iv)
|
||||
|
@@ -32,5 +32,6 @@ def main():
|
||||
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
|
||||
sitesf.write(template)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -28,5 +28,6 @@ def main():
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -59,6 +59,7 @@ def build_lazy_ie(ie, name):
|
||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||
return s
|
||||
|
||||
|
||||
# find the correct sorting and add the required base classes so that sublcasses
|
||||
# can be correctly created
|
||||
classes = _ALL_CLASSES[:-1]
|
||||
|
@@ -41,5 +41,6 @@ def main():
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -74,5 +74,6 @@ def filter_options(readme):
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||
|
||||
ROOT=$(pwd)
|
||||
python devscripts/create-github-release.py $version "$ROOT/build/$version"
|
||||
python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
|
||||
|
||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||
|
||||
|
@@ -44,5 +44,6 @@ def build_completion(opt_parser):
|
||||
with open(ZSH_COMPLETION_FILE, "w") as f:
|
||||
f.write(template)
|
||||
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
||||
|
@@ -225,6 +225,7 @@
|
||||
- **EroProfile**
|
||||
- **Escapist**
|
||||
- **ESPN**
|
||||
- **ESPNArticle**
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
@@ -247,6 +248,7 @@
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **FOX9**
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
@@ -332,6 +334,8 @@
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Iwara**
|
||||
- **Izlesene**
|
||||
- **Jamendo**
|
||||
- **JamendoAlbum**
|
||||
- **JeuxVideo**
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
@@ -481,11 +485,13 @@
|
||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
||||
- **nick.com**
|
||||
- **nick.de**
|
||||
- **nicknight**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
@@ -527,6 +533,7 @@
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **PandaTV**: 熊猫TV
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
@@ -586,6 +593,8 @@
|
||||
- **RDS**: RDS.ca
|
||||
- **RedTube**
|
||||
- **RegioTV**
|
||||
- **RENTV**
|
||||
- **RENTVArticle**
|
||||
- **Restudy**
|
||||
- **Reuters**
|
||||
- **ReverbNation**
|
||||
@@ -641,7 +650,7 @@
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **Shahid**
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **Shared**: shared.sx
|
||||
- **ShareSix**
|
||||
- **Sina**
|
||||
- **SixPlay**
|
||||
@@ -762,6 +771,8 @@
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
@@ -848,6 +859,7 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **Vivo**: vivo.sx
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
@@ -862,6 +874,7 @@
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Walla**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
@@ -869,6 +882,8 @@
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **Webcaster**
|
||||
- **WebcasterFeed**
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **WeiqiTV**: WQTV
|
||||
|
@@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
|
||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'extractor': 'TEST',
|
||||
'duration': 30,
|
||||
'filesize': 10 * 1024,
|
||||
'playlist_id': '42',
|
||||
}
|
||||
second = {
|
||||
'id': '2',
|
||||
@@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'duration': 10,
|
||||
'description': 'foo',
|
||||
'filesize': 5 * 1024,
|
||||
'playlist_id': '43',
|
||||
}
|
||||
videos = [first, second]
|
||||
|
||||
@@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('playlist_id = 42')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
def test_playlist_items_selection(self):
|
||||
entries = [{
|
||||
'id': compat_str(i),
|
||||
|
@@ -51,5 +51,6 @@ class TestAES(unittest.TestCase):
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -60,6 +60,7 @@ def _file_md5(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
|
||||
defs = gettestcases()
|
||||
|
||||
|
||||
@@ -217,6 +218,7 @@ def generator(test_case):
|
||||
|
||||
return test_template
|
||||
|
||||
|
||||
# And add them to TestDownload
|
||||
for n, test_case in enumerate(defs):
|
||||
test_method = generator(test_case)
|
||||
|
@@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase):
|
||||
_, stderr = p.communicate()
|
||||
self.assertFalse(stderr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase):
|
||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase):
|
||||
ie._login()
|
||||
self.assertTrue('unable to log in:' in logger.messages[0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -104,6 +104,14 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}''')
|
||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||
|
||||
def test_call(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return 2; }
|
||||
function y(a) { return x() + a; }
|
||||
function z() { return y(3); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('z'), 5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -69,6 +69,7 @@ from youtube_dl.utils import (
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
base_url,
|
||||
urlencode_postdata,
|
||||
urshift,
|
||||
update_url_query,
|
||||
@@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase):
|
||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||
'trailer.mp4')
|
||||
|
||||
def test_base_url(self):
|
||||
self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
|
||||
self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
@@ -1067,5 +1075,6 @@ The first line
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase):
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
self.to_stderr = self.to_screen
|
||||
|
||||
|
||||
params = get_params({
|
||||
'writeannotations': True,
|
||||
'skip_download': True,
|
||||
@@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
try_rm(ANNOTATIONS_FILE)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
for entry in result['entries']:
|
||||
self.assertTrue(entry.get('title'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
|
||||
|
||||
for test_spec in _TESTS:
|
||||
make_tfunc(*test_spec)
|
||||
|
||||
|
@@ -1658,7 +1658,7 @@ class YoutubeDL(object):
|
||||
video_ext, audio_ext = audio.get('ext'), video.get('ext')
|
||||
if video_ext and audio_ext:
|
||||
COMPATIBLE_EXTS = (
|
||||
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
|
||||
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
|
||||
('webm')
|
||||
)
|
||||
for exts in COMPATIBLE_EXTS:
|
||||
|
@@ -95,8 +95,7 @@ def _real_main(argv=None):
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except IOError:
|
||||
sys.exit('ERROR: batch file could not be read')
|
||||
all_urls = batch_urls + args
|
||||
all_urls = [url.strip() for url in all_urls]
|
||||
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
||||
@@ -450,4 +449,5 @@ def main(argv=None):
|
||||
except KeyboardInterrupt:
|
||||
sys.exit('\nERROR: Interrupted by user')
|
||||
|
||||
|
||||
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
||||
|
@@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
|
||||
return plaintext
|
||||
|
||||
|
||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
|
||||
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
|
||||
@@ -328,4 +329,5 @@ def inc(data):
|
||||
break
|
||||
return data
|
||||
|
||||
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||
|
@@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass
|
||||
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
@@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161():
|
||||
return real_add_option(self, *bargs, **bkwargs)
|
||||
optparse.OptionGroup.add_option = _compat_add_option
|
||||
|
||||
|
||||
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
|
||||
compat_get_terminal_size = shutil.get_terminal_size
|
||||
else:
|
||||
|
@@ -7,6 +7,7 @@ from .http import HttpFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
from .rtsp import RtspFD
|
||||
from .ism import IsmFD
|
||||
from .external import (
|
||||
get_external_downloader,
|
||||
FFmpegFD,
|
||||
@@ -24,6 +25,7 @@ PROTOCOL_MAP = {
|
||||
'rtsp': RtspFD,
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD):
|
||||
class AVconvFD(FFmpegFD):
|
||||
pass
|
||||
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
for name, klass in globals().items()
|
||||
|
@@ -314,7 +314,8 @@ class F4mFD(FragmentFD):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
urlh = self.ydl.urlopen(man_url)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
|
||||
@@ -387,7 +388,10 @@ class F4mFD(FragmentFD):
|
||||
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': url_parsed.geturl(),
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False
|
||||
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,6 +38,10 @@ class FragmentFD(FileDownloader):
|
||||
def report_skip_fragment(self, fragment_name):
|
||||
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
@@ -59,7 +59,8 @@ class HlsFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
|
||||
manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
|
||||
|
||||
s = manifest.decode('utf-8', 'ignore')
|
||||
|
||||
@@ -112,7 +113,10 @@ class HlsFD(FragmentFD):
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
|
271
youtube_dl/downloader/ism.py
Normal file
271
youtube_dl/downloader/ism.py
Normal file
@@ -0,0 +1,271 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
import struct
|
||||
import binascii
|
||||
import io
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
u8 = struct.Struct(b'>B')
|
||||
u88 = struct.Struct(b'>Bx')
|
||||
u16 = struct.Struct(b'>H')
|
||||
u1616 = struct.Struct(b'>Hxx')
|
||||
u32 = struct.Struct(b'>I')
|
||||
u64 = struct.Struct(b'>Q')
|
||||
|
||||
s88 = struct.Struct(b'>bx')
|
||||
s16 = struct.Struct(b'>h')
|
||||
s1616 = struct.Struct(b'>hxx')
|
||||
s32 = struct.Struct(b'>i')
|
||||
|
||||
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
|
||||
|
||||
TRACK_ENABLED = 0x1
|
||||
TRACK_IN_MOVIE = 0x2
|
||||
TRACK_IN_PREVIEW = 0x4
|
||||
|
||||
SELF_CONTAINED = 0x1
|
||||
|
||||
|
||||
def box(box_type, payload):
|
||||
return u32.pack(8 + len(payload)) + box_type + payload
|
||||
|
||||
|
||||
def full_box(box_type, version, flags, payload):
|
||||
return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
|
||||
|
||||
|
||||
def write_piff_header(stream, params):
|
||||
track_id = params['track_id']
|
||||
fourcc = params['fourcc']
|
||||
duration = params['duration']
|
||||
timescale = params.get('timescale', 10000000)
|
||||
language = params.get('language', 'und')
|
||||
height = params.get('height', 0)
|
||||
width = params.get('width', 0)
|
||||
is_audio = width == 0 and height == 0
|
||||
creation_time = modification_time = int(time.time())
|
||||
|
||||
ftyp_payload = b'isml' # major brand
|
||||
ftyp_payload += u32.pack(1) # minor version
|
||||
ftyp_payload += b'piff' + b'iso2' # compatible brands
|
||||
stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
|
||||
|
||||
mvhd_payload = u64.pack(creation_time)
|
||||
mvhd_payload += u64.pack(modification_time)
|
||||
mvhd_payload += u32.pack(timescale)
|
||||
mvhd_payload += u64.pack(duration)
|
||||
mvhd_payload += s1616.pack(1) # rate
|
||||
mvhd_payload += s88.pack(1) # volume
|
||||
mvhd_payload += u16.pack(0) # reserved
|
||||
mvhd_payload += u32.pack(0) * 2 # reserved
|
||||
mvhd_payload += unity_matrix
|
||||
mvhd_payload += u32.pack(0) * 6 # pre defined
|
||||
mvhd_payload += u32.pack(0xffffffff) # next track id
|
||||
moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
|
||||
|
||||
tkhd_payload = u64.pack(creation_time)
|
||||
tkhd_payload += u64.pack(modification_time)
|
||||
tkhd_payload += u32.pack(track_id) # track id
|
||||
tkhd_payload += u32.pack(0) # reserved
|
||||
tkhd_payload += u64.pack(duration)
|
||||
tkhd_payload += u32.pack(0) * 2 # reserved
|
||||
tkhd_payload += s16.pack(0) # layer
|
||||
tkhd_payload += s16.pack(0) # alternate group
|
||||
tkhd_payload += s88.pack(1 if is_audio else 0) # volume
|
||||
tkhd_payload += u16.pack(0) # reserved
|
||||
tkhd_payload += unity_matrix
|
||||
tkhd_payload += u1616.pack(width)
|
||||
tkhd_payload += u1616.pack(height)
|
||||
trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
|
||||
|
||||
mdhd_payload = u64.pack(creation_time)
|
||||
mdhd_payload += u64.pack(modification_time)
|
||||
mdhd_payload += u32.pack(timescale)
|
||||
mdhd_payload += u64.pack(duration)
|
||||
mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
|
||||
mdhd_payload += u16.pack(0) # pre defined
|
||||
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
|
||||
|
||||
hdlr_payload = u32.pack(0) # pre defined
|
||||
hdlr_payload += b'soun' if is_audio else b'vide' # handler type
|
||||
hdlr_payload += u32.pack(0) * 3 # reserved
|
||||
hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
|
||||
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
|
||||
|
||||
if is_audio:
|
||||
smhd_payload = s88.pack(0) # balance
|
||||
smhd_payload = u16.pack(0) # reserved
|
||||
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
|
||||
else:
|
||||
vmhd_payload = u16.pack(0) # graphics mode
|
||||
vmhd_payload += u16.pack(0) * 3 # opcolor
|
||||
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
|
||||
minf_payload = media_header_box
|
||||
|
||||
dref_payload = u32.pack(1) # entry count
|
||||
dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
|
||||
dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
|
||||
minf_payload += box(b'dinf', dinf_payload) # Data Information Box
|
||||
|
||||
stsd_payload = u32.pack(1) # entry count
|
||||
|
||||
sample_entry_payload = u8.pack(0) * 6 # reserved
|
||||
sample_entry_payload += u16.pack(1) # data reference index
|
||||
if is_audio:
|
||||
sample_entry_payload += u32.pack(0) * 2 # reserved
|
||||
sample_entry_payload += u16.pack(params.get('channels', 2))
|
||||
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
|
||||
sample_entry_payload += u16.pack(0) # pre defined
|
||||
sample_entry_payload += u16.pack(0) # reserved
|
||||
sample_entry_payload += u1616.pack(params['sampling_rate'])
|
||||
|
||||
if fourcc == 'AACL':
|
||||
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
||||
else:
|
||||
sample_entry_payload = sample_entry_payload
|
||||
sample_entry_payload += u16.pack(0) # pre defined
|
||||
sample_entry_payload += u16.pack(0) # reserved
|
||||
sample_entry_payload += u32.pack(0) * 3 # pre defined
|
||||
sample_entry_payload += u16.pack(width)
|
||||
sample_entry_payload += u16.pack(height)
|
||||
sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
|
||||
sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
|
||||
sample_entry_payload += u32.pack(0) # reserved
|
||||
sample_entry_payload += u16.pack(1) # frame count
|
||||
sample_entry_payload += u8.pack(0) * 32 # compressor name
|
||||
sample_entry_payload += u16.pack(0x18) # depth
|
||||
sample_entry_payload += s16.pack(-1) # pre defined
|
||||
|
||||
codec_private_data = binascii.unhexlify(params['codec_private_data'])
|
||||
if fourcc in ('H264', 'AVC1'):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
||||
avcc_payload += u16.pack(len(sps))
|
||||
avcc_payload += sps
|
||||
avcc_payload += u8.pack(1) # number of pps
|
||||
avcc_payload += u16.pack(len(pps))
|
||||
avcc_payload += pps
|
||||
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
|
||||
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
|
||||
stsd_payload += sample_entry_box
|
||||
|
||||
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
|
||||
|
||||
stts_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
|
||||
|
||||
stsc_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
|
||||
|
||||
stco_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
|
||||
|
||||
minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
|
||||
|
||||
mdia_payload += box(b'minf', minf_payload) # Media Information Box
|
||||
|
||||
trak_payload += box(b'mdia', mdia_payload) # Media Box
|
||||
|
||||
moov_payload += box(b'trak', trak_payload) # Track Box
|
||||
|
||||
mehd_payload = u64.pack(duration)
|
||||
mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
|
||||
|
||||
trex_payload = u32.pack(track_id) # track id
|
||||
trex_payload += u32.pack(1) # default sample description index
|
||||
trex_payload += u32.pack(0) # default sample duration
|
||||
trex_payload += u32.pack(0) # default sample size
|
||||
trex_payload += u32.pack(0) # default sample flags
|
||||
mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
|
||||
|
||||
moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
|
||||
stream.write(box(b'moov', moov_payload)) # Movie Box
|
||||
|
||||
|
||||
def extract_box_data(data, box_sequence):
|
||||
data_reader = io.BytesIO(data)
|
||||
while True:
|
||||
box_size = u32.unpack(data_reader.read(4))[0]
|
||||
box_type = data_reader.read(4)
|
||||
if box_type == box_sequence[0]:
|
||||
box_data = data_reader.read(box_size - 8)
|
||||
if len(box_sequence) == 1:
|
||||
return box_data
|
||||
return extract_box_data(box_data, box_sequence[1:])
|
||||
data_reader.seek(box_size - 8, 1)
|
||||
|
||||
|
||||
class IsmFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a ISM manifest
|
||||
"""
|
||||
|
||||
FD_NAME = 'ism'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
segments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
track_written = False
|
||||
for i, segment in enumerate(segments):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % i
|
||||
target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
down_data = down.read()
|
||||
if not track_written:
|
||||
tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
|
||||
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
|
||||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
track_written = True
|
||||
ctx['dest_stream'].write(down_data)
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(segment_name)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for segment_file in segments_filenames:
|
||||
os.remove(encodeFilename(segment_file))
|
||||
|
||||
return True
|
@@ -96,6 +96,27 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -163,6 +184,8 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
elif video_info.get('id'):
|
||||
segment_ids = [video_info['id']]
|
||||
else:
|
||||
if video_info.get('auth') is True:
|
||||
raise ExtractorError(
|
||||
|
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -18,12 +19,18 @@ from ..utils import (
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)^
|
||||
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html))
|
||||
\?.*?\bnTitleNo=(?P<id>\d+)'''
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/PLAYER/STATION/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
@@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor):
|
||||
info_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
netloc='afbbs.afreecatv.com:8080',
|
||||
path='/api/video/get_video_info.php'))
|
||||
video_xml = self._download_xml(info_url, video_id)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
|
||||
|
||||
if xpath_element(video_xml, './track/video/file') is None:
|
||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
@@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
}, {
|
||||
'url': 'http://www.ifc.com/movies/chaos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -157,22 +157,16 @@ class AnvatoIE(InfoExtractor):
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps(payload).encode('utf-8'))
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
|
||||
video_id = anvplayer_data['video']
|
||||
access_key = anvplayer_data['accessKey']
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id):
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
|
||||
formats = []
|
||||
for published_url in video_data['published_urls']:
|
||||
video_url = published_url['embed_url']
|
||||
media_format = published_url.get('format')
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'smil':
|
||||
if ext == 'smil' or media_format == 'smil':
|
||||
formats.extend(self._extract_smil_formats(video_url, video_id))
|
||||
continue
|
||||
|
||||
@@ -183,7 +177,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'tbr': tbr if tbr != 0 else None,
|
||||
}
|
||||
|
||||
if ext == 'm3u8':
|
||||
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
|
||||
# Not using _extract_m3u8_formats here as individual media
|
||||
# playlists are also included in published_urls.
|
||||
if tbr is None:
|
||||
@@ -194,7 +188,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif ext == 'mp3':
|
||||
elif ext == 'mp3' or media_format == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
else:
|
||||
a_format.update({
|
||||
@@ -218,7 +212,19 @@ class AnvatoIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'title': video_data.get('def_title'),
|
||||
'description': video_data.get('def_description'),
|
||||
'tags': video_data.get('def_tags', '').split(','),
|
||||
'categories': video_data.get('categories'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'timestamp': int_or_none(video_data.get(
|
||||
'ts_published') or video_data.get('ts_added')),
|
||||
'uploader': video_data.get('mcp_id'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
|
@@ -174,11 +174,15 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||
ERRORS = (
|
||||
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
|
||||
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
|
||||
'Video %s is no longer available'),
|
||||
)
|
||||
|
||||
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
|
||||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
raise ExtractorError(message % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
|
@@ -6,8 +6,8 @@ from ..utils import float_or_none
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor):
|
||||
'uploader': 'Steve Czaban',
|
||||
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
|
||||
@@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor):
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
'only_matching': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://azubu.uol.com.br/adolfz',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user = self._match_id(url)
|
||||
|
@@ -1,7 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -12,6 +14,9 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor):
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascript code
|
||||
all_info = self._parse_json(self._search_regex(
|
||||
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
|
||||
info = all_info[0]
|
||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||
mp3_info = info['downloads']['mp3-320']
|
||||
# If we try to use this url it says the link has expired
|
||||
initial_url = mp3_info['url']
|
||||
m_url = re.match(
|
||||
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
|
||||
initial_url)
|
||||
# We build the url we will use to get the final track url
|
||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
||||
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
||||
# If we could correctly generate the .rand field the url would be
|
||||
# in the "download_url" key
|
||||
final_url = self._proto_relative_url(self._search_regex(
|
||||
r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:')
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, video_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
|
||||
info = blob['digital_items'][0]
|
||||
|
||||
downloads = info['downloads']
|
||||
track = info['title']
|
||||
|
||||
artist = info.get('artist')
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
download_formats = {}
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
formats = []
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, video_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = stat.get('retry_url')
|
||||
if not isinstance(retry_url, compat_str):
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url'),
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -46,19 +46,19 @@ class BeegIE(InfoExtractor):
|
||||
self._proto_relative_url(cpl_url), video_id,
|
||||
'Downloading cpl JS', fatal=False)
|
||||
if cpl:
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*(\d+)', cpl,
|
||||
'beeg version', default=None) or self._search_regex(
|
||||
beeg_version = int_or_none(self._search_regex(
|
||||
r'beeg_version\s*=\s*([^\b]+)', cpl,
|
||||
'beeg version', default=None)) or self._search_regex(
|
||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
||||
beeg_salt = self._search_regex(
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '1750'
|
||||
beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
|
||||
beeg_version = beeg_version or '2000'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
|
||||
video = self._download_json(
|
||||
'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
video_id)
|
||||
|
||||
def split(o, e):
|
||||
|
@@ -4,11 +4,14 @@ from __future__ import unicode_literals
|
||||
from .anvato import AnvatoIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import unified_timestamp
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
@@ -22,6 +25,7 @@ class CBSLocalIE(AnvatoIE):
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1463440500,
|
||||
'upload_date': '20160516',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
@@ -35,6 +39,7 @@ class CBSLocalIE(AnvatoIE):
|
||||
'Syndication\\Curb.tv',
|
||||
'Content\\News'
|
||||
],
|
||||
'tags': ['CBS 2 News Evening'],
|
||||
},
|
||||
}, {
|
||||
# SendtoNews embed
|
||||
@@ -47,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1479962220,
|
||||
'upload_date': '20161124',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -62,8 +92,11 @@ class CBSLocalIE(AnvatoIE):
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
timestamp = unified_timestamp(time_str)
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
|
||||
if time_str:
|
||||
timestamp = unified_timestamp(time_str)
|
||||
else:
|
||||
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
|
@@ -5,14 +5,16 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
parse_duration
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'http://www.cda.pl/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cda.pl/video/5749950c',
|
||||
'md5': '6f844bf51b15f31fae165365707ae970',
|
||||
@@ -21,6 +23,9 @@ class CDAIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'height': 720,
|
||||
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
|
||||
'description': 'md5:269ccd135d550da90d1662651fcb9772',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'average_rating': float,
|
||||
'duration': 39
|
||||
}
|
||||
}, {
|
||||
@@ -30,6 +35,11 @@ class CDAIE(InfoExtractor):
|
||||
'id': '57413289',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lądowanie na lotnisku na Maderze',
|
||||
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137
|
||||
}
|
||||
}, {
|
||||
@@ -39,31 +49,55 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
webpage = self._download_webpage(
|
||||
self._BASE_URL + '/video/' + video_id, video_id)
|
||||
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
|
||||
formats = []
|
||||
|
||||
uploader = self._search_regex(r'''(?x)
|
||||
<(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
|
||||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
||||
''', webpage, 'uploader', default=None, group='uploader')
|
||||
view_count = self._search_regex(
|
||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||
'view_count', default=None)
|
||||
average_rating = self._search_regex(
|
||||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
webpage, 'rating', fatal=False, group='rating_value')
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(view_count),
|
||||
'average_rating': float_or_none(average_rating),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': None,
|
||||
}
|
||||
|
||||
def extract_format(page, version):
|
||||
unpacked = decode_packed_codes(page)
|
||||
format_url = self._search_regex(
|
||||
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
|
||||
'%s url' % version, fatal=False, group='url')
|
||||
if not format_url:
|
||||
json_str = self._search_regex(
|
||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||
'%s player_json' % version, fatal=False, group='player_data')
|
||||
if not json_str:
|
||||
return
|
||||
player_data = self._parse_json(
|
||||
json_str, '%s player_data' % version, fatal=False)
|
||||
if not player_data:
|
||||
return
|
||||
video = player_data.get('video')
|
||||
if not video or 'file' not in video:
|
||||
self.report_warning('Unable to extract %s version information' % version)
|
||||
return
|
||||
f = {
|
||||
'url': format_url,
|
||||
'url': video['file'],
|
||||
}
|
||||
m = re.search(
|
||||
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
|
||||
@@ -75,9 +109,7 @@ class CDAIE(InfoExtractor):
|
||||
})
|
||||
info_dict['formats'].append(f)
|
||||
if not info_dict['duration']:
|
||||
info_dict['duration'] = parse_duration(self._search_regex(
|
||||
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
|
||||
unpacked, 'duration', fatal=False, group='duration'))
|
||||
info_dict['duration'] = parse_duration(video.get('duration'))
|
||||
|
||||
extract_format(webpage, 'default')
|
||||
|
||||
@@ -85,7 +117,8 @@ class CDAIE(InfoExtractor):
|
||||
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
||||
webpage):
|
||||
webpage = self._download_webpage(
|
||||
href, video_id, 'Downloading %s version information' % resolution, fatal=False)
|
||||
self._BASE_URL + href, video_id,
|
||||
'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
# Manually report warning because empty page is returned when
|
||||
# invalid version is requested.
|
||||
|
@@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
age_restricted,
|
||||
base_url,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
@@ -885,7 +886,7 @@ class InfoExtractor(object):
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl'),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
@@ -1279,9 +1280,10 @@ class InfoExtractor(object):
|
||||
}
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
if mobj:
|
||||
f['width'] = int(mobj.group('width'))
|
||||
f['height'] = int(mobj.group('height'))
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
@@ -1539,7 +1541,7 @@ class InfoExtractor(object):
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
mpd_base_url = base_url(urlh.geturl())
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
@@ -1701,7 +1703,7 @@ class InfoExtractor(object):
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
@@ -1719,7 +1721,7 @@ class InfoExtractor(object):
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
@@ -1780,6 +1782,105 @@ class InfoExtractor(object):
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
||||
res = self._download_webpage_handle(
|
||||
ism_url, video_id,
|
||||
note=note or 'Downloading ISM manifest',
|
||||
errnote=errnote or 'Failed to download ISM manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
ism, urlh = res
|
||||
|
||||
return self._parse_ism_formats(
|
||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||
|
||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
|
||||
return []
|
||||
|
||||
duration = int(ism_doc.attrib['Duration'])
|
||||
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
|
||||
|
||||
formats = []
|
||||
for stream in ism_doc.findall('StreamIndex'):
|
||||
stream_type = stream.get('Type')
|
||||
if stream_type not in ('video', 'audio'):
|
||||
continue
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
for track in stream.findall('QualityLevel'):
|
||||
fourcc = track.get('FourCC')
|
||||
# TODO: add support for WVC1 and WMAP
|
||||
if fourcc not in ('H264', 'AVC1', 'AACL'):
|
||||
self.report_warning('%s is not a supported codec' % fourcc)
|
||||
continue
|
||||
tbr = int(track.attrib['Bitrate']) // 1000
|
||||
width = int_or_none(track.get('MaxWidth'))
|
||||
height = int_or_none(track.get('MaxHeight'))
|
||||
sampling_rate = int_or_none(track.get('SamplingRate'))
|
||||
|
||||
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
|
||||
track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
|
||||
|
||||
fragments = []
|
||||
fragment_ctx = {
|
||||
'time': 0,
|
||||
}
|
||||
stream_fragments = stream.findall('c')
|
||||
for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
|
||||
fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
|
||||
fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
|
||||
fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
|
||||
if not fragment_ctx['duration']:
|
||||
try:
|
||||
next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
|
||||
except IndexError:
|
||||
next_fragment_time = duration
|
||||
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
|
||||
for _ in range(fragment_repeat):
|
||||
fragments.append({
|
||||
'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
|
||||
'duration': fragment_ctx['duration'] / stream_timescale,
|
||||
})
|
||||
fragment_ctx['time'] += fragment_ctx['duration']
|
||||
|
||||
format_id = []
|
||||
if ism_id:
|
||||
format_id.append(ism_id)
|
||||
if stream_name:
|
||||
format_id.append(stream_name)
|
||||
format_id.append(compat_str(tbr))
|
||||
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': ism_url,
|
||||
'manifest_url': ism_url,
|
||||
'ext': 'ismv' if stream_type == 'video' else 'isma',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
'asr': sampling_rate,
|
||||
'vcodec': 'none' if stream_type == 'audio' else fourcc,
|
||||
'acodec': 'none' if stream_type == 'video' else fourcc,
|
||||
'protocol': 'ism',
|
||||
'fragments': fragments,
|
||||
'_download_params': {
|
||||
'duration': duration,
|
||||
'timescale': stream_timescale,
|
||||
'width': width or 0,
|
||||
'height': height or 0,
|
||||
'fourcc': fourcc,
|
||||
'codec_private_data': track.get('CodecPrivateData'),
|
||||
'sampling_rate': sampling_rate,
|
||||
'channels': int_or_none(track.get('Channels', 2)),
|
||||
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
|
||||
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
|
||||
},
|
||||
})
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
|
@@ -236,7 +236,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
|
||||
output += """ScaledBorderAndShadow: yes
|
||||
output += """ScaledBorderAndShadow: no
|
||||
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'md5': '21c7ff600f545358134fea762a6d42b6',
|
||||
'info_dict': {
|
||||
@@ -24,7 +24,24 @@ class DotsubIE(InfoExtractor):
|
||||
'upload_date': '20131130',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
|
||||
'md5': '2bb4a83896434d5c26be868c609429a3',
|
||||
'info_dict': {
|
||||
'id': '168006778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Apartments and flats in Raipur the white symphony',
|
||||
'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
|
||||
'duration': 290,
|
||||
'timestamp': 1476767794.2809999,
|
||||
'upload_date': '20160525',
|
||||
'uploader': 'parthivi001',
|
||||
'uploader_id': 'user52596202',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -37,12 +54,23 @@ class DotsubIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
[r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
|
||||
webpage, 'video url')
|
||||
webpage, 'video url', default=None)
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
if not video_url:
|
||||
setup_data = self._parse_json(self._html_search_regex(
|
||||
r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
|
||||
webpage, 'setup data', group='content'), video_id)
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': setup_data['src'],
|
||||
}
|
||||
|
||||
info_dict.update({
|
||||
'title': info['title'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('screenshotURI'),
|
||||
@@ -50,4 +78,6 @@ class DotsubIE(InfoExtractor):
|
||||
'uploader': info.get('user'),
|
||||
'timestamp': float_or_none(info.get('dateCreated'), 1000),
|
||||
'view_count': int_or_none(info.get('numberOfViews')),
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class DrTuberIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
|
||||
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
|
||||
'info_dict': {
|
||||
@@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.drtuber.com/embed/489939',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'class="title_watch"[^>]*><p>([^<]+)<',
|
||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
r'<p[^>]+class="title_substrate">([^<]+)</p>',
|
||||
r'<title>([^<]+) - \d+'),
|
||||
webpage, 'title')
|
||||
|
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
if status != 200:
|
||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
|
||||
try:
|
||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||
except ExtractorError as ee:
|
||||
@@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor):
|
||||
|
||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id,
|
||||
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
mp4_url = self._get_video_url(
|
||||
m3u8_formats_dict = {}
|
||||
for f in m3u8_formats:
|
||||
if f.get('height') is not None:
|
||||
m3u8_formats_dict[f['height']] = f
|
||||
|
||||
mp4_data = self._download_json(
|
||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||
# http://lentaru.media.eagleplatform.com/player/player.js
|
||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
|
||||
video_id, 'Downloading mp4 JSON')
|
||||
mp4_url_basename = url_basename(mp4_url)
|
||||
for m3u8_format in m3u8_formats:
|
||||
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
|
||||
if mobj:
|
||||
http_format = m3u8_format.copy()
|
||||
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
|
||||
if not self._is_valid_url(video_url, video_id):
|
||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
|
||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||
if mp4_data:
|
||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
continue
|
||||
http_format.update({
|
||||
'url': video_url,
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(http_format)
|
||||
height = int_or_none(format_id)
|
||||
if height is not None and m3u8_formats_dict.get(height):
|
||||
f = m3u8_formats_dict[height].copy()
|
||||
f.update({
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
else:
|
||||
f = {
|
||||
'format_id': 'http-%s' % format_id,
|
||||
'height': int_or_none(format_id),
|
||||
}
|
||||
f['url'] = format_url
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -1,38 +1,117 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
'info_dict': {
|
||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
||||
'id': '10365079',
|
||||
'ext': 'mp4',
|
||||
'title': '30 for 30 Shorts: Judging Jewell',
|
||||
'description': None,
|
||||
'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
|
||||
'timestamp': 1390936111,
|
||||
'upload_date': '20140128',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
||||
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
|
||||
'info_dict': {
|
||||
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
||||
'id': '2743663',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must-See Moments: Best of the MLS season',
|
||||
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
|
||||
'timestamp': 1449446454,
|
||||
'upload_date': '20151207',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip/_/id/17989860',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
clip = self._download_json(
|
||||
'http://api-app.espn.com/v1/video/clips/%s' % video_id,
|
||||
video_id)['videos'][0]
|
||||
|
||||
title = clip['headline']
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
|
||||
def traverse_source(source, base_source_id=None):
|
||||
for source_id, source in source.items():
|
||||
if isinstance(source, compat_str):
|
||||
extract_source(source, base_source_id)
|
||||
elif isinstance(source, dict):
|
||||
traverse_source(
|
||||
source,
|
||||
'%s-%s' % (base_source_id, source_id)
|
||||
if base_source_id else source_id)
|
||||
|
||||
def extract_source(source_url, source_id=None):
|
||||
if source_url in format_urls:
|
||||
return
|
||||
format_urls.add(source_url)
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
source_url, video_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url, video_id, f4m_id=source_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=source_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': source_id,
|
||||
})
|
||||
|
||||
traverse_source(clip['links']['source'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = clip.get('caption') or clip.get('description')
|
||||
thumbnail = clip.get('thumbnail')
|
||||
duration = int_or_none(clip.get('duration'))
|
||||
timestamp = unified_timestamp(clip.get('originalPublishDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ESPNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
|
||||
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
cms = 'espn'
|
||||
if 'data-source="intl"' in webpage:
|
||||
cms = 'intl'
|
||||
player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
|
||||
player = self._download_webpage(
|
||||
player_url, video_id)
|
||||
|
||||
pcode = self._search_regex(
|
||||
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
|
||||
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage),
|
||||
'- ESPN Video').strip()
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
|
||||
'ie_key': 'OoyalaExternal',
|
||||
'title': title,
|
||||
}
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
|
@@ -267,7 +267,10 @@ from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
from .espn import ESPNIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
ESPNArticleIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
@@ -296,6 +299,7 @@ from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .fox9 import FOX9IE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
@@ -408,6 +412,10 @@ from .ivi import (
|
||||
from .ivideon import IvideonIE
|
||||
from .iwara import IwaraIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jamendo import (
|
||||
JamendoIE,
|
||||
JamendoAlbumIE,
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
@@ -592,6 +600,7 @@ from .nhl import (
|
||||
from .nick import (
|
||||
NickIE,
|
||||
NickDeIE,
|
||||
NickNightIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import (
|
||||
@@ -601,6 +610,7 @@ from .ninecninemedia import (
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@@ -667,6 +677,7 @@ from .orf import (
|
||||
ORFFM4IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .pandatv import PandaTVIE
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
@@ -740,6 +751,10 @@ from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .rentv import (
|
||||
RENTVIE,
|
||||
RENTVArticleIE,
|
||||
)
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
@@ -796,7 +811,10 @@ from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .shared import (
|
||||
SharedIE,
|
||||
VivoIE,
|
||||
)
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
@@ -947,6 +965,10 @@ from .tv2 import (
|
||||
)
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesIE,
|
||||
TVANouvellesArticleIE,
|
||||
)
|
||||
from .tvc import (
|
||||
TVCIE,
|
||||
TVCArticleIE,
|
||||
@@ -1087,6 +1109,7 @@ from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
@@ -1098,6 +1121,10 @@ from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
)
|
||||
from .webcaster import (
|
||||
WebcasterIE,
|
||||
WebcasterFeedIE,
|
||||
)
|
||||
from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@@ -100,7 +99,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '"What are you doing running in the snow?"',
|
||||
'uploader': 'FailArmy',
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
|
||||
'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
|
||||
@@ -110,6 +110,7 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
|
||||
'uploader': 'S. Saint',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'note': 'swf params escaped',
|
||||
'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
|
||||
@@ -119,6 +120,18 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153664894881749',
|
||||
},
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
'title': 'Holocaust survivor becomes US citizen',
|
||||
'timestamp': 1477818095,
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@@ -227,43 +240,13 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
video_data = None
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||
|
||||
for m in re.findall(PATTERN, webpage):
|
||||
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data_candidate = json.loads(params_raw)['video_data']
|
||||
for _, f in video_data_candidate.items():
|
||||
if not f:
|
||||
continue
|
||||
if isinstance(f, dict):
|
||||
f = [f]
|
||||
if not isinstance(f, list):
|
||||
continue
|
||||
if f[0].get('video_id') == video_id:
|
||||
video_data = video_data_candidate
|
||||
break
|
||||
if video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = item[2][0]['videoData']
|
||||
break
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
for item in video_data:
|
||||
format_id = item['stream_type']
|
||||
ret.setdefault(format_id, []).append(item)
|
||||
return ret
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
break
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
return webpage, False
|
||||
@@ -276,7 +259,8 @@ class FacebookIE(InfoExtractor):
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
formats = []
|
||||
for format_id, f in video_data.items():
|
||||
for f in video_data:
|
||||
format_id = f['stream_type']
|
||||
if f and isinstance(f, dict):
|
||||
f = [f]
|
||||
if not f or not isinstance(f, list):
|
||||
|
43
youtube_dl/extractor/fox9.py
Normal file
43
youtube_dl/extractor/fox9.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .anvato import AnvatoIE
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class FOX9IE(AnvatoIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fox9.com/news/215123287-story',
|
||||
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
|
||||
'info_dict': {
|
||||
'id': '314473',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bear climbs tree in downtown Duluth',
|
||||
'description': 'md5:6a36bfb5073a411758a752455408ac90',
|
||||
'duration': 51,
|
||||
'timestamp': 1478123580,
|
||||
'upload_date': '20161102',
|
||||
'uploader': 'EPFOX',
|
||||
'categories': ['News', 'Sports'],
|
||||
'tags': ['news', 'video'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.fox9.com/news/investigators/214070684-story',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._parse_json(
|
||||
self._search_regex(
|
||||
r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
|
||||
webpage, 'anvato playlist'),
|
||||
video_id, transform_source=js_to_json)[0]['video']
|
||||
|
||||
return self._get_anvato_videos(
|
||||
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
|
||||
video_id)
|
@@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"',
|
||||
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
|
||||
webpage, 'video path')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
@@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor):
|
||||
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
thumbnail = self._search_regex(
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"',
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||
|
@@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
'description': 'Please use this to sell something. www.jonlajoie.com',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
|
||||
'only_matching': True,
|
||||
@@ -51,19 +54,45 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
source_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
|
||||
bitrates.sort()
|
||||
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])),
|
||||
'format_id': '%s-%d' % (link[1], bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
if source_formats:
|
||||
self._sort_formats(source_formats)
|
||||
|
||||
for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
|
||||
for path, ext in links:
|
||||
ff = f.copy()
|
||||
if ff:
|
||||
if ext != 'mp4':
|
||||
ff = dict(
|
||||
[(k, v) for k, v in ff.items()
|
||||
if k in ('height', 'width', 'format_id')])
|
||||
ff.update({
|
||||
'format_id': ff['format_id'].replace('hls', ext),
|
||||
'ext': ext,
|
||||
'protocol': 'http',
|
||||
})
|
||||
else:
|
||||
ff.update({
|
||||
'format_id': '%s-%d' % (ext, bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
ff['url'] = self._proto_relative_url(
|
||||
'%s%d.%s' % (path, bitrate, ext))
|
||||
formats.append(ff)
|
||||
self._check_formats(formats, video_id)
|
||||
|
||||
formats.extend(m3u8_formats)
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
subtitles = {}
|
||||
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
|
||||
|
@@ -47,6 +47,8 @@ from .svt import SVTIE
|
||||
from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .redtube import RedTubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
@@ -58,6 +60,7 @@ from .screenwavemedia import ScreenwaveMediaIE
|
||||
from .mtv import MTVServicesEmbeddedIE
|
||||
from .pladform import PladformIE
|
||||
from .videomore import VideomoreIE
|
||||
from .webcaster import WebcasterFeedIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
@@ -1208,20 +1211,6 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 51690,
|
||||
},
|
||||
},
|
||||
# JWPlayer with M3U8
|
||||
{
|
||||
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
|
||||
'info_dict': {
|
||||
'id': 'playlist',
|
||||
'ext': 'mp4',
|
||||
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
|
||||
'uploader': 'ren.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
|
||||
# This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
|
||||
{
|
||||
@@ -1648,6 +1637,10 @@ class GenericIE(InfoExtractor):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif doc.tag == 'SmoothStreamingMedia':
|
||||
info_dict['formats'] = self._parse_ism_formats(doc, url)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
smil = self._parse_smil(doc, url, video_id)
|
||||
self._sort_formats(smil['formats'])
|
||||
@@ -1991,11 +1984,6 @@ class GenericIE(InfoExtractor):
|
||||
if sportbox_urls:
|
||||
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
|
||||
|
||||
# Look for embedded PornHub player
|
||||
pornhub_url = PornHubIE._extract_url(webpage)
|
||||
if pornhub_url:
|
||||
return self.url_result(pornhub_url, 'PornHub')
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
if xhamster_urls:
|
||||
@@ -2006,6 +1994,21 @@ class GenericIE(InfoExtractor):
|
||||
if tnaflix_urls:
|
||||
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
|
||||
|
||||
# Look for embedded PornHub player
|
||||
pornhub_urls = PornHubIE._extract_urls(webpage)
|
||||
if pornhub_urls:
|
||||
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
|
||||
|
||||
# Look for embedded DrTuber player
|
||||
drtuber_urls = DrTuberIE._extract_urls(webpage)
|
||||
if drtuber_urls:
|
||||
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
|
||||
|
||||
# Look for embedded RedTube player
|
||||
redtube_urls = RedTubeIE._extract_urls(webpage)
|
||||
if redtube_urls:
|
||||
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@@ -2138,6 +2141,11 @@ class GenericIE(InfoExtractor):
|
||||
if videomore_url:
|
||||
return self.url_result(videomore_url)
|
||||
|
||||
# Look for Webcaster embeds
|
||||
webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
|
||||
if webcaster_url:
|
||||
return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
|
||||
|
||||
# Look for Playwire embeds
|
||||
mobj = re.search(
|
||||
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
|
||||
@@ -2230,6 +2238,16 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
||||
''', webpage)
|
||||
if mobj:
|
||||
return self.url_result('limelight:media:%s' % mobj.group('id'))
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||
@@ -2463,6 +2481,21 @@ class GenericIE(InfoExtractor):
|
||||
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||
elif ext == 'f4m':
|
||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
|
||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||
# Just matching .ism/manifest is not enough to be reliably sure
|
||||
# whether it's actually an ISM manifest or some other streaming
|
||||
# manifest since there are various streaming URL formats
|
||||
# possible (see [1]) as well as some other shenanigans like
|
||||
# .smil/manifest URLs that actually serve an ISM (see [2]) and
|
||||
# so on.
|
||||
# Thus the most reasonable way to solve this is to delegate
|
||||
# to generic extractor in order to look into the contents of
|
||||
# the manifest itself.
|
||||
# 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
|
||||
# 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
|
||||
entry_info_dict = self.url_result(
|
||||
smuggle_url(video_url, {'to_generic': True}),
|
||||
GenericIE.ie_key())
|
||||
else:
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
|
@@ -4,9 +4,6 @@ import itertools
|
||||
import re
|
||||
|
||||
from .common import SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
@@ -34,13 +31,16 @@ class GoogleSearchIE(SearchInfoExtractor):
|
||||
}
|
||||
|
||||
for pagenum in itertools.count():
|
||||
result_url = (
|
||||
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
|
||||
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
|
||||
|
||||
webpage = self._download_webpage(
|
||||
result_url, 'gvsearch:' + query,
|
||||
note='Downloading result page ' + str(pagenum + 1))
|
||||
'http://www.google.com/search',
|
||||
'gvsearch:' + query,
|
||||
note='Downloading result page %s' % (pagenum + 1),
|
||||
query={
|
||||
'tbm': 'vid',
|
||||
'q': query,
|
||||
'start': pagenum * 10,
|
||||
'hl': 'en',
|
||||
})
|
||||
|
||||
for hit_idx, mobj in enumerate(re.finditer(
|
||||
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||
|
@@ -6,12 +6,13 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
remove_end,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class HellPornoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
|
||||
'md5': '1fee339c610d2049699ef2aa699439f1',
|
||||
'info_dict': {
|
||||
@@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor):
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://hellporno.net/v/186271/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor):
|
||||
|
||||
video_id = flashvars.get('video_id')
|
||||
thumbnail = flashvars.get('preview_url')
|
||||
ext = flashvars.get('postfix', '.mp4')[1:]
|
||||
ext = determine_ext(flashvars.get('postfix'), 'mp4')
|
||||
|
||||
formats = []
|
||||
for video_url_key in ['video_url', 'video_alt_url']:
|
||||
|
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -14,29 +12,24 @@ class HornBunnyIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
|
||||
'md5': '95e40865aedd08eff60272b704852ad7',
|
||||
'md5': 'e20fd862d1894b67564c96f180f43924',
|
||||
'info_dict': {
|
||||
'id': '5227',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'panty slut jerk off instruction',
|
||||
'duration': 550,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Downloading initial webpage')
|
||||
title = self._html_search_regex(
|
||||
r'class="title">(.*?)</h2>', webpage, 'title')
|
||||
redirect_url = self._html_search_regex(
|
||||
r'pg&settings=(.*?)\|0"\);', webpage, 'title')
|
||||
webpage2 = self._download_webpage(redirect_url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'flvMask:(.*?);', webpage2, 'video_url')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
|
||||
@@ -45,12 +38,12 @@ class HornBunnyIE(InfoExtractor):
|
||||
r'<strong>Views:</strong>\s*(\d+)</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': 'flv',
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
107
youtube_dl/extractor/jamendo.py
Normal file
107
youtube_dl/extractor/jamendo.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class JamendoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
||||
'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
|
||||
'info_dict': {
|
||||
'id': '196219',
|
||||
'display_id': 'stories-from-emona-i',
|
||||
'ext': 'flac',
|
||||
'title': 'Stories from Emona I',
|
||||
'thumbnail': 're:^https?://.*\.jpg'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._VALID_URL_RE.match(url)
|
||||
track_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
% (sub_domain, track_id, format_id),
|
||||
'format_id': format_id,
|
||||
'ext': ext,
|
||||
'quality': quality,
|
||||
} for quality, (format_id, sub_domain, ext) in enumerate((
|
||||
('mp31', 'mp3l', 'mp3'),
|
||||
('mp32', 'mp3d', 'mp3'),
|
||||
('ogg1', 'ogg', 'ogg'),
|
||||
('flac', 'flac', 'flac'),
|
||||
))]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._html_search_meta(
|
||||
'image', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
'title': 'Duck On Cover'
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'e1a2fcb42bda30dfac990212924149a8',
|
||||
'info_dict': {
|
||||
'id': '1032333',
|
||||
'ext': 'flac',
|
||||
'title': 'Warmachine'
|
||||
}
|
||||
}, {
|
||||
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
|
||||
'info_dict': {
|
||||
'id': '1032330',
|
||||
'ext': 'flac',
|
||||
'title': 'Without Your Ghost'
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._VALID_URL_RE.match(url)
|
||||
album_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, mobj.group('display_id'))
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, m.group('path')),
|
||||
ie=JamendoIE.ie_key(),
|
||||
video_id=self._search_regex(
|
||||
r'/track/(\d+)', m.group('path'),
|
||||
'track id', default=None))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
||||
webpage)
|
||||
]
|
||||
|
||||
return self.playlist_result(entries, album_id, title)
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -52,8 +51,8 @@ class LiTVIE(InfoExtractor):
|
||||
'skip': 'Georestricted to Taiwan',
|
||||
}]
|
||||
|
||||
def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
|
||||
episode_title = view_data['title']
|
||||
def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
|
||||
episode_title = program_info['title']
|
||||
content_id = season_list['contentId']
|
||||
|
||||
if prompt:
|
||||
@@ -61,7 +60,7 @@ class LiTVIE(InfoExtractor):
|
||||
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
|
||||
self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
|
||||
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||
for episode in season_list['episode']]
|
||||
|
||||
@@ -80,19 +79,15 @@ class LiTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
|
||||
r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
|
||||
webpage)))
|
||||
|
||||
vod_data = self._parse_json(self._search_regex(
|
||||
'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||
video_id)
|
||||
|
||||
season_list = list(vod_data.get('seasonList', {}).values())
|
||||
season_list = list(program_info.get('seasonList', {}).values())
|
||||
if season_list:
|
||||
if not noplaylist:
|
||||
return self._extract_playlist(
|
||||
season_list[0], video_id, vod_data, view_data,
|
||||
season_list[0], video_id, program_info,
|
||||
prompt=noplaylist_prompt)
|
||||
|
||||
if noplaylist_prompt:
|
||||
@@ -102,8 +97,8 @@ class LiTVIE(InfoExtractor):
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
# If georestricted, there are no embedded data, so an extra request is
|
||||
# necessary to get the error code
|
||||
if 'assetId' not in view_data:
|
||||
view_data = self._download_json(
|
||||
if 'assetId' not in program_info:
|
||||
program_info = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
|
||||
query={'contentId': video_id},
|
||||
headers={'Accept': 'application/json'})
|
||||
@@ -112,9 +107,9 @@ class LiTVIE(InfoExtractor):
|
||||
webpage, 'video data', default='{}'), video_id)
|
||||
if not video_data:
|
||||
payload = {
|
||||
'assetId': view_data['assetId'],
|
||||
'watchDevices': view_data['watchDevices'],
|
||||
'contentType': view_data['contentType'],
|
||||
'assetId': program_info['assetId'],
|
||||
'watchDevices': program_info['watchDevices'],
|
||||
'contentType': program_info['contentType'],
|
||||
}
|
||||
video_data = self._download_json(
|
||||
'https://www.litv.tv/vod/getMainUrl', video_id,
|
||||
@@ -136,11 +131,11 @@ class LiTVIE(InfoExtractor):
|
||||
# LiTV HLS segments doesn't like compressions
|
||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
|
||||
|
||||
title = view_data['title'] + view_data.get('secondaryMark', '')
|
||||
description = view_data.get('description')
|
||||
thumbnail = view_data.get('imageFile')
|
||||
categories = [item['name'] for item in vod_data.get('category', [])]
|
||||
episode = int_or_none(view_data.get('episode'))
|
||||
title = program_info['title'] + program_info.get('secondaryMark', '')
|
||||
description = program_info.get('description')
|
||||
thumbnail = program_info.get('imageFile')
|
||||
categories = [item['name'] for item in program_info.get('category', [])]
|
||||
episode = int_or_none(program_info.get('episode'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -71,12 +71,15 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
|
||||
formats = []
|
||||
|
||||
for sources in settings.findall(compat_xpath('.//MediaSources')):
|
||||
if sources.get('videoType') == 'smoothstreaming':
|
||||
continue
|
||||
sources_type = sources.get('videoType')
|
||||
for source in sources.findall(compat_xpath('./MediaSource')):
|
||||
video_url = source.text
|
||||
if not video_url or not video_url.startswith('http'):
|
||||
continue
|
||||
if sources_type == 'smoothstreaming':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
video_url, video_id, 'mss', fatal=False))
|
||||
continue
|
||||
video_mode = source.get('videoMode')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
|
||||
|
@@ -1,19 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
smuggle_url,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@@ -72,76 +73,133 @@ class MiTeleBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class MiTeleIE(MiTeleBaseIE):
|
||||
class MiTeleIE(InfoExtractor):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
# MD5 is unstable
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||
'info_dict': {
|
||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||
'display_id': 'programa-144',
|
||||
'id': '57b0dfb9c715da65618b4afa',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
'series': 'Diario de',
|
||||
'season': 'La redacción',
|
||||
'season_number': 14,
|
||||
'season_id': 'diario_de_t14_11981',
|
||||
'episode': 'Programa 144',
|
||||
'episode_number': 3,
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
'duration': 2913,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
'info_dict': {
|
||||
'id': 'eLZSwoEd1S3pVyUm8lc6F',
|
||||
'display_id': 'programa-226',
|
||||
'id': '57b0de3dc915da14058b4876',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
|
||||
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
|
||||
'title': 'Cuarto Milenio Temporada 6 Programa 226',
|
||||
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
|
||||
'series': 'Cuarto Milenio',
|
||||
'season': 'Temporada 6',
|
||||
'season_number': 6,
|
||||
'season_id': 'cuarto_milenio_t06_12715',
|
||||
'episode': 'Programa 226',
|
||||
'episode_number': 24,
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
'duration': 7312,
|
||||
'duration': 7313,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
gigya_url = self._search_regex(
|
||||
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
|
||||
webpage, 'gigya', default=None)
|
||||
gigya_sc = self._download_webpage(
|
||||
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
|
||||
video_id, 'Downloading gigya script')
|
||||
|
||||
info = self._get_player_info(url, webpage)
|
||||
# Get a appKey/uuid for getting the session key
|
||||
appKey_var = self._search_regex(
|
||||
r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
|
||||
gigya_sc, 'appKey variable')
|
||||
appKey = self._search_regex(
|
||||
r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
|
||||
webpage, 'title', default=None)
|
||||
session_json = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/session',
|
||||
video_id, 'Downloading session keys', query={
|
||||
'appKey': appKey,
|
||||
'uuid': compat_str(uuid.uuid4()),
|
||||
})
|
||||
|
||||
mobj = re.search(r'''(?sx)
|
||||
class="Destacado-text"[^>]*>.*?<h1>\s*
|
||||
<span>(?P<series>[^<]+)</span>\s*
|
||||
<span>(?P<season>[^<]+)</span>\s*
|
||||
<span>(?P<episode>[^<]+)</span>''', webpage)
|
||||
series, season, episode = mobj.groups() if mobj else [None] * 3
|
||||
paths = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
|
||||
video_id, 'Downloading paths JSON',
|
||||
query={'sessionKey': compat_str(session_json['sessionKey'])})
|
||||
|
||||
if not title:
|
||||
if mobj:
|
||||
title = '%s - %s - %s' % (series, season, episode)
|
||||
else:
|
||||
title = remove_start(self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
|
||||
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
|
||||
source = self._download_json(
|
||||
'http://%s%s%s/docs/%s' % (
|
||||
ooyala_s['base_url'], ooyala_s['full_path'],
|
||||
ooyala_s['provider_id'], video_id),
|
||||
video_id, 'Downloading data JSON', query={
|
||||
'include_titles': 'Series,Season',
|
||||
'product_name': 'test',
|
||||
'format': 'full',
|
||||
})['hits']['hits'][0]['_source']
|
||||
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
embedCode = source['offers'][0]['embed_codes'][0]
|
||||
titles = source['localizable_titles'][0]
|
||||
|
||||
title = titles.get('title_medium') or titles['title_long']
|
||||
|
||||
description = titles.get('summary_long') or titles.get('summary_medium')
|
||||
|
||||
def get(key1, key2):
|
||||
value1 = source.get(key1)
|
||||
if not value1 or not isinstance(value1, list):
|
||||
return
|
||||
if not isinstance(value1[0], dict):
|
||||
return
|
||||
return value1[0].get(key2)
|
||||
|
||||
series = get('localizable_titles_series', 'title_medium')
|
||||
|
||||
season = get('localizable_titles_season', 'title_medium')
|
||||
season_number = int_or_none(source.get('season_number'))
|
||||
season_id = source.get('season_id')
|
||||
|
||||
episode = titles.get('title_sort_name')
|
||||
episode_number = int_or_none(source.get('episode_number'))
|
||||
|
||||
duration = parse_duration(get('videos', 'duration'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
# for some reason only HLS is supported
|
||||
'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'description': description,
|
||||
'series': series,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'season_id': season_id,
|
||||
'episode': episode,
|
||||
})
|
||||
return info
|
||||
'episode_number': episode_number,
|
||||
'duration': duration,
|
||||
'thumbnail': get('images', 'url'),
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MovieClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
|
||||
'md5': '42b5a0352d4933a7bd54f2104f481244',
|
||||
|
@@ -69,10 +69,9 @@ class MSNIE(InfoExtractor):
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
# .ism is not yet supported (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||
if ext == 'ism':
|
||||
continue
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
||||
if 'm3u8' in format_url:
|
||||
# m3u8_native should not be used here until
|
||||
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import update_url_query
|
||||
|
||||
@@ -69,7 +71,7 @@ class NickIE(MTVServicesInfoExtractor):
|
||||
|
||||
class NickDeIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'nick.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
|
||||
'only_matching': True,
|
||||
@@ -79,15 +81,43 @@ class NickDeIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_mrss_url(self, webpage, host):
|
||||
return update_url_query(self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
|
||||
{'siteKey': host})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mrss_url = update_url_query(self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
|
||||
{'siteKey': 'nick.de'})
|
||||
mrss_url = self._extract_mrss_url(webpage, host)
|
||||
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
||||
|
||||
class NickNightIE(NickDeIE):
|
||||
IE_NAME = 'nicknight'
|
||||
_VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nicknight.at/shows/977-awkward',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nicknight.at/shows/1900-faking-it',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_mrss_url(self, webpage, *args):
|
||||
return self._search_regex(
|
||||
r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
|
||||
'mrss url', group='url')
|
||||
|
62
youtube_dl/extractor/nobelprize.py
Normal file
62
youtube_dl/extractor/nobelprize.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
update_url_query,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NobelPrizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'md5': '04c81e5714bb36cc4e2232fee1d8157f',
|
||||
'info_dict': {
|
||||
'id': '2636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Announcement of the 2016 Nobel Prize in Physics',
|
||||
'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'(?s)var\s*config\s*=\s*({.+?});', webpage,
|
||||
'config'), video_id, js_to_json)['media']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for source in media.get('source', []):
|
||||
source_src = source.get('src')
|
||||
if not source_src:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(source_src, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('itemprop', 'description', webpage),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -14,6 +15,25 @@ from ..utils import (
|
||||
|
||||
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
_faked_ip = None
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
# NRK checks X-Forwarded-For HTTP header in order to figure out the
|
||||
# origin of the client behind proxy. This allows to bypass geo
|
||||
# restriction by faking this header's value to some Norway IP.
|
||||
# We will do so once we encounter any geo restriction error.
|
||||
if self._faked_ip:
|
||||
# NB: str is intentional
|
||||
kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
|
||||
return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
def _fake_ip(self):
|
||||
# Use fake IP from 37.191.128.0/17 in order to workaround geo
|
||||
# restriction
|
||||
def octet(lb=0, ub=255):
|
||||
return random.randint(lb, ub)
|
||||
self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -24,6 +44,8 @@ class NRKBaseIE(InfoExtractor):
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
|
||||
|
||||
entries = []
|
||||
|
||||
media_assets = data.get('mediaAssets')
|
||||
@@ -54,6 +76,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'http_headers': http_headers,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
@@ -70,10 +93,23 @@ class NRKBaseIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
if data.get('usageRights', {}).get('isGeoBlocked'):
|
||||
raise ExtractorError(
|
||||
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
expected=True)
|
||||
message_type = data.get('messageType', '')
|
||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type and not self._faked_ip:
|
||||
self.report_warning(
|
||||
'Video is geo restricted, trying to fake IP')
|
||||
self._fake_ip()
|
||||
return self._real_extract(url)
|
||||
|
||||
MESSAGES = {
|
||||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
||||
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
||||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
}
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
|
||||
message_type, message_type)),
|
||||
expected=True)
|
||||
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
|
@@ -56,8 +56,8 @@ class OnetBaseIE(InfoExtractor):
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if format_id == 'ism':
|
||||
# TODO: Support Microsoft Smooth Streaming
|
||||
continue
|
||||
formats.extend(self._extract_ism_formats(
|
||||
video_url, video_id, 'mss', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
|
@@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
|
||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
|
||||
|
||||
def _extract(self, content_tree_url, video_id, domain='example.org'):
|
||||
def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
|
||||
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
|
||||
metadata = content_tree[list(content_tree)[0]]
|
||||
embed_code = metadata['embed_code']
|
||||
@@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
||||
compat_urllib_parse_urlencode({
|
||||
'domain': domain,
|
||||
'supportedFormats': 'mp4,rtmp,m3u8,hds',
|
||||
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
|
||||
}), video_id)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embed_code]
|
||||
@@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
embed_code = self._match_id(url)
|
||||
domain = smuggled_data.get('domain')
|
||||
supportedformats = smuggled_data.get('supportedformats')
|
||||
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
|
||||
return self._extract(content_tree_url, embed_code, domain)
|
||||
return self._extract(content_tree_url, embed_code, domain, supportedformats)
|
||||
|
||||
|
||||
class OoyalaExternalIE(OoyalaBaseIE):
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
@@ -10,6 +12,10 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..jsinterp import (
|
||||
JSInterpreter,
|
||||
_NAME_RE
|
||||
)
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
@@ -56,6 +62,44 @@ class OpenloadIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def openload_decode(self, txt):
|
||||
symbol_dict = {
|
||||
'(゚Д゚) [゚Θ゚]': '_',
|
||||
'(゚Д゚) [゚ω゚ノ]': 'a',
|
||||
'(゚Д゚) [゚Θ゚ノ]': 'b',
|
||||
'(゚Д゚) [\'c\']': 'c',
|
||||
'(゚Д゚) [゚ー゚ノ]': 'd',
|
||||
'(゚Д゚) [゚Д゚ノ]': 'e',
|
||||
'(゚Д゚) [1]': 'f',
|
||||
'(゚Д゚) [\'o\']': 'o',
|
||||
'(o゚ー゚o)': 'u',
|
||||
'(゚Д゚) [\'c\']': 'c',
|
||||
'((゚ー゚) + (o^_^o))': '7',
|
||||
'((o^_^o) +(o^_^o) +(c^_^o))': '6',
|
||||
'((゚ー゚) + (゚Θ゚))': '5',
|
||||
'(-~3)': '4',
|
||||
'(-~-~1)': '3',
|
||||
'(-~1)': '2',
|
||||
'(-~0)': '1',
|
||||
'((c^_^o)-(c^_^o))': '0',
|
||||
}
|
||||
delim = '(゚Д゚)[゚ε゚]+'
|
||||
end_token = '(゚Д゚)[゚o゚]'
|
||||
symbols = '|'.join(map(re.escape, symbol_dict.keys()))
|
||||
txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
|
||||
ret = ''
|
||||
for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
|
||||
for aachar in aacode.split(delim):
|
||||
if aachar.isdigit():
|
||||
ret += compat_chr(int(aachar, 8))
|
||||
else:
|
||||
m = re.match(r'^u([\da-f]{4})$', aachar)
|
||||
if m:
|
||||
ret += compat_chr(int(m.group(1), 16))
|
||||
else:
|
||||
self.report_warning("Cannot decode: %s" % aachar)
|
||||
return ret
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
||||
@@ -70,19 +114,26 @@ class OpenloadIE(InfoExtractor):
|
||||
r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
|
||||
webpage, 'encrypted data')
|
||||
|
||||
magic = compat_ord(enc_data[-1])
|
||||
enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>',
|
||||
webpage, 'encrypted code')
|
||||
|
||||
js_code = self.openload_decode(enc_code)
|
||||
jsi = JSInterpreter(js_code)
|
||||
|
||||
m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function')
|
||||
m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
|
||||
|
||||
offset = jsi.call_function(m_offset_fun)
|
||||
diff = jsi.call_function(m_diff_fun)
|
||||
|
||||
video_url_chars = []
|
||||
|
||||
for idx, c in enumerate(enc_data):
|
||||
j = compat_ord(c)
|
||||
if j == magic:
|
||||
j -= 1
|
||||
elif j == magic - 1:
|
||||
j += 1
|
||||
if j >= 33 and j <= 126:
|
||||
j = ((j + 14) % 94) + 33
|
||||
if idx == len(enc_data) - 1:
|
||||
j += 2
|
||||
if idx == len(enc_data) - offset:
|
||||
j += diff
|
||||
video_url_chars += compat_chr(j)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
|
||||
|
91
youtube_dl/extractor/pandatv.py
Normal file
91
youtube_dl/extractor/pandatv.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class PandaTVIE(InfoExtractor):
|
||||
IE_DESC = '熊猫TV'
|
||||
_VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.panda.tv/10091',
|
||||
'info_dict': {
|
||||
'id': '10091',
|
||||
'title': 're:.+',
|
||||
'uploader': '囚徒',
|
||||
'ext': 'flv',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Live stream is offline',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_json(
|
||||
'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
|
||||
|
||||
error_code = config.get('errno', 0)
|
||||
if error_code is not 0:
|
||||
raise ExtractorError(
|
||||
'%s returned error %s: %s'
|
||||
% (self.IE_NAME, error_code, config['errmsg']),
|
||||
expected=True)
|
||||
|
||||
data = config['data']
|
||||
video_info = data['videoinfo']
|
||||
|
||||
# 2 = live, 3 = offline
|
||||
if video_info.get('status') != '2':
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
|
||||
title = data['roominfo']['name']
|
||||
uploader = data.get('hostinfo', {}).get('name')
|
||||
room_key = video_info['room_key']
|
||||
stream_addr = video_info.get(
|
||||
'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
|
||||
|
||||
# Reverse engineered from web player swf
|
||||
# (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
|
||||
# writing).
|
||||
plflag0, plflag1 = video_info['plflag'].split('_')
|
||||
plflag0 = int(plflag0) - 1
|
||||
if plflag1 == '21':
|
||||
plflag0 = 10
|
||||
plflag1 = '4'
|
||||
live_panda = 'live_panda' if plflag0 < 1 else ''
|
||||
|
||||
quality_key = qualities(['OD', 'HD', 'SD'])
|
||||
suffix = ['_small', '_mid', '']
|
||||
formats = []
|
||||
for k, v in stream_addr.items():
|
||||
if v != '1':
|
||||
continue
|
||||
quality = quality_key(k)
|
||||
if quality <= 0:
|
||||
continue
|
||||
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
|
||||
formats.append({
|
||||
'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
|
||||
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
|
||||
'format_id': '%s-%s' % (k, ext),
|
||||
'quality': quality,
|
||||
'source_preference': pref,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
@@ -8,30 +8,31 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class PlaysTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
|
||||
_TEST = {
|
||||
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
|
||||
_VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
|
||||
_TESTS = [{
|
||||
'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
|
||||
'md5': 'dfeac1198506652b5257a62762cec7bc',
|
||||
'info_dict': {
|
||||
'id': '56af17f56c95335490',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you outplay the Azir wall',
|
||||
'title': 'Bjergsen - When you outplay the Azir wall',
|
||||
'description': 'Posted by Bjergsen',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://plays.tv/embeds/56af17f56c95335490',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'https://plays.tv/video/%s' % video_id, video_id)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id,)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
content = self._parse_json(
|
||||
self._search_regex(
|
||||
r'R\.bindContent\(({.+?})\);', webpage,
|
||||
'content'), video_id)['content']
|
||||
mpd_url, sources = re.search(
|
||||
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
|
||||
content).groups()
|
||||
webpage).groups()
|
||||
formats = self._extract_mpd_formats(
|
||||
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
|
||||
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
|
||||
@@ -42,10 +43,11 @@ class PlaysTVIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -119,14 +120,17 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
@staticmethod
|
||||
def _convert_subtitles(duration, subs):
|
||||
srt = ''
|
||||
TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset')
|
||||
TEXT_KEYS = ('text', 'Text')
|
||||
for num, current in enumerate(subs):
|
||||
current = subs[num]
|
||||
start, text = float_or_none(
|
||||
current.get('DisplayTimeOffset')), current.get('Text')
|
||||
start, text = (
|
||||
float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
|
||||
dict_get(current, TEXT_KEYS))
|
||||
if start is None or text is None:
|
||||
continue
|
||||
end = duration if num == len(subs) - 1 else float_or_none(
|
||||
subs[num + 1].get('DisplayTimeOffset'))
|
||||
dict_get(subs[num + 1], TIME_OFFSET_KEYS))
|
||||
if end is None:
|
||||
continue
|
||||
srt += os.linesep.join(
|
||||
|
@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
|
||||
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[0-9a-z]+)
|
||||
(?P<id>[\da-z]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
@@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
|
||||
webpage)
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
return str_to_int(self._search_regex(
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class Puls4IE(ProSiebenSat1BaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
|
||||
'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
|
||||
@@ -22,6 +22,12 @@ class Puls4IE(ProSiebenSat1BaseIE):
|
||||
'upload_date': '20160830',
|
||||
'uploader': 'PULS_4',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_TOKEN = 'puls4'
|
||||
_SALT = '01!kaNgaiNgah1Ie4AeSha'
|
||||
|
@@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor):
|
||||
f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
|
||||
if closed_caption_url:
|
||||
subtitles['fr'] = [{
|
||||
'url': closed_caption_url,
|
||||
'ext': determine_ext(closed_caption_url, 'vtt'),
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_meta('Title'),
|
||||
@@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
'season_number': int_or_none('SrcSaison'),
|
||||
'episode_number': int_or_none('SrcEpisode'),
|
||||
'upload_date': unified_strdate(get_meta('Date')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -10,8 +12,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.redtube.com/66418',
|
||||
'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
'info_dict': {
|
||||
@@ -23,11 +25,21 @@ class RedTubeIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.redtube.com/%s' % video_id, video_id)
|
||||
|
||||
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
|
||||
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
|
||||
|
76
youtube_dl/extractor/rentv.py
Normal file
76
youtube_dl/extractor/rentv.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class RENTVIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://ren.tv/video/epizod/118577',
|
||||
'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': '118577',
|
||||
'ext': 'mp4',
|
||||
'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ren.tv/player/118577',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'rentv:118577',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
|
||||
jw_config = self._parse_json(self._search_regex(
|
||||
r'config\s*=\s*({.+});', webpage, 'jw config'), video_id)
|
||||
return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls')
|
||||
|
||||
|
||||
class RENTVArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
|
||||
'md5': 'ebd63c4680b167693745ab91343df1d6',
|
||||
'info_dict': {
|
||||
'id': '136472',
|
||||
'ext': 'mp4',
|
||||
'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла',
|
||||
'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.',
|
||||
}
|
||||
}, {
|
||||
# TODO: invalid m3u8
|
||||
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
|
||||
'info_dict': {
|
||||
'id': 'playlist',
|
||||
'ext': 'mp4',
|
||||
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
|
||||
'uploader': 'ren.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
|
||||
entries = []
|
||||
for config_profile in drupal_settings.get('ren_jwplayer', {}).values():
|
||||
media_id = config_profile.get('mediaid')
|
||||
if not media_id:
|
||||
continue
|
||||
media_id = compat_str(media_id)
|
||||
entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id))
|
||||
return self.playlist_result(entries, display_id)
|
@@ -1,17 +1,24 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
urlencode_postdata,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class ShahidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
|
||||
_NETRC_MACHINE = 'shahid'
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
|
||||
'info_dict': {
|
||||
@@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# shahid plus subscriber only
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _call_api(self, path, video_id, note):
|
||||
data = self._download_json(
|
||||
'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
}).get('data', {})
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
|
||||
try:
|
||||
user_data = self._download_json(
|
||||
'https://shahid.mbc.net/wd/service/users/login',
|
||||
None, 'Logging in', data=json.dumps({
|
||||
'email': email,
|
||||
'password': password,
|
||||
'basic': 'false',
|
||||
}).encode('utf-8'), headers={
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
})['user']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
fail_data = self._parse_json(
|
||||
e.cause.read().decode('utf-8'), None, fatal=False)
|
||||
if fail_data:
|
||||
faults = fail_data.get('faults', [])
|
||||
faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
|
||||
if faults_message:
|
||||
raise ExtractorError(faults_message, expected=True)
|
||||
raise
|
||||
|
||||
self._download_webpage(
|
||||
'https://shahid.mbc.net/populateContext',
|
||||
None, 'Populate Context', data=urlencode_postdata({
|
||||
'firstName': user_data['firstName'],
|
||||
'lastName': user_data['lastName'],
|
||||
'userName': user_data['email'],
|
||||
'csg_user_name': user_data['email'],
|
||||
'subscriberId': user_data['id'],
|
||||
'sessionId': user_data['sessionId'],
|
||||
}))
|
||||
|
||||
def _get_api_data(self, response):
|
||||
data = response.get('data', {})
|
||||
|
||||
error = data.get('error')
|
||||
if error:
|
||||
@@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor):
|
||||
return data
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
player = self._call_api(
|
||||
'Content/Episode/%s' % video_id,
|
||||
video_id, 'Downloading player JSON')
|
||||
player = self._get_api_data(self._download_json(
|
||||
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
|
||||
video_id, 'Downloading player JSON'))
|
||||
|
||||
if player.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
@@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = self._call_api(
|
||||
'episode/%s' % video_id, video_id,
|
||||
'Downloading video JSON')['episode']
|
||||
video = self._get_api_data(self._download_json(
|
||||
'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
|
||||
video_id, 'Downloading video JSON', query={
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
}))[page_type]
|
||||
|
||||
title = video['title']
|
||||
categories = [
|
||||
|
@@ -10,11 +10,38 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SharedIE(InfoExtractor):
|
||||
IE_DESC = 'shared.sx and vivo.sx'
|
||||
_VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
|
||||
class SharedBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
_TESTS = [{
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
if self._FILE_NOT_FOUND in webpage:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = self._extract_video_url(webpage, video_id, url)
|
||||
|
||||
title = base64.b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta(
|
||||
'full:size', webpage, 'file size', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'filesize': filesize,
|
||||
'title': title,
|
||||
}
|
||||
|
||||
|
||||
class SharedIE(SharedBaseIE):
|
||||
IE_DESC = 'shared.sx'
|
||||
_VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
|
||||
_FILE_NOT_FOUND = '>File does not exist<'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://shared.sx/0060718775',
|
||||
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
|
||||
'info_dict': {
|
||||
@@ -23,7 +50,32 @@ class SharedIE(InfoExtractor):
|
||||
'title': 'Bmp4',
|
||||
'filesize': 1720110,
|
||||
},
|
||||
}, {
|
||||
}
|
||||
|
||||
def _extract_video_url(self, webpage, video_id, url):
|
||||
download_form = self._hidden_inputs(webpage)
|
||||
|
||||
video_page = self._download_webpage(
|
||||
url, video_id, 'Downloading video page',
|
||||
data=urlencode_postdata(download_form),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
video_page, 'video URL', group='url')
|
||||
|
||||
return video_url
|
||||
|
||||
|
||||
class VivoIE(SharedBaseIE):
|
||||
IE_DESC = 'vivo.sx'
|
||||
_VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
|
||||
_FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://vivo.sx/d7ddda0e78',
|
||||
'md5': '15b3af41be0b4fe01f4df075c2678b2c',
|
||||
'info_dict': {
|
||||
@@ -32,43 +84,13 @@ class SharedIE(InfoExtractor):
|
||||
'title': 'Chicken',
|
||||
'filesize': 528031,
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
if '>File does not exist<' in webpage:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
download_form = self._hidden_inputs(webpage)
|
||||
|
||||
video_page = self._download_webpage(
|
||||
urlh.geturl(), video_id, 'Downloading video page',
|
||||
data=urlencode_postdata(download_form),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': urlh.geturl(),
|
||||
})
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
video_page, 'video URL', group='url')
|
||||
title = base64.b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta(
|
||||
'full:size', webpage, 'file size', fatal=False))
|
||||
thumbnail = self._html_search_regex(
|
||||
r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
video_page, 'thumbnail', default=None, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'filesize': filesize,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
def _extract_video_url(self, webpage, video_id, *args):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'stream', group='url'),
|
||||
video_id,
|
||||
transform_source=lambda x: base64.b64decode(
|
||||
x.encode('ascii')).decode('utf-8'))[0]
|
||||
|
@@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
|
||||
'md5': 'e482a414a38db73087450e3a6ce69d00',
|
||||
'md5': '3316ff838ae5bb7f642537825e1e90d2',
|
||||
'info_dict': {
|
||||
'id': '0_6snoelag',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mov',
|
||||
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
|
||||
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
|
||||
'timestamp': 1429467813,
|
||||
'upload_date': '20150419',
|
||||
'uploader_id': 'batchUser',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embedded_video_info_str = self._html_search_regex(
|
||||
r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
|
||||
|
||||
embedded_video_info = self._parse_json(
|
||||
embedded_video_info_str, video_id,
|
||||
transform_source=lambda s: s.replace('\\', ''))
|
||||
embedded_video_info = self._parse_json(self._html_search_regex(
|
||||
r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(
|
||||
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
|
||||
|
@@ -15,11 +15,11 @@ from ..utils import (
|
||||
class TouTvIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'toutv'
|
||||
IE_NAME = 'tou.tv'
|
||||
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)'
|
||||
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)'
|
||||
_access_token = None
|
||||
_claims = None
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
||||
'info_dict': {
|
||||
'id': '122017',
|
||||
@@ -33,7 +33,10 @@ class TouTvIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ici.tou.tv/hackers',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
@@ -53,7 +56,7 @@ class TouTvIE(InfoExtractor):
|
||||
'state': state,
|
||||
})
|
||||
login_form = self._search_regex(
|
||||
r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form')
|
||||
r'(?s)(<form[^>]+(?:id|name)="Form-login".+?</form>)', login_webpage, 'login form')
|
||||
form_data = self._hidden_inputs(login_form)
|
||||
form_data.update({
|
||||
'login-email': email,
|
||||
|
@@ -9,7 +9,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'tubitv'
|
||||
_TEST = {
|
||||
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
|
||||
'md5': '43ac06be9326f41912dc64ccf7a80320',
|
||||
'info_dict': {
|
||||
'id': '283829',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Comedian at The Friday',
|
||||
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
|
||||
'uploader': 'Indie Rights Films',
|
||||
'upload_date': '20160111',
|
||||
'timestamp': 1452555979,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS download',
|
||||
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
|
||||
title = video_data['n']
|
||||
title = video_data['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['mh'], video_id, 'mp4', 'm3u8_native')
|
||||
self._proto_relative_url(video_data['url']),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_url in video_data.get('thumbnails', []):
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(thumbnail_url),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for sub in video_data.get('sb', []):
|
||||
sub_url = sub.get('u')
|
||||
for sub in video_data.get('subtitles', []):
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('l', 'en'), []).append({
|
||||
'url': sub_url,
|
||||
subtitles.setdefault(sub.get('lang', 'English'), []).append({
|
||||
'url': self._proto_relative_url(sub_url),
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': video_data.get('ph'),
|
||||
'description': video_data.get('d'),
|
||||
'duration': int_or_none(video_data.get('s')),
|
||||
'timestamp': parse_iso8601(video_data.get('u')),
|
||||
'uploader': video_data.get('on'),
|
||||
'thumbnails': thumbnails,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'uploader_id': video_data.get('publisher_id'),
|
||||
}
|
||||
|
65
youtube_dl/extractor/tvanouvelles.py
Normal file
65
youtube_dl/extractor/tvanouvelles.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
|
||||
|
||||
class TVANouvellesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tvanouvelles.ca/videos/5117035533001',
|
||||
'info_dict': {
|
||||
'id': '5117035533001',
|
||||
'ext': 'mp4',
|
||||
'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications',
|
||||
'description': 'md5:479653b7c8cf115747bf5118066bd8b3',
|
||||
'uploader_id': '1741764581',
|
||||
'timestamp': 1473352030,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
BrightcoveNewIE.ie_key(), brightcove_id)
|
||||
|
||||
|
||||
class TVANouvellesArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte',
|
||||
'info_dict': {
|
||||
'id': 'des-policiers-qui-ont-la-meche-un-peu-courte',
|
||||
'title': 'Des policiers qui ont «la mèche un peu courte»?',
|
||||
'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'),
|
||||
ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id'))
|
||||
for mobj in re.finditer(
|
||||
r'data-video-id=(["\'])?(?P<id>\d+)', webpage)]
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, display_id, title, description)
|
@@ -69,7 +69,8 @@ class TVPIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._search_regex([
|
||||
r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
|
||||
"object_id\s*:\s*'(\d+)'"], webpage, 'video id')
|
||||
r"object_id\s*:\s*'(\d+)'",
|
||||
r'data-video-id="(\d+)"'], webpage, 'video id', default=page_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'tvp:' + video_id,
|
||||
@@ -138,6 +139,9 @@ class TVPEmbedIE(InfoExtractor):
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url_base + '.ism/video.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
video_url_base + '.ism/Manifest',
|
||||
video_id, 'mss', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url_base + '.ism/video.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
|
@@ -398,7 +398,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
stream = self._call_api(
|
||||
'kraken/streams/%s' % channel_id, channel_id,
|
||||
'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
|
||||
'Downloading stream JSON').get('stream')
|
||||
|
||||
if not stream:
|
||||
@@ -417,6 +417,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
query = {
|
||||
'allow_source': 'true',
|
||||
'allow_audio_only': 'true',
|
||||
'allow_spectre': 'true',
|
||||
'p': random.randint(1000000, 10000000),
|
||||
'player': 'twitchweb',
|
||||
'segment_preference': '4',
|
||||
|
@@ -25,7 +25,7 @@ class TwitterBaseIE(InfoExtractor):
|
||||
|
||||
class TwitterCardIE(TwitterBaseIE):
|
||||
IE_NAME = 'twitter:card'
|
||||
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
||||
@@ -84,6 +84,9 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
'title': 'Twitter web player',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/videos/752274308186120192',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VesselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
|
||||
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
|
||||
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
|
||||
_NETRC_MACHINE = 'vessel'
|
||||
@@ -32,12 +32,18 @@ class VesselIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/videos/F01_dsLj1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/videos/RRX-sir-J',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1',
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -51,7 +51,7 @@ class VevoIE(VevoBaseIE):
|
||||
'artist': 'Hurts',
|
||||
'genre': 'Pop',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
|
||||
}, {
|
||||
'note': 'v3 SMIL format',
|
||||
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||
@@ -67,7 +67,7 @@ class VevoIE(VevoBaseIE):
|
||||
'artist': 'Cassadee Pope',
|
||||
'genre': 'Country',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
|
||||
}, {
|
||||
'note': 'Age-limited video',
|
||||
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
|
||||
@@ -83,7 +83,7 @@ class VevoIE(VevoBaseIE):
|
||||
'artist': 'Justin Timberlake',
|
||||
'genre': 'Pop',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
|
||||
}, {
|
||||
'note': 'No video_info',
|
||||
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
|
||||
@@ -91,15 +91,33 @@ class VevoIE(VevoBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'USUV71503000',
|
||||
'ext': 'mp4',
|
||||
'title': 'K Camp - Till I Die',
|
||||
'title': 'K Camp ft. T.I. - Till I Die',
|
||||
'age_limit': 18,
|
||||
'timestamp': 1449468000,
|
||||
'upload_date': '20151207',
|
||||
'uploader': 'K Camp',
|
||||
'track': 'Till I Die',
|
||||
'artist': 'K Camp',
|
||||
'genre': 'Rap/Hip-Hop',
|
||||
'genre': 'Hip-Hop',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
|
||||
}, {
|
||||
'note': 'Featured test',
|
||||
'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
|
||||
'md5': 'd28675e5e8805035d949dc5cf161071d',
|
||||
'info_dict': {
|
||||
'id': 'USUV71402190',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lemaitre ft. LoLo - Wait',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1413432000,
|
||||
'upload_date': '20141016',
|
||||
'uploader': 'Lemaitre',
|
||||
'track': 'Wait',
|
||||
'artist': 'Lemaitre',
|
||||
'genre': 'Electronic',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
|
||||
}, {
|
||||
'note': 'Only available via webpage',
|
||||
'url': 'http://www.vevo.com/watch/GBUV71600656',
|
||||
@@ -242,8 +260,11 @@ class VevoIE(VevoBaseIE):
|
||||
|
||||
timestamp = parse_iso8601(video_info.get('releaseDate'))
|
||||
artists = video_info.get('artists')
|
||||
if artists:
|
||||
artist = uploader = artists[0]['name']
|
||||
for curr_artist in artists:
|
||||
if curr_artist.get('role') == 'Featured':
|
||||
featured_artist = curr_artist['name']
|
||||
else:
|
||||
artist = uploader = curr_artist['name']
|
||||
view_count = int_or_none(video_info.get('views', {}).get('total'))
|
||||
|
||||
for video_version in video_versions:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user