Compare commits
214 Commits
2016.05.21
...
2016.06.11
Author | SHA1 | Date | |
---|---|---|---|
![]() |
33751818d3 | ||
![]() |
698f127c1a | ||
![]() |
fe458b6596 | ||
![]() |
21ac1a8ac3 | ||
![]() |
79027c0ea0 | ||
![]() |
4cad2929cd | ||
![]() |
62666af99f | ||
![]() |
9ddc289f88 | ||
![]() |
6626c214e1 | ||
![]() |
d845622b2e | ||
![]() |
1058f56e96 | ||
![]() |
0434358823 | ||
![]() |
3841256c2c | ||
![]() |
bdf16f8140 | ||
![]() |
836ab0c554 | ||
![]() |
6c0376fe4f | ||
![]() |
1fa309da40 | ||
![]() |
daa0df9e8b | ||
![]() |
09728d5fbc | ||
![]() |
c16f8a4659 | ||
![]() |
a225238530 | ||
![]() |
55b2f099c0 | ||
![]() |
9631a94fb5 | ||
![]() |
cc4444662c | ||
![]() |
de3eb07ed6 | ||
![]() |
5de008e8c3 | ||
![]() |
3e74b444e7 | ||
![]() |
e1e0a10c56 | ||
![]() |
436214baf7 | ||
![]() |
506d0e9693 | ||
![]() |
55290788d3 | ||
![]() |
bc7e7adf51 | ||
![]() |
b0aebe702c | ||
![]() |
416878f41f | ||
![]() |
c0fed3bda5 | ||
![]() |
bb1e44cc8e | ||
![]() |
21efee5f8b | ||
![]() |
e2713d32f4 | ||
![]() |
e21c26daf9 | ||
![]() |
1594a4932f | ||
![]() |
6869d634c6 | ||
![]() |
50918c4ee0 | ||
![]() |
6c33d24b46 | ||
![]() |
be6217b261 | ||
![]() |
9d51a0a9a1 | ||
![]() |
39da509f67 | ||
![]() |
a479b8f687 | ||
![]() |
48a5eabc48 | ||
![]() |
11380753b5 | ||
![]() |
411c590a1f | ||
![]() |
6da8d7de69 | ||
![]() |
c6308b3153 | ||
![]() |
fc0a45fa41 | ||
![]() |
e6e90515db | ||
![]() |
22a0a95247 | ||
![]() |
50ce1c331c | ||
![]() |
7264e38591 | ||
![]() |
33d9f3707c | ||
![]() |
a26a9d6239 | ||
![]() |
a4a8201c02 | ||
![]() |
a6571f1073 | ||
![]() |
57b6e9652e | ||
![]() |
3d9b3605a3 | ||
![]() |
74193838f7 | ||
![]() |
fb94e260b5 | ||
![]() |
345dec937f | ||
![]() |
4315f74fa8 | ||
![]() |
e67f688025 | ||
![]() |
db59b37d0b | ||
![]() |
244fe977fe | ||
![]() |
7b0d1c2859 | ||
![]() |
21d0a8e48b | ||
![]() |
47f12ad3e3 | ||
![]() |
8f1aaa97a1 | ||
![]() |
9d78524cbe | ||
![]() |
bc270284b5 | ||
![]() |
c93b4eaceb | ||
![]() |
71b9cb3107 | ||
![]() |
633b444fd2 | ||
![]() |
51c4d85ce7 | ||
![]() |
631d4c87ee | ||
![]() |
1e236d7e23 | ||
![]() |
2c34735267 | ||
![]() |
39b32571df | ||
![]() |
db56f281d9 | ||
![]() |
e92b552a10 | ||
![]() |
1ae6c83bce | ||
![]() |
0fc832e1b2 | ||
![]() |
7def35712a | ||
![]() |
cad88f96dc | ||
![]() |
762d44c956 | ||
![]() |
4d8856d511 | ||
![]() |
c917106be4 | ||
![]() |
76e9cd7f24 | ||
![]() |
bf4c6a38e1 | ||
![]() |
7f3c3dfa52 | ||
![]() |
9c3c447eb3 | ||
![]() |
ad73083ff0 | ||
![]() |
1e8b59243f | ||
![]() |
c88270271e | ||
![]() |
b96f007eeb | ||
![]() |
9a4aec8b7e | ||
![]() |
54fb199681 | ||
![]() |
8c32e5dc32 | ||
![]() |
0ea590076f | ||
![]() |
4a684895c0 | ||
![]() |
f4e4aa9b6b | ||
![]() |
5e3856a2c5 | ||
![]() |
6e6b9f600f | ||
![]() |
6a1df4fb5f | ||
![]() |
dde1ce7c06 | ||
![]() |
811586ebcf | ||
![]() |
0ff3749bfe | ||
![]() |
28bab13348 | ||
![]() |
877032314f | ||
![]() |
e7d85c4ef7 | ||
![]() |
8ec2b2c41c | ||
![]() |
197a5da1d0 | ||
![]() |
abbb2938fa | ||
![]() |
f657b1a5f2 | ||
![]() |
86a52881c6 | ||
![]() |
8267423652 | ||
![]() |
917a3196f8 | ||
![]() |
56bd028a0f | ||
![]() |
681b923b5c | ||
![]() |
9ed6d8c6c5 | ||
![]() |
f3fb420b82 | ||
![]() |
165e3561e9 | ||
![]() |
27f17c0eab | ||
![]() |
44c8892369 | ||
![]() |
f574103d7c | ||
![]() |
6d138e98e3 | ||
![]() |
2a329110b9 | ||
![]() |
2bee7b25f3 | ||
![]() |
92cf872a48 | ||
![]() |
6461f2b7ec | ||
![]() |
807cf7b07f | ||
![]() |
de7d76af52 | ||
![]() |
11c70deba7 | ||
![]() |
f36532404d | ||
![]() |
77b8b4e696 | ||
![]() |
2615fa7584 | ||
![]() |
3a686853e1 | ||
![]() |
949fc42e00 | ||
![]() |
33a1ff7113 | ||
![]() |
bec2c14f2c | ||
![]() |
37f972954d | ||
![]() |
3874e6ea66 | ||
![]() |
fac2af3c51 | ||
![]() |
6f8cb24219 | ||
![]() |
448bb5f333 | ||
![]() |
293c255688 | ||
![]() |
ac88d2316e | ||
![]() |
5950cb1d6d | ||
![]() |
761052db92 | ||
![]() |
240b60453e | ||
![]() |
85b0fe7d64 | ||
![]() |
0a5685b26f | ||
![]() |
6f748df43f | ||
![]() |
b410cb83d4 | ||
![]() |
da9d82840a | ||
![]() |
4ee0b8afdb | ||
![]() |
1de32771e1 | ||
![]() |
688c634b7d | ||
![]() |
0d6ee97508 | ||
![]() |
6b43132ce9 | ||
![]() |
a4690b3244 | ||
![]() |
444417edb5 | ||
![]() |
277c7465f5 | ||
![]() |
25bcd3550e | ||
![]() |
a4760d204f | ||
![]() |
e8593f346a | ||
![]() |
05b651e3a5 | ||
![]() |
42a7439717 | ||
![]() |
b1e9ebd080 | ||
![]() |
0c50eeb987 | ||
![]() |
4b464a6a78 | ||
![]() |
5db9df622f | ||
![]() |
5181759c0d | ||
![]() |
e54373204a | ||
![]() |
102810ef04 | ||
![]() |
78d3b3e213 | ||
![]() |
7a46542f97 | ||
![]() |
eb7941e3e6 | ||
![]() |
db3b8b2103 | ||
![]() |
c5f5155100 | ||
![]() |
4a12077855 | ||
![]() |
a4a7c44bd3 | ||
![]() |
70346165fe | ||
![]() |
c776b99691 | ||
![]() |
e9297256d4 | ||
![]() |
e5871c672b | ||
![]() |
9b06b0fb92 | ||
![]() |
4f3a25c2b4 | ||
![]() |
21a19aa94d | ||
![]() |
c6b9cf05e1 | ||
![]() |
4d8819d249 | ||
![]() |
898f4b49cc | ||
![]() |
0150a00f33 | ||
![]() |
c8831015f4 | ||
![]() |
92d221ad48 | ||
![]() |
0db9a05f88 | ||
![]() |
93fdb14177 | ||
![]() |
370d4eb8ad | ||
![]() |
3452c3a27c | ||
![]() |
81f35fee2f | ||
![]() |
0fdbe3146c | ||
![]() |
8d93c21466 | ||
![]() |
1dbfd78754 | ||
![]() |
22e35adefd | ||
![]() |
833b644fff | ||
![]() |
57cf9b7f06 | ||
![]() |
14f7a2b8af | ||
![]() |
c0837a12c8 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.05.21.2
|
||||
[debug] youtube-dl version 2016.06.11.2
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
6
.gitignore
vendored
6
.gitignore
vendored
@@ -28,12 +28,16 @@ updates_key.pem
|
||||
*.mp4
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
test/local_parameters.json
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
|
||||
# IntelliJ related files
|
||||
.idea
|
||||
.idea/*
|
||||
*.iml
|
||||
|
||||
tmp/
|
||||
|
@@ -14,7 +14,6 @@ script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- phihag@phihag.de
|
||||
- yasoob.khld@gmail.com
|
||||
# irc:
|
||||
# channels:
|
||||
|
4
Makefile
4
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -69,7 +69,7 @@ README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
youtube-dl.1: README.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
|
||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||
rm -f youtube-dl.1.temp.md
|
||||
|
||||
|
58
README.md
58
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
|
||||
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
@@ -25,15 +25,21 @@ If you do not have curl, you can alternatively use a recent wget:
|
||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||
|
||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
||||
|
||||
brew install youtube-dl
|
||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||
|
||||
You can also use pip:
|
||||
|
||||
sudo pip install youtube-dl
|
||||
sudo pip install --upgrade youtube-dl
|
||||
|
||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||
|
||||
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
|
||||
|
||||
brew install youtube-dl
|
||||
|
||||
Or with [MacPorts](https://www.macports.org/):
|
||||
|
||||
sudo port install youtube-dl
|
||||
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
||||
|
||||
@@ -73,8 +79,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: Do not read the user
|
||||
in the global configuration file
|
||||
/etc/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
@@ -162,7 +168,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
(experimental)
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT Maximum download rate in bytes per second
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
@@ -249,18 +255,19 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--write-info-json Write video metadata to a .info.json file
|
||||
--write-annotations Write video annotations to a
|
||||
.annotations.xml file
|
||||
--load-info FILE JSON file containing the video information
|
||||
--load-info-json FILE JSON file containing the video information
|
||||
(created with the "--write-info-json"
|
||||
option)
|
||||
--cookies FILE File to read cookies from and dump cookie
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
permanently. By default
|
||||
$XDG_CACHE_HOME/youtube-dl or
|
||||
~/.cache/youtube-dl . At the moment, only
|
||||
YouTube player files (for videos with
|
||||
obfuscated signatures) are cached, but that
|
||||
may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
@@ -433,7 +440,7 @@ You can use `--ignore-config` if you want to disable the configuration file for
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
@@ -504,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||
- `playlist`: Name or id of the playlist that contains the video
|
||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id`: Playlist identifier
|
||||
- `playlist_title`: Playlist title
|
||||
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
- `chapter`: Name or title of the chapter the video belongs to
|
||||
@@ -693,6 +703,10 @@ hash -r
|
||||
|
||||
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
||||
|
||||
### youtube-dl is extremely slow to start on Windows
|
||||
|
||||
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
|
||||
|
||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
@@ -780,9 +794,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
|
||||
|
||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
|
||||
### The exe throws a *Runtime error from Visual C++*
|
||||
### The exe throws an error due to missing `MSVCR100.dll`
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
|
||||
|
||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||
|
||||
@@ -837,6 +851,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because
|
||||
|
||||
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
|
||||
|
||||
# Why do I need to go through that much red tape when filing bugs?
|
||||
|
||||
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
|
||||
|
||||
youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
|
||||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||
|
@@ -1,17 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from socketserver import ThreadingMixIn
|
||||
import argparse
|
||||
import ctypes
|
||||
import functools
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
import os.path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
|
||||
from youtube_dl.compat import (
|
||||
compat_input,
|
||||
compat_http_server,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
|
||||
# These are not used outside of buildserver.py thus not in compat.py
|
||||
|
||||
try:
|
||||
import winreg as compat_winreg
|
||||
except ImportError: # Python 2
|
||||
import _winreg as compat_winreg
|
||||
|
||||
try:
|
||||
import socketserver as compat_socketserver
|
||||
except ImportError: # Python 2
|
||||
import SocketServer as compat_socketserver
|
||||
|
||||
|
||||
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
|
||||
allow_reuse_address = True
|
||||
|
||||
|
||||
@@ -191,7 +212,7 @@ def main(args=None):
|
||||
action='store_const', dest='action', const='service',
|
||||
help='Run as a Windows service')
|
||||
parser.add_argument('-b', '--bind', metavar='<host:port>',
|
||||
action='store', default='localhost:8142',
|
||||
action='store', default='0.0.0.0:8142',
|
||||
help='Bind to host:port (default %default)')
|
||||
options = parser.parse_args(args=args)
|
||||
|
||||
@@ -216,7 +237,7 @@ def main(args=None):
|
||||
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
|
||||
thr = threading.Thread(target=srv.serve_forever)
|
||||
thr.start()
|
||||
input('Press ENTER to shut down')
|
||||
compat_input('Press ENTER to shut down')
|
||||
srv.shutdown()
|
||||
thr.join()
|
||||
|
||||
@@ -231,8 +252,6 @@ def rmtree(path):
|
||||
os.remove(fname)
|
||||
os.rmdir(path)
|
||||
|
||||
#==============================================================================
|
||||
|
||||
|
||||
class BuildError(Exception):
|
||||
def __init__(self, output, code=500):
|
||||
@@ -249,15 +268,25 @@ class HTTPError(BuildError):
|
||||
|
||||
class PythonBuilder(object):
|
||||
def __init__(self, **kwargs):
|
||||
pythonVersion = kwargs.pop('python', '2.7')
|
||||
python_version = kwargs.pop('python', '3.4')
|
||||
python_path = None
|
||||
for node in ('Wow6432Node\\', ''):
|
||||
try:
|
||||
key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
|
||||
key = compat_winreg.OpenKey(
|
||||
compat_winreg.HKEY_LOCAL_MACHINE,
|
||||
r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
|
||||
try:
|
||||
self.pythonPath, _ = _winreg.QueryValueEx(key, '')
|
||||
python_path, _ = compat_winreg.QueryValueEx(key, '')
|
||||
finally:
|
||||
_winreg.CloseKey(key)
|
||||
compat_winreg.CloseKey(key)
|
||||
break
|
||||
except Exception:
|
||||
raise BuildError('No such Python version: %s' % pythonVersion)
|
||||
pass
|
||||
|
||||
if not python_path:
|
||||
raise BuildError('No such Python version: %s' % python_version)
|
||||
|
||||
self.pythonPath = python_path
|
||||
|
||||
super(PythonBuilder, self).__init__(**kwargs)
|
||||
|
||||
@@ -305,8 +334,10 @@ class YoutubeDLBuilder(object):
|
||||
|
||||
def build(self):
|
||||
try:
|
||||
subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
cwd=self.buildPath)
|
||||
proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
|
||||
proc.wait()
|
||||
#subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
# cwd=self.buildPath)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise BuildError(e.output)
|
||||
|
||||
@@ -369,12 +400,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
|
||||
pass
|
||||
|
||||
|
||||
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
|
||||
|
||||
def do_GET(self):
|
||||
path = urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
|
||||
path = compat_urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
|
||||
action, _, path = path.path.strip('/').partition('/')
|
||||
if path:
|
||||
path = path.split('/')
|
||||
@@ -388,7 +419,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
builder.close()
|
||||
except BuildError as e:
|
||||
self.send_response(e.code)
|
||||
msg = unicode(e).encode('UTF-8')
|
||||
msg = compat_str(e).encode('UTF-8')
|
||||
self.send_header('Content-Type', 'text/plain; charset=UTF-8')
|
||||
self.send_header('Content-Length', len(msg))
|
||||
self.end_headers()
|
||||
@@ -400,7 +431,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
else:
|
||||
self.send_response(500, 'Malformed URL')
|
||||
|
||||
#==============================================================================
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
111
devscripts/create-github-release.py
Normal file
111
devscripts/create-github-release.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import mimetypes
|
||||
import netrc
|
||||
import optparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_basestring,
|
||||
compat_input,
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from youtube_dl.utils import (
|
||||
make_HTTPS_handler,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class GitHubReleaser(object):
|
||||
_API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
|
||||
_UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
|
||||
_NETRC_MACHINE = 'github.com'
|
||||
|
||||
def __init__(self, debuglevel=0):
|
||||
self._init_github_account()
|
||||
https_handler = make_HTTPS_handler({}, debuglevel=debuglevel)
|
||||
self._opener = compat_urllib_request.build_opener(https_handler)
|
||||
|
||||
def _init_github_account(self):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
self._username = info[0]
|
||||
self._password = info[2]
|
||||
compat_print('Using GitHub credentials found in .netrc...')
|
||||
return
|
||||
else:
|
||||
compat_print('No GitHub credentials found in .netrc')
|
||||
except (IOError, netrc.NetrcParseError):
|
||||
compat_print('Unable to parse .netrc')
|
||||
self._username = compat_input(
|
||||
'Type your GitHub username or email address and press [Return]: ')
|
||||
self._password = compat_getpass(
|
||||
'Type your GitHub password and press [Return]: ')
|
||||
|
||||
def _call(self, req):
|
||||
if isinstance(req, compat_basestring):
|
||||
req = sanitized_Request(req)
|
||||
# Authorizing manually since GitHub does not response with 401 with
|
||||
# WWW-Authenticate header set (see
|
||||
# https://developer.github.com/v3/#basic-authentication)
|
||||
b64 = base64.b64encode(
|
||||
('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
|
||||
req.add_header('Authorization', 'Basic %s' % b64)
|
||||
response = self._opener.open(req).read().decode('utf-8')
|
||||
return json.loads(response)
|
||||
|
||||
def list_releases(self):
|
||||
return self._call(self._API_URL)
|
||||
|
||||
def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False):
|
||||
data = {
|
||||
'tag_name': tag_name,
|
||||
'target_commitish': 'master',
|
||||
'name': name,
|
||||
'body': body,
|
||||
'draft': draft,
|
||||
'prerelease': prerelease,
|
||||
}
|
||||
req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8'))
|
||||
return self._call(req)
|
||||
|
||||
def create_asset(self, release_id, asset):
|
||||
asset_name = os.path.basename(asset)
|
||||
url = self._UPLOADS_URL % (release_id, asset_name)
|
||||
# Our files are small enough to be loaded directly into memory.
|
||||
data = open(asset, 'rb').read()
|
||||
req = sanitized_Request(url, data)
|
||||
mime_type, _ = mimetypes.guess_type(asset_name)
|
||||
req.add_header('Content-Type', mime_type or 'application/octet-stream')
|
||||
return self._call(req)
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
parser.error('Expected a version and a build directory')
|
||||
|
||||
version, build_path = args
|
||||
|
||||
releaser = GitHubReleaser()
|
||||
|
||||
new_release = releaser.create_release(version, name='youtube-dl %s' % version)
|
||||
release_id = new_release['id']
|
||||
|
||||
for asset in os.listdir(build_path):
|
||||
compat_print('Uploading %s...' % asset)
|
||||
releaser.create_asset(release_id, os.path.join(build_path, asset))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -1,13 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import optparse
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.error('Expected an output filename')
|
||||
|
||||
outfile, = args
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(readme)
|
||||
|
||||
|
||||
def filter_options(readme):
|
||||
ret = ''
|
||||
@@ -37,27 +70,5 @@ def filter_options(readme):
|
||||
|
||||
return ret
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
print(readme.encode('utf-8'))
|
||||
else:
|
||||
print(readme)
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -6,7 +6,7 @@
|
||||
# * the git config user.signingkey is properly set
|
||||
|
||||
# You will need
|
||||
# pip install coverage nose rsa
|
||||
# pip install coverage nose rsa wheel
|
||||
|
||||
# TODO
|
||||
# release notes
|
||||
@@ -15,10 +15,28 @@
|
||||
set -e
|
||||
|
||||
skip_tests=true
|
||||
if [ "$1" = '--run-tests' ]; then
|
||||
buildserver='localhost:8142'
|
||||
|
||||
while true
|
||||
do
|
||||
case "$1" in
|
||||
--run-tests)
|
||||
skip_tests=false
|
||||
shift
|
||||
fi
|
||||
;;
|
||||
--buildserver)
|
||||
buildserver="$2"
|
||||
shift 2
|
||||
;;
|
||||
--*)
|
||||
echo "ERROR: unknown option $1"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||
version="$1"
|
||||
@@ -35,6 +53,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
|
||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make clean
|
||||
@@ -66,7 +85,7 @@ git push origin "$version"
|
||||
REV=$(git rev-parse HEAD)
|
||||
make youtube-dl youtube-dl.tar.gz
|
||||
read -p "VM running? (y/n) " -n 1
|
||||
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
mkdir -p "build/$version"
|
||||
mv youtube-dl youtube-dl.exe "build/$version"
|
||||
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
||||
@@ -76,15 +95,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
|
||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
||||
|
||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
|
||||
/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
|
||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
||||
|
||||
ROOT=$(pwd)
|
||||
python devscripts/create-github-release.py $version "$ROOT/build/$version"
|
||||
|
||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||
|
||||
/bin/echo -e "\n### Now switching to gh-pages..."
|
||||
git clone --branch gh-pages --single-branch . build/gh-pages
|
||||
ROOT=$(pwd)
|
||||
(
|
||||
set -e
|
||||
ORIGIN_URL=$(git config --get remote.origin.url)
|
||||
|
@@ -28,6 +28,7 @@
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **AfreecaTV**: afreecatv.com
|
||||
- **Aftonbladet**
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
@@ -43,8 +44,8 @@
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **ARD:mediathek**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@@ -55,6 +56,7 @@
|
||||
- **arte.tv:future**
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **arte.tv:playlist**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **AudiMedia**
|
||||
@@ -136,6 +138,7 @@
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
@@ -205,6 +208,7 @@
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
@@ -250,6 +254,7 @@
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
@@ -326,8 +331,8 @@
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
- **Libsyn**
|
||||
- **life**: Life.ru
|
||||
- **life:embed**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
@@ -336,6 +341,7 @@
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
- **loc**: Library of Congress
|
||||
- **LocalNews8**
|
||||
- **LoveHomePorn**
|
||||
- **lrt.lt**
|
||||
@@ -512,6 +518,8 @@
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
- **radiocanada**
|
||||
- **RadioCanadaAudioVideo**
|
||||
- **radiofrance**
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
@@ -521,8 +529,10 @@
|
||||
- **RedTube**
|
||||
- **RegioTV**
|
||||
- **Restudy**
|
||||
- **Reuters**
|
||||
- **ReverbNation**
|
||||
- **Revision3**
|
||||
- **revision**
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RottenTomatoes**
|
||||
@@ -561,6 +571,7 @@
|
||||
- **ScreencastOMatic**
|
||||
- **ScreenJunkies**
|
||||
- **ScreenwaveMedia**
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
@@ -682,8 +693,8 @@
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **tvp.pl**
|
||||
- **tvp.pl:Series**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
@@ -729,6 +740,7 @@
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
@@ -764,17 +776,15 @@
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **vulture.com**
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
@@ -810,6 +820,7 @@
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
- **youku**: 优酷
|
||||
- **youku:show**
|
||||
- **YouPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
|
1
setup.py
1
setup.py
@@ -122,6 +122,7 @@ setup(
|
||||
"Programming Language :: Python :: 3.2",
|
||||
"Programming Language :: Python :: 3.3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
@@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
|
||||
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
||||
|
||||
def test_compat_etree_fromstring_doctype(self):
|
||||
xml = '''<?xml version="1.0"?>
|
||||
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
|
||||
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
|
||||
compat_etree_fromstring(xml)
|
||||
|
||||
def test_struct_unpack(self):
|
||||
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
|
||||
|
||||
|
@@ -16,6 +16,15 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
@@ -31,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
||||
elif self.path == '/302':
|
||||
if sys.version_info[0] == 3:
|
||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
return
|
||||
|
||||
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
|
||||
self.send_response(302)
|
||||
self.send_header(b'Location', new_url.encode('utf-8'))
|
||||
self.end_headers()
|
||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
||||
else:
|
||||
assert False
|
||||
|
||||
@@ -47,18 +72,32 @@ class FakeLogger(object):
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def test_unicode_path_redirection(self):
|
||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||
if sys.version_info[0] == 3:
|
||||
return
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
if os.name == 'java':
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = self.httpd.socket.sock
|
||||
else:
|
||||
sock = self.httpd.socket
|
||||
self.port = sock.getsockname()[1]
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
@@ -94,14 +133,14 @@ class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
self.port = self.proxy.socket.getsockname()[1]
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.cn_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('cn'))
|
||||
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||
self.cn_port = http_server_port(self.cn_proxy)
|
||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||
self.cn_proxy_thread.daemon = True
|
||||
self.cn_proxy_thread.start()
|
||||
|
@@ -157,8 +157,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
||||
|
||||
self.assertEqual(sanitize_filename(
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy')
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
|
||||
|
||||
def test_sanitize_ids(self):
|
||||
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
||||
@@ -249,6 +249,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unescapeHTML('/'), '/')
|
||||
self.assertEqual(unescapeHTML('é'), 'é')
|
||||
self.assertEqual(unescapeHTML('�'), '�')
|
||||
# HTML5 entities
|
||||
self.assertEqual(unescapeHTML('.''), '.\'')
|
||||
|
||||
def test_date_from_str(self):
|
||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||
|
@@ -1223,6 +1223,10 @@ class YoutubeDL(object):
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result')
|
||||
|
||||
if not isinstance(info_dict['id'], compat_str):
|
||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
||||
info_dict['id'] = compat_str(info_dict['id'])
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
info_dict['playlist'] = None
|
||||
|
@@ -18,7 +18,6 @@ from .options import (
|
||||
from .compat import (
|
||||
compat_expanduser,
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
compat_shlex_split,
|
||||
workaround_optparse_bug9161,
|
||||
)
|
||||
@@ -76,7 +75,7 @@ def _real_main(argv=None):
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
compat_print(std_headers['User-Agent'])
|
||||
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Batch file verification
|
||||
@@ -101,10 +100,10 @@ def _real_main(argv=None):
|
||||
|
||||
if opts.list_extractors:
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
|
||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||
for mu in matchedUrls:
|
||||
compat_print(' ' + mu)
|
||||
write_string(' ' + mu + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
if opts.list_extractor_descriptions:
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
@@ -117,7 +116,7 @@ def _real_main(argv=None):
|
||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||
_COUNTS = ('', '5', '10', 'all')
|
||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||
compat_print(desc)
|
||||
write_string(desc + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
|
2263
youtube_dl/compat.py
2263
youtube_dl/compat.py
File diff suppressed because it is too large
Load Diff
@@ -210,6 +210,7 @@ class FFmpegFD(ExternalFD):
|
||||
# args += ['-http_proxy', proxy]
|
||||
env = os.environ.copy()
|
||||
compat_setenv('HTTP_PROXY', proxy, env=env)
|
||||
compat_setenv('http_proxy', proxy, env=env)
|
||||
|
||||
protocol = info_dict.get('protocol')
|
||||
|
||||
|
@@ -319,7 +319,7 @@ class F4mFD(FragmentFD):
|
||||
doc = compat_etree_fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None:
|
||||
if requested_bitrate is None or len(formats) == 1:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
|
@@ -23,11 +23,17 @@ class HlsFD(FragmentFD):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
||||
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
|
||||
r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# event media playlists [4]
|
||||
|
||||
# This heuristic also is not correct since segments may not be appended as well.
|
||||
# Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
|
||||
# no segments will definitely be appended to the end of the playlist.
|
||||
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# # event media playlists [4]
|
||||
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||
|
133
youtube_dl/extractor/afreecatv.py
Normal file
133
youtube_dl/extractor/afreecatv.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)^
|
||||
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html))
|
||||
\?.*?\bnTitleNo=(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def parse_video_key(key):
|
||||
video_key = {}
|
||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||
if m:
|
||||
video_key['upload_date'] = m.group('upload_date')
|
||||
video_key['part'] = m.group('part')
|
||||
return video_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
info_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
netloc='afbbs.afreecatv.com:8080',
|
||||
path='/api/video/get_video_info.php'))
|
||||
video_xml = self._download_xml(info_url, video_id)
|
||||
|
||||
if xpath_element(video_xml, './track/video/file') is None:
|
||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||
expected=True)
|
||||
|
||||
title = xpath_text(video_xml, './track/title', 'title')
|
||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||
duration = int_or_none(xpath_text(video_xml, './track/duration',
|
||||
'duration'))
|
||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||
|
||||
entries = []
|
||||
for i, video_file in enumerate(video_xml.findall('./track/video/file')):
|
||||
video_key = self.parse_video_key(video_file.get('key', ''))
|
||||
if not video_key:
|
||||
continue
|
||||
entries.append({
|
||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||
'title': title,
|
||||
'upload_date': video_key.get('upload_date'),
|
||||
'duration': int_or_none(video_file.get('duration')),
|
||||
'url': video_file.text,
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
if len(entries) > 1:
|
||||
info['_type'] = 'multi_video'
|
||||
info['entries'] = entries
|
||||
elif len(entries) == 1:
|
||||
info['url'] = entries[0]['url']
|
||||
info['upload_date'] = entries[0].get('upload_date')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'No files found for the specified AfreecaTV video, either'
|
||||
' the URL is incorrect or the video has been made private.',
|
||||
expected=True)
|
||||
|
||||
return info
|
@@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url)
|
||||
@@ -78,60 +75,6 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
return video_id, lang
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'embed url', group='url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
@@ -235,6 +178,74 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return info_dict
|
||||
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'embed url', group='url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
@@ -267,7 +278,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:info'
|
||||
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
|
||||
'info_dict': {
|
||||
'id': '067528-000-A',
|
||||
@@ -275,7 +286,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
'title': 'Service civique, un cache misère ?',
|
||||
'upload_date': '20160403',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
@@ -300,6 +311,8 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
if lang == 'folge':
|
||||
@@ -318,7 +331,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||
'info_dict': {
|
||||
@@ -328,14 +341,14 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
'upload_date': '20140128',
|
||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:cinema'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://cinema.arte.tv/de/node/38291',
|
||||
'md5': '6b275511a5107c60bacbeeda368c3aa1',
|
||||
'info_dict': {
|
||||
@@ -345,7 +358,7 @@ class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
'upload_date': '20160122',
|
||||
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVMagazineIE(ArteTVPlus7IE):
|
||||
@@ -390,9 +403,41 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
json_url = mobj.group('json_url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
|
||||
'info_dict': {
|
||||
'id': 'PL-013263',
|
||||
'title': 'Areva & Uramin',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, lang = self._extract_url_info(url)
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
entries = [
|
||||
self._extract_from_json_url(
|
||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
||||
for video in collection['videos'] if video.get('jsonUrl')]
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
@@ -6,6 +6,7 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
@@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
result[resultkey] = api_response[apikey]
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': api_response.get('id', song_id),
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
|
@@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '2b68e5851514c20efdff2afc5603b8b4',
|
||||
'md5': '73d0b3171568232574e45652f8720b5c',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'mp3',
|
||||
@@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
@@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(data['id']),
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
|
@@ -1,34 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308313,
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012660,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1397983878,
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
@@ -36,75 +44,186 @@ class BiliBiliIE(InfoExtractor):
|
||||
'id': '1041170',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'uploader': '枫叶逝去',
|
||||
'timestamp': 1396501299,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '4808130',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
|
||||
'info_dict': {
|
||||
'id': '4808130_part1',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
|
||||
'info_dict': {
|
||||
'id': '4808130_part2',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '4b7b225b968402d7c32348c646f1fd83',
|
||||
'info_dict': {
|
||||
'id': '4808130_part3',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '7b795e214166501e9141139eea236e91',
|
||||
'info_dict': {
|
||||
'id': '4808130_part4',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '2880301',
|
||||
'ext': 'flv',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'uploader': '黑夜为猫',
|
||||
'uploader_id': '610729',
|
||||
},
|
||||
'params': {
|
||||
# Just to test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['upload time'],
|
||||
}]
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
page_num = mobj.group('page_num') or '1'
|
||||
|
||||
view_data = self._download_json(
|
||||
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
|
||||
video_id)
|
||||
if 'error' in view_data:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cid = view_data['cid']
|
||||
title = unescapeHTML(view_data['title'])
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
|
||||
cid,
|
||||
'Downloading page %s/%s' % (page_num, view_data['pages'])
|
||||
)
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
|
||||
if xpath_text(doc, './result') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in doc.findall('./durl'):
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'ext': 'flv',
|
||||
}]
|
||||
backup_urls = durl.find('./backup_url')
|
||||
if backup_urls is not None:
|
||||
for backup_url in backup_urls.findall('./url'):
|
||||
formats.append({'url': backup_url.text})
|
||||
formats.reverse()
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'title': title,
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'title': title,
|
||||
'description': view_data.get('description'),
|
||||
'thumbnail': view_data.get('pic'),
|
||||
'uploader': view_data.get('author'),
|
||||
'timestamp': int_or_none(view_data.get('created')),
|
||||
'view_count': int_or_none(view_data.get('play')),
|
||||
'duration': int_or_none(xpath_text(doc, './timelength')),
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
if len(entries) == 1:
|
||||
entries[0].update(info)
|
||||
return entries[0]
|
||||
else:
|
||||
info.update({
|
||||
for idx, entry in enumerate(entries):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
})
|
||||
return info
|
||||
}
|
||||
|
@@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||
'info_dict': {
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
@@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
qualities,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -16,24 +16,38 @@ from ..utils import (
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
(?:(?:www|m)\.)?canalplus\.fr|
|
||||
(?:www\.)?piwiplus\.fr|
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||
)
|
||||
|
||||
'''
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
|
||||
_SITE_ID_MAP = {
|
||||
'canalplus.fr': 'cplus',
|
||||
'piwiplus.fr': 'teletoon',
|
||||
'd8.tv': 'd8',
|
||||
'itele.fr': 'itele',
|
||||
'canalplus': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
'd8': 'd8',
|
||||
'd17': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
|
||||
'md5': '12164a6f14ff6df8bd628e8ba9b10b78',
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||
'md5': '41f438a4904f7664b91b4ed0dec969dc',
|
||||
'info_dict': {
|
||||
'id': '1263092',
|
||||
'id': '1192814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Zapping - 13/05/15',
|
||||
'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
|
||||
'upload_date': '20150513',
|
||||
'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
|
||||
'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
|
||||
'upload_date': '20150105',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
@@ -46,35 +60,45 @@ class CanalplusIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
}, {
|
||||
'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
|
||||
'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
|
||||
'info_dict': {
|
||||
'id': '966289',
|
||||
'ext': 'flv',
|
||||
'title': 'Campagne intime - Documentaire exceptionnel',
|
||||
'description': 'md5:d2643b799fb190846ae09c61e59a859f',
|
||||
'upload_date': '20131108',
|
||||
},
|
||||
'skip': 'videos get deleted after a while',
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
||||
'md5': '38b8f7934def74f0d6f3ba6c036a5f82',
|
||||
'info_dict': {
|
||||
'id': '1213714',
|
||||
'id': '1390231',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
|
||||
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
|
||||
'upload_date': '20150211',
|
||||
'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
|
||||
'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
|
||||
'upload_date': '20160512',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
|
||||
'info_dict': {
|
||||
'id': '1398334',
|
||||
'ext': 'mp4',
|
||||
'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
|
||||
'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
|
||||
'upload_date': '20160607',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.groupdict().get('id')
|
||||
video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
|
||||
|
||||
site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal']
|
||||
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = url_basename(mobj.group('path'))
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
@@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
@@ -66,7 +68,8 @@ class CBSIE(CBSBaseIE):
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not content_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
|
@@ -20,10 +20,9 @@ class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
_TESTS = [{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
@@ -36,10 +35,10 @@ class Channel9IE(InfoExtractor):
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
|
||||
'Mads Kristensen'],
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
@@ -51,8 +50,7 @@ class Channel9IE(InfoExtractor):
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
# low quality mp4 is best
|
||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'info_dict': {
|
||||
@@ -66,8 +64,20 @@ class Channel9IE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
|
||||
'info_dict': {
|
||||
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
||||
'title': 'Channel 9',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
@@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
return self.playlist_result(contents)
|
||||
|
||||
def _extract_list(self, content_path):
|
||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
|
||||
def _extract_list(self, video_id, rss_url=None):
|
||||
if not rss_url:
|
||||
rss_url = self._RSS_URL % video_id
|
||||
rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
|
||||
entries = [self.url_result(session_url.text, 'Channel9')
|
||||
for session_url in rss.findall('./channel/item/link')]
|
||||
title_text = rss.find('./channel/title').text
|
||||
return self.playlist_result(entries, content_path, title_text)
|
||||
return self.playlist_result(entries, video_id, title_text)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
content_path = mobj.group('contentpath')
|
||||
rss = mobj.group('rss')
|
||||
|
||||
webpage = self._download_webpage(url, content_path, 'Downloading web page')
|
||||
if rss:
|
||||
return self._extract_list(content_path, url)
|
||||
|
||||
page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
|
||||
if page_type_m is not None:
|
||||
page_type = page_type_m.group('pagetype')
|
||||
webpage = self._download_webpage(
|
||||
url, content_path, 'Downloading web page')
|
||||
|
||||
page_type = self._search_regex(
|
||||
r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
|
||||
webpage, 'page type', default=None, group='pagetype')
|
||||
if page_type:
|
||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
@@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor):
|
||||
return self._extract_list(content_path)
|
||||
else:
|
||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||
|
||||
else: # Assuming list
|
||||
return self._extract_list(content_path)
|
||||
|
@@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
@@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
|
@@ -45,6 +45,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
determine_protocol,
|
||||
@@ -987,7 +988,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest',
|
||||
@@ -1001,11 +1002,11 @@ class InfoExtractor(object):
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
|
||||
|
||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||
@@ -1029,9 +1030,26 @@ class InfoExtractor(object):
|
||||
'base URL', default=None)
|
||||
if base_url:
|
||||
base_url = base_url.strip()
|
||||
|
||||
bootstrap_info = xpath_element(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||
'bootstrap info', default=None)
|
||||
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
width = int_or_none(media_el.attrib.get('width'))
|
||||
height = int_or_none(media_el.attrib.get('height'))
|
||||
format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
|
||||
# If <bootstrapInfo> is present, the specified f4m is a
|
||||
# stream-level manifest, and only set-level manifests may refer to
|
||||
# external resources. See section 11.4 and section 4 of F4M spec
|
||||
if bootstrap_info is None:
|
||||
media_url = None
|
||||
# @href is introduced in 2.0, see section 11.6 of F4M spec
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
media_url = media_el.attrib.get('href')
|
||||
if media_url is None:
|
||||
media_url = media_el.attrib.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
manifest_url = (
|
||||
@@ -1041,19 +1059,37 @@ class InfoExtractor(object):
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
# bitrate in f4m downloader
|
||||
if determine_ext(manifest_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
ext = determine_ext(manifest_url)
|
||||
if ext == 'f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal))
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
# Sometimes stream-level manifest contains single media entry that
|
||||
# does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
|
||||
# At the same time parent's media entry in set-level manifest may
|
||||
# contain it. We will copy it from parent in such cases.
|
||||
if len(f4m_formats) == 1:
|
||||
f = f4m_formats[0]
|
||||
f.update({
|
||||
'tbr': f.get('tbr') or tbr,
|
||||
'width': f.get('width') or width,
|
||||
'height': f.get('height') or height,
|
||||
'format_id': f.get('format_id') if not tbr else format_id,
|
||||
})
|
||||
formats.extend(f4m_formats)
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=m3u8_id, fatal=fatal))
|
||||
continue
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||
'format_id': format_id,
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'ext': 'flv' if bootstrap_info is not None else None,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'preference': preference,
|
||||
})
|
||||
return formats
|
||||
|
143
youtube_dl/extractor/coub.py
Normal file
143
youtube_dl/extractor/coub.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class CoubIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://coub.com/view/5u5n1',
|
||||
'info_dict': {
|
||||
'id': '5u5n1',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Matrix Moonwalk',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 4.6,
|
||||
'timestamp': 1428527772,
|
||||
'upload_date': '20150408',
|
||||
'uploader': 'Артём Лоскутников',
|
||||
'uploader_id': 'artyom.loskutnikov',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'coub:5u5n1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# longer video id
|
||||
'url': 'http://coub.com/view/237d5l5h',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
coub = self._download_json(
|
||||
'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
|
||||
|
||||
if coub.get('error'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
|
||||
|
||||
title = coub['title']
|
||||
|
||||
file_versions = coub['file_versions']
|
||||
|
||||
QUALITIES = ('low', 'med', 'high')
|
||||
|
||||
MOBILE = 'mobile'
|
||||
IPHONE = 'iphone'
|
||||
HTML5 = 'html5'
|
||||
|
||||
SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
|
||||
|
||||
quality_key = qualities(QUALITIES)
|
||||
preference_key = qualities(SOURCE_PREFERENCE)
|
||||
|
||||
formats = []
|
||||
|
||||
for kind, items in file_versions.get(HTML5, {}).items():
|
||||
if kind not in ('video', 'audio'):
|
||||
continue
|
||||
if not isinstance(items, dict):
|
||||
continue
|
||||
for quality, item in items.items():
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
item_url = item.get('url')
|
||||
if not item_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': '%s-%s-%s' % (HTML5, kind, quality),
|
||||
'filesize': int_or_none(item.get('size')),
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
'quality': quality_key(quality),
|
||||
'preference': preference_key(HTML5),
|
||||
})
|
||||
|
||||
iphone_url = file_versions.get(IPHONE, {}).get('url')
|
||||
if iphone_url:
|
||||
formats.append({
|
||||
'url': iphone_url,
|
||||
'format_id': IPHONE,
|
||||
'preference': preference_key(IPHONE),
|
||||
})
|
||||
|
||||
mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
|
||||
if mobile_url:
|
||||
formats.append({
|
||||
'url': mobile_url,
|
||||
'format_id': '%s-audio' % MOBILE,
|
||||
'preference': preference_key(MOBILE),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = coub.get('picture')
|
||||
duration = float_or_none(coub.get('duration'))
|
||||
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
|
||||
uploader = coub.get('channel', {}).get('title')
|
||||
uploader_id = coub.get('channel', {}).get('permalink')
|
||||
|
||||
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||
like_count = int_or_none(coub.get('likes_count'))
|
||||
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||
comment_count = int_or_none(coub.get('comments_count'))
|
||||
|
||||
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||
if age_restricted is not None:
|
||||
age_limit = 18 if age_restricted is True else 0
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@@ -2,13 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
IE_NAME = 'dw'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
|
||||
@@ -31,6 +34,18 @@ class DWIE(InfoExtractor):
|
||||
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
|
||||
'upload_date': '20160311',
|
||||
}
|
||||
}, {
|
||||
# DW documentaries, only last for one or two weeks
|
||||
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
|
||||
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
|
||||
'info_dict': {
|
||||
'id': '19274438',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the 90s – Hip Hop',
|
||||
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
|
||||
'upload_date': '20160521',
|
||||
},
|
||||
'skip': 'Video removed',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -38,6 +53,7 @@ class DWIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
title = hidden_inputs['media_title']
|
||||
media_id = hidden_inputs.get('media_id') or media_id
|
||||
|
||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
||||
formats = self._extract_smil_formats(
|
||||
@@ -49,13 +65,20 @@ class DWIE(InfoExtractor):
|
||||
else:
|
||||
formats = [{'url': hidden_inputs['file_name']}]
|
||||
|
||||
upload_date = hidden_inputs.get('display_date')
|
||||
if not upload_date:
|
||||
upload_date = self._html_search_regex(
|
||||
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
|
||||
'upload date', default=None)
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': hidden_inputs.get('preview_image'),
|
||||
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
||||
'upload_date': hidden_inputs.get('display_date'),
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@@ -11,8 +11,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
@@ -23,8 +23,12 @@ class EpornerIE(InfoExtractor):
|
||||
'duration': 1838,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
'info_dict': {
|
||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
||||
'ext': 'mp4',
|
||||
@@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
|
||||
'description': None,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
||||
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
|
||||
'info_dict': {
|
||||
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must-See Moments: Best of the MLS season',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
|
@@ -21,6 +21,7 @@ from .adobetv import (
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aenetworks import AENetworksIE
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
@@ -56,6 +57,7 @@ from .arte import (
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
@@ -143,6 +145,7 @@ from .cnn import (
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
@@ -231,6 +234,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
@@ -288,6 +292,7 @@ from .globo import (
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
@@ -379,6 +384,7 @@ from .leeco import (
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .libraryofcongress import LibraryOfCongressIE
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import (
|
||||
LifeNewsIE,
|
||||
@@ -617,6 +623,10 @@ from .qqmusic import (
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .r7 import R7IE
|
||||
from .radiocanada import (
|
||||
RadioCanadaIE,
|
||||
RadioCanadaAudioVideoIE,
|
||||
)
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
@@ -630,8 +640,12 @@ from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .revision3 import Revision3IE
|
||||
from .revision3 import (
|
||||
Revision3EmbedIE,
|
||||
Revision3IE,
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
@@ -670,6 +684,7 @@ from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
@@ -827,7 +842,10 @@ from .tvc import (
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvp import (
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
@@ -894,6 +912,7 @@ from .videomore import (
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .vidio import VidioIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
@@ -939,21 +958,21 @@ from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@@ -994,7 +1013,10 @@ from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youku import (
|
||||
YoukuIE,
|
||||
YoukuShowIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
|
64
youtube_dl/extractor/eyedotv.py
Normal file
64
youtube_dl/extractor/eyedotv.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EyedoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
|
||||
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
|
||||
'info_dict': {
|
||||
'id': '16301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
|
||||
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
|
||||
'uploader': 'Afnic Live',
|
||||
'uploader_id': '8023',
|
||||
}
|
||||
}
|
||||
_ROOT_URL = 'http://live.eyedo.net:1935/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
|
||||
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
|
||||
|
||||
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
|
||||
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
|
||||
if state_live_code == 'avenir':
|
||||
raise ExtractorError(
|
||||
'%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
|
||||
expected=True)
|
||||
|
||||
is_live = state_live_code == 'live'
|
||||
m3u8_url = None
|
||||
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
|
||||
if is_live:
|
||||
if xpath_text(video_data, 'Cdn') == 'true':
|
||||
m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
|
||||
'description': xpath_text(video_data, _add_ns('Description')),
|
||||
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
|
||||
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
|
||||
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
|
||||
}
|
@@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -62,6 +62,8 @@ from .digiteka import DigitekaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .vessel import VesselIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -625,13 +627,13 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# MTVSercices embed
|
||||
{
|
||||
'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
|
||||
'md5': '35727f82f58c76d996fc188f9755b0d5',
|
||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||
'info_dict': {
|
||||
'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
|
||||
'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Review',
|
||||
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
||||
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
|
||||
'description': 'Two valets share their love for movie star Liam Neesons.',
|
||||
},
|
||||
},
|
||||
# YouTube embed via <data-embed-url="">
|
||||
@@ -783,6 +785,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||
}
|
||||
},
|
||||
# Another Livestream embed, without 'new.' in URL
|
||||
{
|
||||
'url': 'https://www.freespeech.org/',
|
||||
'info_dict': {
|
||||
'id': '123537347',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# Live stream
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
@@ -867,18 +882,6 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||
}
|
||||
},
|
||||
# Kaltura embed
|
||||
{
|
||||
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||
'info_dict': {
|
||||
'id': '1_eergr3h1',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150226',
|
||||
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
|
||||
'timestamp': int,
|
||||
'title': 'John Carlson Postgame 2/25/15',
|
||||
},
|
||||
},
|
||||
# Kaltura embed (different embed code)
|
||||
{
|
||||
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
||||
@@ -904,6 +907,19 @@ class GenericIE(InfoExtractor):
|
||||
'uploader_id': 'echojecka',
|
||||
},
|
||||
},
|
||||
# Kaltura embed with single quotes
|
||||
{
|
||||
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
||||
'info_dict': {
|
||||
'id': '0_izeg5utt',
|
||||
'ext': 'mp4',
|
||||
'title': '35871',
|
||||
'timestamp': 1355743100,
|
||||
'upload_date': '20121217',
|
||||
'uploader_id': 'batchUser',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@@ -1016,16 +1032,31 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': 1389118457,
|
||||
},
|
||||
},
|
||||
# NBC News embed
|
||||
{
|
||||
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
|
||||
'md5': '1aa589c675898ae6d37a17913cf68d66',
|
||||
'info_dict': {
|
||||
'id': '701714499682',
|
||||
'ext': 'mp4',
|
||||
'title': 'PREVIEW: On Assignment: David Letterman',
|
||||
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
|
||||
},
|
||||
},
|
||||
# UDN embed
|
||||
{
|
||||
'url': 'http://www.udn.com/news/story/7314/822787',
|
||||
'url': 'https://video.udn.com/news/300346',
|
||||
'md5': 'fd2060e988c326991037b9aff9df21a6',
|
||||
'info_dict': {
|
||||
'id': '300346',
|
||||
'ext': 'mp4',
|
||||
'title': '中一中男師變性 全校師生力挺',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Ooyala embed
|
||||
{
|
||||
@@ -1193,6 +1224,16 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Lake8737',
|
||||
}
|
||||
},
|
||||
# Duplicated embedded video URLs
|
||||
{
|
||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||
'info_dict': {
|
||||
'id': '149298443_480_16c25b74_2',
|
||||
'ext': 'mp4',
|
||||
'title': 'vs. Blue Orange Spring Game',
|
||||
'uploader': 'www.hudl.com',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1499,6 +1540,16 @@ class GenericIE(InfoExtractor):
|
||||
if bc_urls:
|
||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
||||
|
||||
# Look for ThePlatform embeds
|
||||
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||
if tp_urls:
|
||||
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
||||
|
||||
# Look for Vessel embeds
|
||||
vessel_urls = VesselIE._extract_urls(webpage)
|
||||
if vessel_urls:
|
||||
return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
@@ -1806,14 +1857,6 @@ class GenericIE(InfoExtractor):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url)
|
||||
|
||||
# Look for embedded vulture.com player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='Vulture')
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||
if mtvservices_url:
|
||||
@@ -1862,7 +1905,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
|
||||
r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Livestream')
|
||||
@@ -1874,7 +1917,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
|
||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result(smuggle_url(
|
||||
@@ -1926,6 +1969,12 @@ class GenericIE(InfoExtractor):
|
||||
if nbc_sports_url:
|
||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||
|
||||
# Look for NBC News embeds
|
||||
nbc_news_embed_url = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
|
||||
if nbc_news_embed_url:
|
||||
return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
|
||||
|
||||
# Look for Google Drive embeds
|
||||
google_drive_url = GoogleDriveIE._extract_url(webpage)
|
||||
if google_drive_url:
|
||||
@@ -2105,7 +2154,7 @@ class GenericIE(InfoExtractor):
|
||||
raise UnsupportedError(url)
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
for video_url in orderedSet(found):
|
||||
video_url = unescapeHTML(video_url)
|
||||
video_url = video_url.replace('\\/', '/')
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
66
youtube_dl/extractor/godtv.py
Normal file
66
youtube_dl/extractor/godtv.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class GodTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
||||
'info_dict': {
|
||||
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Randy Needham',
|
||||
'duration': 3615.08,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://god.tv/playlist/bible-study',
|
||||
'info_dict': {
|
||||
'id': 'bible-study',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
}, {
|
||||
'url': 'http://god.tv/node/15097',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/live/africa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/liveevents',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'settings', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
ooyala_id = None
|
||||
|
||||
if settings:
|
||||
playlist = settings.get('playlist')
|
||||
if playlist and isinstance(playlist, list):
|
||||
entries = [
|
||||
OoyalaIE._build_url_result(video['content_id'])
|
||||
for video in playlist if video.get('content_id')]
|
||||
if entries:
|
||||
return self.playlist_result(entries, display_id)
|
||||
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
||||
|
||||
if not ooyala_id:
|
||||
ooyala_id = self._search_regex(
|
||||
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
||||
webpage, 'ooyala id', group='id')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_id)
|
@@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
|
||||
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '42428ce8a00585f9bc36e49226eae7a1',
|
||||
'info_dict': {
|
||||
'id': 'fk6OhWpXgIQ',
|
||||
'ext': 'mp4',
|
||||
@@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
|
||||
'uploader_id': 'groupon',
|
||||
'uploader': 'Groupon',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
_PROVIDERS = {
|
||||
|
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
'md5': '8b743df908c42f60cf6496586c7f12c3',
|
||||
'md5': '7d45932269a288149483144f01b99789',
|
||||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
@@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
|
||||
'duration': 56.823,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -60,7 +60,8 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)),
|
||||
} for video_url in video_urls]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
143
youtube_dl/extractor/libraryofcongress.py
Normal file
143
youtube_dl/extractor/libraryofcongress.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
class LibraryOfCongressIE(InfoExtractor):
|
||||
IE_NAME = 'loc'
|
||||
IE_DESC = 'Library of Congress'
|
||||
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# embedded via <div class="media-player"
|
||||
'url': 'http://loc.gov/item/90716351/',
|
||||
'md5': '353917ff7f0255aa6d4b80a034833de8',
|
||||
'info_dict': {
|
||||
'id': '90716351',
|
||||
'ext': 'mp4',
|
||||
'title': "Pa's trip to Mars",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# webcast embedded via mediaObjectId
|
||||
'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578',
|
||||
'info_dict': {
|
||||
'id': '5578',
|
||||
'ext': 'mp4',
|
||||
'title': 'Help! Preservation Training Needs Here, There & Everywhere',
|
||||
'duration': 3765,
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with direct download links
|
||||
'url': 'https://www.loc.gov/item/78710669/',
|
||||
'info_dict': {
|
||||
'id': '78710669',
|
||||
'ext': 'mp4',
|
||||
'title': 'La vie et la passion de Jesus-Christ',
|
||||
'duration': 0,
|
||||
'view_count': int,
|
||||
'formats': 'mincount:4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media_id = self._search_regex(
|
||||
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
||||
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
||||
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
||||
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
|
||||
webpage, 'media id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
||||
video_id)['mediaObject']
|
||||
|
||||
derivative = data['derivatives'][0]
|
||||
media_url = derivative['derivativeUrl']
|
||||
|
||||
title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(
|
||||
webpage)
|
||||
|
||||
# Following algorithm was extracted from setAVSource js function
|
||||
# found in webpage
|
||||
media_url = media_url.replace('rtmp', 'https')
|
||||
|
||||
is_video = data.get('mediaType', 'v').lower() == 'v'
|
||||
ext = determine_ext(media_url)
|
||||
if ext not in ('mp4', 'mp3'):
|
||||
media_url += '.mp4' if is_video else '.mp3'
|
||||
|
||||
if 'vod/mp4:' in media_url:
|
||||
formats = [{
|
||||
'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
'quality': 1,
|
||||
}]
|
||||
elif 'vod/mp3:' in media_url:
|
||||
formats = [{
|
||||
'url': media_url.replace('vod/mp3:', ''),
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
|
||||
download_urls = set()
|
||||
for m in re.finditer(
|
||||
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
||||
format_id = m.group('id').lower()
|
||||
if format_id == 'gif':
|
||||
continue
|
||||
download_url = m.group('url')
|
||||
if download_url in download_urls:
|
||||
continue
|
||||
download_urls.add(download_url)
|
||||
formats.append({
|
||||
'url': download_url,
|
||||
'format_id': format_id,
|
||||
'filesize_approx': parse_filesize(m.group('size')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(data.get('duration'))
|
||||
view_count = int_or_none(data.get('viewCount'))
|
||||
|
||||
subtitles = {}
|
||||
cc_url = data.get('ccUrl')
|
||||
if cc_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': cc_url,
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -7,48 +7,53 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
IE_NAME = 'life'
|
||||
IE_DESC = 'Life.ru'
|
||||
_VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single video embedded via video/source
|
||||
'url': 'http://lifenews.ru/news/98736',
|
||||
'url': 'https://life.ru/t/новости/98736',
|
||||
'md5': '77c95eaefaca216e32a76a343ad89d23',
|
||||
'info_dict': {
|
||||
'id': '98736',
|
||||
'ext': 'mp4',
|
||||
'title': 'Мужчина нашел дома архив оборонного завода',
|
||||
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
|
||||
'timestamp': 1344154740,
|
||||
'upload_date': '20120805',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# single video embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/152125',
|
||||
'url': 'https://life.ru/t/новости/152125',
|
||||
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
|
||||
'info_dict': {
|
||||
'id': '152125',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||
'timestamp': 1427961840,
|
||||
'upload_date': '20150402',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# two videos embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
'url': 'https://life.ru/t/новости/153461',
|
||||
'info_dict': {
|
||||
'id': '153461',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
'timestamp': 1430825520,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||
@@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'timestamp': 1430825520,
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}, {
|
||||
@@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'timestamp': 1430825520,
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
'url': 'https://life.ru/t/новости/213035',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
section = mobj.group('section')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://lifenews.ru/%s/%s' % (section, video_id),
|
||||
video_id, 'Downloading page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_urls = re.findall(
|
||||
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
||||
@@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
|
||||
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage),
|
||||
' - Первый по срочным новостям — LIFE | NEWS')
|
||||
' - Life.ru')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
||||
comment_count = self._html_search_regex(
|
||||
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
|
||||
webpage, 'comment count', fatal=False)
|
||||
r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
|
||||
webpage, 'view count', fatal=False, group='value')
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
|
||||
webpage, 'upload date', fatal=False, group='value'))
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
def make_entry(video_id, video_url, index=None):
|
||||
@@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='m3u8'))
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@@ -98,13 +98,19 @@ class LimelightBaseIE(InfoExtractor):
|
||||
} for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
|
||||
|
||||
subtitles = {}
|
||||
for caption in properties.get('captions', {}):
|
||||
for caption in properties.get('captions', []):
|
||||
lang = caption.get('language_code')
|
||||
subtitles_url = caption.get('url')
|
||||
if lang and subtitles_url:
|
||||
subtitles[lang] = [{
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitles_url,
|
||||
}]
|
||||
})
|
||||
closed_captions_url = properties.get('closed_captions_url')
|
||||
if closed_captions_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': closed_captions_url,
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -123,7 +129,18 @@ class LimelightBaseIE(InfoExtractor):
|
||||
|
||||
class LimelightMediaIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight'
|
||||
_VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:media:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bmediaId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
|
||||
'info_dict': {
|
||||
@@ -158,6 +175,9 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'media'
|
||||
_API_PATH = 'media'
|
||||
@@ -176,15 +196,29 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
|
||||
class LimelightChannelIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel'
|
||||
_VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:channel:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bchannelId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
|
||||
'info_dict': {
|
||||
'id': 'ab6a524c379342f9b23642917020c082',
|
||||
'title': 'Javascript Sample Code',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'channel'
|
||||
_API_PATH = 'channels'
|
||||
|
||||
@@ -207,15 +241,29 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
|
||||
class LimelightChannelListIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel_list'
|
||||
_VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:channel_list:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bchannelListId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||
'info_dict': {
|
||||
'id': '301b117890c4465c8179ede21fd92e2b',
|
||||
'title': 'Website - Hero Player',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'channel_list'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _extract_stream_info(self, stream_info):
|
||||
broadcast_id = stream_info['broadcast_id']
|
||||
broadcast_id = compat_str(stream_info['broadcast_id'])
|
||||
is_live = stream_info.get('is_live')
|
||||
|
||||
formats = []
|
||||
@@ -203,9 +203,10 @@ class LivestreamIE(InfoExtractor):
|
||||
if not videos_info:
|
||||
break
|
||||
for v in videos_info:
|
||||
v_id = compat_str(v['id'])
|
||||
entries.append(self.url_result(
|
||||
'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v['id']),
|
||||
'Livestream', v['id'], v['caption']))
|
||||
'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id),
|
||||
'Livestream', v_id, v.get('caption')))
|
||||
last_video = videos_info[-1]['id']
|
||||
return self.playlist_result(entries, event_id, event_data['full_name'])
|
||||
|
||||
|
@@ -1,93 +1,94 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class LyndaBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||
_SIGNIN_URL = 'https://www.lynda.com/signin'
|
||||
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
||||
_USER_URL = 'https://www.lynda.com/signin/user'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@staticmethod
|
||||
def _check_error(json_string, key_or_keys):
|
||||
keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
|
||||
for key in keys:
|
||||
error = json_string.get(key)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
|
||||
action_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
|
||||
'post url', default=fallback_action_url, group='url')
|
||||
|
||||
if not action_url.startswith('http'):
|
||||
action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)
|
||||
|
||||
form_data = self._hidden_inputs(form_html)
|
||||
form_data.update(extra_form_data)
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
action_url, None, note,
|
||||
data=urlencode_postdata(form_data),
|
||||
headers={
|
||||
'Referer': referrer_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
response = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
self._check_error(response, ('email', 'password'))
|
||||
raise
|
||||
|
||||
self._check_error(response, 'ErrorMessage')
|
||||
|
||||
return response, action_url
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
'remember': 'false',
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
# Step 1: download signin page
|
||||
signin_page = self._download_webpage(
|
||||
self._SIGNIN_URL, None, 'Downloading signin page')
|
||||
|
||||
# Not (yet) logged in
|
||||
m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page)
|
||||
if m is not None:
|
||||
response = m.group('json')
|
||||
response_json = json.loads(response)
|
||||
state = response_json['state']
|
||||
|
||||
if state == 'notlogged':
|
||||
raise ExtractorError(
|
||||
'Unable to login, incorrect username and/or password',
|
||||
expected=True)
|
||||
|
||||
# This is when we get popup:
|
||||
# > You're already logged in to lynda.com on two devices.
|
||||
# > If you log in here, we'll log you out of another device.
|
||||
# So, we need to confirm this.
|
||||
if state == 'conflicted':
|
||||
confirm_form = {
|
||||
'username': '',
|
||||
'password': '',
|
||||
'resolve': 'true',
|
||||
'remember': 'false',
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(confirm_form))
|
||||
login_page = self._download_webpage(
|
||||
request, None,
|
||||
'Confirming log in and log out from another device')
|
||||
|
||||
if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
||||
if 'login error' in login_page:
|
||||
mobj = re.search(
|
||||
r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>',
|
||||
login_page)
|
||||
if mobj:
|
||||
raise ExtractorError(
|
||||
'lynda returned error: %s - %s'
|
||||
% (mobj.group('title'), clean_html(mobj.group('description'))),
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _logout(self):
|
||||
username, _ = self._get_login_info()
|
||||
if username is None:
|
||||
# Already logged in
|
||||
if any(re.search(p, signin_page) for p in (
|
||||
'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
||||
return
|
||||
|
||||
self._download_webpage(
|
||||
'http://www.lynda.com/ajax/logout.aspx', None,
|
||||
'Logging out', 'Unable to log out', fatal=False)
|
||||
# Step 2: submit email
|
||||
signin_form = self._search_regex(
|
||||
r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
|
||||
signin_page, 'signin form')
|
||||
signin_page, signin_url = self._login_step(
|
||||
signin_form, self._PASSWORD_URL, {'email': username},
|
||||
'Submitting email', self._SIGNIN_URL)
|
||||
|
||||
# Step 3: submit password
|
||||
password_form = signin_page['body']
|
||||
self._login_step(
|
||||
password_form, self._USER_URL, {'email': username, 'password': password},
|
||||
'Submitting password', signin_url)
|
||||
|
||||
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
@@ -212,8 +213,6 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
|
||||
self._logout()
|
||||
|
||||
if course.get('Status') == 'NotFound':
|
||||
raise ExtractorError(
|
||||
'Course %s does not exist' % course_id, expected=True)
|
||||
@@ -246,5 +245,6 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
|
||||
|
||||
course_title = course.get('Title')
|
||||
course_description = course.get('Description')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
return self.playlist_result(entries, course_id, course_title, course_description)
|
||||
|
@@ -67,6 +67,23 @@ class NBCIE(InfoExtractor):
|
||||
# This video has expired but with an escaped embedURL
|
||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# HLS streams requires the 'hdnea3' cookie
|
||||
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||
'info_dict': {
|
||||
'id': 'n1806',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goliath',
|
||||
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
||||
'timestamp': 1237100400,
|
||||
'upload_date': '20090315',
|
||||
'uploader': 'NBCU-COM',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
}
|
||||
]
|
||||
|
||||
@@ -249,6 +266,11 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
|
||||
'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -272,18 +294,17 @@ class NBCNewsIE(ThePlatformIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = None
|
||||
bootstrap_json = self._search_regex(
|
||||
r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
|
||||
webpage, 'bootstrap json', default=None)
|
||||
if bootstrap_json:
|
||||
bootstrap = self._parse_json(bootstrap_json, display_id)
|
||||
bootstrap = self._parse_json(
|
||||
bootstrap_json, display_id, transform_source=unescapeHTML)
|
||||
if 'results' in bootstrap:
|
||||
info = bootstrap['results'][0]['video']
|
||||
elif 'video' in bootstrap:
|
||||
info = bootstrap['video']
|
||||
else:
|
||||
player_instance_json = self._search_regex(
|
||||
r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None)
|
||||
if not player_instance_json:
|
||||
player_instance_json = self._html_search_regex(
|
||||
r'data-video="([^"]+)"', webpage, 'video json')
|
||||
info = self._parse_json(player_instance_json, display_id)
|
||||
info = bootstrap
|
||||
video_id = info['mpxId']
|
||||
title = info['title']
|
||||
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
determine_ext,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
@@ -15,56 +16,49 @@ from ..compat import compat_urllib_parse_urlencode
|
||||
class OoyalaBaseIE(InfoExtractor):
|
||||
_PLAYER_BASE = 'http://player.ooyala.com/'
|
||||
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
|
||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
|
||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
|
||||
|
||||
def _extract(self, content_tree_url, video_id, domain='example.org'):
|
||||
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
|
||||
metadata = content_tree[list(content_tree)[0]]
|
||||
embed_code = metadata['embed_code']
|
||||
pcode = metadata.get('asset_pcode') or embed_code
|
||||
video_info = {
|
||||
'id': embed_code,
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
|
||||
'duration': float_or_none(metadata.get('duration'), 1000),
|
||||
}
|
||||
title = metadata['title']
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
|
||||
auth_data = self._download_json(
|
||||
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
||||
compat_urllib_parse_urlencode({
|
||||
'domain': domain,
|
||||
'supportedFormats': supported_format
|
||||
}),
|
||||
video_id, 'Downloading %s JSON' % supported_format)
|
||||
'supportedFormats': 'mp4,rtmp,m3u8,hds',
|
||||
}), video_id)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embed_code]
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
if cur_auth_data['authorized']:
|
||||
for stream in cur_auth_data['streams']:
|
||||
url = base64.b64decode(
|
||||
s_url = base64.b64decode(
|
||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
||||
if url in urls:
|
||||
if s_url in urls:
|
||||
continue
|
||||
urls.append(url)
|
||||
urls.append(s_url)
|
||||
ext = determine_ext(s_url, None)
|
||||
delivery_type = stream['delivery_type']
|
||||
if delivery_type == 'hls' or '.m3u8' in url:
|
||||
if delivery_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url, embed_code, 'mp4', 'm3u8_native',
|
||||
s_url, embed_code, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif delivery_type == 'hds' or '.f4m' in url:
|
||||
elif delivery_type == 'hds' or ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif '.smil' in url:
|
||||
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
url, embed_code, fatal=False))
|
||||
s_url, embed_code, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': stream.get('delivery_type'),
|
||||
'url': s_url,
|
||||
'ext': ext or stream.get('delivery_type'),
|
||||
'vcodec': stream.get('video_codec'),
|
||||
'format_id': delivery_type,
|
||||
'width': int_or_none(stream.get('width')),
|
||||
@@ -78,8 +72,24 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
self.IE_NAME, cur_auth_data['message']), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_info['formats'] = formats
|
||||
return video_info
|
||||
subtitles = {}
|
||||
for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles[lang] = [{
|
||||
'url': sub_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': embed_code,
|
||||
'title': title,
|
||||
'description': metadata.get('description'),
|
||||
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
|
||||
'duration': float_or_none(metadata.get('duration'), 1000),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class OoyalaIE(OoyalaBaseIE):
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||
@@ -31,6 +31,9 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://openload.io/f/ZAn6oz-VZGE/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://openload.co/f/_-ztPaZtMhM/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
|
||||
# for title and ext
|
||||
@@ -100,7 +103,7 @@ class OpenloadIE(InfoExtractor):
|
||||
raise ExtractorError('File not found', expected=True)
|
||||
|
||||
code = self._search_regex(
|
||||
r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>',
|
||||
r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>',
|
||||
webpage, 'JS code')
|
||||
|
||||
decoded = self.openload_decode(code)
|
||||
|
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
@@ -42,8 +45,11 @@ class PeriscopeIE(InfoExtractor):
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
|
||||
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
|
||||
user = broadcast_data.get('user', {})
|
||||
|
||||
uploader = broadcast.get('user_display_name') or user.get('display_name')
|
||||
uploader_id = (broadcast.get('username') or user.get('username') or
|
||||
broadcast.get('user_id') or user.get('id'))
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
state = broadcast.get('state').lower()
|
||||
@@ -92,6 +98,7 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'LularoeHusbandMike',
|
||||
'title': 'LULAROE HUSBAND MIKE',
|
||||
'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
|
||||
},
|
||||
# Periscope only shows videos in the last 24 hours, so it's possible to
|
||||
# get 0 videos
|
||||
@@ -103,16 +110,19 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
|
||||
broadcast_data = self._parse_json(self._html_search_meta(
|
||||
'broadcast-data', webpage, default='{}'), user_id)
|
||||
username = broadcast_data.get('user', {}).get('display_name')
|
||||
user_broadcasts = self._parse_json(
|
||||
self._html_search_meta('user-broadcasts', webpage, default='{}'),
|
||||
data_store = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-store=(["\'])(?P<data>.+?)\1',
|
||||
webpage, 'data store', default='{}', group='data')),
|
||||
user_id)
|
||||
|
||||
user = data_store.get('User', {}).get('user', {})
|
||||
title = user.get('display_name') or user.get('username')
|
||||
description = user.get('description')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
|
||||
for broadcast in user_broadcasts.get('broadcasts', [])]
|
||||
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
|
||||
|
||||
return self.playlist_result(entries, user_id, username)
|
||||
return self.playlist_result(entries, user_id, title, description)
|
||||
|
@@ -4,9 +4,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,6 +22,19 @@ class PlaywireIE(InfoExtractor):
|
||||
'duration': 145.94,
|
||||
},
|
||||
}, {
|
||||
# m3u8 in f4m
|
||||
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
|
||||
'info_dict': {
|
||||
'id': '4840492',
|
||||
'ext': 'mp4',
|
||||
'title': 'ITV EL SHOW FULL',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Multiple resolutions while bitrates missing
|
||||
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -48,25 +60,10 @@ class PlaywireIE(InfoExtractor):
|
||||
thumbnail = content.get('poster')
|
||||
src = content['media']['f4m']
|
||||
|
||||
f4m = self._download_xml(src, video_id)
|
||||
base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
|
||||
formats = []
|
||||
for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media.get('bitrate'))
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
f = {
|
||||
'url': '%s/%s' % (base_url, media.attrib['url']),
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
if not (tbr or width or height):
|
||||
f['quality'] = 1 if '-hd.' in media_url else 0
|
||||
formats.append(f)
|
||||
formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
|
||||
for a_format in formats:
|
||||
if not dict_get(a_format, ['tbr', 'width', 'height']):
|
||||
a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
130
youtube_dl/extractor/radiocanada.py
Normal file
130
youtube_dl/extractor/radiocanada.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
xpath_element,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RadioCanadaIE(InfoExtractor):
|
||||
IE_NAME = 'radiocanada'
|
||||
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||
'info_dict': {
|
||||
'id': '7184272',
|
||||
'ext': 'flv',
|
||||
'title': 'Le parcours du tireur capté sur vidéo',
|
||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
formats = []
|
||||
# TODO: extract m3u8 and f4m formats
|
||||
# m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in ('flash',):
|
||||
v_data = self._download_xml(
|
||||
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
|
||||
video_id, note='Downloading %s XML' % device_type, query={
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
'connectionType': 'broadband',
|
||||
'multibitrate': 'true',
|
||||
'deviceType': device_type,
|
||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
||||
'bypasslock': 'NZt5K62gRqfc',
|
||||
})
|
||||
v_url = xpath_text(v_data, 'url')
|
||||
if not v_url:
|
||||
continue
|
||||
if v_url == 'null':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
||||
ext = determine_ext(v_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
ext = determine_ext(v_url)
|
||||
bitrates = xpath_element(v_data, 'bitrates')
|
||||
for url_e in bitrates.findall('url'):
|
||||
tbr = int_or_none(url_e.get('bitrate'))
|
||||
if not tbr:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
|
||||
'ext': 'flv',
|
||||
'protocol': 'rtmp',
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._download_xml(
|
||||
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
|
||||
video_id, note='Downloading metadata XML', query={
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
})
|
||||
|
||||
def get_meta(name):
|
||||
el = find_xpath_attr(metadata, './/Meta', 'name', name)
|
||||
return el.text if el is not None else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_meta('Title'),
|
||||
'description': get_meta('Description') or get_meta('ShortDescription'),
|
||||
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
||||
'duration': int_or_none(get_meta('length')),
|
||||
'series': get_meta('Emission'),
|
||||
'season_number': int_or_none('SrcSaison'),
|
||||
'episode_number': int_or_none('SrcEpisode'),
|
||||
'upload_date': unified_strdate(get_meta('Date')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||
'radiocanada:audiovideo'
|
||||
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||
'info_dict': {
|
||||
'id': '7527184',
|
||||
'ext': 'flv',
|
||||
'title': 'Barack Obama au Vietnam',
|
||||
'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
|
||||
'upload_date': '20160523',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
69
youtube_dl/extractor/reuters.py
Normal file
69
youtube_dl/extractor/reuters.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class ReutersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
|
||||
'md5': '8015113643a0b12838f160b0b81cc2ee',
|
||||
'info_dict': {
|
||||
'id': '368575562',
|
||||
'ext': 'mp4',
|
||||
'title': 'San Francisco police chief resigns',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
|
||||
video_data = js_to_json(self._search_regex(
|
||||
r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
|
||||
webpage, 'video data'))
|
||||
|
||||
def get_json_value(key, fatal=False):
|
||||
return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
|
||||
|
||||
title = unescapeHTML(get_json_value('title', fatal=True))
|
||||
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
|
||||
|
||||
mas_data = self._download_json(
|
||||
'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
|
||||
video_id, transform_source=js_to_json)
|
||||
formats = []
|
||||
for f in mas_data:
|
||||
f_url = f.get('url')
|
||||
if not f_url:
|
||||
continue
|
||||
method = f.get('method')
|
||||
if method == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
container = f.get('container')
|
||||
ext = '3gp' if method == 'mobile' else container
|
||||
formats.append({
|
||||
'format_id': ext,
|
||||
'url': f_url,
|
||||
'ext': ext,
|
||||
'container': container if method != 'mobile' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': get_json_value('thumb'),
|
||||
'duration': int_or_none(get_json_value('seconds')),
|
||||
'formats': formats,
|
||||
}
|
@@ -13,8 +13,64 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class Revision3EmbedIE(InfoExtractor):
|
||||
IE_NAME = 'revision3:embed'
|
||||
_VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
|
||||
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
|
||||
'info_dict': {
|
||||
'id': '67558',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Pros & Cons Of Zoos',
|
||||
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
|
||||
'uploader_id': 'dnews',
|
||||
'uploader': 'DNews',
|
||||
}
|
||||
}
|
||||
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('playlist_id')
|
||||
playlist_type = mobj.group('playlist_type') or 'video_id'
|
||||
video_data = self._download_json(
|
||||
'http://revision3.com/api/getPlaylist.json', playlist_id, query={
|
||||
'api_key': self._API_KEY,
|
||||
'codecs': 'h264,vp8,theora',
|
||||
playlist_type: playlist_id,
|
||||
})['items'][0]
|
||||
|
||||
formats = []
|
||||
for vcodec, media in video_data['media'].items():
|
||||
for quality_id, quality in media.items():
|
||||
if quality_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality['url'], playlist_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': quality['url'],
|
||||
'format_id': '%s-%s' % (vcodec, quality_id),
|
||||
'tbr': int_or_none(quality.get('bitrate')),
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': playlist_id,
|
||||
'title': unescapeHTML(video_data['title']),
|
||||
'description': unescapeHTML(video_data.get('summary')),
|
||||
'uploader': video_data.get('show', {}).get('name'),
|
||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class Revision3IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
|
||||
IE_NAME = 'revision'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
|
||||
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
|
||||
@@ -32,52 +88,14 @@ class Revision3IE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
# Show
|
||||
'url': 'http://testtube.com/brainstuff',
|
||||
'info_dict': {
|
||||
'id': '251',
|
||||
'title': 'BrainStuff',
|
||||
'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
|
||||
},
|
||||
'playlist_mincount': 93,
|
||||
}, {
|
||||
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
|
||||
'info_dict': {
|
||||
'id': '58227',
|
||||
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
|
||||
'duration': 275,
|
||||
'ext': 'webm',
|
||||
'title': '5 Weird Ways Plants Can Eat Animals',
|
||||
'description': 'Why have some plants evolved to eat meat?',
|
||||
'upload_date': '20150120',
|
||||
'timestamp': 1421763300,
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
|
||||
'info_dict': {
|
||||
'id': '71618',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
|
||||
'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
|
||||
'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
|
||||
'uploader': 'Editors\' Picks',
|
||||
'uploader_id': 'tt-editors-picks',
|
||||
'timestamp': 1453309200,
|
||||
'upload_date': '20160120',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'http://revision3.com/variant',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Tag
|
||||
'url': 'http://testtube.com/tech-news',
|
||||
'info_dict': {
|
||||
'id': '21018',
|
||||
'title': 'tech news',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'url': 'http://revision3.com/vr',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
|
||||
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
@@ -119,33 +137,9 @@ class Revision3IE(InfoExtractor):
|
||||
})
|
||||
return info
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
|
||||
video_id)['items'][0]
|
||||
|
||||
formats = []
|
||||
for vcodec, media in video_data['media'].items():
|
||||
for quality_id, quality in media.items():
|
||||
if quality_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality['url'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': quality['url'],
|
||||
'format_id': '%s-%s' % (vcodec, quality_id),
|
||||
'tbr': int_or_none(quality.get('bitrate')),
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'title': unescapeHTML(video_data['title']),
|
||||
'description': unescapeHTML(video_data.get('summary')),
|
||||
'uploader': video_data.get('show', {}).get('name'),
|
||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'formats': formats,
|
||||
'_type': 'url_transparent',
|
||||
'url': 'revision3:%s' % video_id,
|
||||
})
|
||||
return info
|
||||
else:
|
||||
|
57
youtube_dl/extractor/seeker.py
Normal file
57
youtube_dl/extractor/seeker.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SeekerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
# player.loadRevision3Item
|
||||
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
|
||||
'md5': '30c1dc4030cc715cf05b423d0947ac18',
|
||||
'info_dict': {
|
||||
'id': '76243',
|
||||
'ext': 'webm',
|
||||
'title': 'Should Trump Be Required To Release His Tax Returns?',
|
||||
'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
|
||||
'uploader': 'Seeker Daily',
|
||||
'uploader_id': 'seekerdaily',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
|
||||
'info_dict': {
|
||||
'id': '67558',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Pros & Cons Of Zoos',
|
||||
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': '1834116536',
|
||||
'title': 'After Gorilla Killing, Changes Ahead for Zoos',
|
||||
'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, article_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
|
||||
if mobj:
|
||||
playlist_type, playlist_id = mobj.groups()
|
||||
return self.url_result(
|
||||
'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
|
||||
else:
|
||||
entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
|
||||
r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
|
||||
return self.playlist_result(
|
||||
entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))
|
@@ -96,20 +96,18 @@ class SpankwireIE(InfoExtractor):
|
||||
formats = []
|
||||
for height, video_url in zip(heights, video_urls):
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
_, quality = path.split('/')[4].split('_')[:2]
|
||||
f = {
|
||||
'url': video_url,
|
||||
'height': height,
|
||||
}
|
||||
tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
|
||||
if tbr:
|
||||
f.update({
|
||||
'tbr': int(tbr),
|
||||
'format_id': '%dp' % height,
|
||||
})
|
||||
m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
|
||||
if m:
|
||||
tbr = int(m.group('tbr'))
|
||||
height = int(m.group('height'))
|
||||
else:
|
||||
f['format_id'] = quality
|
||||
formats.append(f)
|
||||
tbr = None
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
@@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
|
||||
'md5': '3d6361864d7cac20b57c8784da17166f',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
@@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
|
||||
'duration': 422.255,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
|
||||
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
'info_dict': {
|
||||
@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._html_search_regex(
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
|
||||
webpage, 'wat id', group='id')
|
||||
return self.url_result('wat:%s' % wat_id, 'Wat')
|
||||
|
@@ -14,11 +14,13 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
xpath_with_ns,
|
||||
mimetype2ext,
|
||||
find_xpath_attr,
|
||||
@@ -48,6 +50,12 @@ class ThePlatformBaseIE(OnceIE):
|
||||
if OnceIE.suitable(_format['url']):
|
||||
formats.extend(self._extract_once_formats(_format['url']))
|
||||
else:
|
||||
media_url = _format['url']
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
hdnea2 = self._get_cookies(media_url).get('hdnea2')
|
||||
if hdnea2:
|
||||
_format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
|
||||
|
||||
formats.append(_format)
|
||||
|
||||
subtitles = self._parse_smil_subtitles(meta, default_ns)
|
||||
@@ -151,6 +159,22 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
m = re.search(
|
||||
r'''(?x)
|
||||
<meta\s+
|
||||
property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
|
||||
content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
|
||||
''', webpage)
|
||||
if m:
|
||||
return [m.group('url')]
|
||||
|
||||
matches = re.findall(
|
||||
r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||
if matches:
|
||||
return list(zip(*matches))[1]
|
||||
|
||||
@staticmethod
|
||||
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
|
||||
flags = '10' if include_qs else '00'
|
||||
|
@@ -12,7 +12,7 @@ class TheSixtyOneIE(InfoExtractor):
|
||||
s|
|
||||
song/comments/list|
|
||||
song
|
||||
)/(?P<id>[A-Za-z0-9]+)/?$'''
|
||||
)/(?:[^/]+/)?(?P<id>[A-Za-z0-9]+)/?$'''
|
||||
_SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
|
||||
_SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
|
||||
_THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
|
||||
@@ -45,6 +45,10 @@ class TheSixtyOneIE(InfoExtractor):
|
||||
'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
_DECODE_MAP = {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -6,20 +6,13 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TvpIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl'
|
||||
_VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$'
|
||||
class TVPIE(InfoExtractor):
|
||||
IE_NAME = 'tvp'
|
||||
IE_DESC = 'Telewizja Polska'
|
||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
|
||||
'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
|
||||
'info_dict': {
|
||||
'id': '4278035',
|
||||
'ext': 'wmv',
|
||||
'title': 'Ogniem i mieczem, odc. 2',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
|
||||
'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
|
||||
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
||||
'info_dict': {
|
||||
'id': '194536',
|
||||
@@ -36,12 +29,22 @@ class TvpIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||
'md5': 'c3b15ed1af288131115ff17a17c19dda',
|
||||
'info_dict': {
|
||||
'id': '17834272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Na sygnale, odc. 39',
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -92,8 +95,8 @@ class TvpIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class TvpSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl:Series'
|
||||
class TVPSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'tvp:series'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -127,7 +130,7 @@ class TvpSeriesIE(InfoExtractor):
|
||||
videos_paths = re.findall(
|
||||
'(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
|
||||
entries = [
|
||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key())
|
||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
|
||||
for v_path in videos_paths]
|
||||
|
||||
return {
|
||||
|
@@ -260,7 +260,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
'nauth': access_token['token'],
|
||||
'nauthsig': access_token['sig'],
|
||||
})),
|
||||
item_id, 'mp4')
|
||||
item_id, 'mp4', entry_protocol='m3u8_native')
|
||||
|
||||
self._prefer_source(formats)
|
||||
info['formats'] = formats
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
remove_end,
|
||||
@@ -52,7 +53,7 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
'id': 'dq4Oj5quskI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ubuntu 11.10 Overview',
|
||||
'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
|
||||
'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...',
|
||||
'upload_date': '20111013',
|
||||
'uploader': 'OMG! Ubuntu!',
|
||||
'uploader_id': 'omgubuntu',
|
||||
@@ -116,6 +117,9 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
|
||||
|
||||
if video_url:
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
else:
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
@@ -207,6 +211,7 @@ class TwitterIE(InfoExtractor):
|
||||
'uploader_id': 'giphz',
|
||||
},
|
||||
'expected_warnings': ['height', 'width'],
|
||||
'skip': 'Account suspended',
|
||||
}, {
|
||||
'url': 'https://twitter.com/starwars/status/665052190608723968',
|
||||
'md5': '39b7199856dee6cd4432e72c74bc69d4',
|
||||
@@ -239,10 +244,10 @@ class TwitterIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '700207533655363584',
|
||||
'ext': 'mp4',
|
||||
'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||
'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'jay',
|
||||
'uploader': 'Donte The Dumbass',
|
||||
'uploader_id': 'jaydingeer',
|
||||
},
|
||||
'params': {
|
||||
@@ -262,7 +267,6 @@ class TwitterIE(InfoExtractor):
|
||||
'add_ie': ['Vine'],
|
||||
}, {
|
||||
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
|
||||
# md5 constantly changes
|
||||
'info_dict': {
|
||||
'id': '719944021058060289',
|
||||
'ext': 'mp4',
|
||||
@@ -271,6 +275,9 @@ class TwitterIE(InfoExtractor):
|
||||
'uploader_id': 'captainamerica',
|
||||
'uploader': 'Captain America',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -278,7 +285,11 @@ class TwitterIE(InfoExtractor):
|
||||
user_id = mobj.group('user_id')
|
||||
twid = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid)
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
self._TEMPLATE_URL % (user_id, twid), twid)
|
||||
|
||||
if 'twitter.com/account/suspended' in urlh.geturl():
|
||||
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
||||
|
||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||
|
||||
|
@@ -142,7 +142,9 @@ class UdemyIE(InfoExtractor):
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<'])
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
|
||||
r'>Logout<'))
|
||||
|
||||
# already logged in
|
||||
if is_logged(login_popup):
|
||||
|
@@ -2,10 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
@@ -16,13 +19,16 @@ class UDNEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
|
||||
_TESTS = [{
|
||||
'url': 'http://video.udn.com/embed/news/300040',
|
||||
'md5': 'de06b4c90b042c128395a88f0384817e',
|
||||
'info_dict': {
|
||||
'id': '300040',
|
||||
'ext': 'mp4',
|
||||
'title': '生物老師男變女 全校挺"做自己"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.udn.com/embed/news/300040',
|
||||
'only_matching': True,
|
||||
@@ -38,39 +44,53 @@ class UDNEmbedIE(InfoExtractor):
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
options = json.loads(js_to_json(self._html_search_regex(
|
||||
r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
|
||||
r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
|
||||
|
||||
video_urls = options['video']
|
||||
|
||||
if video_urls.get('youtube'):
|
||||
return self.url_result(video_urls.get('youtube'), 'Youtube')
|
||||
|
||||
try:
|
||||
del video_urls['youtube']
|
||||
except KeyError:
|
||||
pass
|
||||
formats = []
|
||||
for video_type, api_url in video_urls.items():
|
||||
if not api_url:
|
||||
continue
|
||||
|
||||
formats = [{
|
||||
'url': self._download_webpage(
|
||||
video_url = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, api_url), video_id,
|
||||
'retrieve url for %s video' % video_type),
|
||||
'format_id': video_type,
|
||||
'preference': 0 if video_type == 'mp4' else -1,
|
||||
} for video_type, api_url in video_urls.items() if api_url]
|
||||
note='retrieve url for %s video' % video_type)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No videos found', expected=True)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
else:
|
||||
mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
# video_type may be 'mp4', which confuses YoutubeDL
|
||||
'format_id': 'http-' + video_type,
|
||||
}
|
||||
if mobj:
|
||||
a_format.update({
|
||||
'height': int_or_none(mobj.group('height')),
|
||||
'tbr': int_or_none(mobj.group('tbr')),
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = None
|
||||
|
||||
if options.get('gallery') and len(options['gallery']):
|
||||
thumbnail = options['gallery'][0].get('original')
|
||||
thumbnails = [{
|
||||
'url': img_url,
|
||||
'id': img_type,
|
||||
} for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': options['title'],
|
||||
'thumbnail': thumbnail
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
@@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
|
||||
'uploader': 'afp-news',
|
||||
'duration': 123,
|
||||
},
|
||||
'skip': 'This video has been deleted.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -12,11 +13,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class VesselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
|
||||
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
|
||||
_NETRC_MACHINE = 'vessel'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vessel.com/videos/HDN7G5UMs',
|
||||
'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
|
||||
'info_dict': {
|
||||
@@ -28,7 +29,16 @@ class VesselIE(InfoExtractor):
|
||||
'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
|
||||
'timestamp': int,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
@staticmethod
|
||||
def make_json_request(url, data):
|
||||
@@ -98,16 +108,24 @@ class VesselIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for f in video_asset.get('sources', []):
|
||||
if f['name'] == 'hls-index':
|
||||
location = f.get('location')
|
||||
if not location:
|
||||
continue
|
||||
name = f.get('name')
|
||||
if name == 'hls-index':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
|
||||
location, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False))
|
||||
elif name == 'dash-index':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
location, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': f['name'],
|
||||
'format_id': name,
|
||||
'tbr': f.get('bitrate'),
|
||||
'height': f.get('height'),
|
||||
'width': f.get('width'),
|
||||
'url': f['location'],
|
||||
'url': location,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
|
||||
'md5': 'e9d77741f9e42ba583e683cd170660f7',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'flv',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
'duration': 725.983,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.vice.com/video/how-to-hack-a-car',
|
||||
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
|
||||
@@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
|
||||
'uploader': 'Motherboard',
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'only_matching': True,
|
||||
|
73
youtube_dl/extractor/vidio.py
Normal file
73
youtube_dl/extractor/vidio.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class VidioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
|
||||
'md5': 'cd2801394afc164e9775db6a140b91fe',
|
||||
'info_dict': {
|
||||
'id': '165683',
|
||||
'display_id': 'dj_ambred-booyah-live-2015',
|
||||
'ext': 'mp4',
|
||||
'title': 'DJ_AMBRED - Booyah (Live 2015)',
|
||||
'description': 'md5:27dc15f819b6a78a626490881adbadf8',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 149,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
m3u8_url, duration, thumbnail = [None] * 3
|
||||
|
||||
clips = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
|
||||
webpage, 'video data', default='[]', group='data'),
|
||||
display_id, fatal=False)
|
||||
if clips:
|
||||
clip = clips[0]
|
||||
m3u8_url = clip.get('sources', [{}])[0].get('file')
|
||||
duration = clip.get('clip_duration')
|
||||
thumbnail = clip.get('image')
|
||||
|
||||
m3u8_url = m3u8_url or self._search_regex(
|
||||
r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>.+?)\1', webpage, 'hls url')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
|
||||
|
||||
duration = int_or_none(duration or self._search_regex(
|
||||
r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
|
||||
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
(r'<span[^>]+data-comment-vote-count=["\'](\d+)',
|
||||
r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -141,6 +141,10 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.kesari.tv/news/video/1461919076414',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Was once Kaltura embed
|
||||
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -217,7 +217,6 @@ class VKIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
info_url = url
|
||||
if video_id:
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
# Some videos (removed?) can only be downloaded with list id specified
|
||||
|
@@ -1,8 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import division, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -10,6 +9,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
@@ -23,7 +23,7 @@ class VLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "[V] Girl's Day's Broadcast",
|
||||
'title': "[V LIVE] Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
},
|
||||
@@ -35,24 +35,12 @@ class VLiveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
|
||||
# UTC+x - UTC+9 (KST)
|
||||
tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone
|
||||
tz_offset = -tz // 60 - 9 * 60
|
||||
self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset)
|
||||
|
||||
status_params = self._download_json(
|
||||
'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
|
||||
video_id, 'Downloading JSON status',
|
||||
headers={'Referer': url.encode('utf-8')})
|
||||
status = status_params.get('status')
|
||||
air_start = status_params.get('onAirStartAt', '')
|
||||
is_live = status_params.get('isLive')
|
||||
|
||||
video_params = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)',
|
||||
r'\bvlive\.video\.init\(([^)]+)\)',
|
||||
webpage, 'video params')
|
||||
live_params, long_video_id, key = re.split(
|
||||
r'"\s*,\s*"', video_params)[1:4]
|
||||
status, _, _, live_params, long_video_id, key = re.split(
|
||||
r'"\s*,\s*"', video_params)[2:8]
|
||||
status = remove_start(status, 'PRODUCT_')
|
||||
|
||||
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
||||
live_params = self._parse_json('"%s"' % live_params, video_id)
|
||||
@@ -61,8 +49,6 @@ class VLiveIE(InfoExtractor):
|
||||
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
|
||||
if long_video_id and key:
|
||||
return self._replay(video_id, webpage, long_video_id, key)
|
||||
elif is_live:
|
||||
status = 'LIVE_END'
|
||||
else:
|
||||
status = 'COMING_SOON'
|
||||
|
||||
@@ -70,7 +56,7 @@ class VLiveIE(InfoExtractor):
|
||||
raise ExtractorError('Uploading for replay. Please wait...',
|
||||
expected=True)
|
||||
elif status == 'COMING_SOON':
|
||||
raise ExtractorError('Coming soon! %s' % air_start, expected=True)
|
||||
raise ExtractorError('Coming soon!', expected=True)
|
||||
elif status == 'CANCELED':
|
||||
raise ExtractorError('We are sorry, '
|
||||
'but the live broadcast has been canceled.',
|
||||
|
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@@ -16,13 +19,13 @@ class VoiceRepublicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
|
||||
'md5': '0554a24d1657915aa8e8f84e15dc9353',
|
||||
'md5': 'b9174d651323f17783000876347116e3',
|
||||
'info_dict': {
|
||||
'id': '2296',
|
||||
'display_id': 'watching-the-watchers-building-a-sousveillance-state',
|
||||
'ext': 'm4a',
|
||||
'title': 'Watching the Watchers: Building a Sousveillance State',
|
||||
'description': 'md5:715ba964958afa2398df615809cfecb1',
|
||||
'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||
'duration': 1800,
|
||||
'view_count': int,
|
||||
@@ -52,7 +55,7 @@ class VoiceRepublicIE(InfoExtractor):
|
||||
if data:
|
||||
title = data['title']
|
||||
description = data.get('teaser')
|
||||
talk_id = data.get('talk_id') or display_id
|
||||
talk_id = compat_str(data.get('talk_id') or display_id)
|
||||
talk = data['talk']
|
||||
duration = int_or_none(talk.get('duration'))
|
||||
formats = [{
|
||||
|
@@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Google\'s new material design direction',
|
||||
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# data-ooyala-id
|
||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||
@@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The Nexus 6: hands-on with Google\'s phablet',
|
||||
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# volume embed
|
||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||
@@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The new frontier of LGBTQ civil rights, explained',
|
||||
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
|
||||
@@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'upload_date': '20160324',
|
||||
'uploader_id': 'voxdotcom',
|
||||
'uploader': 'Vox',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@@ -117,7 +121,7 @@ class VoxMediaIE(InfoExtractor):
|
||||
volume_webpage = self._download_webpage(
|
||||
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
for provider_video_type in ('ooyala', 'youtube'):
|
||||
provider_video_id = video_data.get('%s_id' % provider_video_type)
|
||||
if provider_video_id:
|
||||
|
@@ -4,6 +4,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
@@ -27,7 +28,8 @@ class VpornIE(InfoExtractor):
|
||||
'duration': 393,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
'skip': 'video removed',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vporn.com/female/hana-shower/523564/',
|
||||
@@ -40,7 +42,7 @@ class VpornIE(InfoExtractor):
|
||||
'description': 'Hana showers at the bathroom.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Hmmmmm',
|
||||
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
|
||||
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'],
|
||||
'duration': 588,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
@@ -55,6 +57,10 @@ class VpornIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!'
|
||||
if errmsg in webpage:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
|
||||
description = self._html_search_regex(
|
||||
|
@@ -1,69 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class VultureIE(InfoExtractor):
|
||||
IE_NAME = 'vulture.com'
|
||||
_VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
|
||||
'md5': '8d997845642a2b5152820f7257871bc8',
|
||||
'info_dict': {
|
||||
'id': '6GHRQL3RV7MSD1H4',
|
||||
'ext': 'mp4',
|
||||
'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
|
||||
'uploader_id': 'Sarah',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
'timestamp': 1401288564,
|
||||
'upload_date': '20140528',
|
||||
'description': 'Uplifting and witty, as predicted.',
|
||||
'duration': 1015,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query_string = self._search_regex(
|
||||
r"queryString\s*=\s*'([^']+)'", webpage, 'query string')
|
||||
video_id = self._search_regex(
|
||||
r'content=([^&]+)', query_string, 'video ID')
|
||||
query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
|
||||
|
||||
query_webpage = self._download_webpage(
|
||||
query_url, display_id, note='Downloading query page')
|
||||
params_json = self._search_regex(
|
||||
r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',
|
||||
query_webpage,
|
||||
'player params')
|
||||
params = json.loads(params_json)
|
||||
|
||||
upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T'))
|
||||
uploader_id = params.get('user', {}).get('handle')
|
||||
|
||||
media_item = params['media_item']
|
||||
title = os.path.splitext(media_item['title'])[0]
|
||||
duration = int_or_none(media_item.get('duration_seconds'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': media_item['pipeline_xid'],
|
||||
'title': title,
|
||||
'timestamp': upload_timestamp,
|
||||
'thumbnail': params.get('thumbnail_url'),
|
||||
'uploader_id': uploader_id,
|
||||
'description': params.get('description'),
|
||||
'duration': duration,
|
||||
}
|
@@ -11,7 +11,96 @@ from ..utils import (
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
IE_NAME = 'washingtonpost'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TEST = {
|
||||
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
|
||||
'info_dict': {
|
||||
'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Egypt finds belongings, debris from plane crash',
|
||||
'description': 'md5:a17ceee432f215a5371388c1f680bd86',
|
||||
'upload_date': '20160520',
|
||||
'uploader': 'Reuters',
|
||||
'timestamp': 1463778452,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
|
||||
video_id, transform_source=strip_jsonp)[0]['contentConfig']
|
||||
title = video_data['title']
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video_data.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
video_type = s.get('type')
|
||||
if video_type == 'smil':
|
||||
continue
|
||||
elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
width = m3u8_format.get('width')
|
||||
if not width:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
m3u8_format.update({
|
||||
'vbr': int_or_none(vbr),
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
width = int_or_none(s.get('width'))
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
has_width = width != 0
|
||||
formats.append({
|
||||
'format_id': (
|
||||
'%s-%d-%d' % (video_type, width, vbr)
|
||||
if width
|
||||
else video_type),
|
||||
'vbr': vbr if has_width else None,
|
||||
'width': width,
|
||||
'height': int_or_none(s.get('height')),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if has_width else 'none',
|
||||
'filesize': int_or_none(s.get('fileSize')),
|
||||
'url': s_url,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
|
||||
})
|
||||
source_media_url = video_data.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('blurb'),
|
||||
'uploader': video_data.get('credits', {}).get('source'),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('videoDuration'), 100),
|
||||
'timestamp': int_or_none(
|
||||
video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class WashingtonPostArticleIE(InfoExtractor):
|
||||
IE_NAME = 'washingtonpost:article'
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
@@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
|
||||
}]
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
@@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
|
||||
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
|
||||
data-video-uuid=
|
||||
)"([^"]+)"''', webpage)
|
||||
entries = []
|
||||
for i, uuid in enumerate(uuids, start=1):
|
||||
vinfo_all = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
|
||||
page_id,
|
||||
transform_source=strip_jsonp,
|
||||
note='Downloading information of video %d/%d' % (i, len(uuids))
|
||||
)
|
||||
vinfo = vinfo_all[0]['contentConfig']
|
||||
uploader = vinfo.get('credits', {}).get('source')
|
||||
timestamp = int_or_none(
|
||||
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
|
||||
|
||||
formats = [{
|
||||
'format_id': (
|
||||
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
|
||||
if s.get('width')
|
||||
else s.get('type')),
|
||||
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
|
||||
'width': s.get('width'),
|
||||
'height': s.get('height'),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
|
||||
'filesize': s.get('fileSize'),
|
||||
'url': s.get('url'),
|
||||
'ext': 'mp4',
|
||||
'preference': -100 if s.get('type') == 'smil' else None,
|
||||
'protocol': {
|
||||
'MP4': 'http',
|
||||
'F4F': 'f4m',
|
||||
}.get(s.get('type')),
|
||||
} for s in vinfo.get('streams', [])]
|
||||
source_media_url = vinfo.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': uuid,
|
||||
'title': vinfo['title'],
|
||||
'description': vinfo.get('blurb'),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(vinfo.get('videoDuration'), 100),
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@@ -2,25 +2,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
HEADRequest,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
|
||||
_VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
|
||||
'md5': 'ce70e9223945ed26a8056d413ca55dc9',
|
||||
'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
|
||||
'info_dict': {
|
||||
'id': '11713067',
|
||||
'display_id': 'soupe-figues-l-orange-aux-epices',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soupe de figues à l\'orange et aux épices',
|
||||
'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
|
||||
@@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
|
||||
'md5': 'fbc84e4378165278e743956d9c1bf16b',
|
||||
'info_dict': {
|
||||
'id': '11713075',
|
||||
'display_id': 'gregory-lemarchal-voix-ange',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
|
||||
'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
|
||||
@@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def download_video_info(self, real_id):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
||||
|
||||
# 'contentv4' is used in the website, but it also returns the related
|
||||
# videos, we don't need them
|
||||
info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
|
||||
return info['media']
|
||||
|
||||
def _real_extract(self, url):
|
||||
def real_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
real_id = mobj.group('real_id')
|
||||
if not real_id:
|
||||
short_id = mobj.group('short_id')
|
||||
webpage = self._download_webpage(url, display_id or short_id)
|
||||
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
|
||||
|
||||
video_info = self.download_video_info(real_id)
|
||||
video_info = self._download_json(
|
||||
'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
|
||||
geo_list = video_info.get('geoList')
|
||||
country = geo_list[0] if geo_list else ''
|
||||
|
||||
chapters = video_info['chapters']
|
||||
first_chapter = chapters[0]
|
||||
files = video_info['files']
|
||||
first_file = files[0]
|
||||
|
||||
if real_id_for_chapter(first_chapter) != real_id:
|
||||
def video_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
|
||||
if video_id_for_chapter(first_chapter) != video_id:
|
||||
self.to_screen('Multipart video detected')
|
||||
chapter_urls = []
|
||||
for chapter in chapters:
|
||||
chapter_id = real_id_for_chapter(chapter)
|
||||
# Yes, when we this chapter is processed by WatIE,
|
||||
# it will download the info again
|
||||
chapter_info = self.download_video_info(chapter_id)
|
||||
chapter_urls.append(chapter_info['url'])
|
||||
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
||||
return self.playlist_result(entries, real_id, video_info['title'])
|
||||
|
||||
upload_date = None
|
||||
if 'date_diffusion' in first_chapter:
|
||||
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
||||
entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
|
||||
return self.playlist_result(entries, video_id, video_info['title'])
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the short id for getting the video url
|
||||
# the video id for getting the video url
|
||||
|
||||
formats = [{
|
||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||
'format_id': 'Mobile',
|
||||
}]
|
||||
date_diffusion = first_chapter.get('date_diffusion')
|
||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||
|
||||
fmts = [('SD', 'web')]
|
||||
if first_file.get('hasHD'):
|
||||
fmts.append(('HD', 'webhd'))
|
||||
def extract_url(path_template, url_type):
|
||||
req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
|
||||
head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
|
||||
red_url = head.geturl()
|
||||
if req_url == red_url:
|
||||
raise ExtractorError(
|
||||
'%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
|
||||
expected=True)
|
||||
return red_url
|
||||
|
||||
def compute_token(param):
|
||||
timestamp = '%08x' % int(self._download_webpage(
|
||||
'http://www.wat.tv/servertime', real_id,
|
||||
'Downloading server time').split('|')[0])
|
||||
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
|
||||
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
|
||||
m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
|
||||
http_url = extract_url('android5/%s.mp4', 'http')
|
||||
|
||||
for fmt in fmts:
|
||||
webid = '/%s/%s' % (fmt[1], real_id)
|
||||
video_url = self._download_webpage(
|
||||
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
|
||||
real_id,
|
||||
'Downloading %s video URL' % fmt[0],
|
||||
'Failed to download %s video URL' % fmt[0],
|
||||
False)
|
||||
if not video_url:
|
||||
formats = []
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
formats.extend(m3u8_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
for m3u8_format in m3u8_formats:
|
||||
mobj = re.search(
|
||||
r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': fmt[0],
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
m3u8_format.update({
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
'description': first_chapter['description'],
|
||||
'view_count': video_info['views'],
|
||||
'upload_date': upload_date,
|
||||
'duration': first_file['duration'],
|
||||
'duration': video_info['files'][0]['duration'],
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,214 +1,224 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
qualities,
|
||||
update_url_query,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
class WDRIE(InfoExtractor):
|
||||
_PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
|
||||
_VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
|
||||
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
||||
_PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
|
||||
_VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
|
||||
# HDS download, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': 'mdb-362427',
|
||||
'id': 'mdb-1058683',
|
||||
'ext': 'flv',
|
||||
'title': 'Servicezeit',
|
||||
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
|
||||
'upload_date': '20140310',
|
||||
'is_live': False
|
||||
'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100',
|
||||
'title': 'Geheimnis Aachener Dom',
|
||||
'alt_title': 'Doku am Freitag',
|
||||
'upload_date': '20160304',
|
||||
'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318',
|
||||
'is_live': False,
|
||||
'subtitles': {'de': [{
|
||||
'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml',
|
||||
'ext': 'ttml',
|
||||
}]},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Page Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
|
||||
'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
|
||||
'md5': 'f4c1f96d01cf285240f53ea4309663d8',
|
||||
'info_dict': {
|
||||
'id': 'mdb-363194',
|
||||
'ext': 'flv',
|
||||
'title': 'Marga Spiegel ist tot',
|
||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||
'upload_date': '20140311',
|
||||
'is_live': False
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Page Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
|
||||
'md5': '83e9e8fefad36f357278759870805898',
|
||||
'info_dict': {
|
||||
'id': 'mdb-194332',
|
||||
'id': 'mdb-1072000',
|
||||
'ext': 'mp3',
|
||||
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
|
||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||
'upload_date': '20091129',
|
||||
'is_live': False
|
||||
'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100',
|
||||
'title': 'Schriftstellerin Juli Zeh',
|
||||
'alt_title': 'WDR 3 Gespräch am Samstag',
|
||||
'upload_date': '20160312',
|
||||
'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
|
||||
'is_live': False,
|
||||
'subtitles': {}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
|
||||
'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
|
||||
'info_dict': {
|
||||
'id': 'mdb-478135',
|
||||
'ext': 'mp3',
|
||||
'title': 'Flavia Coelho: Amar é Amar',
|
||||
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
|
||||
'upload_date': '20140717',
|
||||
'is_live': False
|
||||
},
|
||||
'skip': 'Page Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
|
||||
'playlist_mincount': 146,
|
||||
'info_dict': {
|
||||
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
|
||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-103364',
|
||||
'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'index',
|
||||
'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'alt_title': 'WDR Fernsehen Live',
|
||||
'upload_date': None,
|
||||
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20150101',
|
||||
'is_live': True
|
||||
'is_live': True,
|
||||
'subtitles': {}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
||||
'playlist_mincount': 8,
|
||||
'info_dict': {
|
||||
'id': 'aktuelle-stunde/aktuelle-stunde-120',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1096487',
|
||||
'ext': 'flv',
|
||||
'upload_date': 're:^[0-9]{8}$',
|
||||
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
|
||||
'description': '- Die Sendung mit der Maus -',
|
||||
},
|
||||
'skip': 'The id changes from week to week because of the new episode'
|
||||
},
|
||||
{
|
||||
'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5',
|
||||
# HDS download, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': 'mdb-186083',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20130919',
|
||||
'title': 'Sachgeschichte - Achterbahn ',
|
||||
'description': '- Die Sendung mit der Maus -',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
||||
# Live stream, MD5 unstable
|
||||
'info_dict': {
|
||||
'id': 'mdb-869971',
|
||||
'ext': 'flv',
|
||||
'title': 'Funkhaus Europa Livestream',
|
||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||
'upload_date': '20160101',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_url = mobj.group('url')
|
||||
page_id = mobj.group('id')
|
||||
url_type = mobj.group('type')
|
||||
page_url = mobj.group('page_url')
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
||||
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
||||
# for wdrmaus its in a link to the page in a multiline "videoLink"-tag
|
||||
json_metadata = self._html_search_regex(
|
||||
r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
|
||||
webpage, 'media link', default=None, flags=re.MULTILINE)
|
||||
|
||||
if mobj.group('player') is None:
|
||||
if not json_metadata:
|
||||
entries = [
|
||||
self.url_result(page_url + href, 'WDR')
|
||||
self.url_result(page_url + href[0], 'WDR')
|
||||
for href in re.findall(
|
||||
r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX,
|
||||
r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX,
|
||||
webpage)
|
||||
]
|
||||
|
||||
if entries: # Playlist page
|
||||
return self.playlist_result(entries, page_id)
|
||||
return self.playlist_result(entries, playlist_id=display_id)
|
||||
|
||||
# Overview page
|
||||
entries = []
|
||||
for page_num in itertools.count(2):
|
||||
hrefs = re.findall(
|
||||
r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
|
||||
webpage)
|
||||
entries.extend(
|
||||
self.url_result(page_url + href, 'WDR')
|
||||
for href in hrefs)
|
||||
next_url_m = re.search(
|
||||
r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
|
||||
if not next_url_m:
|
||||
break
|
||||
next_url = page_url + next_url_m.group(1)
|
||||
webpage = self._download_webpage(
|
||||
next_url, page_id,
|
||||
note='Downloading playlist page %d' % page_num)
|
||||
return self.playlist_result(entries, page_id)
|
||||
raise ExtractorError('No downloadable streams found', expected=True)
|
||||
|
||||
flashvars = compat_parse_qs(self._html_search_regex(
|
||||
r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
|
||||
media_link_obj = self._parse_json(json_metadata, display_id,
|
||||
transform_source=js_to_json)
|
||||
jsonp_url = media_link_obj['mediaObj']['url']
|
||||
|
||||
page_id = flashvars['trackerClipId'][0]
|
||||
video_url = flashvars['dslSrc'][0]
|
||||
title = flashvars['trackerClipTitle'][0]
|
||||
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
|
||||
is_live = flashvars.get('isLive', ['0'])[0] == '1'
|
||||
metadata = self._download_json(
|
||||
jsonp_url, 'metadata', transform_source=strip_jsonp)
|
||||
|
||||
metadata_tracker_data = metadata['trackerData']
|
||||
metadata_media_resource = metadata['mediaResource']
|
||||
|
||||
formats = []
|
||||
|
||||
# check if the metadata contains a direct URL to a file
|
||||
for kind, media_resource in metadata_media_resource.items():
|
||||
if kind not in ('dflt', 'alt'):
|
||||
continue
|
||||
|
||||
for tag_name, medium_url in media_resource.items():
|
||||
if tag_name not in ('videoURL', 'audioURL'):
|
||||
continue
|
||||
|
||||
ext = determine_ext(medium_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
medium_url, display_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls'))
|
||||
elif ext == 'f4m':
|
||||
manifest_url = update_url_query(
|
||||
medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'})
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url, display_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
medium_url, 'stream', fatal=False))
|
||||
else:
|
||||
a_format = {
|
||||
'url': medium_url
|
||||
}
|
||||
if ext == 'unknown_video':
|
||||
urlh = self._request_webpage(
|
||||
medium_url, display_id, note='Determining extension')
|
||||
ext = urlhandle_detect_ext(urlh)
|
||||
a_format['ext'] = ext
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
caption_url = metadata_media_resource.get('captionURL')
|
||||
if caption_url:
|
||||
subtitles['de'] = [{
|
||||
'url': caption_url,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
|
||||
title = metadata_tracker_data.get('trackerClipTitle')
|
||||
is_live = url_type == 'live'
|
||||
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
if 'trackerClipAirTime' in flashvars:
|
||||
upload_date = flashvars['trackerClipAirTime'][0]
|
||||
upload_date = None
|
||||
elif 'trackerClipAirTime' in metadata_tracker_data:
|
||||
upload_date = metadata_tracker_data['trackerClipAirTime']
|
||||
else:
|
||||
upload_date = self._html_search_meta(
|
||||
'DC.Date', webpage, 'upload date')
|
||||
upload_date = self._html_search_meta('DC.Date', webpage, 'upload date')
|
||||
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
formats = []
|
||||
preference = qualities(['S', 'M', 'L', 'XL'])
|
||||
|
||||
if video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
elif video_url.endswith('.smil'):
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, page_id, False, {
|
||||
'hdcore': '3.3.0',
|
||||
'plugin': 'aasp-3.3.0.99.43',
|
||||
}))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'http_headers': {
|
||||
'User-Agent': 'mobile',
|
||||
},
|
||||
})
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'rel="adaptiv"[^>]+href="([^"]+)"',
|
||||
webpage, 'm3u8 url', default=None)
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
direct_urls = re.findall(
|
||||
r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage)
|
||||
if direct_urls:
|
||||
for quality, video_url in direct_urls:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'preference': preference(quality),
|
||||
'http_headers': {
|
||||
'User-Agent': 'mobile',
|
||||
},
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_meta('Description', webpage, 'description')
|
||||
|
||||
return {
|
||||
'id': page_id,
|
||||
'formats': formats,
|
||||
'id': metadata_tracker_data.get('trackerClipId', display_id),
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'alt_title': metadata_tracker_data.get('trackerClipSubcategory'),
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
'is_live': is_live
|
||||
'description': self._html_search_meta('Description', webpage),
|
||||
'is_live': is_live,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
@@ -241,81 +251,3 @@ class WDRMobileIE(InfoExtractor):
|
||||
'User-Agent': 'mobile',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class WDRMausIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
|
||||
IE_DESC = 'Sendung mit der Maus'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||
'info_dict': {
|
||||
'id': 'aktuelle-sendung',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 're:^http://.+\.jpg',
|
||||
'upload_date': 're:^[0-9]{8}$',
|
||||
'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
|
||||
'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
|
||||
'info_dict': {
|
||||
'id': '40_jahre_maus',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 're:^http://.+\.jpg',
|
||||
'upload_date': '20131007',
|
||||
'title': '12.03.2011 - 40 Jahre Maus',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
param_code = self._html_search_regex(
|
||||
r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters')
|
||||
|
||||
title_date = self._search_regex(
|
||||
r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
|
||||
webpage, 'air date')
|
||||
title_str = self._html_search_regex(
|
||||
r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
title = '%s - %s' % (title_date, title_str)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('dc.date', webpage))
|
||||
|
||||
fields = compat_parse_qs(param_code)
|
||||
video_url = fields['firstVideo'][0]
|
||||
thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
|
||||
|
||||
formats = [{
|
||||
'format_id': 'rtmp',
|
||||
'url': video_url,
|
||||
}]
|
||||
|
||||
jscode = self._download_webpage(
|
||||
'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
|
||||
video_id, fatal=False,
|
||||
note='Downloading URL translation table',
|
||||
errnote='Could not download URL translation table')
|
||||
if jscode:
|
||||
for m in re.finditer(
|
||||
r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
|
||||
jscode):
|
||||
if video_url.startswith(m.group('stream')):
|
||||
http_url = video_url.replace(
|
||||
m.group('stream'), m.group('dl'))
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': http_url,
|
||||
})
|
||||
break
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
@@ -1,49 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class WeiboIE(InfoExtractor):
|
||||
"""
|
||||
The videos in Weibo come from different sites, this IE just finds the link
|
||||
to the external video and returns it.
|
||||
"""
|
||||
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
|
||||
'info_dict': {
|
||||
'id': '98322879',
|
||||
'ext': 'flv',
|
||||
'title': '魔声耳机最新广告“All Eyes On Us”',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Sina'],
|
||||
}
|
||||
|
||||
# Additional example videos from different sites
|
||||
# Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
|
||||
# 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
video_id = mobj.group('id')
|
||||
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
|
||||
info = self._download_json(info_url, video_id)
|
||||
|
||||
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
|
||||
# Prefer sina video since they have thumbnails
|
||||
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
|
||||
player_url = videos_urls[-1]
|
||||
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
|
||||
player_url)
|
||||
if m_sina is not None:
|
||||
self.to_screen('Sina video detected')
|
||||
sina_id = m_sina.group(1)
|
||||
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
|
||||
return self.url_result(player_url)
|
@@ -62,7 +62,8 @@ class XFileShareIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
},
|
||||
'skip': 'Video removed',
|
||||
}, {
|
||||
'url': 'http://vidto.me/ku5glz52nqe1.html',
|
||||
'info_dict': {
|
||||
|
@@ -12,10 +12,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [
|
||||
{
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
'md5': '8281348b8d3c53d39fffb377d24eac4e',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'ext': 'mp4',
|
||||
@@ -24,9 +24,8 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893.52,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
'info_dict': {
|
||||
'id': '2221348',
|
||||
@@ -36,13 +35,29 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200.48,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# empty seo
|
||||
'url': 'http://xhamster.com/movies/5667973/.html',
|
||||
'info_dict': {
|
||||
'id': '5667973',
|
||||
'ext': 'mp4',
|
||||
'title': '....',
|
||||
'upload_date': '20160208',
|
||||
'uploader': 'parejafree',
|
||||
'duration': 72.0,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
def extract_video_url(webpage, name):
|
||||
@@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
|
||||
webpage, 'xhamster url', default=None)
|
||||
|
||||
if not video_url:
|
||||
|
@@ -66,6 +66,7 @@ class XuiteIE(InfoExtractor):
|
||||
'uploader_id': '242127761',
|
||||
'categories': ['電玩動漫'],
|
||||
},
|
||||
'skip': 'Video removed',
|
||||
}, {
|
||||
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
|
||||
'only_matching': True,
|
||||
|
@@ -343,7 +343,7 @@ class YahooIE(InfoExtractor):
|
||||
webpage, 'region', fatal=False, default='US')
|
||||
data = compat_urllib_parse_urlencode({
|
||||
'protocol': 'http',
|
||||
'region': region,
|
||||
'region': region.upper(),
|
||||
})
|
||||
query_url = (
|
||||
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
||||
|
@@ -20,10 +20,11 @@ class YandexMusicBaseIE(InfoExtractor):
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
if response.get('type') == 'captcha' or 'captcha' in response:
|
||||
YandexMusicBaseIE._raise_captcha()
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
|
||||
if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
|
||||
@staticmethod
|
||||
def _raise_captcha():
|
||||
raise ExtractorError(
|
||||
'YandexMusic has considered youtube-dl requests automated and '
|
||||
'asks you to solve a CAPTCHA. You can either wait for some '
|
||||
@@ -32,6 +33,11 @@ class YandexMusicBaseIE(InfoExtractor):
|
||||
'solve CAPTCHA, then export cookies and pass cookie file to '
|
||||
'youtube-dl with --cookies',
|
||||
expected=True)
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
|
||||
if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
|
||||
self._raise_captcha()
|
||||
return webpage
|
||||
|
||||
def _download_json(self, *args, **kwargs):
|
||||
|
@@ -2,7 +2,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import itertools
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
@@ -13,6 +15,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
@@ -275,6 +278,8 @@ class YoukuIE(InfoExtractor):
|
||||
'format_id': self.get_format_name(fm),
|
||||
'ext': self.parse_ext_l(fm),
|
||||
'filesize': int(seg['size']),
|
||||
'width': stream.get('width'),
|
||||
'height': stream.get('height'),
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -283,3 +288,52 @@ class YoukuIE(InfoExtractor):
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class YoukuShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html'
|
||||
IE_NAME = 'youku:show'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html',
|
||||
'info_dict': {
|
||||
'id': 'zc7c670be07ff11e48b3f',
|
||||
'title': '花千骨 未删减版',
|
||||
'description': 'md5:578d4f2145ae3f9128d9d4d863312910',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}
|
||||
|
||||
_PAGE_SIZE = 40
|
||||
|
||||
def _find_videos_in_page(self, webpage):
|
||||
videos = re.findall(
|
||||
r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage)
|
||||
return [
|
||||
self.url_result(video_url, YoukuIE.ie_key(), title)
|
||||
for video_url, title in videos]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
|
||||
entries = self._find_videos_in_page(webpage)
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False)
|
||||
detail_div = get_element_by_attribute('class', 'detail', webpage) or ''
|
||||
playlist_description = self._html_search_regex(
|
||||
r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>',
|
||||
detail_div, 'playlist description', fatal=False)
|
||||
|
||||
for idx in itertools.count(1):
|
||||
episodes_page = self._download_webpage(
|
||||
'http://www.youku.com/show_episode/id_%s.html' % show_id,
|
||||
show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)},
|
||||
note='Downloading episodes page %d' % idx)
|
||||
new_entries = self._find_videos_in_page(episodes_page)
|
||||
entries.extend(new_entries)
|
||||
if len(new_entries) < self._PAGE_SIZE:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, show_id, playlist_title, playlist_description)
|
||||
|
@@ -344,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
@@ -1986,7 +1988,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
|
||||
class YoutubeUserIE(YoutubeChannelIE):
|
||||
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
||||
IE_NAME = 'youtube:user'
|
||||
|
||||
@@ -1999,6 +2001,9 @@ class YoutubeUserIE(YoutubeChannelIE):
|
||||
}, {
|
||||
'url': 'ytuser:phihag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/c/gametrailers',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@@ -395,8 +395,8 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
downloader.add_option(
|
||||
'-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='LIMIT',
|
||||
'-r', '--limit-rate', '--rate-limit',
|
||||
dest='ratelimit', metavar='RATE',
|
||||
help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
|
||||
downloader.add_option(
|
||||
'-R', '--retries',
|
||||
@@ -668,7 +668,7 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='writeannotations', default=False,
|
||||
help='Write video annotations to a .annotations.xml file')
|
||||
filesystem.add_option(
|
||||
'--load-info',
|
||||
'--load-info-json', '--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
help='JSON file containing the video information (created with the "--write-info-json" option)')
|
||||
filesystem.add_option(
|
||||
|
@@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener):
|
||||
|
||||
print_notes(to_screen, versions_info['versions'])
|
||||
|
||||
filename = sys.argv[0]
|
||||
# Py2EXE: Filename could be different
|
||||
if hasattr(sys, 'frozen') and not os.path.isfile(filename):
|
||||
if os.path.isfile(filename + '.exe'):
|
||||
filename += '.exe'
|
||||
# sys.executable is set to the full pathname of the exe-file for py2exe
|
||||
filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
|
||||
|
||||
if not os.access(filename, os.W_OK):
|
||||
to_screen('ERROR: no write permissions on %s' % filename)
|
||||
@@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener):
|
||||
|
||||
# Py2EXE
|
||||
if hasattr(sys, 'frozen'):
|
||||
exe = os.path.abspath(filename)
|
||||
exe = filename
|
||||
directory = os.path.dirname(exe)
|
||||
if not os.access(directory, os.W_OK):
|
||||
to_screen('ERROR: no write permissions on %s' % directory)
|
||||
|
@@ -39,6 +39,7 @@ from .compat import (
|
||||
compat_chr,
|
||||
compat_etree_fromstring,
|
||||
compat_html_entities,
|
||||
compat_html_entities_html5,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
@@ -105,9 +106,9 @@ KNOWN_EXTENSIONS = (
|
||||
'f4f', 'f4m', 'm3u8', 'smil')
|
||||
|
||||
# needed for sanitizing filenames in restricted mode
|
||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ',
|
||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'],
|
||||
'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy')))
|
||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
|
||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
|
||||
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
|
||||
|
||||
|
||||
def preferredencoding():
|
||||
@@ -456,12 +457,19 @@ def orderedSet(iterable):
|
||||
return res
|
||||
|
||||
|
||||
def _htmlentity_transform(entity):
|
||||
def _htmlentity_transform(entity_with_semicolon):
|
||||
"""Transforms an HTML entity to a character."""
|
||||
entity = entity_with_semicolon[:-1]
|
||||
|
||||
# Known non-numeric HTML entity
|
||||
if entity in compat_html_entities.name2codepoint:
|
||||
return compat_chr(compat_html_entities.name2codepoint[entity])
|
||||
|
||||
# TODO: HTML5 allows entities without a semicolon. For example,
|
||||
# 'Éric' should be decoded as 'Éric'.
|
||||
if entity_with_semicolon in compat_html_entities_html5:
|
||||
return compat_html_entities_html5[entity_with_semicolon]
|
||||
|
||||
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
|
||||
if mobj is not None:
|
||||
numstr = mobj.group(1)
|
||||
@@ -486,7 +494,7 @@ def unescapeHTML(s):
|
||||
assert type(s) == compat_str
|
||||
|
||||
return re.sub(
|
||||
r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
|
||||
r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
|
||||
|
||||
|
||||
def get_subprocess_encoding():
|
||||
@@ -861,9 +869,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
|
||||
if sys.version_info >= (3, 0):
|
||||
location = location.encode('iso-8859-1').decode('utf-8')
|
||||
else:
|
||||
location = location.decode('utf-8')
|
||||
location_escaped = escape_url(location)
|
||||
if location != location_escaped:
|
||||
del resp.headers['Location']
|
||||
if sys.version_info < (3, 0):
|
||||
location_escaped = location_escaped.encode('utf-8')
|
||||
resp.headers['Location'] = location_escaped
|
||||
return resp
|
||||
|
||||
@@ -1035,6 +1047,7 @@ def unified_strdate(date_str, day_first=True):
|
||||
format_expressions.extend([
|
||||
'%d-%m-%Y',
|
||||
'%d.%m.%Y',
|
||||
'%d.%m.%y',
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
@@ -1055,7 +1068,10 @@ def unified_strdate(date_str, day_first=True):
|
||||
if upload_date is None:
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
try:
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
except ValueError:
|
||||
pass
|
||||
if upload_date is not None:
|
||||
return compat_str(upload_date)
|
||||
|
||||
@@ -1907,7 +1923,7 @@ def parse_age_limit(s):
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(
|
||||
r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
|
||||
|
||||
def js_to_json(code):
|
||||
@@ -2012,6 +2028,9 @@ def mimetype2ext(mt):
|
||||
|
||||
ext = {
|
||||
'audio/mp4': 'm4a',
|
||||
# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
|
||||
# it's the most popular one
|
||||
'audio/mpeg': 'mp3',
|
||||
}.get(mt)
|
||||
if ext is not None:
|
||||
return ext
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.05.21.2'
|
||||
__version__ = '2016.06.11.2'
|
||||
|
Reference in New Issue
Block a user