Compare commits
198 Commits
2015.03.03
...
2015.03.24
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
48c971e073 | ||
|
|
f5e2efbbf0 | ||
|
|
b0872c19ea | ||
|
|
9f790b9901 | ||
|
|
93f787070f | ||
|
|
f9544f6e8f | ||
|
|
336d19044c | ||
|
|
7866c9e173 | ||
|
|
1a4123de04 | ||
|
|
cf2e2eb1c0 | ||
|
|
2051acdeb2 | ||
|
|
cefdf970cc | ||
|
|
a1d0aa7b88 | ||
|
|
49aeedb8cb | ||
|
|
ef249a2cd7 | ||
|
|
a09141548a | ||
|
|
5379a2d40d | ||
|
|
c9450c7ab1 | ||
|
|
faa1b5c292 | ||
|
|
393d9fc6d2 | ||
|
|
4e6a228689 | ||
|
|
179d6678b1 | ||
|
|
85698c5086 | ||
|
|
a7d9ded45d | ||
|
|
531980d89c | ||
|
|
1887ecd4d6 | ||
|
|
cd32c2caba | ||
|
|
1c9a1457fc | ||
|
|
038b0eb1da | ||
|
|
f20bf146e2 | ||
|
|
01218f919b | ||
|
|
2684871bc1 | ||
|
|
ccf3960eec | ||
|
|
eecc0685c9 | ||
|
|
2ed849eccf | ||
|
|
3378d67a18 | ||
|
|
f3c0c667a6 | ||
|
|
0ae8bbac2d | ||
|
|
cbc3cfcab4 | ||
|
|
b30ef07c6c | ||
|
|
73900846b1 | ||
|
|
d1dc7e3991 | ||
|
|
3073a6d5e9 | ||
|
|
aae53774f2 | ||
|
|
7a757b7194 | ||
|
|
fa8ce26904 | ||
|
|
2c2c06e359 | ||
|
|
ee580538fa | ||
|
|
c3c5c31517 | ||
|
|
ed9a25dd61 | ||
|
|
9ef4f12b53 | ||
|
|
84f8101606 | ||
|
|
b1337948eb | ||
|
|
98f02fdde2 | ||
|
|
048fdc2292 | ||
|
|
2ca1c5aa9f | ||
|
|
674fb0fcc5 | ||
|
|
00bfe40e4d | ||
|
|
cd459b1d49 | ||
|
|
92a4793b3c | ||
|
|
dc03a42537 | ||
|
|
219da6bb68 | ||
|
|
0499cd866e | ||
|
|
13047f4135 | ||
|
|
af69cab21d | ||
|
|
d41a3fa1b4 | ||
|
|
733be371af | ||
|
|
576904bce6 | ||
|
|
cf47794f09 | ||
|
|
c06a9f8730 | ||
|
|
2e90dff2c2 | ||
|
|
90183a46d8 | ||
|
|
b68eedba23 | ||
|
|
d5b559393b | ||
|
|
1de4ac1385 | ||
|
|
39aa42ffbb | ||
|
|
ec1b9577ba | ||
|
|
3b4444f99a | ||
|
|
613b2d9dc6 | ||
|
|
8f4cc22455 | ||
|
|
7c42327e0e | ||
|
|
873383e9bd | ||
|
|
8508557e77 | ||
|
|
4d1652484f | ||
|
|
88cf6fb368 | ||
|
|
e7db87f700 | ||
|
|
2cb434e53e | ||
|
|
cd65491c30 | ||
|
|
082b1155a3 | ||
|
|
9202b1b787 | ||
|
|
a7e01c438d | ||
|
|
05be67e77d | ||
|
|
85741b9986 | ||
|
|
f247a199fe | ||
|
|
29171bc2d2 | ||
|
|
7be5a62ed7 | ||
|
|
3647136f24 | ||
|
|
13598940e3 | ||
|
|
0eb365868e | ||
|
|
28c6411e49 | ||
|
|
bba3fc7960 | ||
|
|
fcd877013e | ||
|
|
ba1d4c0488 | ||
|
|
517bcca299 | ||
|
|
1b53778175 | ||
|
|
b7a0304d92 | ||
|
|
545315a985 | ||
|
|
3f4327520c | ||
|
|
4a34f69ea6 | ||
|
|
fb7e68833c | ||
|
|
486dd09e0b | ||
|
|
054b99a330 | ||
|
|
65c5e044c7 | ||
|
|
11984c7467 | ||
|
|
3946864c8a | ||
|
|
b84037013e | ||
|
|
1dbfc62d75 | ||
|
|
d7d79106c7 | ||
|
|
1138491631 | ||
|
|
71705fa70d | ||
|
|
602814adab | ||
|
|
3a77719c5a | ||
|
|
7e195d0e92 | ||
|
|
e04793401d | ||
|
|
a3fbd18824 | ||
|
|
c6052b8c14 | ||
|
|
c792b5011f | ||
|
|
32aaeca775 | ||
|
|
1593194c63 | ||
|
|
614a7e1e23 | ||
|
|
2ebfeacabc | ||
|
|
f5d8f58a17 | ||
|
|
937daef4a7 | ||
|
|
dd77f14c64 | ||
|
|
c36cbe5a8a | ||
|
|
41b2194f86 | ||
|
|
d1e2e8f583 | ||
|
|
47fe42e1ab | ||
|
|
4c60393854 | ||
|
|
f848215dfc | ||
|
|
dcca581967 | ||
|
|
d475b3384c | ||
|
|
dd7831fe94 | ||
|
|
cc08b11d16 | ||
|
|
8bba753cca | ||
|
|
43d6280d0a | ||
|
|
e5a11a2293 | ||
|
|
f18ef2d144 | ||
|
|
1bb5c511a5 | ||
|
|
d55de57b67 | ||
|
|
a2aaf4dbc6 | ||
|
|
bdf6eee0ae | ||
|
|
8b910bda0c | ||
|
|
24993e3b39 | ||
|
|
11101076a1 | ||
|
|
f838875726 | ||
|
|
28778d6bae | ||
|
|
1132eae56d | ||
|
|
d34e79492d | ||
|
|
ab205b9dc8 | ||
|
|
7dcad95d4f | ||
|
|
8a48223a7b | ||
|
|
d47ae7f620 | ||
|
|
135c9c42bf | ||
|
|
0bf79ac455 | ||
|
|
98998cded6 | ||
|
|
14137b5781 | ||
|
|
a172d96292 | ||
|
|
23ba76bc0e | ||
|
|
61e00a9775 | ||
|
|
d1508cd68d | ||
|
|
9c85b5376d | ||
|
|
3c6f245083 | ||
|
|
f207019ce5 | ||
|
|
bd05aa4e24 | ||
|
|
8dc9d361c2 | ||
|
|
d0e958c71c | ||
|
|
a0bb7c5593 | ||
|
|
7feddd9fc7 | ||
|
|
55969016e9 | ||
|
|
9609f02e3c | ||
|
|
5c7495a194 | ||
|
|
5ee6fc974e | ||
|
|
c2ebea6580 | ||
|
|
12a129ec6d | ||
|
|
f28fe66970 | ||
|
|
123397317c | ||
|
|
dc570c4951 | ||
|
|
22d3628319 | ||
|
|
50c9949d7a | ||
|
|
376817c6d4 | ||
|
|
63fc800057 | ||
|
|
e0d0572b73 | ||
|
|
7fde87c77d | ||
|
|
938c3f65b6 | ||
|
|
2461f79d2a | ||
|
|
499bfcbfd0 | ||
|
|
91410c9bfa |
@@ -2,6 +2,7 @@ language: python
|
||||
python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
before_install:
|
||||
|
||||
4
AUTHORS
4
AUTHORS
@@ -113,3 +113,7 @@ Robin de Rooij
|
||||
Ryan Schmidt
|
||||
Leslie P. Polzer
|
||||
Duncan Keall
|
||||
Alexander Mamay
|
||||
Devin J. Pohly
|
||||
Eduardo Ferro Aldama
|
||||
Jeff Buchbinder
|
||||
|
||||
@@ -18,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||
|
||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
|
||||
25
README.md
25
README.md
@@ -67,6 +67,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--source-address IP Client-side IP address to bind to (experimental)
|
||||
-4, --force-ipv4 Make all connections via IPv4 (experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6 (experimental)
|
||||
--cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
|
||||
not present) is used for the actual downloading. (experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
@@ -165,7 +167,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
-v, --verbose print various debugging information
|
||||
--dump-intermediate-pages print downloaded pages to debug problems (very verbose)
|
||||
--dump-pages print downloaded pages to debug problems (very verbose)
|
||||
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
-C, --call-home Contact the youtube-dl server for debugging.
|
||||
@@ -226,6 +228,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--embed-subs embed subtitles in the video (only for mp4 videos)
|
||||
--embed-thumbnail embed thumbnail in the audio as cover art
|
||||
--add-metadata write metadata to the video file
|
||||
--metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
|
||||
%(title)s" matches a title like "Coldplay - Paradise"
|
||||
--xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
||||
fix file if we can, warn otherwise)
|
||||
@@ -402,6 +407,18 @@ A note on the service that they don't host the infringing content, but just link
|
||||
|
||||
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
||||
|
||||
### How can I speed up work on my issue?
|
||||
|
||||
(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
|
||||
|
||||
First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
|
||||
|
||||
Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
|
||||
|
||||
If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
|
||||
|
||||
Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
|
||||
|
||||
### How can I detect whether a given URL is supported by youtube-dl?
|
||||
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
@@ -501,6 +518,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
|
||||
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
||||
|
||||
```python
|
||||
from __future__ import unicode_literals
|
||||
import youtube_dl
|
||||
|
||||
ydl_opts = {}
|
||||
@@ -513,6 +531,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
```python
|
||||
from __future__ import unicode_literals
|
||||
import youtube_dl
|
||||
|
||||
|
||||
@@ -570,7 +589,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||
|
||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
|
||||
42
devscripts/generate_aes_testdata.py
Normal file
42
devscripts/generate_aes_testdata.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import codecs
|
||||
import subprocess
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import intlist_to_bytes
|
||||
from youtube_dl.aes import aes_encrypt, key_expansion
|
||||
|
||||
secret_msg = b'Secret message goes here'
|
||||
|
||||
|
||||
def hex_str(int_list):
|
||||
return codecs.encode(intlist_to_bytes(int_list), 'hex')
|
||||
|
||||
|
||||
def openssl_encode(algo, key, iv):
|
||||
cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
|
||||
prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
out, _ = prog.communicate(secret_msg)
|
||||
return out
|
||||
|
||||
iv = key = [0x20, 0x15] + 14 * [0]
|
||||
|
||||
r = openssl_encode('aes-128-cbc', key, iv)
|
||||
print('aes_cbc_decrypt')
|
||||
print(repr(r))
|
||||
|
||||
password = key
|
||||
new_key = aes_encrypt(password, key_expansion(password))
|
||||
r = openssl_encode('aes-128-ctr', new_key, iv)
|
||||
print('aes_decrypt_text 16')
|
||||
print(repr(r))
|
||||
|
||||
password = key + 16 * [0]
|
||||
new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
|
||||
r = openssl_encode('aes-256-ctr', new_key, iv)
|
||||
print('aes_decrypt_text 32')
|
||||
print(repr(r))
|
||||
@@ -47,6 +47,7 @@
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
- **Bet**
|
||||
@@ -111,12 +112,14 @@
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **Dump**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **EaglePlatform**
|
||||
- **EbaumsWorld**
|
||||
- **EchoMsk**
|
||||
- **eHow**
|
||||
@@ -144,6 +147,7 @@
|
||||
- **Firstpost**
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Foxgay**
|
||||
- **FoxNews**
|
||||
- **france2.fr:generation-quoi**
|
||||
@@ -161,6 +165,7 @@
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gametrailers**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
- **GiantBomb**
|
||||
@@ -211,6 +216,7 @@
|
||||
- **jpopsuki.tv**
|
||||
- **Jukebox**
|
||||
- **Kaltura**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **keek**
|
||||
@@ -225,6 +231,7 @@
|
||||
- **Letv**
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **LiveLeak**
|
||||
- **livestream**
|
||||
@@ -304,6 +311,7 @@
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKTV**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
@@ -315,6 +323,7 @@
|
||||
- **Ooyala**
|
||||
- **OpenFilm**
|
||||
- **orf:fm4**: radio FM4
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
@@ -322,10 +331,12 @@
|
||||
- **PBS**
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
- **Pladform**
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
@@ -334,6 +345,7 @@
|
||||
- **PornHubPlaylist**
|
||||
- **Pornotube**
|
||||
- **PornoXO**
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Puls4**
|
||||
@@ -359,6 +371,7 @@
|
||||
- **RTP**
|
||||
- **RTS**: RTS.ch
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
@@ -409,6 +422,7 @@
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **SRMediathek**: Saarländischer Rundfunk
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **streamcloud.eu**
|
||||
@@ -478,6 +492,7 @@
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **Ultimedia**
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **ustream**
|
||||
@@ -505,6 +520,7 @@
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
- **Viewster**
|
||||
- **viki**
|
||||
- **vimeo**
|
||||
- **vimeo:album**
|
||||
@@ -551,6 +567,9 @@
|
||||
- **XXXYMovies**
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **Yam**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YesJapan**
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
|
||||
@@ -14,6 +14,9 @@ from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.postprocessor.common import PostProcessor
|
||||
from youtube_dl.utils import match_filter_func
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
|
||||
class YDL(FakeYDL):
|
||||
@@ -46,8 +49,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 460, 'url': 'x'},
|
||||
{'ext': 'mp4', 'height': 460, 'url': 'y'},
|
||||
{'ext': 'webm', 'height': 460, 'url': TEST_URL},
|
||||
{'ext': 'mp4', 'height': 460, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
yie = YoutubeIE(ydl)
|
||||
@@ -60,8 +63,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 720, 'url': 'a'},
|
||||
{'ext': 'mp4', 'height': 1080, 'url': 'b'},
|
||||
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@@ -74,9 +77,9 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
||||
{'ext': 'mp4', 'height': 720, 'url': '_'},
|
||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
||||
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'mp4', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@@ -88,8 +91,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
||||
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@@ -133,10 +136,10 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@@ -167,10 +170,10 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@@ -185,8 +188,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
self.assertEqual(downloaded['format_id'], 'audio-low')
|
||||
|
||||
formats = [
|
||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
|
||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@@ -228,9 +231,9 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection_video(self):
|
||||
formats = [
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@@ -337,6 +340,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'G')
|
||||
|
||||
|
||||
class TestYoutubeDL(unittest.TestCase):
|
||||
def test_subtitles(self):
|
||||
def s_formats(lang, autocaption=False):
|
||||
return [{
|
||||
@@ -459,6 +464,73 @@ class TestFormatSelection(unittest.TestCase):
|
||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||
os.unlink(audiofile)
|
||||
|
||||
def test_match_filter(self):
|
||||
class FilterYDL(YDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(FilterYDL, self).__init__(*args, **kwargs)
|
||||
self.params['simulate'] = True
|
||||
|
||||
def process_info(self, info_dict):
|
||||
super(YDL, self).process_info(info_dict)
|
||||
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||
if res is None:
|
||||
self.downloaded_info_dicts.append(info_dict)
|
||||
return res
|
||||
|
||||
first = {
|
||||
'id': '1',
|
||||
'url': TEST_URL,
|
||||
'title': 'one',
|
||||
'extractor': 'TEST',
|
||||
'duration': 30,
|
||||
'filesize': 10 * 1024,
|
||||
}
|
||||
second = {
|
||||
'id': '2',
|
||||
'url': TEST_URL,
|
||||
'title': 'two',
|
||||
'extractor': 'TEST',
|
||||
'duration': 10,
|
||||
'description': 'foo',
|
||||
'filesize': 5 * 1024,
|
||||
}
|
||||
videos = [first, second]
|
||||
|
||||
def get_videos(filter_=None):
|
||||
ydl = FilterYDL({'match_filter': filter_})
|
||||
for v in videos:
|
||||
ydl.process_ie_result(v, download=True)
|
||||
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||
|
||||
res = get_videos()
|
||||
self.assertEqual(res, ['1', '2'])
|
||||
|
||||
def f(v):
|
||||
if v['id'] == '1':
|
||||
return None
|
||||
else:
|
||||
return 'Video id is not 1'
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('duration < 30')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('description = foo')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('description =? foo')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1', '2'])
|
||||
|
||||
f = match_filter_func('filesize > 5KiB')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
55
test/test_aes.py
Normal file
55
test/test_aes.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
|
||||
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||
import base64
|
||||
|
||||
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
|
||||
|
||||
|
||||
class TestAES(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.key = self.iv = [0x20, 0x15] + 14 * [0]
|
||||
self.secret_msg = b'Secret message goes here'
|
||||
|
||||
def test_encrypt(self):
|
||||
msg = b'message'
|
||||
key = list(range(16))
|
||||
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
|
||||
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
|
||||
self.assertEqual(decrypted, msg)
|
||||
|
||||
def test_cbc_decrypt(self):
|
||||
data = bytes_to_intlist(
|
||||
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
|
||||
)
|
||||
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
|
||||
def test_decrypt_text(self):
|
||||
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8]) +
|
||||
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
||||
)
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8]) +
|
||||
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
||||
)
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
||||
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||
self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||
self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
|
||||
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
||||
|
||||
# https://github.com/rg3/youtube-dl/issues/1930
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import unittest
|
||||
@@ -27,5 +29,12 @@ class TestExecution(unittest.TestCase):
|
||||
def test_main_exec(self):
|
||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
|
||||
def test_cmdline_umlauts(self):
|
||||
p = subprocess.Popen(
|
||||
[sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'],
|
||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
self.assertFalse(stderr)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -8,7 +8,7 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
@@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
proxy_name = name
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
||||
return HTTPTestRequestHandler
|
||||
|
||||
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
self.port = self.proxy.socket.getsockname()[1]
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.cn_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('cn'))
|
||||
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||
self.cn_proxy_thread.daemon = True
|
||||
self.cn_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'cn_verification_proxy': cn_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||
response = ydl.urlopen(req).read().decode('utf-8')
|
||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
26
test/test_netrc.py
Normal file
26
test/test_netrc.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
gen_extractors,
|
||||
)
|
||||
|
||||
|
||||
class TestNetRc(unittest.TestCase):
|
||||
def test_netrc_present(self):
|
||||
for ie in gen_extractors():
|
||||
if not hasattr(ie, '_login'):
|
||||
continue
|
||||
self.assertTrue(
|
||||
hasattr(ie, '_NETRC_MACHINE'),
|
||||
'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
17
test/test_postprocessors.py
Normal file
17
test/test_postprocessors.py
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.postprocessor import MetadataFromTitlePP
|
||||
|
||||
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
@@ -26,6 +26,7 @@ from youtube_dl.extractor import (
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -320,5 +321,17 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||
|
||||
|
||||
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
||||
IE = FunnyOrDieIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -17,13 +17,22 @@ IGNORED_FILES = [
|
||||
'buildserver.py',
|
||||
]
|
||||
|
||||
IGNORED_DIRS = [
|
||||
'.git',
|
||||
'.tox',
|
||||
]
|
||||
|
||||
from test.helper import assertRegexpMatches
|
||||
|
||||
|
||||
class TestUnicodeLiterals(unittest.TestCase):
|
||||
def test_all_files(self):
|
||||
for dirpath, _, filenames in os.walk(rootDir):
|
||||
for dirpath, dirnames, filenames in os.walk(rootDir):
|
||||
for ignore_dir in IGNORED_DIRS:
|
||||
if ignore_dir in dirnames:
|
||||
# If we remove the directory from dirnames os.walk won't
|
||||
# recurse into it
|
||||
dirnames.remove(ignore_dir)
|
||||
for basename in filenames:
|
||||
if not basename.endswith('.py'):
|
||||
continue
|
||||
|
||||
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
InAdvancePagedList,
|
||||
@@ -38,6 +39,8 @@ from youtube_dl.utils import (
|
||||
parse_iso8601,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url_path_consecutive_slashes,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
@@ -52,6 +55,7 @@ from youtube_dl.utils import (
|
||||
urlencode_postdata,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
render_table,
|
||||
match_str,
|
||||
)
|
||||
@@ -131,6 +135,62 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||
|
||||
def test_sanitize_path(self):
|
||||
if sys.platform != 'win32':
|
||||
return
|
||||
|
||||
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||
self.assertEqual(sanitize_path('abc|def'), 'abc#def')
|
||||
self.assertEqual(sanitize_path('<>:"|?*'), '#######')
|
||||
self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
|
||||
self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
|
||||
|
||||
self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||
self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||
|
||||
self.assertEqual(
|
||||
sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
|
||||
'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
|
||||
|
||||
self.assertEqual(
|
||||
sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
|
||||
'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
|
||||
self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
|
||||
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
|
||||
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
|
||||
|
||||
self.assertEqual(sanitize_path('../abc'), '..\\abc')
|
||||
self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_sanitize_url_path_consecutive_slashes(self):
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||
'http://hostname/abc/')
|
||||
|
||||
def test_ordered_set(self):
|
||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||
self.assertEqual(orderedSet([]), [])
|
||||
@@ -192,6 +252,17 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||
|
||||
def test_xpath_text(self):
|
||||
testxml = '''<root>
|
||||
<div>
|
||||
<p>Foo</p>
|
||||
</div>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||
|
||||
def test_smuggle_url(self):
|
||||
data = {"ö": "ö", "abc": [3]}
|
||||
url = 'https://foo.bar/baz?x=y#a'
|
||||
|
||||
7
tox.ini
7
tox.ini
@@ -1,8 +1,11 @@
|
||||
[tox]
|
||||
envlist = py26,py27,py33
|
||||
envlist = py26,py27,py33,py34
|
||||
[testenv]
|
||||
deps =
|
||||
nose
|
||||
coverage
|
||||
commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
|
||||
--exclude test_subtitles.py --exclude test_write_annotations.py
|
||||
--exclude test_youtube_lists.py
|
||||
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||
# test.test_download:TestDownload.test_NowVideo
|
||||
|
||||
@@ -54,12 +54,14 @@ from .utils import (
|
||||
MaxDownloadsReached,
|
||||
PagedList,
|
||||
parse_filesize,
|
||||
PerRequestProxyHandler,
|
||||
PostProcessingError,
|
||||
platform_name,
|
||||
preferredencoding,
|
||||
render_table,
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
std_headers,
|
||||
subtitles_filename,
|
||||
takewhile_inclusive,
|
||||
@@ -183,6 +185,8 @@ class YoutubeDL(object):
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
proxy: URL of the proxy server to use
|
||||
cn_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on Chinese sites. (Experimental)
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
support, using fridibi
|
||||
@@ -319,8 +323,10 @@ class YoutubeDL(object):
|
||||
'Set the LC_ALL environment variable to fix this.')
|
||||
self.params['restrictfilenames'] = True
|
||||
|
||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
if isinstance(params.get('outtmpl'), bytes):
|
||||
self.report_warning(
|
||||
'Parameter outtmpl is bytes, but should be a unicode string. '
|
||||
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
@@ -559,7 +565,7 @@ class YoutubeDL(object):
|
||||
if v is not None)
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
@@ -626,7 +632,7 @@ class YoutubeDL(object):
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
'''
|
||||
|
||||
if ie_key:
|
||||
ies = [self.get_info_extractor(ie_key)]
|
||||
@@ -1082,8 +1088,7 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format = 'best'
|
||||
formats_to_download = []
|
||||
# The -1 is for supporting YoutubeIE
|
||||
if req_format in ('-1', 'all'):
|
||||
if req_format == 'all':
|
||||
formats_to_download = formats
|
||||
else:
|
||||
for rfstr in req_format.split(','):
|
||||
@@ -1210,9 +1215,6 @@ class YoutubeDL(object):
|
||||
if len(info_dict['title']) > 200:
|
||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
@@ -1258,7 +1260,7 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
try:
|
||||
dn = os.path.dirname(encodeFilename(filename))
|
||||
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
||||
if dn and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
@@ -1762,13 +1764,14 @@ class YoutubeDL(object):
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
proxy_handler = PerRequestProxyHandler(proxies)
|
||||
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
opener = compat_urllib_request.build_opener(
|
||||
https_handler, proxy_handler, cookie_processor, ydlh)
|
||||
proxy_handler, https_handler, cookie_processor, ydlh)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
|
||||
@@ -213,6 +213,11 @@ def _real_main(argv=None):
|
||||
# PostProcessors
|
||||
postprocessors = []
|
||||
# Add the metadata pp first, the other pps will copy it
|
||||
if opts.metafromtitle:
|
||||
postprocessors.append({
|
||||
'key': 'MetadataFromTitle',
|
||||
'titleformat': opts.metafromtitle
|
||||
})
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.extractaudio:
|
||||
@@ -364,6 +369,7 @@ def _real_main(argv=None):
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
||||
@@ -281,7 +281,7 @@ class F4mFD(FileDownloader):
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = base64.b64decode(node.text)
|
||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return (boot_info, bootstrap_url)
|
||||
|
||||
@@ -308,7 +308,7 @@ class F4mFD(FileDownloader):
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = base64.b64decode(metadata_node.text)
|
||||
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||
else:
|
||||
metadata = None
|
||||
|
||||
|
||||
@@ -92,6 +92,8 @@ class HttpFD(FileDownloader):
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'downloaded_bytes': resume_len,
|
||||
'total_bytes': resume_len,
|
||||
})
|
||||
return True
|
||||
else:
|
||||
@@ -218,12 +220,6 @@ class HttpFD(FileDownloader):
|
||||
if tmpfilename != '-':
|
||||
stream.close()
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'status': 'error',
|
||||
})
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
@@ -37,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .beatportpro import BeatportProIE
|
||||
from .bet import BetIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
@@ -106,6 +107,7 @@ from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import DouyuTVIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
@@ -116,6 +118,7 @@ from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .divxstage import DivxStageIE
|
||||
from .dropbox import DropboxIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .echomsk import EchoMskIE
|
||||
from .ehow import EHowIE
|
||||
@@ -150,6 +153,7 @@ from .fktv import (
|
||||
)
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
@@ -174,6 +178,7 @@ from .gameone import (
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .giantbomb import GiantBombIE
|
||||
@@ -228,6 +233,7 @@ from .jove import JoveIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
@@ -244,6 +250,7 @@ from .letv import (
|
||||
LetvTvIE,
|
||||
LetvPlaylistIE
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import (
|
||||
@@ -340,6 +347,7 @@ from .npo import (
|
||||
)
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKPlaylistIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
@@ -354,6 +362,7 @@ from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFOE1IE,
|
||||
ORFFM4IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
@@ -361,9 +370,11 @@ from .pbs import PBSIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
@@ -372,6 +383,7 @@ from .pornhub import (
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .puls4 import Puls4IE
|
||||
@@ -397,7 +409,7 @@ from .rtlnow import RTLnowIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@@ -455,6 +467,7 @@ from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
@@ -527,6 +540,7 @@ from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .ultimedia import UltimediaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
@@ -550,6 +564,7 @@ from .videoweed import VideoWeedIE
|
||||
from .vidme import VidmeIE
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoAlbumIE,
|
||||
@@ -606,6 +621,11 @@ from .yahoo import (
|
||||
YahooSearchIE,
|
||||
)
|
||||
from .yam import YamIE
|
||||
from .yandexmusic import (
|
||||
YandexMusicTrackIE,
|
||||
YandexMusicAlbumIE,
|
||||
YandexMusicPlaylistIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
|
||||
@@ -2,13 +2,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
xpath_text,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
||||
'ext': 'flv',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||
},
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return collection, video
|
||||
return None, None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, episode_path)
|
||||
|
||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
||||
|
||||
try:
|
||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
||||
except ValueError as ve:
|
||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
||||
raise ExtractorError(errmsg, cause=ve)
|
||||
bootstrapped_data = self._parse_json(self._search_regex(
|
||||
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
||||
|
||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||
if is_playlist:
|
||||
collections = bootstrappedData['playlists']['collections']
|
||||
collections = bootstrapped_data['playlists']['collections']
|
||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||
video_info = self.find_video_info(collection, episode_path)
|
||||
|
||||
show_title = video_info['showTitle']
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
collections = bootstrappedData['show']['collections']
|
||||
collections = bootstrapped_data['show']['collections']
|
||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||
|
||||
show = bootstrappedData['show']
|
||||
# Video wasn't found in the collections, let's try `slugged_video`.
|
||||
if video_info is None:
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
else:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
show = bootstrapped_data['show']
|
||||
show_title = show['title']
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class AftenpostenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
|
||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||
'info_dict': {
|
||||
'id': '21039',
|
||||
@@ -30,12 +30,7 @@ class AftenpostenIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-xs-id="(\d+)"', webpage, 'video id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_xml(
|
||||
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||
|
||||
@@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||
|
||||
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
|
||||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
|
||||
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info_dict['formats'] = formats
|
||||
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||
|
||||
103
youtube_dl/extractor/beatportpro.py
Normal file
103
youtube_dl/extractor/beatportpro.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BeatportProIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
|
||||
'md5': 'b3c34d8639a2f6a7f734382358478887',
|
||||
'info_dict': {
|
||||
'id': '5379371',
|
||||
'display_id': 'synesthesia-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': 'Froxic - Synesthesia (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
|
||||
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
|
||||
'info_dict': {
|
||||
'id': '3756896',
|
||||
'display_id': 'love-and-war-original-mix',
|
||||
'ext': 'mp3',
|
||||
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
|
||||
'md5': 'a1fd8e8046de3950fd039304c186c05f',
|
||||
'info_dict': {
|
||||
'id': '4991738',
|
||||
'display_id': 'birds-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
track_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playables = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.Playables\s*=\s*({.+?});', webpage,
|
||||
'playables info', flags=re.DOTALL),
|
||||
track_id)
|
||||
|
||||
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||
|
||||
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||
if track['mix']:
|
||||
title += ' (' + track['mix'] + ')'
|
||||
|
||||
formats = []
|
||||
for ext, info in track['preview'].items():
|
||||
if not info['url']:
|
||||
continue
|
||||
fmt = {
|
||||
'url': info['url'],
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if ext == 'mp3':
|
||||
fmt['preference'] = 0
|
||||
fmt['acodec'] = 'mp3'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
elif ext == 'mp4':
|
||||
fmt['preference'] = 1
|
||||
fmt['acodec'] = 'aac'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
images = []
|
||||
for name, info in track['images'].items():
|
||||
image_url = info.get('url')
|
||||
if name == 'dynamic' or not image_url:
|
||||
continue
|
||||
image = {
|
||||
'id': name,
|
||||
'url': image_url,
|
||||
'height': int_or_none(info.get('height')),
|
||||
'width': int_or_none(info.get('width')),
|
||||
}
|
||||
images.append(image)
|
||||
|
||||
return {
|
||||
'id': compat_str(track.get('id')) or track_id,
|
||||
'display_id': track.get('slug') or display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': images,
|
||||
}
|
||||
@@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
|
||||
'tbr': media['bitRate'],
|
||||
'width': media['width'],
|
||||
'height': media['height'],
|
||||
} for media in info['media']]
|
||||
} for media in info['media'] if media.get('mediaPurpose') == 'play']
|
||||
|
||||
if not formats:
|
||||
formats.append({
|
||||
|
||||
@@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||
webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
|
||||
@@ -839,7 +839,7 @@ class InfoExtractor(object):
|
||||
m3u8_id=None):
|
||||
|
||||
formats = [{
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': 'm3u8',
|
||||
@@ -883,8 +883,13 @@ class InfoExtractor(object):
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media else None
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
@@ -1057,6 +1062,9 @@ class InfoExtractor(object):
|
||||
def _get_automatic_captions(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
def _subtitles_timecode(self, seconds):
|
||||
return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
@@ -23,12 +23,12 @@ from ..utils import (
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
inc,
|
||||
)
|
||||
|
||||
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@@ -101,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
|
||||
|
||||
key = obfuscate_key(id)
|
||||
|
||||
class Counter:
|
||||
__value = iv
|
||||
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
return temp
|
||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||
return zlib.decompress(decrypted_data)
|
||||
|
||||
|
||||
@@ -46,13 +46,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
||||
'md5': '392c4b85a60a90dc4792da41ce3144eb',
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||
'info_dict': {
|
||||
'id': 'x33vw9',
|
||||
'id': 'x2iuewm',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Amphora Alex and Van .',
|
||||
'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
|
||||
'uploader': 'IGN',
|
||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
|
||||
77
youtube_dl/extractor/douyutv.py
Normal file
77
youtube_dl/extractor/douyutv.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'info_dict': {
|
||||
'id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:9e525642c25a0a24302869937cf69d17',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_json(
|
||||
'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
|
||||
|
||||
data = config['data']
|
||||
|
||||
error_code = config.get('error', 0)
|
||||
show_status = data.get('show_status')
|
||||
if error_code is not 0:
|
||||
raise ExtractorError(
|
||||
'Server reported error %i' % error_code, expected=True)
|
||||
|
||||
# 1 = live, 2 = offline
|
||||
if show_status == '2':
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
|
||||
base_url = data['rtmp_url']
|
||||
live_path = data['rtmp_live']
|
||||
|
||||
title = self._live_title(data['room_name'])
|
||||
description = data.get('show_details')
|
||||
thumbnail = data.get('room_src')
|
||||
|
||||
uploader = data.get('nickname')
|
||||
uploader_id = data.get('owner_uid')
|
||||
|
||||
multi_formats = data.get('rtmp_multi_bitrate')
|
||||
if not isinstance(multi_formats, dict):
|
||||
multi_formats = {}
|
||||
multi_formats['live'] = live_path
|
||||
|
||||
formats = [{
|
||||
'url': '%s/%s' % (base_url, format_path),
|
||||
'format_id': format_id,
|
||||
'preference': 1 if format_id == 'live' else 0,
|
||||
} for format_id, format_path in multi_formats.items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
98
youtube_dl/extractor/eagleplatform.py
Normal file
98
youtube_dl/extractor/eagleplatform.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class EaglePlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
eagleplatform:(?P<custom_host>[^/]+):|
|
||||
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# http://lenta.ru/news/2015/03/06/navalny/
|
||||
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# http://muz-tv.ru/play/7129/
|
||||
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||
'md5': '6c2ebeab03b739597ce8d86339d5a905',
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
'title': "'O Sole Mio",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 216,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
status = int_or_none(response.get('status', 200))
|
||||
if status != 200:
|
||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
||||
|
||||
player_data = self._download_json(
|
||||
'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
|
||||
|
||||
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
|
||||
|
||||
title = media['title']
|
||||
description = media.get('description')
|
||||
thumbnail = media.get('snapshot')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
view_count = int_or_none(media.get('views'))
|
||||
|
||||
age_restriction = media.get('age_restriction')
|
||||
age_limit = None
|
||||
if age_restriction:
|
||||
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||
|
||||
m3u8_data = self._download_json(
|
||||
media['sources']['secure_m3u8']['auto'],
|
||||
video_id, 'Downloading m3u8 JSON')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_data['data'][0], video_id,
|
||||
'mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_like = self._search_regex(
|
||||
r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
|
||||
data = json.loads(json_like)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
|
||||
playlist_id)
|
||||
|
||||
session = str(random.randint(0, 1000000000))
|
||||
mix_id = data['id']
|
||||
track_count = data['tracks_count']
|
||||
duration = data['duration']
|
||||
avg_song_duration = float(duration) / track_count
|
||||
# duration is sometimes negative, use predefined avg duration
|
||||
if avg_song_duration <= 0:
|
||||
avg_song_duration = 300
|
||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||
next_url = first_url
|
||||
entries = []
|
||||
|
||||
@@ -4,11 +4,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
qualities,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'info_dict': {
|
||||
'id': '652431',
|
||||
'ext': 'mp4',
|
||||
@@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||
r'video_url=(.+?)&', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
flash_vars = compat_parse_qs(self._search_regex(
|
||||
r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
|
||||
|
||||
formats = []
|
||||
quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
|
||||
for k, vals in flash_vars.items():
|
||||
m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
|
||||
if m is not None:
|
||||
formats.append({
|
||||
'format_id': m.group('quality'),
|
||||
'quality': quality(m.group('quality')),
|
||||
'url': vals[0],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'url': video_url,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
41
youtube_dl/extractor/footyroom.py
Normal file
41
youtube_dl/extractor/footyroom.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FootyRoomIE(InfoExtractor):
|
||||
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||
'info_dict': {
|
||||
'id': 'schalke-04-0-2-real-madrid-2015-02',
|
||||
'title': 'Schalke 04 0 – 2 Real Madrid',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
|
||||
playlist_id)
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
|
||||
entries = []
|
||||
for video in playlist:
|
||||
payload = video.get('payload')
|
||||
if not payload:
|
||||
continue
|
||||
playwire_url = self._search_regex(
|
||||
r'data-config="([^"]+)"', payload,
|
||||
'playwire url', default=None)
|
||||
if playwire_url:
|
||||
entries.append(self.url_result(playwire_url, 'Playwire'))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
@@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
bitrates.sort()
|
||||
|
||||
formats = []
|
||||
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
@@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
'vbr': bitrate,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
|
||||
subtitles[src_lang] = [{
|
||||
'ext': src.split('/')[-1],
|
||||
'url': 'http://www.funnyordie.com%s' % src,
|
||||
}]
|
||||
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)
|
||||
@@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
'description': post.get('description'),
|
||||
'thumbnail': post.get('picture'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
38
youtube_dl/extractor/gazeta.py
Normal file
38
youtube_dl/extractor/gazeta.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GazetaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
||||
'info_dict': {
|
||||
'id': '205566',
|
||||
'ext': 'mp4',
|
||||
'title': '«70–80 процентов гражданских в Донецке на грани голода»',
|
||||
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
display_id = mobj.group('id')
|
||||
embed_url = '%s?p=embed' % mobj.group('url')
|
||||
embed_page = self._download_webpage(
|
||||
embed_url, display_id, 'Downloading embed page')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
|
||||
|
||||
return self.url_result(
|
||||
'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
|
||||
@@ -12,6 +12,7 @@ from ..utils import remove_end
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_NETRC_MACHINE = 'gdcvault'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
||||
|
||||
@@ -527,6 +527,17 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Viddler'],
|
||||
},
|
||||
# Libsyn embed
|
||||
{
|
||||
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
|
||||
'info_dict': {
|
||||
'id': '3377616',
|
||||
'ext': 'mp3',
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
},
|
||||
# jwplayer YouTube
|
||||
{
|
||||
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||
@@ -570,6 +581,45 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'John Carlson Postgame 2/25/15',
|
||||
},
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# ClipYou (Eagle.Platform) embed (custom URL)
|
||||
{
|
||||
'url': 'http://muz-tv.ru/play/7129/',
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
'title': "'O Sole Mio",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 216,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
# Pladform embed
|
||||
{
|
||||
'url': 'http://muz-tv.ru/kinozal/view/7400/',
|
||||
'info_dict': {
|
||||
'id': '100183293',
|
||||
'ext': 'mp4',
|
||||
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 694,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosure
|
||||
{
|
||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
@@ -967,6 +1017,19 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for NYTimes player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Libsyn player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||
@@ -1155,6 +1218,24 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||
|
||||
# Look for Eagle.Platform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'EaglePlatform')
|
||||
|
||||
# Look for ClipYou (uses Eagle.Platform) embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
|
||||
|
||||
# Look for Pladform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Pladform')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
@@ -1211,10 +1292,16 @@ class GenericIE(InfoExtractor):
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||
webpage)
|
||||
if not found:
|
||||
# Look also in Refresh HTTP header
|
||||
refresh_header = head_response.headers.get('Refresh')
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
self.report_following_redirect(new_url)
|
||||
|
||||
@@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id="(\d+)"',
|
||||
|
||||
@@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
|
||||
|
||||
if webpage is not None:
|
||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
||||
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
|
||||
|
||||
return (webpage, None)
|
||||
return webpage, None
|
||||
|
||||
def _real_initialize(self):
|
||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||
@@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
|
||||
swf_referer = None
|
||||
if self.do_playerpage_request:
|
||||
(_, player_objs) = self._get_playerpage(url)
|
||||
if player_objs is not None:
|
||||
if player_objs:
|
||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):
|
||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
'info_dict': {
|
||||
'id': '5182',
|
||||
'id': '114765',
|
||||
'ext': 'mp4',
|
||||
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
xml_link = self._html_search_regex(
|
||||
r'<param name="flashvars" value="config=(.*?)" />',
|
||||
title = self._html_search_meta('name', webpage)
|
||||
config_url = self._html_search_regex(
|
||||
r'data-src="(/contenu/medias/video.php.*?)"',
|
||||
webpage, 'config URL')
|
||||
config_url = 'http://www.jeuxvideo.com' + config_url
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||
xml_link, 'video ID')
|
||||
r'id=(\d+)',
|
||||
config_url, 'video ID')
|
||||
|
||||
config = self._download_xml(
|
||||
xml_link, title, 'Downloading XML config')
|
||||
info_json = config.find('format.json').text
|
||||
info = json.loads(info_json)['versions'][0]
|
||||
config = self._download_json(
|
||||
config_url, title, 'Downloading JSON config')
|
||||
|
||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||
formats = [{
|
||||
'url': source['file'],
|
||||
'format_id': source['label'],
|
||||
'resolution': source['label'],
|
||||
} for source in reversed(config['sources'])]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': config.find('titre_video').text,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': config.find('image').text,
|
||||
'thumbnail': config.get('image'),
|
||||
}
|
||||
|
||||
96
youtube_dl/extractor/kanalplay.py
Normal file
96
youtube_dl/extractor/kanalplay.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KanalPlayIE(InfoExtractor):
|
||||
IE_DESC = 'Kanal 5/9/11 Play'
|
||||
_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
|
||||
'info_dict': {
|
||||
'id': '3270012277',
|
||||
'ext': 'flv',
|
||||
'title': 'Saknar både dusch och avlopp',
|
||||
'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
|
||||
'duration': 2636.36,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_subtitles(self, subs):
|
||||
return '\r\n\r\n'.join(
|
||||
'%s\r\n%s --> %s\r\n%s'
|
||||
% (
|
||||
num,
|
||||
self._subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
self._subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
item['text'],
|
||||
) for num, item in enumerate(subs, 1))
|
||||
|
||||
def _get_subtitles(self, channel_id, video_id):
|
||||
subs = self._download_json(
|
||||
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
||||
video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
channel_id = mobj.group('channel_id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
|
||||
video_id)
|
||||
|
||||
reasons_for_no_streams = video.get('reasonsForNoStreams')
|
||||
if reasons_for_no_streams:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
|
||||
expected=True)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
duration = float_or_none(video.get('length'), 1000)
|
||||
thumbnail = video.get('posterUrl')
|
||||
|
||||
stream_base_url = video['streamBaseUrl']
|
||||
|
||||
formats = [{
|
||||
'url': stream_base_url,
|
||||
'play_path': stream['source'],
|
||||
'ext': 'flv',
|
||||
'tbr': float_or_none(stream.get('bitrate'), 1000),
|
||||
'rtmp_real_time': True,
|
||||
} for stream in video['streams']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if video.get('hasSubtitle'):
|
||||
subtitles = self.extract_subtitles(channel_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(flashvars.get('duration'))
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
||||
width = int_or_none(self._og_search_property(
|
||||
'video:width', webpage, 'video width', default=None))
|
||||
height = int_or_none(self._og_search_property(
|
||||
'video:height', webpage, 'video height', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -7,8 +7,9 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -39,12 +40,20 @@ class LetvIE(InfoExtractor):
|
||||
'title': '美人天下01',
|
||||
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'publish time'
|
||||
]
|
||||
}, {
|
||||
'note': 'This video is available only in Mainland China, thus a proxy is needed',
|
||||
'url': 'http://www.letv.com/ptv/vplay/1118082.html',
|
||||
'md5': 'f80936fbe20fb2f58648e81386ff7927',
|
||||
'info_dict': {
|
||||
'id': '1118082',
|
||||
'ext': 'mp4',
|
||||
'title': '与龙共舞 完整版',
|
||||
'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
|
||||
},
|
||||
'params': {
|
||||
'cn_verification_proxy': 'http://proxy.uku.im:8888'
|
||||
},
|
||||
}]
|
||||
# http://www.letv.com/ptv/vplay/1118082.html
|
||||
# This video is available only in Mainland China
|
||||
|
||||
@staticmethod
|
||||
def urshift(val, n):
|
||||
@@ -76,9 +85,16 @@ class LetvIE(InfoExtractor):
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.letv.com'
|
||||
}
|
||||
play_json_req = compat_urllib_request.Request(
|
||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||
)
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||
|
||||
play_json = self._download_json(
|
||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
|
||||
media_id, 'playJson data')
|
||||
play_json_req,
|
||||
media_id, 'Downloading playJson data')
|
||||
|
||||
# Check for errors
|
||||
playstatus = play_json['playstatus']
|
||||
@@ -114,7 +130,8 @@ class LetvIE(InfoExtractor):
|
||||
|
||||
url_info_dict = {
|
||||
'url': media_url,
|
||||
'ext': determine_ext(dispatch[format_id][1])
|
||||
'ext': determine_ext(dispatch[format_id][1]),
|
||||
'format_id': format_id,
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
@@ -123,7 +140,7 @@ class LetvIE(InfoExtractor):
|
||||
urls.append(url_info_dict)
|
||||
|
||||
publish_time = parse_iso8601(self._html_search_regex(
|
||||
r'发布时间 ([^<>]+) ', page, 'publish time', fatal=False),
|
||||
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
description = self._html_search_meta('description', page, fatal=False)
|
||||
|
||||
|
||||
59
youtube_dl/extractor/libsyn.py
Normal file
59
youtube_dl/extractor/libsyn.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class LibsynIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||
'info_dict': {
|
||||
'id': '3377616',
|
||||
'ext': 'mp3',
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||
episode_title = self._search_regex(
|
||||
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': release_date,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -40,6 +41,13 @@ class LivestreamIE(InfoExtractor):
|
||||
'id': '2245590',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'http://new.livestream.com/chess24/tatasteelchess',
|
||||
'info_dict': {
|
||||
'title': 'Tata Steel Chess',
|
||||
'id': '3705884',
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}, {
|
||||
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
||||
'only_matching': True,
|
||||
@@ -117,6 +125,30 @@ class LivestreamIE(InfoExtractor):
|
||||
'view_count': video_data.get('views'),
|
||||
}
|
||||
|
||||
def _extract_event(self, info):
|
||||
event_id = compat_str(info['id'])
|
||||
account = compat_str(info['owner_account_id'])
|
||||
root_url = (
|
||||
'https://new.livestream.com/api/accounts/{account}/events/{event}/'
|
||||
'feed.json'.format(account=account, event=event_id))
|
||||
|
||||
def _extract_videos():
|
||||
last_video = None
|
||||
for i in itertools.count(1):
|
||||
if last_video is None:
|
||||
info_url = root_url
|
||||
else:
|
||||
info_url = '{root}?&id={id}&newer=-1&type=video'.format(
|
||||
root=root_url, id=last_video)
|
||||
videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
|
||||
videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
|
||||
if not videos_info:
|
||||
break
|
||||
for v in videos_info:
|
||||
yield self._extract_video_info(v)
|
||||
last_video = videos_info[-1]['id']
|
||||
return self.playlist_result(_extract_videos(), event_id, info['full_name'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
@@ -144,14 +176,13 @@ class LivestreamIE(InfoExtractor):
|
||||
result = result and compat_str(vdata['data']['id']) == vid
|
||||
return result
|
||||
|
||||
videos = [self._extract_video_info(video_data['data'])
|
||||
for video_data in info['feed']['data']
|
||||
if is_relevant(video_data, video_id)]
|
||||
if video_id is None:
|
||||
# This is an event page:
|
||||
return self.playlist_result(
|
||||
videos, '%s' % info['id'], info['full_name'])
|
||||
return self._extract_event(info)
|
||||
else:
|
||||
videos = [self._extract_video_info(video_data['data'])
|
||||
for video_data in info['feed']['data']
|
||||
if is_relevant(video_data, video_id)]
|
||||
if not videos:
|
||||
raise ExtractorError('Cannot find video %s' % video_id)
|
||||
return videos[0]
|
||||
|
||||
@@ -52,6 +52,7 @@ class LRTIE(InfoExtractor):
|
||||
'url': data['streamer'],
|
||||
'play_path': 'mp4:%s' % data['file'],
|
||||
'preference': -1,
|
||||
'rtmp_real_time': True,
|
||||
})
|
||||
else:
|
||||
formats.extend(
|
||||
|
||||
@@ -19,6 +19,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -10,7 +11,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
str_to_int,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,8 +27,6 @@ class MixcloudIE(InfoExtractor):
|
||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||
'uploader': 'Daniel Holbach',
|
||||
'uploader_id': 'dholbach',
|
||||
'upload_date': '20111115',
|
||||
'timestamp': 1321359578,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@@ -37,31 +35,30 @@ class MixcloudIE(InfoExtractor):
|
||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||
'info_dict': {
|
||||
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
||||
'ext': 'm4a',
|
||||
'title': 'Electric Relaxation vol. 3',
|
||||
'ext': 'mp3',
|
||||
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||
'uploader': 'Daniel Drumz',
|
||||
'uploader': 'Gilles Peterson Worldwide',
|
||||
'uploader_id': 'gillespeterson',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'thumbnail': 're:https?://.*/images/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_url(self, track_id, template_url):
|
||||
server_count = 30
|
||||
for i in range(server_count):
|
||||
url = template_url % i
|
||||
def _get_url(self, track_id, template_url, server_number):
|
||||
boundaries = (1, 30)
|
||||
for nr in server_numbers(server_number, boundaries):
|
||||
url = template_url % nr
|
||||
try:
|
||||
# We only want to know if the request succeed
|
||||
# don't download the whole file
|
||||
self._request_webpage(
|
||||
HEADRequest(url), track_id,
|
||||
'Checking URL %d/%d ...' % (i + 1, server_count + 1))
|
||||
'Checking URL %d/%d ...' % (nr, boundaries[-1]))
|
||||
return url
|
||||
except ExtractorError:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -75,17 +72,18 @@ class MixcloudIE(InfoExtractor):
|
||||
preview_url = self._search_regex(
|
||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||
server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
|
||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||
final_song_url = self._get_url(track_id, template_url)
|
||||
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||
if final_song_url is None:
|
||||
self.to_screen('Trying with m4a extension')
|
||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||
final_song_url = self._get_url(track_id, template_url)
|
||||
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||
if final_song_url is None:
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
|
||||
PREFIX = (
|
||||
r'<span class="play-button[^"]*?"'
|
||||
r'm-play-on-spacebar[^>]+'
|
||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||
title = self._html_search_regex(
|
||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||
@@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor):
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
||||
r'/favorites/?">([0-9]+)<'],
|
||||
r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
|
||||
webpage, 'like count', fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>'],
|
||||
webpage, 'play count', fatal=False))
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
@@ -118,7 +112,38 @@ class MixcloudIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
}
|
||||
|
||||
|
||||
def server_numbers(first, boundaries):
|
||||
""" Server numbers to try in descending order of probable availability.
|
||||
Starting from first (i.e. the number of the server hosting the preview file)
|
||||
and going further and further up to the higher boundary and down to the
|
||||
lower one in an alternating fashion. Namely:
|
||||
|
||||
server_numbers(2, (1, 5))
|
||||
|
||||
# Where the preview server is 2, min number is 1 and max is 5.
|
||||
# Yields: 2, 3, 1, 4, 5
|
||||
|
||||
Why not random numbers or increasing sequences? Since from what I've seen,
|
||||
full length files seem to be hosted on servers whose number is closer to
|
||||
that of the preview; to be confirmed.
|
||||
"""
|
||||
zip_longest = getattr(itertools, 'zip_longest', None)
|
||||
if zip_longest is None:
|
||||
# python 2.x
|
||||
zip_longest = itertools.izip_longest
|
||||
|
||||
if len(boundaries) != 2:
|
||||
raise ValueError("boundaries should be a two-element tuple")
|
||||
min, max = boundaries
|
||||
highs = range(first + 1, max + 1)
|
||||
lows = range(first - 1, min - 1, -1)
|
||||
rest = filter(
|
||||
None, itertools.chain.from_iterable(zip_longest(highs, lows)))
|
||||
yield first
|
||||
for n in rest:
|
||||
yield n
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MLBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||
_VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
|
||||
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor):
|
||||
IE_NAME = 'niconico'
|
||||
IE_DESC = 'ニコニコ動画'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'info_dict': {
|
||||
@@ -39,9 +39,26 @@ class NiconicoIE(InfoExtractor):
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
'password': 'youtube-dl',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nicovideo.jp/watch/nm14296458',
|
||||
'md5': '8db08e0158457cf852a31519fceea5bc',
|
||||
'info_dict': {
|
||||
'id': 'nm14296458',
|
||||
'ext': 'swf',
|
||||
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||
'description': 'md5:',
|
||||
'uploader': 'りょうた',
|
||||
'uploader_id': '18822557',
|
||||
'upload_date': '20110429',
|
||||
'duration': 209,
|
||||
},
|
||||
'params': {
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
'password': 'youtube-dl',
|
||||
},
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
# Determine whether the downloader used authentication to download video
|
||||
_AUTHENTICATED = False
|
||||
@@ -76,8 +93,7 @@ class NiconicoIE(InfoExtractor):
|
||||
return True
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Get video webpage. We are not actually interested in it, but need
|
||||
# the cookies in order to be able to download the info webpage
|
||||
@@ -90,7 +106,7 @@ class NiconicoIE(InfoExtractor):
|
||||
if self._AUTHENTICATED:
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
else:
|
||||
# Get external player info
|
||||
|
||||
@@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):
|
||||
if streams:
|
||||
for stream in streams:
|
||||
stream_type = stream.get('type').lower()
|
||||
if stream_type == 'ss':
|
||||
# smooth streaming is not supported
|
||||
if stream_type in ['ss', 'ms']:
|
||||
continue
|
||||
stream_info = self._download_json(
|
||||
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
|
||||
@@ -230,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
|
||||
stream_url = self._download_json(
|
||||
stream_info['stream'], display_id,
|
||||
'Downloading %s URL' % stream_type,
|
||||
transform_source=strip_jsonp)
|
||||
'Unable to download %s URL' % stream_type,
|
||||
transform_source=strip_jsonp, fatal=False)
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream_type == 'hds':
|
||||
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
||||
# f4m downloader downloads only piece of live stream
|
||||
@@ -242,6 +246,7 @@ class NPOLiveIE(NPOBaseIE):
|
||||
else:
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'preference': -10,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -14,46 +14,48 @@ from ..utils import (
|
||||
|
||||
|
||||
class NRKIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
|
||||
_VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
|
||||
'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': 'bccd850baebefe23b56d708a113229c2',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'flv',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 263,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
|
||||
'md5': '3471f2a51718195164e88f46bf427668',
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
|
||||
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
|
||||
video_id, 'Downloading media JSON')
|
||||
|
||||
if data['usageRights']['isGeoBlocked']:
|
||||
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
|
||||
raise ExtractorError(
|
||||
'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
|
||||
expected=True)
|
||||
|
||||
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
|
||||
video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
|
||||
|
||||
duration = parse_duration(data.get('duration'))
|
||||
|
||||
images = data.get('images')
|
||||
if images:
|
||||
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class NRKPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||
'info_dict': {
|
||||
'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||
'title': 'Gjenopplev den historiske solformørkelsen',
|
||||
'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
|
||||
'info_dict': {
|
||||
'id': 'rivertonprisen-til-karin-fossum-1.12266449',
|
||||
'title': 'Rivertonprisen til Karin Fossum',
|
||||
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('nrk:%s' % video_id, 'NRK')
|
||||
for video_id in re.findall(
|
||||
r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
|
||||
webpage)
|
||||
]
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
playlist_description = self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
||||
@@ -149,9 +192,6 @@ class NRKTVIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
def _seconds2str(self, s):
|
||||
return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
|
||||
|
||||
def _debug_print(self, txt):
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self.to_screen('[debug] %s' % txt)
|
||||
@@ -168,8 +208,8 @@ class NRKTVIE(InfoExtractor):
|
||||
for pos, p in enumerate(ps):
|
||||
begin = parse_duration(p.get('begin'))
|
||||
duration = parse_duration(p.get('dur'))
|
||||
starttime = self._seconds2str(begin)
|
||||
endtime = self._seconds2str(begin + duration)
|
||||
starttime = self._subtitles_timecode(begin)
|
||||
endtime = self._subtitles_timecode(begin + duration)
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
|
||||
return {lang: [
|
||||
{'ext': 'ttml', 'url': url},
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class NYTimesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
@@ -22,18 +24,21 @@ class NYTimesIE(InfoExtractor):
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
title = video_data['headline']
|
||||
description = video_data['summary']
|
||||
duration = video_data['duration'] / 1000.0
|
||||
description = video_data.get('summary')
|
||||
duration = float_or_none(video_data.get('duration'), 1000)
|
||||
|
||||
uploader = video_data['byline']
|
||||
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
||||
@@ -49,11 +54,11 @@ class NYTimesIE(InfoExtractor):
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['type'],
|
||||
'vcodec': video['video_codec'],
|
||||
'width': video['width'],
|
||||
'height': video['height'],
|
||||
'filesize': get_file_size(video['fileSize']),
|
||||
'format_id': video.get('type'),
|
||||
'vcodec': video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('fileSize')),
|
||||
} for video in video_data['renditions']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
@@ -61,7 +66,8 @@ class NYTimesIE(InfoExtractor):
|
||||
thumbnails = [
|
||||
{
|
||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||
'resolution': '%dx%d' % (image['width'], image['height']),
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data['images']
|
||||
]
|
||||
|
||||
|
||||
@@ -11,6 +11,11 @@ from ..utils import (
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
strip_jsonp,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
determine_ext,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -197,3 +202,92 @@ class ORFFM4IE(InfoExtractor):
|
||||
'description': data['subtitle'],
|
||||
'entries': entries
|
||||
}
|
||||
|
||||
|
||||
class ORFIPTVIE(InfoExtractor):
|
||||
IE_NAME = 'orf:iptv'
|
||||
IE_DESC = 'iptv.ORF.at'
|
||||
_VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://iptv.orf.at/stories/2267952',
|
||||
'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
|
||||
'info_dict': {
|
||||
'id': '339775',
|
||||
'ext': 'flv',
|
||||
'title': 'Kreml-Kritiker Nawalny wieder frei',
|
||||
'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
|
||||
'duration': 84.729,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150306',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
story_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://iptv.orf.at/stories/%s' % story_id, story_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video(?:id)?="(\d+)"', webpage, 'video id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
||||
video_id)[0]
|
||||
|
||||
duration = float_or_none(data['duration'], 1000)
|
||||
|
||||
video = data['sources']['default']
|
||||
load_balancer_url = video['loadBalancerUrl']
|
||||
abr = int_or_none(video.get('audioBitrate'))
|
||||
vbr = int_or_none(video.get('bitrate'))
|
||||
fps = int_or_none(video.get('videoFps'))
|
||||
width = int_or_none(video.get('videoWidth'))
|
||||
height = int_or_none(video.get('videoHeight'))
|
||||
thumbnail = video.get('preview')
|
||||
|
||||
rendition = self._download_json(
|
||||
load_balancer_url, video_id, transform_source=strip_jsonp)
|
||||
|
||||
f = {
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'fps': fps,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in rendition['redirect'].items():
|
||||
if format_id == 'rtmp':
|
||||
ff = f.copy()
|
||||
ff.update({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(ff)
|
||||
elif determine_ext(format_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id))
|
||||
elif determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
else:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'dc.date', webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
90
youtube_dl/extractor/pladform.py
Normal file
90
youtube_dl/extractor/pladform.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
xpath_text,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class PladformIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
out\.pladform\.ru/player|
|
||||
static\.pladform\.ru/player\.swf
|
||||
)
|
||||
\?.*\bvideoid=|
|
||||
video\.pladform\.ru/catalog/video/videoid/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# http://muz-tv.ru/kinozal/view/7400/
|
||||
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
|
||||
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
|
||||
'info_dict': {
|
||||
'id': '100183293',
|
||||
'ext': 'mp4',
|
||||
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 694,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_xml(
|
||||
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
if video.tag == 'error':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, video.text),
|
||||
expected=True)
|
||||
|
||||
quality = qualities(('ld', 'sd', 'hd'))
|
||||
|
||||
formats = [{
|
||||
'url': src.text,
|
||||
'format_id': src.get('quality'),
|
||||
'quality': quality(src.get('quality')),
|
||||
} for src in video.findall('./src')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False) or xpath_text(
|
||||
video, './/title', 'title', fatal=True)
|
||||
description = self._search_regex(
|
||||
r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
|
||||
video, './/cover', 'cover')
|
||||
|
||||
duration = int_or_none(xpath_text(video, './/time', 'duration'))
|
||||
age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
78
youtube_dl/extractor/playwire.py
Normal file
78
youtube_dl/extractor/playwire.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PlaywireIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
|
||||
'md5': 'e6398701e3595888125729eaa2329ed9',
|
||||
'info_dict': {
|
||||
'id': '3353705',
|
||||
'ext': 'mp4',
|
||||
'title': 'S04_RM_UCL_Rus',
|
||||
'thumbnail': 're:^http://.*\.png$',
|
||||
'duration': 145.94,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
|
||||
|
||||
player = self._download_json(
|
||||
'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
|
||||
video_id)
|
||||
|
||||
title = player['settings']['title']
|
||||
duration = float_or_none(player.get('duration'), 1000)
|
||||
|
||||
content = player['content']
|
||||
thumbnail = content.get('poster')
|
||||
src = content['media']['f4m']
|
||||
|
||||
f4m = self._download_xml(src, video_id)
|
||||
base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
|
||||
formats = []
|
||||
for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media.get('bitrate'))
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
f = {
|
||||
'url': '%s/%s' % (base_url, media.attrib['url']),
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
if not (tbr or width or height):
|
||||
f['quality'] = 1 if '-hd.' in media_url else 0
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
69
youtube_dl/extractor/primesharetv.py
Normal file
69
youtube_dl/extractor/primesharetv.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class PrimeShareTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://primeshare.tv/download/238790B611',
|
||||
'md5': 'b92d9bf5461137c36228009f31533fbc',
|
||||
'info_dict': {
|
||||
'id': '238790B611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>File not exist<' in webpage:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}
|
||||
|
||||
wait_time = int(self._search_regex(
|
||||
r'var\s+cWaitTime\s*=\s*(\d+)',
|
||||
webpage, 'wait time', default=7)) + 1
|
||||
self._sleep(wait_time, video_id)
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
url, compat_urllib_parse.urlencode(fields), headers)
|
||||
video_page = self._download_webpage(
|
||||
req, video_id, 'Downloading video page')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
|
||||
video_page, 'video url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>Watch\s*(?: )?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?: )?\s*<strong>',
|
||||
video_page, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
@@ -127,6 +127,47 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
for s in subs)
|
||||
|
||||
|
||||
class RTVEInfantilIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:infantil'
|
||||
IE_DESC = 'RTVE infantil'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||
'md5': '915319587b33720b8e0357caaa6617e6',
|
||||
'info_dict': {
|
||||
'id': '3040283',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maneras de vivir',
|
||||
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
||||
'duration': 357.958,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||
video_id)['page']['items'][0]
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vidplayer_id = self._search_regex(
|
||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'thumbnail': info.get('image'),
|
||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||
}
|
||||
|
||||
|
||||
class RTVELiveIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
|
||||
@@ -4,22 +4,87 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .common import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request
|
||||
)
|
||||
from ..utils import sanitize_url_path_consecutive_slashes
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'note': 'This video is available only in Mainland China',
|
||||
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||
'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
|
||||
'md5': '29175c8cadd8b5cc4055001e85d6b372',
|
||||
'info_dict': {
|
||||
'id': '382479172',
|
||||
'ext': 'mp4',
|
||||
'title': 'MV:Far East Movement《The Illest》',
|
||||
},
|
||||
'skip': 'Only available from China',
|
||||
}
|
||||
'params': {
|
||||
'cn_verification_proxy': 'proxy.uku.im:8888'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||
'info_dict': {
|
||||
'id': '409385080',
|
||||
'ext': 'mp4',
|
||||
'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
||||
}
|
||||
}, {
|
||||
'note': 'Multipart video',
|
||||
'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
|
||||
'info_dict': {
|
||||
'id': '78910339',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||
'info_dict': {
|
||||
'id': '78910339_part1',
|
||||
'ext': 'mp4',
|
||||
'duration': 294,
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||
'info_dict': {
|
||||
'id': '78910339_part2',
|
||||
'ext': 'mp4',
|
||||
'duration': 300,
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '8407e634175fdac706766481b9443450',
|
||||
'info_dict': {
|
||||
'id': '78910339_part3',
|
||||
'ext': 'mp4',
|
||||
'duration': 150,
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
'note': 'Video with title containing dash',
|
||||
'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
|
||||
'info_dict': {
|
||||
'id': '78932792',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl testing video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -29,8 +94,14 @@ class SohuIE(InfoExtractor):
|
||||
else:
|
||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||
|
||||
req = compat_urllib_request.Request(base_data_url + vid_id)
|
||||
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||
|
||||
return self._download_json(
|
||||
base_data_url + vid_id, video_id,
|
||||
req, video_id,
|
||||
'Downloading JSON data for %s' % vid_id)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -38,10 +109,8 @@ class SohuIE(InfoExtractor):
|
||||
mytv = mobj.group('mytv') is not None
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
raw_title = self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>',
|
||||
webpage, 'video title')
|
||||
title = raw_title.partition('-')[0].strip()
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
vid = self._html_search_regex(
|
||||
r'var vid ?= ?["\'](\d+)["\']',
|
||||
@@ -77,7 +146,9 @@ class SohuIE(InfoExtractor):
|
||||
% (format_id, i + 1, part_count))
|
||||
|
||||
part_info = part_str.split('|')
|
||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
||||
|
||||
video_url = sanitize_url_path_consecutive_slashes(
|
||||
'%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
||||
58
youtube_dl/extractor/ssa.py
Normal file
58
youtube_dl/extractor/ssa.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class SSAIE(InfoExtractor):
|
||||
_VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://ssa.nls.uk/film/3561',
|
||||
'info_dict': {
|
||||
'id': '3561',
|
||||
'ext': 'flv',
|
||||
'title': 'SHETLAND WOOL',
|
||||
'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
|
||||
'duration': 900,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
streamer = self._search_regex(
|
||||
r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
|
||||
play_path = self._search_regex(
|
||||
r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
|
||||
|
||||
def search_field(field_name, fatal=False):
|
||||
return self._search_regex(
|
||||
r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
|
||||
webpage, 'title', fatal=fatal)
|
||||
|
||||
title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
|
||||
description = unescapeHTML(search_field('Description'))
|
||||
duration = parse_duration(search_field('Running time'))
|
||||
thumbnail = self._search_regex(
|
||||
r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': streamer,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -53,10 +53,10 @@ class TeamcocoIE(InfoExtractor):
|
||||
embed = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed page')
|
||||
|
||||
encoded_data = self._search_regex(
|
||||
r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
|
||||
player_data = self._parse_json(self._search_regex(
|
||||
r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
|
||||
data = self._parse_json(
|
||||
base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
|
||||
base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
||||
|
||||
@@ -16,6 +16,7 @@ class TVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)http://(?:www\.)?
|
||||
(?:tvplay\.lv/parraides|
|
||||
tv3play\.lt/programos|
|
||||
play\.tv3\.lt/programos|
|
||||
tv3play\.ee/sisu|
|
||||
tv3play\.se/program|
|
||||
tv6play\.se/program|
|
||||
@@ -45,7 +46,7 @@ class TVPlayIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
|
||||
'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '409229',
|
||||
'ext': 'flv',
|
||||
|
||||
@@ -23,6 +23,8 @@ class TwitchBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'http://usher.twitch.tv'
|
||||
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
|
||||
_LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
@@ -66,14 +68,14 @@ class TwitchBaseIE(InfoExtractor):
|
||||
'authenticity_token': authenticity_token,
|
||||
'redirect_on_login': '',
|
||||
'embed_form': 'false',
|
||||
'mp_source_action': '',
|
||||
'mp_source_action': 'login-button',
|
||||
'follow': '',
|
||||
'user[login]': username,
|
||||
'user[password]': password,
|
||||
'login': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
@@ -84,6 +86,14 @@ class TwitchBaseIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % m.group('msg').strip(), expected=True)
|
||||
|
||||
def _prefer_source(self, formats):
|
||||
try:
|
||||
source = next(f for f in formats if f['format_id'] == 'Source')
|
||||
source['preference'] = 10
|
||||
except StopIteration:
|
||||
pass # No Source stream present
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
class TwitchItemBaseIE(TwitchBaseIE):
|
||||
def _download_info(self, item, item_id):
|
||||
@@ -139,7 +149,7 @@ class TwitchItemBaseIE(TwitchBaseIE):
|
||||
|
||||
class TwitchVideoIE(TwitchItemBaseIE):
|
||||
IE_NAME = 'twitch:video'
|
||||
_VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
||||
_VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
|
||||
_ITEM_TYPE = 'video'
|
||||
_ITEM_SHORTCUT = 'a'
|
||||
|
||||
@@ -155,7 +165,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
|
||||
|
||||
class TwitchChapterIE(TwitchItemBaseIE):
|
||||
IE_NAME = 'twitch:chapter'
|
||||
_VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
||||
_VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
|
||||
_ITEM_TYPE = 'chapter'
|
||||
_ITEM_SHORTCUT = 'c'
|
||||
|
||||
@@ -174,7 +184,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
|
||||
|
||||
class TwitchVodIE(TwitchItemBaseIE):
|
||||
IE_NAME = 'twitch:vod'
|
||||
_VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
||||
_VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
|
||||
_ITEM_TYPE = 'vod'
|
||||
_ITEM_SHORTCUT = 'v'
|
||||
|
||||
@@ -208,6 +218,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
'%s/vod/%s?nauth=%s&nauthsig=%s'
|
||||
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
|
||||
item_id, 'mp4')
|
||||
self._prefer_source(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
@@ -348,21 +359,14 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
'p': random.randint(1000000, 10000000),
|
||||
'player': 'twitchweb',
|
||||
'segment_preference': '4',
|
||||
'sig': access_token['sig'],
|
||||
'token': access_token['token'],
|
||||
'sig': access_token['sig'].encode('utf-8'),
|
||||
'token': access_token['token'].encode('utf-8'),
|
||||
}
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/api/channel/hls/%s.m3u8?%s'
|
||||
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
||||
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
|
||||
channel_id, 'mp4')
|
||||
|
||||
# prefer the 'source' stream, the others are limited to 30 fps
|
||||
def _sort_source(f):
|
||||
if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
|
||||
return 1
|
||||
return 0
|
||||
formats = sorted(formats, key=_sort_source)
|
||||
self._prefer_source(formats)
|
||||
|
||||
view_count = stream.get('viewers')
|
||||
timestamp = parse_iso8601(stream.get('created_at'))
|
||||
|
||||
104
youtube_dl/extractor/ultimedia.py
Normal file
104
youtube_dl/extractor/ultimedia.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class UltimediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
|
||||
_TESTS = [{
|
||||
# news
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
||||
'md5': '276a0e49de58c7e85d32b057837952a2',
|
||||
'info_dict': {
|
||||
'id': 's8uk0r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
||||
'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'upload_date': '20150317',
|
||||
},
|
||||
}, {
|
||||
# music
|
||||
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
||||
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
||||
'info_dict': {
|
||||
'id': 'xvpfp8',
|
||||
'ext': 'mp4',
|
||||
'title': "Two - C'est la vie (Clip)",
|
||||
'description': 'Two',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'upload_date': '20150224',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
deliver_url = self._search_regex(
|
||||
r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
|
||||
webpage, 'deliver URL')
|
||||
|
||||
deliver_page = self._download_webpage(
|
||||
deliver_url, video_id, 'Downloading iframe page')
|
||||
|
||||
if '>This video is currently not available' in deliver_page:
|
||||
raise ExtractorError(
|
||||
'Video %s is currently not available' % video_id, expected=True)
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
|
||||
video_id)
|
||||
|
||||
quality = qualities(['flash', 'html5'])
|
||||
formats = []
|
||||
for mode in player['modes']:
|
||||
video_url = mode.get('config', {}).get('file')
|
||||
if not video_url:
|
||||
continue
|
||||
if re.match(r'https?://www\.youtube\.com/.+?', video_url):
|
||||
return self.url_result(video_url, 'Youtube')
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': mode.get('type'),
|
||||
'quality': quality(mode.get('type')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = player.get('image')
|
||||
|
||||
title = clean_html((
|
||||
self._html_search_regex(
|
||||
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
|
||||
webpage, 'title', default=None)
|
||||
or self._search_regex(
|
||||
r"var\s+nameVideo\s*=\s*'([^']+)'",
|
||||
deliver_page, 'title')))
|
||||
|
||||
description = clean_html(self._html_search_regex(
|
||||
r'(?s)<span>Description</span>(.+?)</p>', webpage,
|
||||
'description', fatal=False))
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'Ajouté le\s*<span>([^<]+)', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class VideoMegaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.)?videomega\.tv/
|
||||
(?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
|
||||
(?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
|
||||
'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
|
||||
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
|
||||
'info_dict': {
|
||||
'id': 'QR0HCUHI1661IHUCH0RQ',
|
||||
'id': '4GNA688SU99US886ANG4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'title': 'BigBuckBunny_320x180',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
@@ -33,34 +26,24 @@ class VideoMegaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
|
||||
iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
|
||||
req = compat_urllib_request.Request(iframe_url)
|
||||
req.add_header('Referer', url)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
try:
|
||||
escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
|
||||
except IndexError:
|
||||
raise ExtractorError('Unable to extract escaped data')
|
||||
|
||||
playlist = compat_urllib_parse.unquote(escaped_data)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'title')
|
||||
title = re.sub(
|
||||
r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
|
||||
video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'http_headers': {
|
||||
'Referer': iframe_url,
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -28,12 +26,11 @@ class VidmeIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default='')
|
||||
@@ -44,13 +41,10 @@ class VidmeIE(InfoExtractor):
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
||||
r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -64,5 +58,4 @@ class VidmeIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
129
youtube_dl/extractor/viewster.py
Normal file
129
youtube_dl/extractor/viewster.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class ViewsterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
|
||||
_TESTS = [{
|
||||
# movielink, paymethod=fre
|
||||
'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
|
||||
'playlist': [{
|
||||
'md5': '8f9d94b282d80c42b378dffdbb11caf3',
|
||||
'info_dict': {
|
||||
'id': '1293-19341-000-movie',
|
||||
'ext': 'flv',
|
||||
'title': "'Hout' (Wood) - Movie",
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': '1293-19341-000',
|
||||
'title': "'Hout' (Wood)",
|
||||
'description': 'md5:925733185a9242ef96f436937683f33b',
|
||||
}
|
||||
}, {
|
||||
# movielink, paymethod=adv
|
||||
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
||||
'playlist': [{
|
||||
'md5': '77a005453ca7396cbe3d35c9bea30aef',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000-movie',
|
||||
'ext': 'flv',
|
||||
'title': "THE LISTENING PROJECT - Movie",
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000',
|
||||
'title': "THE LISTENING PROJECT",
|
||||
'description': 'md5:714421ae9957e112e672551094bf3b08',
|
||||
}
|
||||
}, {
|
||||
# direct links, no movielink
|
||||
'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
|
||||
'playlist': [{
|
||||
'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Trailer",
|
||||
},
|
||||
}, {
|
||||
'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-behind-scenes',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Behind Scenes",
|
||||
},
|
||||
}, {
|
||||
'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-scene-from-movie',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Scene from movie",
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000',
|
||||
'title': "Sinister",
|
||||
'description': 'md5:014c40b0488848de9683566a42e33372',
|
||||
}
|
||||
}]
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
|
||||
movie = self._download_json(
|
||||
request, video_id, 'Downloading movie metadata JSON')
|
||||
|
||||
title = movie.get('title') or movie['original_title']
|
||||
description = movie.get('synopsis')
|
||||
thumbnail = movie.get('large_artwork') or movie.get('artwork')
|
||||
|
||||
entries = []
|
||||
for clip in movie['play_list']:
|
||||
entry = None
|
||||
|
||||
# movielink api
|
||||
link_request = clip.get('link_request')
|
||||
if link_request:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
|
||||
% link_request)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
|
||||
movie_link = self._download_json(
|
||||
request, video_id, 'Downloading movie link JSON', fatal=False)
|
||||
|
||||
if movie_link:
|
||||
formats = self._extract_f4m_formats(
|
||||
movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
|
||||
self._sort_formats(formats)
|
||||
entry = {
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# direct link
|
||||
clip_url = clip.get('clip_data', {}).get('url')
|
||||
if clip_url:
|
||||
entry = {
|
||||
'url': clip_url,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
|
||||
if entry:
|
||||
entry.update({
|
||||
'id': '%s-%s' % (video_id, clip['canonical_title']),
|
||||
'title': '%s - %s' % (title, clip['title']),
|
||||
})
|
||||
entries.append(entry)
|
||||
|
||||
playlist = self.playlist_result(entries, video_id, title, description)
|
||||
playlist['thumbnail'] = thumbnail
|
||||
return playlist
|
||||
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -20,6 +19,7 @@ from ..utils import (
|
||||
RegexNotFoundError,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -38,7 +38,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
|
||||
data = urlencode_postdata({
|
||||
'email': username,
|
||||
'password': password,
|
||||
@@ -140,6 +140,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
|
||||
'uploader_id': 'atencio',
|
||||
'uploader': 'Peter Atencio',
|
||||
'upload_date': '20130927',
|
||||
'duration': 187,
|
||||
},
|
||||
},
|
||||
@@ -176,17 +177,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
data = compat_urllib_parse.urlencode({
|
||||
token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
})
|
||||
# I didn't manage to use the password with https
|
||||
if url.startswith('https'):
|
||||
pass_url = url.replace('https', 'http')
|
||||
else:
|
||||
pass_url = url
|
||||
password_request = compat_urllib_request.Request(pass_url + '/password', data)
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
return self._download_webpage(
|
||||
@@ -223,12 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
orig_url = url
|
||||
if mobj.group('pro') or mobj.group('player'):
|
||||
url = 'http://player.vimeo.com/video/' + video_id
|
||||
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password:
|
||||
headers['Cookie'] = '%s_password=%s' % (
|
||||
video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
|
||||
url = 'https://player.vimeo.com/video/' + video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
@@ -323,9 +317,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
|
||||
# Extract upload date
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
|
||||
mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
video_upload_date = unified_strdate(mobj.group(1))
|
||||
|
||||
try:
|
||||
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
|
||||
@@ -379,7 +373,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
for tt in text_tracks:
|
||||
subtitles[tt['lang']] = [{
|
||||
'ext': 'vtt',
|
||||
'url': 'http://vimeo.com' + tt['url'],
|
||||
'url': 'https://vimeo.com' + tt['url'],
|
||||
}]
|
||||
|
||||
return {
|
||||
@@ -402,11 +396,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
|
||||
class VimeoChannelIE(InfoExtractor):
|
||||
IE_NAME = 'vimeo:channel'
|
||||
_VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||
_VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/channels/tributes',
|
||||
'url': 'https://vimeo.com/channels/tributes',
|
||||
'info_dict': {
|
||||
'id': 'tributes',
|
||||
'title': 'Vimeo Tributes',
|
||||
@@ -435,10 +429,10 @@ class VimeoChannelIE(InfoExtractor):
|
||||
name="([^"]+)"\s+
|
||||
value="([^"]*)"
|
||||
''', login_form))
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
post = urlencode_postdata(fields)
|
||||
password_path = self._search_regex(
|
||||
r'action="([^"]+)"', login_form, 'password URL')
|
||||
password_url = compat_urlparse.urljoin(page_url, password_path)
|
||||
@@ -465,7 +459,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
for video_id in video_ids]
|
||||
return {'_type': 'playlist',
|
||||
'id': list_id,
|
||||
@@ -476,15 +470,15 @@ class VimeoChannelIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
|
||||
return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
|
||||
|
||||
|
||||
class VimeoUserIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:user'
|
||||
_VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/nkistudio/videos',
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
'info_dict': {
|
||||
'title': 'Nki',
|
||||
'id': 'nkistudio',
|
||||
@@ -495,15 +489,15 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
return self._extract_videos(name, 'http://vimeo.com/%s' % name)
|
||||
return self._extract_videos(name, 'https://vimeo.com/%s' % name)
|
||||
|
||||
|
||||
class VimeoAlbumIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:album'
|
||||
_VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
|
||||
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
|
||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/album/2632481',
|
||||
'url': 'https://vimeo.com/album/2632481',
|
||||
'info_dict': {
|
||||
'id': '2632481',
|
||||
'title': 'Staff Favorites: November 2013',
|
||||
@@ -527,14 +521,14 @@ class VimeoAlbumIE(VimeoChannelIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
|
||||
return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
|
||||
|
||||
|
||||
class VimeoGroupsIE(VimeoAlbumIE):
|
||||
IE_NAME = 'vimeo:group'
|
||||
_VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
|
||||
_VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/groups/rolexawards',
|
||||
'url': 'https://vimeo.com/groups/rolexawards',
|
||||
'info_dict': {
|
||||
'id': 'rolexawards',
|
||||
'title': 'Rolex Awards for Enterprise',
|
||||
@@ -548,13 +542,13 @@ class VimeoGroupsIE(VimeoAlbumIE):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
|
||||
return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
|
||||
|
||||
|
||||
class VimeoReviewIE(InfoExtractor):
|
||||
IE_NAME = 'vimeo:review'
|
||||
IE_DESC = 'Review pages on vimeo'
|
||||
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||
@@ -566,7 +560,7 @@ class VimeoReviewIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'note': 'video player needs Referer',
|
||||
'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
|
||||
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
|
||||
'md5': '6295fdab8f4bf6a002d058b2c6dce276',
|
||||
'info_dict': {
|
||||
'id': '91613211',
|
||||
@@ -588,11 +582,11 @@ class VimeoReviewIE(InfoExtractor):
|
||||
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:watchlater'
|
||||
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
|
||||
_VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
|
||||
_LOGIN_REQUIRED = True
|
||||
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/home/watchlater',
|
||||
'url': 'https://vimeo.com/home/watchlater',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -612,7 +606,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
|
||||
|
||||
class VimeoLikesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
|
||||
_VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
|
||||
IE_NAME = 'vimeo:likes'
|
||||
IE_DESC = 'Vimeo user likes'
|
||||
_TEST = {
|
||||
@@ -640,8 +634,8 @@ class VimeoLikesIE(InfoExtractor):
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
def _get_page(idx):
|
||||
page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
|
||||
self.http_scheme(), user_id, idx + 1)
|
||||
page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
|
||||
user_id, idx + 1)
|
||||
webpage = self._download_webpage(
|
||||
page_url, user_id,
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count))
|
||||
|
||||
@@ -33,14 +33,13 @@ class VineIE(InfoExtractor):
|
||||
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
|
||||
|
||||
formats = [{
|
||||
'url': data['videoLowURL'],
|
||||
'ext': 'mp4',
|
||||
'format_id': 'low',
|
||||
}, {
|
||||
'url': data['videoUrl'],
|
||||
'ext': 'mp4',
|
||||
'format_id': 'standard',
|
||||
}]
|
||||
'format_id': '%(format)s-%(rate)s' % f,
|
||||
'vcodec': f['format'],
|
||||
'quality': f['rate'],
|
||||
'url': f['videoUrl'],
|
||||
} for f in data['videoUrls'] if f.get('rate')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
month_by_abbreviation,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,23 +29,45 @@ class YamIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
# An external video hosted on YouTube
|
||||
'url': 'http://mymedia.yam.com/m/3598173',
|
||||
'md5': '0238ceec479c654e8c2f1223755bf3e9',
|
||||
'url': 'http://mymedia.yam.com/m/3599430',
|
||||
'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
|
||||
'info_dict': {
|
||||
'id': 'pJ2Deys283c',
|
||||
'id': 'CNpEoQlrIgA',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150202',
|
||||
'upload_date': '20150306',
|
||||
'uploader': '新莊社大瑜伽社',
|
||||
'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
|
||||
'description': 'md5:11e2e405311633ace874f2e6226c8b17',
|
||||
'uploader_id': '2323agoy',
|
||||
'title': '外婆的澎湖灣KTV-潘安邦',
|
||||
}
|
||||
'title': '20090412陽明山二子坪-1',
|
||||
},
|
||||
'skip': 'Video does not exist',
|
||||
}, {
|
||||
'url': 'http://mymedia.yam.com/m/3598173',
|
||||
'info_dict': {
|
||||
'id': '3598173',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'cause Yam system error',
|
||||
}, {
|
||||
'url': 'http://mymedia.yam.com/m/3599437',
|
||||
'info_dict': {
|
||||
'id': '3599437',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'invalid YouTube URL',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
# Check for errors
|
||||
system_msg = self._html_search_regex(
|
||||
r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
|
||||
default=None)
|
||||
if system_msg:
|
||||
raise ExtractorError(system_msg, expected=True)
|
||||
|
||||
# Is it hosted externally on YouTube?
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<embed src="(http://www.youtube.com/[^"]+)"',
|
||||
|
||||
127
youtube_dl/extractor/yandexmusic.py
Normal file
127
youtube_dl/extractor/yandexmusic.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# coding=utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class YandexMusicBaseIE(InfoExtractor):
|
||||
def _get_track_url(self, storage_dir, track_id):
|
||||
data = self._download_json(
|
||||
'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
|
||||
% storage_dir,
|
||||
track_id, 'Downloading track location JSON')
|
||||
|
||||
key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
|
||||
storage = storage_dir.split('.')
|
||||
|
||||
return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
|
||||
% (data['host'], key, data['ts'] + data['path'], storage[1]))
|
||||
|
||||
def _get_track_info(self, track):
|
||||
return {
|
||||
'id': track['id'],
|
||||
'ext': 'mp3',
|
||||
'url': self._get_track_url(track['storageDir'], track['id']),
|
||||
'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
|
||||
'filesize': int_or_none(track.get('fileSize')),
|
||||
'duration': float_or_none(track.get('durationMs'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
IE_NAME = 'yandexmusic:track'
|
||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'duration': 193.04,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
album_id, track_id = mobj.group('album_id'), mobj.group('id')
|
||||
|
||||
track = self._download_json(
|
||||
'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
|
||||
track_id, 'Downloading track JSON')['track']
|
||||
|
||||
return self._get_track_info(track)
|
||||
|
||||
|
||||
class YandexMusicAlbumIE(YandexMusicBaseIE):
|
||||
IE_NAME = 'yandexmusic:album'
|
||||
IE_DESC = 'Яндекс.Музыка - Альбом'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://music.yandex.ru/album/540508',
|
||||
'info_dict': {
|
||||
'id': '540508',
|
||||
'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
|
||||
album = self._download_json(
|
||||
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
|
||||
album_id, 'Downloading album JSON')
|
||||
|
||||
entries = [self._get_track_info(track) for track in album['volumes'][0]]
|
||||
|
||||
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
|
||||
year = album.get('year')
|
||||
if year:
|
||||
title += ' (%s)' % year
|
||||
|
||||
return self.playlist_result(entries, compat_str(album['id']), title)
|
||||
|
||||
|
||||
class YandexMusicPlaylistIE(YandexMusicBaseIE):
|
||||
IE_NAME = 'yandexmusic:playlist'
|
||||
IE_DESC = 'Яндекс.Музыка - Плейлист'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
|
||||
'info_dict': {
|
||||
'id': '1245',
|
||||
'title': 'Что слушают Enter Shikari',
|
||||
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
|
||||
playlist_id)['pageData']['playlist']
|
||||
|
||||
entries = [self._get_track_info(track) for track in playlist['tracks']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(playlist_id),
|
||||
playlist['title'], playlist.get('description'))
|
||||
@@ -47,7 +47,8 @@ class YouPornIE(InfoExtractor):
|
||||
|
||||
# Get JSON parameters
|
||||
json_params = self._search_regex(
|
||||
r'var currentVideo = new Video\((.*)\)[,;]',
|
||||
[r'var\s+videoJa?son\s*=\s*({.+?});',
|
||||
r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'],
|
||||
webpage, 'JSON parameters')
|
||||
try:
|
||||
params = json.loads(json_params)
|
||||
|
||||
@@ -1532,7 +1532,7 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, query)
|
||||
result_code = self._search_regex(
|
||||
r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
|
||||
r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
|
||||
|
||||
part_codes = re.findall(
|
||||
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
|
||||
|
||||
@@ -195,6 +195,12 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_const', const='::', dest='source_address',
|
||||
help='Make all connections via IPv6 (experimental)',
|
||||
)
|
||||
network.add_option(
|
||||
'--cn-verification-proxy',
|
||||
dest='cn_verification_proxy', default=None, metavar='URL',
|
||||
help='Use this proxy to verify the IP address for some Chinese sites. '
|
||||
'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
|
||||
)
|
||||
|
||||
selection = optparse.OptionGroup(parser, 'Video Selection')
|
||||
selection.add_option(
|
||||
@@ -557,7 +563,7 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='verbose', default=False,
|
||||
help='print various debugging information')
|
||||
verbosity.add_option(
|
||||
'--dump-intermediate-pages',
|
||||
'--dump-pages', '--dump-intermediate-pages',
|
||||
action='store_true', dest='dump_intermediate_pages', default=False,
|
||||
help='print downloaded pages to debug problems (very verbose)')
|
||||
verbosity.add_option(
|
||||
@@ -729,6 +735,15 @@ def parseOpts(overrideArguments=None):
|
||||
'--add-metadata',
|
||||
action='store_true', dest='addmetadata', default=False,
|
||||
help='write metadata to the video file')
|
||||
postproc.add_option(
|
||||
'--metadata-from-title',
|
||||
metavar='FORMAT', dest='metafromtitle',
|
||||
help='parse additional metadata like song title / artist from the video title. '
|
||||
'The format syntax is the same as --output, '
|
||||
'the parsed parameters replace existing values. '
|
||||
'Additional templates: %(album), %(artist). '
|
||||
'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
|
||||
'"Coldplay - Paradise"')
|
||||
postproc.add_option(
|
||||
'--xattrs',
|
||||
action='store_true', dest='xattrs', default=False,
|
||||
@@ -779,6 +794,11 @@ def parseOpts(overrideArguments=None):
|
||||
write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||
else:
|
||||
command_line_conf = sys.argv[1:]
|
||||
# Workaround for Python 2.x, where argv is a byte list
|
||||
if sys.version_info < (3,):
|
||||
command_line_conf = [
|
||||
a.decode('utf-8', 'replace') for a in command_line_conf]
|
||||
|
||||
if '--ignore-config' in command_line_conf:
|
||||
system_conf = []
|
||||
user_conf = []
|
||||
|
||||
@@ -15,6 +15,7 @@ from .ffmpeg import (
|
||||
)
|
||||
from .xattrpp import XAttrMetadataPP
|
||||
from .execafterdownload import ExecAfterDownloadPP
|
||||
from .metadatafromtitle import MetadataFromTitlePP
|
||||
|
||||
|
||||
def get_postprocessor(key):
|
||||
@@ -34,5 +35,6 @@ __all__ = [
|
||||
'FFmpegPostProcessor',
|
||||
'FFmpegSubtitlesConvertorPP',
|
||||
'FFmpegVideoConvertorPP',
|
||||
'MetadataFromTitlePP',
|
||||
'XAttrMetadataPP',
|
||||
]
|
||||
|
||||
@@ -545,7 +545,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
metadata['title'] = info['title']
|
||||
if info.get('upload_date') is not None:
|
||||
metadata['date'] = info['upload_date']
|
||||
if info.get('uploader') is not None:
|
||||
if info.get('artist') is not None:
|
||||
metadata['artist'] = info['artist']
|
||||
elif info.get('uploader') is not None:
|
||||
metadata['artist'] = info['uploader']
|
||||
elif info.get('uploader_id') is not None:
|
||||
metadata['artist'] = info['uploader_id']
|
||||
@@ -554,6 +556,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
metadata['comment'] = info['description']
|
||||
if info.get('webpage_url') is not None:
|
||||
metadata['purl'] = info['webpage_url']
|
||||
if info.get('album') is not None:
|
||||
metadata['album'] = info['album']
|
||||
|
||||
if not metadata:
|
||||
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
|
||||
|
||||
47
youtube_dl/postprocessor/metadatafromtitle.py
Normal file
47
youtube_dl/postprocessor/metadatafromtitle.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..utils import PostProcessingError
|
||||
|
||||
|
||||
class MetadataFromTitlePPError(PostProcessingError):
|
||||
pass
|
||||
|
||||
|
||||
class MetadataFromTitlePP(PostProcessor):
|
||||
def __init__(self, downloader, titleformat):
|
||||
super(MetadataFromTitlePP, self).__init__(downloader)
|
||||
self._titleformat = titleformat
|
||||
self._titleregex = self.format_to_regex(titleformat)
|
||||
|
||||
def format_to_regex(self, fmt):
|
||||
"""
|
||||
Converts a string like
|
||||
'%(title)s - %(artist)s'
|
||||
to a regex like
|
||||
'(?P<title>.+)\ \-\ (?P<artist>.+)'
|
||||
"""
|
||||
lastpos = 0
|
||||
regex = ""
|
||||
# replace %(..)s with regex group and escape other string parts
|
||||
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
||||
regex += re.escape(fmt[lastpos:match.start()])
|
||||
regex += r'(?P<' + match.group(1) + '>.+)'
|
||||
lastpos = match.end()
|
||||
if lastpos < len(fmt):
|
||||
regex += re.escape(fmt[lastpos:len(fmt)])
|
||||
return regex
|
||||
|
||||
def run(self, info):
|
||||
title = info['title']
|
||||
match = re.match(self._titleregex, title)
|
||||
if match is None:
|
||||
raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat)
|
||||
for attribute, value in match.groupdict().items():
|
||||
value = match.group(attribute)
|
||||
info[attribute] = value
|
||||
self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
|
||||
|
||||
return True, info
|
||||
@@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode):
|
||||
raise
|
||||
|
||||
# In case of error, try to remove win32 forbidden chars
|
||||
alt_filename = os.path.join(
|
||||
re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
|
||||
for path_part in os.path.split(filename)
|
||||
)
|
||||
alt_filename = sanitize_path(filename)
|
||||
if alt_filename == filename:
|
||||
raise
|
||||
else:
|
||||
# An exception here should be caught in the caller
|
||||
stream = open(encodeFilename(filename), open_mode)
|
||||
stream = open(encodeFilename(alt_filename), open_mode)
|
||||
return (stream, alt_filename)
|
||||
|
||||
|
||||
@@ -311,6 +308,31 @@ def sanitize_filename(s, restricted=False, is_id=False):
|
||||
return result
|
||||
|
||||
|
||||
def sanitize_path(s):
|
||||
"""Sanitizes and normalizes path on Windows"""
|
||||
if sys.platform != 'win32':
|
||||
return s
|
||||
drive, _ = os.path.splitdrive(s)
|
||||
unc, _ = os.path.splitunc(s)
|
||||
unc_or_drive = unc or drive
|
||||
norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
|
||||
if unc_or_drive:
|
||||
norm_path.pop(0)
|
||||
sanitized_path = [
|
||||
path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
|
||||
for path_part in norm_path]
|
||||
if unc_or_drive:
|
||||
sanitized_path.insert(0, unc_or_drive + os.path.sep)
|
||||
return os.path.join(*sanitized_path)
|
||||
|
||||
|
||||
def sanitize_url_path_consecutive_slashes(url):
|
||||
"""Collapses consecutive slashes in URLs' path"""
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
||||
return compat_urlparse.urlunparse(parsed_url)
|
||||
|
||||
|
||||
def orderedSet(iterable):
|
||||
""" Remove all duplicates from the input iterable """
|
||||
res = []
|
||||
@@ -1768,3 +1790,24 @@ def match_filter_func(filter_str):
|
||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||
return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
|
||||
return _match_func
|
||||
|
||||
|
||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
def __init__(self, proxies=None):
|
||||
# Set default handlers
|
||||
for type in ('http', 'https'):
|
||||
setattr(self, '%s_open' % type,
|
||||
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
|
||||
meth(r, proxy, type))
|
||||
return compat_urllib_request.ProxyHandler.__init__(self, proxies)
|
||||
|
||||
def proxy_open(self, req, proxy, type):
|
||||
req_proxy = req.headers.get('Ytdl-request-proxy')
|
||||
if req_proxy is not None:
|
||||
proxy = req_proxy
|
||||
del req.headers['Ytdl-request-proxy']
|
||||
|
||||
if proxy == '__noproxy__':
|
||||
return None # No Proxy
|
||||
return compat_urllib_request.ProxyHandler.proxy_open(
|
||||
self, req, proxy, type)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.03.03'
|
||||
__version__ = '2015.03.24'
|
||||
|
||||
Reference in New Issue
Block a user