Compare commits
180 Commits
2015.09.03
...
2015.09.22
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b942db3dc3 | ||
|
|
78f9fb902b | ||
|
|
d8fef8faac | ||
|
|
8ea6bd2802 | ||
|
|
c659022b5c | ||
|
|
8ca2e93e1a | ||
|
|
5600e214c3 | ||
|
|
6400f8ec0f | ||
|
|
c3a4e2ec40 | ||
|
|
e28c794699 | ||
|
|
da9f180835 | ||
|
|
6b8ce312e3 | ||
|
|
de3fc356e1 | ||
|
|
d0fed4ac02 | ||
|
|
7ce50a355c | ||
|
|
9612f23399 | ||
|
|
cccedc1aa4 | ||
|
|
c430802e32 | ||
|
|
cb4e421901 | ||
|
|
8e97596b7b | ||
|
|
92085e7099 | ||
|
|
c6aa838b51 | ||
|
|
9f5e8d16b3 | ||
|
|
82c06a40ac | ||
|
|
4423eba49b | ||
|
|
5b4c54631a | ||
|
|
5a1a2e9454 | ||
|
|
f005f96ea5 | ||
|
|
5e39123b3b | ||
|
|
393ca8c94d | ||
|
|
f817adc468 | ||
|
|
6c91a5a7f5 | ||
|
|
749b09616d | ||
|
|
5de5ab89b4 | ||
|
|
1d67c96640 | ||
|
|
d1c694ea4a | ||
|
|
06368a232a | ||
|
|
8a7bbd1606 | ||
|
|
131d05033b | ||
|
|
1806a75415 | ||
|
|
659ffe204c | ||
|
|
4647fd8910 | ||
|
|
d492dad8f4 | ||
|
|
3368d70dce | ||
|
|
0e1b2566ff | ||
|
|
369e60162e | ||
|
|
d5e7657fe2 | ||
|
|
f84ce1ebaf | ||
|
|
12bc242944 | ||
|
|
88060cce10 | ||
|
|
272e4db5c7 | ||
|
|
6e21cc3b67 | ||
|
|
0391bc8176 | ||
|
|
3b9264a049 | ||
|
|
2b3c254678 | ||
|
|
287be8c615 | ||
|
|
953fed280f | ||
|
|
e2ff3df314 | ||
|
|
31208a07c2 | ||
|
|
ac7a1b0dfb | ||
|
|
c246773599 | ||
|
|
25cd56a715 | ||
|
|
82c18e2a53 | ||
|
|
d5d38d16ae | ||
|
|
e1cbf33573 | ||
|
|
2ffe3bc14b | ||
|
|
d5867276a9 | ||
|
|
f665ef8fc5 | ||
|
|
b264c21302 | ||
|
|
349b3a2ea0 | ||
|
|
87813a8570 | ||
|
|
aab135516b | ||
|
|
141ba36996 | ||
|
|
d434ca5448 | ||
|
|
94e507aea7 | ||
|
|
3ebc121293 | ||
|
|
41ebd6530b | ||
|
|
2ec7b7b79b | ||
|
|
60ed60353b | ||
|
|
586f1cc532 | ||
|
|
73eb13dfc7 | ||
|
|
1721fef28b | ||
|
|
364ca0582e | ||
|
|
133a2b4ac2 | ||
|
|
d85187eb74 | ||
|
|
cc1ac11017 | ||
|
|
73f536439e | ||
|
|
b17e7d9a9b | ||
|
|
2f29b758e0 | ||
|
|
482aa3fecc | ||
|
|
d9c19db340 | ||
|
|
6c4d243de5 | ||
|
|
d1561ef777 | ||
|
|
1072336249 | ||
|
|
75bb5c7028 | ||
|
|
376e1ad081 | ||
|
|
b58a22b963 | ||
|
|
47004d9579 | ||
|
|
12810c9cd3 | ||
|
|
7a459170fa | ||
|
|
3cf0df568a | ||
|
|
b88ebd472e | ||
|
|
64997815c4 | ||
|
|
3ecc527209 | ||
|
|
b1b7d1ffba | ||
|
|
4003bd82b0 | ||
|
|
8801255d7d | ||
|
|
3b18f539a7 | ||
|
|
c67a055d16 | ||
|
|
bc973e06d0 | ||
|
|
aeb3c8a0e8 | ||
|
|
cf33a47df0 | ||
|
|
daeb0f04cd | ||
|
|
97243fe395 | ||
|
|
9dbdb65abe | ||
|
|
9af461de35 | ||
|
|
4d71e200c6 | ||
|
|
8e0bdabed2 | ||
|
|
bca553caac | ||
|
|
a2f42a3baf | ||
|
|
7465222a9c | ||
|
|
e28034c5ac | ||
|
|
12bbd32ad0 | ||
|
|
266e466ee4 | ||
|
|
cf83f532ae | ||
|
|
cd019668dc | ||
|
|
515fc8776f | ||
|
|
c7c0996d8c | ||
|
|
b3e64671cc | ||
|
|
4abe214499 | ||
|
|
e94cb5ae7e | ||
|
|
e213c98df1 | ||
|
|
1639282434 | ||
|
|
be0e5dbd83 | ||
|
|
ad72917274 | ||
|
|
6a3f4c3f82 | ||
|
|
a6420bf50c | ||
|
|
eb387896e9 | ||
|
|
f43c163158 | ||
|
|
673bf566fc | ||
|
|
f95c5e1218 | ||
|
|
f33f32f159 | ||
|
|
8df5ae15d1 | ||
|
|
75b399f455 | ||
|
|
12439dd5ec | ||
|
|
3513d41436 | ||
|
|
cab792abe5 | ||
|
|
8870358b1b | ||
|
|
ee087c79ad | ||
|
|
51f579b635 | ||
|
|
c23c3d7d7d | ||
|
|
4abf617b9c | ||
|
|
3026164b16 | ||
|
|
9dd73ef4a4 | ||
|
|
75c72a1e67 | ||
|
|
08354db47b | ||
|
|
027eb5a6b0 | ||
|
|
f71264490c | ||
|
|
6270239a6d | ||
|
|
1195a38f46 | ||
|
|
66e289bab4 | ||
|
|
52c6f26cab | ||
|
|
dc534b674f | ||
|
|
f30c2e8e98 | ||
|
|
c482b3c69a | ||
|
|
266b0ad676 | ||
|
|
87f70ab39d | ||
|
|
8e636da499 | ||
|
|
22889ab175 | ||
|
|
5d2354f177 | ||
|
|
a41fb80ce1 | ||
|
|
2e2575e213 | ||
|
|
26c61e0809 | ||
|
|
e7a8c3032d | ||
|
|
725d1c58aa | ||
|
|
d14f0c45fc | ||
|
|
615f155a3a | ||
|
|
eba470f2f2 | ||
|
|
061f62da54 | ||
|
|
bfed4813b2 |
3
AUTHORS
3
AUTHORS
@@ -140,3 +140,6 @@ Behrouz Abbasi
|
|||||||
ngld
|
ngld
|
||||||
nyuszika7h
|
nyuszika7h
|
||||||
Shaun Walbridge
|
Shaun Walbridge
|
||||||
|
Lee Jenkins
|
||||||
|
Anssi Hannula
|
||||||
|
Lukáš Lalinský
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
|||||||
- [VIDEO SELECTION](#video-selection)
|
- [VIDEO SELECTION](#video-selection)
|
||||||
- [FAQ](#faq)
|
- [FAQ](#faq)
|
||||||
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
||||||
|
- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl)
|
||||||
- [BUGS](#bugs)
|
- [BUGS](#bugs)
|
||||||
- [COPYRIGHT](#copyright)
|
- [COPYRIGHT](#copyright)
|
||||||
|
|
||||||
@@ -261,7 +262,7 @@ For example:
|
|||||||
machine youtube login myaccount@gmail.com password my_youtube_password
|
machine youtube login myaccount@gmail.com password my_youtube_password
|
||||||
machine twitch login my_twitch_account_name password my_twitch_password
|
machine twitch login my_twitch_account_name password my_twitch_password
|
||||||
```
|
```
|
||||||
To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
|
To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or place it in [configuration file](#configuration).
|
||||||
|
|
||||||
On Windows you may also need to setup `%HOME%` environment variable manually.
|
On Windows you may also need to setup `%HOME%` environment variable manually.
|
||||||
|
|
||||||
@@ -277,8 +278,8 @@ The `-o` option allows users to indicate a template for the output file names. T
|
|||||||
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
||||||
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
||||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||||
- `playlist`: The name or the id of the playlist that contains the video.
|
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
|
||||||
- `playlist_index`: The index of the video in the playlist, a five-digit number.
|
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
|
||||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||||
|
|
||||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||||
|
|||||||
@@ -122,7 +122,6 @@
|
|||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **divxstage**: DivxStage
|
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **dramafever**
|
- **dramafever**
|
||||||
@@ -195,7 +194,7 @@
|
|||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **GoldenMoustache**
|
- **GoldenMoustache**
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
|
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
- **Groupon**
|
- **Groupon**
|
||||||
- **Hark**
|
- **Hark**
|
||||||
@@ -286,7 +285,7 @@
|
|||||||
- **Minhateca**
|
- **Minhateca**
|
||||||
- **MinistryGrid**
|
- **MinistryGrid**
|
||||||
- **miomio.tv**
|
- **miomio.tv**
|
||||||
- **mitele.es**
|
- **MiTele**: mitele.es
|
||||||
- **mixcloud**
|
- **mixcloud**
|
||||||
- **MLB**
|
- **MLB**
|
||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
@@ -309,7 +308,6 @@
|
|||||||
- **mtvservices:embedded**
|
- **mtvservices:embedded**
|
||||||
- **MuenchenTV**: münchen.tv
|
- **MuenchenTV**: münchen.tv
|
||||||
- **MusicPlayOn**
|
- **MusicPlayOn**
|
||||||
- **MusicVault**
|
|
||||||
- **muzu.tv**
|
- **muzu.tv**
|
||||||
- **Mwave**
|
- **Mwave**
|
||||||
- **MySpace**
|
- **MySpace**
|
||||||
@@ -318,7 +316,6 @@
|
|||||||
- **Myvi**
|
- **Myvi**
|
||||||
- **myvideo**
|
- **myvideo**
|
||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
- **N-JOY**
|
|
||||||
- **n-tv.de**
|
- **n-tv.de**
|
||||||
- **NationalGeographic**
|
- **NationalGeographic**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
@@ -327,7 +324,9 @@
|
|||||||
- **NBCNews**
|
- **NBCNews**
|
||||||
- **NBCSports**
|
- **NBCSports**
|
||||||
- **NBCSportsVPlayer**
|
- **NBCSportsVPlayer**
|
||||||
- **ndr**: NDR.de - Mediathek
|
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||||
|
- **ndr:embed**
|
||||||
|
- **ndr:embed:base**
|
||||||
- **NDTV**
|
- **NDTV**
|
||||||
- **NerdCubedFeed**
|
- **NerdCubedFeed**
|
||||||
- **Nerdist**
|
- **Nerdist**
|
||||||
@@ -350,12 +349,16 @@
|
|||||||
- **nhl.com:videocenter**: NHL videocenter category
|
- **nhl.com:videocenter**: NHL videocenter category
|
||||||
- **niconico**: ニコニコ動画
|
- **niconico**: ニコニコ動画
|
||||||
- **NiconicoPlaylist**
|
- **NiconicoPlaylist**
|
||||||
|
- **njoy**: N-JOY
|
||||||
|
- **njoy:embed**
|
||||||
- **Noco**
|
- **Noco**
|
||||||
- **Normalboots**
|
- **Normalboots**
|
||||||
- **NosVideo**
|
- **NosVideo**
|
||||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||||
- **novamov**: NovaMov
|
- **novamov**: NovaMov
|
||||||
- **Nowness**
|
- **nowness**
|
||||||
|
- **nowness:playlist**
|
||||||
|
- **nowness:series**
|
||||||
- **NowTV**
|
- **NowTV**
|
||||||
- **nowvideo**: NowVideo
|
- **nowvideo**: NowVideo
|
||||||
- **npo**: npo.nl and ntr.nl
|
- **npo**: npo.nl and ntr.nl
|
||||||
@@ -376,7 +379,6 @@
|
|||||||
- **OnionStudios**
|
- **OnionStudios**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OoyalaExternal**
|
- **OoyalaExternal**
|
||||||
- **OpenFilm**
|
|
||||||
- **orf:fm4**: radio FM4
|
- **orf:fm4**: radio FM4
|
||||||
- **orf:iptv**: iptv.ORF.at
|
- **orf:iptv**: iptv.ORF.at
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
@@ -531,7 +533,7 @@
|
|||||||
- **techtv.mit.edu**
|
- **techtv.mit.edu**
|
||||||
- **ted**
|
- **ted**
|
||||||
- **TeleBruxelles**
|
- **TeleBruxelles**
|
||||||
- **telecinco.es**
|
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||||
- **Telegraaf**
|
- **Telegraaf**
|
||||||
- **TeleMB**
|
- **TeleMB**
|
||||||
- **TeleTask**
|
- **TeleTask**
|
||||||
@@ -633,6 +635,7 @@
|
|||||||
- **vine:user**
|
- **vine:user**
|
||||||
- **vk**: VK
|
- **vk**: VK
|
||||||
- **vk:uservideos**: VK - User's Videos
|
- **vk:uservideos**: VK - User's Videos
|
||||||
|
- **vlive**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from youtube_dl.utils import get_filesystem_encoding
|
|||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
|
compat_shlex_split,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
)
|
)
|
||||||
@@ -67,5 +68,8 @@ class TestCompat(unittest.TestCase):
|
|||||||
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
|
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
|
||||||
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
|
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
|
||||||
|
|
||||||
|
def test_compat_shlex_split(self):
|
||||||
|
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -57,11 +57,16 @@ from youtube_dl.utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
xpath_attr,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
parse_dfxp_time_expr,
|
parse_dfxp_time_expr,
|
||||||
dfxp2srt,
|
dfxp2srt,
|
||||||
|
cli_option,
|
||||||
|
cli_valueless_option,
|
||||||
|
cli_bool_option,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -264,6 +269,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||||
|
|
||||||
|
def test_xpath_element(self):
|
||||||
|
doc = xml.etree.ElementTree.Element('root')
|
||||||
|
div = xml.etree.ElementTree.SubElement(doc, 'div')
|
||||||
|
p = xml.etree.ElementTree.SubElement(div, 'p')
|
||||||
|
p.text = 'Foo'
|
||||||
|
self.assertEqual(xpath_element(doc, 'div/p'), p)
|
||||||
|
self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
|
||||||
|
self.assertTrue(xpath_element(doc, 'div/bar') is None)
|
||||||
|
self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
|
||||||
|
|
||||||
def test_xpath_text(self):
|
def test_xpath_text(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
<div>
|
<div>
|
||||||
@@ -272,9 +287,25 @@ class TestUtil(unittest.TestCase):
|
|||||||
</root>'''
|
</root>'''
|
||||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||||
|
self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
|
||||||
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||||
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||||
|
|
||||||
|
def test_xpath_attr(self):
|
||||||
|
testxml = '''<root>
|
||||||
|
<div>
|
||||||
|
<p x="a">Foo</p>
|
||||||
|
</div>
|
||||||
|
</root>'''
|
||||||
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default')
|
||||||
|
self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default')
|
||||||
|
self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True)
|
||||||
|
self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True)
|
||||||
|
|
||||||
def test_smuggle_url(self):
|
def test_smuggle_url(self):
|
||||||
data = {"ö": "ö", "abc": [3]}
|
data = {"ö": "ö", "abc": [3]}
|
||||||
url = 'https://foo.bar/baz?x=y#a'
|
url = 'https://foo.bar/baz?x=y#a'
|
||||||
@@ -646,6 +677,51 @@ The first line
|
|||||||
'''
|
'''
|
||||||
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
|
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
|
||||||
|
|
||||||
|
def test_cli_option(self):
|
||||||
|
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||||
|
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||||
|
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
|
||||||
|
|
||||||
|
def test_cli_valueless_option(self):
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader'])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), [])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate'])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), [])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), [])
|
||||||
|
self.assertEqual(cli_valueless_option(
|
||||||
|
{'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate'])
|
||||||
|
|
||||||
|
def test_cli_bool_option(self):
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'),
|
||||||
|
['--no-check-certificate', 'true'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='),
|
||||||
|
['--no-check-certificate=true'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
|
||||||
|
['--check-certificate', 'false'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
||||||
|
['--check-certificate=false'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
|
||||||
|
['--check-certificate', 'true'])
|
||||||
|
self.assertEqual(
|
||||||
|
cli_bool_option(
|
||||||
|
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
||||||
|
['--check-certificate=true'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ params = get_params({
|
|||||||
|
|
||||||
|
|
||||||
TEST_ID = 'gr51aVj-mLg'
|
TEST_ID = 'gr51aVj-mLg'
|
||||||
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
|
ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
|
||||||
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -69,6 +69,7 @@ from .utils import (
|
|||||||
version_tuple,
|
version_tuple,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
@@ -284,7 +285,11 @@ class YoutubeDL(object):
|
|||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
self._err_file = sys.stderr
|
self._err_file = sys.stderr
|
||||||
self.params = params
|
self.params = {
|
||||||
|
# Default parameters
|
||||||
|
'nocheckcertificate': False,
|
||||||
|
}
|
||||||
|
self.params.update(params)
|
||||||
self.cache = Cache(self)
|
self.cache = Cache(self)
|
||||||
|
|
||||||
if params.get('bidi_workaround', False):
|
if params.get('bidi_workaround', False):
|
||||||
@@ -1939,8 +1944,7 @@ class YoutubeDL(object):
|
|||||||
if os.access(opts_cookiefile, os.R_OK):
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
self.cookiejar.load()
|
self.cookiejar.load()
|
||||||
|
|
||||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
|
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||||
self.cookiejar)
|
|
||||||
if opts_proxy is not None:
|
if opts_proxy is not None:
|
||||||
if opts_proxy == '':
|
if opts_proxy == '':
|
||||||
proxies = {}
|
proxies = {}
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ import codecs
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import shlex
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
@@ -20,6 +19,7 @@ from .compat import (
|
|||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_print,
|
compat_print,
|
||||||
|
compat_shlex_split,
|
||||||
workaround_optparse_bug9161,
|
workaround_optparse_bug9161,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
@@ -262,10 +262,10 @@ def _real_main(argv=None):
|
|||||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||||
external_downloader_args = None
|
external_downloader_args = None
|
||||||
if opts.external_downloader_args:
|
if opts.external_downloader_args:
|
||||||
external_downloader_args = shlex.split(opts.external_downloader_args)
|
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
|
||||||
postprocessor_args = None
|
postprocessor_args = None
|
||||||
if opts.postprocessor_args:
|
if opts.postprocessor_args:
|
||||||
postprocessor_args = shlex.split(opts.postprocessor_args)
|
postprocessor_args = compat_shlex_split(opts.postprocessor_args)
|
||||||
match_filter = (
|
match_filter = (
|
||||||
None if opts.match_filter is None
|
None if opts.match_filter is None
|
||||||
else match_filter_func(opts.match_filter))
|
else match_filter_func(opts.match_filter))
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import getpass
|
|||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import shlex
|
||||||
import shutil
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -79,6 +80,11 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
import BaseHTTPServer as compat_http_server
|
import BaseHTTPServer as compat_http_server
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_str = unicode # Python 2
|
||||||
|
except NameError:
|
||||||
|
compat_str = str
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||||
@@ -99,7 +105,7 @@ except ImportError: # Python 2
|
|||||||
# Is it a string-like object?
|
# Is it a string-like object?
|
||||||
string.split
|
string.split
|
||||||
return b''
|
return b''
|
||||||
if isinstance(string, unicode):
|
if isinstance(string, compat_str):
|
||||||
string = string.encode('utf-8')
|
string = string.encode('utf-8')
|
||||||
bits = string.split(b'%')
|
bits = string.split(b'%')
|
||||||
if len(bits) == 1:
|
if len(bits) == 1:
|
||||||
@@ -149,11 +155,6 @@ except ImportError: # Python 2
|
|||||||
string = string.replace('+', ' ')
|
string = string.replace('+', ' ')
|
||||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||||
|
|
||||||
try:
|
|
||||||
compat_str = unicode # Python 2
|
|
||||||
except NameError:
|
|
||||||
compat_str = str
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
compat_basestring = basestring # Python 2
|
compat_basestring = basestring # Python 2
|
||||||
except NameError:
|
except NameError:
|
||||||
@@ -227,6 +228,17 @@ except ImportError: # Python < 3.3
|
|||||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info >= (2, 7, 3):
|
||||||
|
compat_shlex_split = shlex.split
|
||||||
|
else:
|
||||||
|
# Working around shlex issue with unicode strings on some python 2
|
||||||
|
# versions (see http://bugs.python.org/issue1548891)
|
||||||
|
def compat_shlex_split(s, comments=False, posix=True):
|
||||||
|
if isinstance(s, compat_str):
|
||||||
|
s = s.encode('utf-8')
|
||||||
|
return shlex.split(s, comments, posix)
|
||||||
|
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
if type(c) is int:
|
if type(c) is int:
|
||||||
return c
|
return c
|
||||||
@@ -459,6 +471,7 @@ __all__ = [
|
|||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
'compat_print',
|
'compat_print',
|
||||||
|
'compat_shlex_split',
|
||||||
'compat_socket_create_connection',
|
'compat_socket_create_connection',
|
||||||
'compat_str',
|
'compat_str',
|
||||||
'compat_subprocess_get_DEVNULL',
|
'compat_subprocess_get_DEVNULL',
|
||||||
|
|||||||
@@ -5,6 +5,10 @@ import subprocess
|
|||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
cli_option,
|
||||||
|
cli_valueless_option,
|
||||||
|
cli_bool_option,
|
||||||
|
cli_configuration_args,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
)
|
)
|
||||||
@@ -46,19 +50,16 @@ class ExternalFD(FileDownloader):
|
|||||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||||
|
|
||||||
def _option(self, command_option, param):
|
def _option(self, command_option, param):
|
||||||
param = self.params.get(param)
|
return cli_option(self.params, command_option, param)
|
||||||
if param is None:
|
|
||||||
return []
|
def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
|
||||||
if isinstance(param, bool):
|
return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
|
||||||
return [command_option]
|
|
||||||
return [command_option, param]
|
def _valueless_option(self, command_option, param, expected_value=True):
|
||||||
|
return cli_valueless_option(self.params, command_option, param, expected_value)
|
||||||
|
|
||||||
def _configuration_args(self, default=[]):
|
def _configuration_args(self, default=[]):
|
||||||
ex_args = self.params.get('external_downloader_args')
|
return cli_configuration_args(self.params, 'external_downloader_args', default)
|
||||||
if ex_args is None:
|
|
||||||
return default
|
|
||||||
assert isinstance(ex_args, list)
|
|
||||||
return ex_args
|
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
@@ -80,6 +81,8 @@ class CurlFD(ExternalFD):
|
|||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
|
cmd += self._option('--proxy', 'proxy')
|
||||||
|
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
@@ -102,7 +105,7 @@ class WgetFD(ExternalFD):
|
|||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--bind-address', 'source_address')
|
cmd += self._option('--bind-address', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
cmd += self._option('--proxy', 'proxy')
|
||||||
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
|
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
@@ -121,6 +124,7 @@ class Aria2cFD(ExternalFD):
|
|||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ class HlsFD(FileDownloader):
|
|||||||
args = [
|
args = [
|
||||||
encodeArgument(opt)
|
encodeArgument(opt)
|
||||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||||
args.append(encodeFilename(tmpfilename, True))
|
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||||
|
|
||||||
self._debug_cmd(args)
|
self._debug_cmd(args)
|
||||||
|
|
||||||
@@ -92,6 +92,7 @@ class NativeHlsFD(FragmentFD):
|
|||||||
return False
|
return False
|
||||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||||
ctx['dest_stream'].write(down.read())
|
ctx['dest_stream'].write(down.read())
|
||||||
|
down.close()
|
||||||
frags_filenames.append(frag_sanitized)
|
frags_filenames.append(frag_sanitized)
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|||||||
@@ -138,7 +138,6 @@ from .dump import DumpIE
|
|||||||
from .dumpert import DumpertIE
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
from .divxstage import DivxStageIE
|
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
from .eagleplatform import EaglePlatformIE
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
@@ -347,7 +346,6 @@ from .mtv import (
|
|||||||
)
|
)
|
||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
from .musicplayon import MusicPlayOnIE
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .musicvault import MusicVaultIE
|
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
from .mwave import MwaveIE
|
from .mwave import MwaveIE
|
||||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||||
@@ -368,6 +366,9 @@ from .nbc import (
|
|||||||
from .ndr import (
|
from .ndr import (
|
||||||
NDRIE,
|
NDRIE,
|
||||||
NJoyIE,
|
NJoyIE,
|
||||||
|
NDREmbedBaseIE,
|
||||||
|
NDREmbedIE,
|
||||||
|
NJoyEmbedIE,
|
||||||
)
|
)
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .netzkino import NetzkinoIE
|
from .netzkino import NetzkinoIE
|
||||||
@@ -403,7 +404,11 @@ from .normalboots import NormalbootsIE
|
|||||||
from .nosvideo import NosVideoIE
|
from .nosvideo import NosVideoIE
|
||||||
from .nova import NovaIE
|
from .nova import NovaIE
|
||||||
from .novamov import NovaMovIE
|
from .novamov import NovaMovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import (
|
||||||
|
NownessIE,
|
||||||
|
NownessPlaylistIE,
|
||||||
|
NownessSeriesIE,
|
||||||
|
)
|
||||||
from .nowtv import NowTVIE
|
from .nowtv import NowTVIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .npo import (
|
from .npo import (
|
||||||
@@ -433,7 +438,6 @@ from .ooyala import (
|
|||||||
OoyalaIE,
|
OoyalaIE,
|
||||||
OoyalaExternalIE,
|
OoyalaExternalIE,
|
||||||
)
|
)
|
||||||
from .openfilm import OpenFilmIE
|
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
@@ -743,6 +747,7 @@ from .vk import (
|
|||||||
VKIE,
|
VKIE,
|
||||||
VKUserVideosIE,
|
VKUserVideosIE,
|
||||||
)
|
)
|
||||||
|
from .vlive import VLiveIE
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
from .voicerepublic import VoiceRepublicIE
|
from .voicerepublic import VoiceRepublicIE
|
||||||
from .vporn import VpornIE
|
from .vporn import VpornIE
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ class AcademicEarthCourseIE(InfoExtractor):
|
|||||||
'title': 'Laws of Nature',
|
'title': 'Laws of Nature',
|
||||||
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
|
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 3,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
xpath_text(idoc, './/trt', 'segment duration').strip())
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
file_els = idoc.findall('.//files/file')
|
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
|
||||||
|
|
||||||
for file_el in file_els:
|
for file_el in file_els:
|
||||||
bitrate = file_el.attrib.get('bitrate')
|
bitrate = file_el.attrib.get('bitrate')
|
||||||
|
|||||||
@@ -20,14 +20,14 @@ class AirMozillaIE(InfoExtractor):
|
|||||||
'id': '6x4q2w',
|
'id': '6x4q2w',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||||
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
|
'thumbnail': 're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
|
||||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||||
'timestamp': 1422487800,
|
'timestamp': 1422487800,
|
||||||
'upload_date': '20150128',
|
'upload_date': '20150128',
|
||||||
'location': 'SFO Commons',
|
'location': 'SFO Commons',
|
||||||
'duration': 3780,
|
'duration': 3780,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'categories': ['Main'],
|
'categories': ['Main', 'Privacy'],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ class AlJazeeraIE(InfoExtractor):
|
|||||||
'uploader': 'Al Jazeera English',
|
'uploader': 'Al Jazeera English',
|
||||||
},
|
},
|
||||||
'add_ie': ['Brightcove'],
|
'add_ie': ['Brightcove'],
|
||||||
|
'skip': 'Not accessible from Travis CI server',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@@ -77,7 +81,13 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||||
json_url = self._html_search_regex(
|
json_url = self._html_search_regex(
|
||||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
||||||
webpage, 'json vp url')
|
webpage, 'json vp url', default=None)
|
||||||
|
if not json_url:
|
||||||
|
iframe_url = self._html_search_regex(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||||
|
webpage, 'iframe url', group='url')
|
||||||
|
json_url = compat_parse_qs(
|
||||||
|
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIASELECTOR_URLS = [
|
||||||
|
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -189,6 +190,12 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
# Skip DASH until supported
|
# Skip DASH until supported
|
||||||
elif transfer_format == 'dash':
|
elif transfer_format == 'dash':
|
||||||
pass
|
pass
|
||||||
|
elif transfer_format == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=supplier, fatal=False)
|
||||||
|
if m3u8_formats:
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
# Direct link
|
# Direct link
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
|
|||||||
@@ -17,16 +17,14 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(InfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||||
|
_TESTS = [{
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '214411058091220',
|
'id': '61924494876951776',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hyde Park Civilizace',
|
'title': 'Hyde Park Civilizace',
|
||||||
'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
|
'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'duration': 3350,
|
'duration': 3350,
|
||||||
},
|
},
|
||||||
@@ -34,11 +32,10 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '14716',
|
'id': '61924494876844374',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||||
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
||||||
@@ -49,23 +46,52 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# video with 18+ caution trailer
|
||||||
|
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '215562210900007-bogotart',
|
||||||
|
'title': 'Queer: Bogotart',
|
||||||
|
'description': 'Alternativní průvodce současným queer světem',
|
||||||
},
|
},
|
||||||
]
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '61924494876844842',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Queer: Bogotart (Varování 18+)',
|
||||||
|
'duration': 10.2,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '61924494877068022',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Queer: Bogotart (Queer)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 1558.3,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
typ = self._html_search_regex(
|
||||||
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
||||||
|
episode_id = self._html_search_regex(
|
||||||
|
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
'playlist[0][type]': typ,
|
'playlist[0][type]': typ,
|
||||||
@@ -83,7 +109,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlistpage = self._download_json(req, video_id)
|
playlistpage = self._download_json(req, playlist_id)
|
||||||
|
|
||||||
playlist_url = playlistpage['url']
|
playlist_url = playlistpage['url']
|
||||||
if playlist_url == 'error_region':
|
if playlist_url == 'error_region':
|
||||||
@@ -92,33 +118,43 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlist = self._download_json(req, video_id)
|
playlist_title = self._og_search_title(webpage)
|
||||||
|
playlist_description = self._og_search_description(webpage)
|
||||||
|
|
||||||
item = playlist['playlist'][0]
|
playlist = self._download_json(req, playlist_id)['playlist']
|
||||||
|
playlist_len = len(playlist)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for item in playlist:
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_url in item['streamUrls'].items():
|
for format_id, stream_url in item['streamUrls'].items():
|
||||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native'))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
item_id = item.get('id') or item['assetId']
|
||||||
description = self._og_search_description(webpage)
|
title = item['title']
|
||||||
|
|
||||||
duration = float_or_none(item.get('duration'))
|
duration = float_or_none(item.get('duration'))
|
||||||
thumbnail = item.get('previewImageUrl')
|
thumbnail = item.get('previewImageUrl')
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
if item.get('type') == 'VOD':
|
||||||
subs = item.get('subtitles')
|
subs = item.get('subtitles')
|
||||||
if subs:
|
if subs:
|
||||||
subtitles = self.extract_subtitles(episode_id, subs)
|
subtitles = self.extract_subtitles(episode_id, subs)
|
||||||
|
|
||||||
return {
|
entries.append({
|
||||||
'id': episode_id,
|
'id': item_id,
|
||||||
'title': title,
|
'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title),
|
||||||
'description': description,
|
'description': playlist_description if playlist_len == 1 else None,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def _get_subtitles(self, episode_id, subs):
|
def _get_subtitles(self, episode_id, subs):
|
||||||
original_subtitles = self._download_webpage(
|
original_subtitles = self._download_webpage(
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ClubicIE(InfoExtractor):
|
class ClubicIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
|
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
||||||
'md5': '1592b694ba586036efac1776b0b43cd3',
|
'md5': '1592b694ba586036efac1776b0b43cd3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -24,7 +24,10 @@ class ClubicIE(InfoExtractor):
|
|||||||
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
|
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
|
||||||
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
|
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|||||||
@@ -516,6 +516,12 @@ class InfoExtractor(object):
|
|||||||
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s. You might want to use --proxy to workaround.' % msg,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
# Methods for following #608
|
# Methods for following #608
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||||
@@ -731,9 +737,10 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
|
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||||
hidden_inputs = {}
|
hidden_inputs = {}
|
||||||
for input in re.findall(r'<input([^>]+)>', html):
|
for input in re.findall(r'(?i)<input([^>]+)>', html):
|
||||||
if not re.search(r'type=(["\'])hidden\1', input):
|
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
|
||||||
continue
|
continue
|
||||||
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
||||||
if not name:
|
if not name:
|
||||||
@@ -746,7 +753,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _form_hidden_inputs(self, form_id, html):
|
def _form_hidden_inputs(self, form_id, html):
|
||||||
form = self._search_regex(
|
form = self._search_regex(
|
||||||
r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
|
r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
|
||||||
html, '%s form' % form_id, group='form')
|
html, '%s form' % form_id, group='form')
|
||||||
return self._hidden_inputs(form)
|
return self._hidden_inputs(form)
|
||||||
|
|
||||||
|
|||||||
@@ -20,16 +20,34 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
bytes_to_intlist,
|
bytes_to_intlist,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
|
int_or_none,
|
||||||
remove_end,
|
remove_end,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(InfoExtractor):
|
class CrunchyrollBaseIE(InfoExtractor):
|
||||||
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||||
|
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||||
|
else compat_urllib_request.Request(url_or_request))
|
||||||
|
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||||
|
# similar to https://github.com/rg3/youtube-dl/issues/6797.
|
||||||
|
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||||
|
# should be imposed or not (from what I can see it just takes the first language
|
||||||
|
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||||
|
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||||
|
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||||
|
request.add_header('Accept-Language', '*')
|
||||||
|
return super(CrunchyrollBaseIE, self)._download_webpage(
|
||||||
|
request, video_id, note, errnote, fatal, tries, timeout, encoding)
|
||||||
|
|
||||||
|
|
||||||
|
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -257,10 +275,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||||
if not video_description:
|
if not video_description:
|
||||||
video_description = None
|
video_description = None
|
||||||
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
video_upload_date = self._html_search_regex(
|
||||||
|
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
|
||||||
|
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
||||||
if video_upload_date:
|
if video_upload_date:
|
||||||
video_upload_date = unified_strdate(video_upload_date)
|
video_upload_date = unified_strdate(video_upload_date)
|
||||||
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
|
video_uploader = self._html_search_regex(
|
||||||
|
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
|
||||||
|
'video_uploader', fatal=False)
|
||||||
|
|
||||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||||
@@ -286,6 +308,13 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
video_url = stream_info.find('./host').text
|
video_url = stream_info.find('./host').text
|
||||||
video_play_path = stream_info.find('./file').text
|
video_play_path = stream_info.find('./file').text
|
||||||
|
metadata = stream_info.find('./metadata')
|
||||||
|
format_info = {
|
||||||
|
'format': video_format,
|
||||||
|
'format_id': video_format,
|
||||||
|
'height': int_or_none(xpath_text(metadata, './height')),
|
||||||
|
'width': int_or_none(xpath_text(metadata, './width')),
|
||||||
|
}
|
||||||
|
|
||||||
if '.fplive.net/' in video_url:
|
if '.fplive.net/' in video_url:
|
||||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||||
@@ -294,19 +323,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
netloc='v.lvlt.crcdn.net',
|
netloc='v.lvlt.crcdn.net',
|
||||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||||
formats.append({
|
format_info.update({
|
||||||
'url': direct_video_url,
|
'url': direct_video_url,
|
||||||
'format_id': video_format,
|
|
||||||
})
|
})
|
||||||
|
formats.append(format_info)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
formats.append({
|
format_info.update({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'play_path': video_play_path,
|
'play_path': video_play_path,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': video_format,
|
|
||||||
'format_id': video_format,
|
|
||||||
})
|
})
|
||||||
|
formats.append(format_info)
|
||||||
|
|
||||||
subtitles = self.extract_subtitles(video_id, webpage)
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
|
|
||||||
@@ -322,7 +350,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollShowPlaylistIE(InfoExtractor):
|
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||||
IE_NAME = "crunchyroll:playlist"
|
IE_NAME = "crunchyroll:playlist"
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
|
||||||
|
|
||||||
|
|||||||
@@ -44,8 +44,8 @@ class DCNIE(InfoExtractor):
|
|||||||
title = video.get('title_en') or video['title_ar']
|
title = video.get('title_en') or video['title_ar']
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
|
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
|
||||||
+ compat_urllib_parse.urlencode({
|
compat_urllib_parse.urlencode({
|
||||||
'id': video['id'],
|
'id': video['id'],
|
||||||
'user_id': video['user_id'],
|
'user_id': video['user_id'],
|
||||||
'signature': video['signature'],
|
'signature': video['signature'],
|
||||||
|
|||||||
@@ -1,27 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .novamov import NovaMovIE
|
|
||||||
|
|
||||||
|
|
||||||
class DivxStageIE(NovaMovIE):
|
|
||||||
IE_NAME = 'divxstage'
|
|
||||||
IE_DESC = 'DivxStage'
|
|
||||||
|
|
||||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}
|
|
||||||
|
|
||||||
_HOST = 'www.divxstage.eu'
|
|
||||||
|
|
||||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
|
||||||
_TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
|
|
||||||
_DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
|
|
||||||
'md5': '63969f6eb26533a1968c4d325be63e72',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '57f238e2e5e01',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'youtubedl test video',
|
|
||||||
'description': 'This is a test video for youtubedl.',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -79,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
age_limit = 0 if age_restriction == 'allow_all' else 18
|
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||||
|
|
||||||
m3u8_data = self._download_json(
|
m3u8_data = self._download_json(
|
||||||
media['sources']['secure_m3u8']['auto'],
|
self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:'),
|
||||||
video_id, 'Downloading m3u8 JSON')
|
video_id, 'Downloading m3u8 JSON')
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encode_dict,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -55,10 +56,7 @@ class FC2IE(InfoExtractor):
|
|||||||
'Submit': ' Login ',
|
'Submit': ' Login ',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||||
# chokes on unicode
|
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
@@ -230,6 +231,22 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': False,
|
'skip_download': False,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# redirect in Refresh HTTP header
|
||||||
|
'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pO8h3EaFRdo',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
|
||||||
|
'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
|
||||||
|
'upload_date': '20150917',
|
||||||
|
'uploader_id': 'brtvofficial',
|
||||||
|
'uploader': 'Boiler Room',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': False,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||||
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
||||||
@@ -1797,7 +1814,7 @@ class GenericIE(InfoExtractor):
|
|||||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
found = re.search(
|
found = re.search(
|
||||||
@@ -1808,6 +1825,9 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look also in Refresh HTTP header
|
# Look also in Refresh HTTP header
|
||||||
refresh_header = head_response.headers.get('Refresh')
|
refresh_header = head_response.headers.get('Refresh')
|
||||||
if refresh_header:
|
if refresh_header:
|
||||||
|
# In python 2 response HTTP headers are bytestrings
|
||||||
|
if sys.version_info < (3, 0) and isinstance(refresh_header, str):
|
||||||
|
refresh_header = refresh_header.decode('iso-8859-1')
|
||||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||||
|
|||||||
@@ -10,15 +10,16 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
encode_dict,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GorillaVidIE(InfoExtractor):
|
class GorillaVidIE(InfoExtractor):
|
||||||
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net'
|
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?P<host>(?:www\.)?
|
https?://(?P<host>(?:www\.)?
|
||||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/
|
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com))/
|
||||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@@ -67,13 +68,22 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://movpod.in/0wguyyxi1yca',
|
'url': 'http://movpod.in/0wguyyxi1yca',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://filehoot.com/3ivfabn7573c.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3ivfabn7573c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
|
||||||
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
|
url = 'http://%s/%s' % (mobj.group('host'), video_id)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
@@ -87,7 +97,7 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
if countdown:
|
if countdown:
|
||||||
self._sleep(countdown, video_id)
|
self._sleep(countdown, video_id)
|
||||||
|
|
||||||
post = compat_urllib_parse.urlencode(fields)
|
post = compat_urllib_parse.urlencode(encode_dict(fields))
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url, post)
|
req = compat_urllib_request.Request(url, post)
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
@@ -95,7 +105,7 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '],
|
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'>Watch (.+) '],
|
||||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
||||||
|
|||||||
@@ -202,6 +202,7 @@ class KuwoSingerIE(InfoExtractor):
|
|||||||
'title': 'Ali',
|
'title': 'Ali',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 95,
|
'playlist_mincount': 95,
|
||||||
|
'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||||
'md5': '1f8cb3e170d41fd74add04d3c9330e5f',
|
'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '25418',
|
'id': '25418',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'MIT DNA Learning Center Set',
|
'title': 'MIT DNA and Protein Sets',
|
||||||
'description': 'md5:82313335e8a8a3f243351ba55bc1b474',
|
'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -33,8 +33,8 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||||
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
||||||
|
|
||||||
base_url = self._search_regex(
|
base_url = self._proto_relative_url(self._search_regex(
|
||||||
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url')
|
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:')
|
||||||
formats_json = self._search_regex(
|
formats_json = self._search_regex(
|
||||||
r'bitrates: (\[.+?\])', raw_page, 'video formats')
|
r'bitrates: (\[.+?\])', raw_page, 'video formats')
|
||||||
formats_mit = json.loads(formats_json)
|
formats_mit = json.loads(formats_json)
|
||||||
|
|||||||
@@ -1,74 +1,85 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_parse
|
||||||
compat_urllib_parse,
|
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encode_dict,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
parse_duration,
|
int_or_none,
|
||||||
strip_jsonp,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MiTeleIE(InfoExtractor):
|
class MiTeleIE(InfoExtractor):
|
||||||
IE_NAME = 'mitele.es'
|
IE_DESC = 'mitele.es'
|
||||||
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||||
|
'md5': 'ace7635b2a0b286aaa37d3ff192d2a8a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0fce117d',
|
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Programa 144 - Tor, la web invisible',
|
|
||||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
|
||||||
'display_id': 'programa-144',
|
'display_id': 'programa-144',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tor, la web invisible',
|
||||||
|
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||||
|
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||||
'duration': 2913,
|
'duration': 2913,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
episode = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, episode)
|
|
||||||
embed_data_json = self._search_regex(
|
|
||||||
r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
|
||||||
).replace('\'', '"')
|
|
||||||
embed_data = json.loads(embed_data_json)
|
|
||||||
|
|
||||||
domain = embed_data['mediaUrl']
|
webpage = self._download_webpage(url, display_id)
|
||||||
if not domain.startswith('http'):
|
|
||||||
# only happens in telecinco.es videos
|
|
||||||
domain = 'http://' + domain
|
|
||||||
info_url = compat_urlparse.urljoin(
|
|
||||||
domain,
|
|
||||||
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
|
||||||
)
|
|
||||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
|
||||||
|
|
||||||
video_link = info_el.find('videoUrl/link').text
|
config_url = self._search_regex(
|
||||||
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
|
||||||
token_info = self._download_json(
|
|
||||||
embed_data['flashvars']['ov_tk'] + '?' + token_query,
|
config = self._download_json(
|
||||||
episode,
|
config_url, display_id, 'Downloading config JSON')
|
||||||
transform_source=strip_jsonp
|
|
||||||
)
|
mmc = self._download_json(
|
||||||
formats = self._extract_m3u8_formats(
|
config['services']['mmc'], display_id, 'Downloading mmc JSON')
|
||||||
token_info['tokenizedUrl'], episode, ext='mp4')
|
|
||||||
|
formats = []
|
||||||
|
for location in mmc['locations']:
|
||||||
|
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||||
|
bas = location.get('bas')
|
||||||
|
loc = location.get('loc')
|
||||||
|
ogn = location.get('ogn')
|
||||||
|
if None in (gat, bas, loc, ogn):
|
||||||
|
continue
|
||||||
|
token_data = {
|
||||||
|
'bas': bas,
|
||||||
|
'icd': loc,
|
||||||
|
'ogn': ogn,
|
||||||
|
'sta': '0',
|
||||||
|
}
|
||||||
|
media = self._download_json(
|
||||||
|
'%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data)).encode('utf-8')),
|
||||||
|
display_id, 'Downloading %s JSON' % location['loc'])
|
||||||
|
file_ = media.get('file')
|
||||||
|
if not file_:
|
||||||
|
continue
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||||
|
display_id, f4m_id=loc))
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-media-id\s*=\s*"([^"]+)"', webpage,
|
||||||
|
'data media id', default=None) or display_id
|
||||||
|
thumbnail = config.get('poster', {}).get('imageUrl')
|
||||||
|
duration = int_or_none(mmc.get('duration'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': embed_data['videoId'],
|
'id': video_id,
|
||||||
'display_id': episode,
|
'display_id': display_id,
|
||||||
'title': info_el.find('title').text,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
'description': get_element_by_attribute('class', 'text', webpage),
|
'description': get_element_by_attribute('class', 'text', webpage),
|
||||||
'thumbnail': info_el.find('thumb').text,
|
'thumbnail': thumbnail,
|
||||||
'duration': parse_duration(info_el.find('duration').text),
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,63 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class MusicVaultIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
|
|
||||||
'md5': '3adcbdb3dcc02d647539e53f284ba171',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1010863',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'uploader_id': 'the-allman-brothers-band',
|
|
||||||
'title': 'Straight from the Heart',
|
|
||||||
'duration': 244,
|
|
||||||
'uploader': 'The Allman Brothers Band',
|
|
||||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
|
||||||
'upload_date': '20131219',
|
|
||||||
'location': 'Capitol Theatre (Passaic, NJ)',
|
|
||||||
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
|
|
||||||
'timestamp': int,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'<meta itemprop="thumbnail" content="([^"]+)"',
|
|
||||||
webpage, 'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
data_div = self._search_regex(
|
|
||||||
r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<h2.*?>(.*?)</h2>', data_div, 'title')
|
|
||||||
location = self._html_search_regex(
|
|
||||||
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
|
|
||||||
|
|
||||||
kaltura_id = self._search_regex(
|
|
||||||
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
|
|
||||||
webpage, 'kaltura ID')
|
|
||||||
wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': mobj.group('id'),
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'kaltura:%s:%s' % (wid, kaltura_id),
|
|
||||||
'ie_key': 'Kaltura',
|
|
||||||
'display_id': display_id,
|
|
||||||
'uploader_id': mobj.group('uploader_id'),
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': self._html_search_meta('description', webpage),
|
|
||||||
'location': location,
|
|
||||||
'title': title,
|
|
||||||
'uploader': uploader,
|
|
||||||
}
|
|
||||||
@@ -1,130 +1,380 @@
|
|||||||
# encoding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
parse_duration,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NDRBaseIE(InfoExtractor):
|
class NDRBaseIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
return self._extract_embed(webpage, display_id)
|
||||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
|
||||||
|
|
||||||
title = self._og_search_title(page).strip()
|
|
||||||
description = self._og_search_description(page)
|
|
||||||
if description:
|
|
||||||
description = description.strip()
|
|
||||||
|
|
||||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None))
|
|
||||||
if not duration:
|
|
||||||
duration = parse_duration(self._html_search_regex(
|
|
||||||
r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)',
|
|
||||||
page, 'duration', default=None))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
|
||||||
if mp3_url:
|
|
||||||
formats.append({
|
|
||||||
'url': mp3_url.group('audio'),
|
|
||||||
'format_id': 'mp3',
|
|
||||||
})
|
|
||||||
|
|
||||||
thumbnail = None
|
|
||||||
|
|
||||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page)
|
|
||||||
if video_url:
|
|
||||||
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
|
|
||||||
if thumbnails:
|
|
||||||
quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
|
|
||||||
largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
|
|
||||||
thumbnail = 'http://www.ndr.de' + largest[0]
|
|
||||||
|
|
||||||
for format_id in 'lo', 'hi', 'hq':
|
|
||||||
formats.append({
|
|
||||||
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
|
||||||
'format_id': format_id,
|
|
||||||
})
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
raise ExtractorError('No media links available for %s' % video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class NDRIE(NDRBaseIE):
|
class NDRIE(NDRBaseIE):
|
||||||
IE_NAME = 'ndr'
|
IE_NAME = 'ndr'
|
||||||
IE_DESC = 'NDR.de - Mediathek'
|
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||||
|
_TESTS = [{
|
||||||
_TESTS = [
|
# httpVideo, same content id
|
||||||
{
|
|
||||||
'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
|
|
||||||
'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
|
|
||||||
'note': 'Video file',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '25866',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Kartoffeltage in der Lewitz',
|
|
||||||
'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
|
|
||||||
'duration': 166,
|
|
||||||
},
|
|
||||||
'skip': '404 Not found',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||||
'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',
|
'md5': '6515bc255dc5c5f8c85bbc38e035a659',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '988',
|
'id': 'hafengeburtstag988',
|
||||||
|
'display_id': 'Party-Poette-und-Parade',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Party, Pötte und Parade',
|
'title': 'Party, Pötte und Parade',
|
||||||
'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',
|
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'timestamp': 1431108900,
|
||||||
|
'upload_date': '20150510',
|
||||||
'duration': 3498,
|
'duration': 3498,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
# httpVideo, different content id
|
||||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
||||||
'note': 'Audio file',
|
'md5': '1043ff203eab307f0c51702ec49e9a71',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '51535',
|
'id': 'osna272',
|
||||||
|
'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
|
||||||
|
'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'timestamp': 1442059200,
|
||||||
|
'upload_date': '20150912',
|
||||||
|
'duration': 510,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# httpAudio, same content id
|
||||||
|
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
||||||
|
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'audio51535',
|
||||||
|
'display_id': 'La-Valette-entgeht-der-Hinrichtung',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'La Valette entgeht der Hinrichtung',
|
'title': 'La Valette entgeht der Hinrichtung',
|
||||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||||
|
'uploader': 'ndrinfo',
|
||||||
|
'timestamp': 1290626100,
|
||||||
|
'upload_date': '20140729',
|
||||||
'duration': 884,
|
'duration': 884,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_embed(self, webpage, display_id):
|
||||||
|
embed_url = self._html_search_meta(
|
||||||
|
'embedURL', webpage, 'embed URL', fatal=True)
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
timestamp = parse_iso8601(
|
||||||
|
self._search_regex(
|
||||||
|
r'<span itemprop="datePublished" content="([^"]+)">',
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': embed_url,
|
||||||
|
'display_id': display_id,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
}
|
}
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class NJoyIE(NDRBaseIE):
|
class NJoyIE(NDRBaseIE):
|
||||||
IE_NAME = 'N-JOY'
|
IE_NAME = 'njoy'
|
||||||
_VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html'
|
IE_DESC = 'N-JOY'
|
||||||
|
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# httpVideo, same content id
|
||||||
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
|
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
|
||||||
'md5': 'cb63be60cd6f9dd75218803146d8dc67',
|
'md5': 'cb63be60cd6f9dd75218803146d8dc67',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2480',
|
'id': 'comedycontest2480',
|
||||||
|
'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Benaissa beim NDR Comedy Contest',
|
'title': 'Benaissa beim NDR Comedy Contest',
|
||||||
'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',
|
'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'upload_date': '20141129',
|
||||||
'duration': 654,
|
'duration': 654,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# httpVideo, different content id
|
||||||
|
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
||||||
|
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dockville882',
|
||||||
|
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Ich hab noch nie" mit Felix Jaehn',
|
||||||
|
'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
|
||||||
|
'uploader': 'njoy',
|
||||||
|
'upload_date': '20150822',
|
||||||
|
'duration': 211,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_embed(self, webpage, display_id):
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': 'NDREmbedBase',
|
||||||
|
'url': 'ndr:%s' % video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'description': description,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NDREmbedBaseIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ndr:embed:base'
|
||||||
|
_VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'ndr:soundcheck3366',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id') or mobj.group('id_s')
|
||||||
|
|
||||||
|
ppjson = self._download_json(
|
||||||
|
'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
|
||||||
|
|
||||||
|
playlist = ppjson['playlist']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
|
||||||
|
|
||||||
|
for format_id, f in playlist.items():
|
||||||
|
src = f.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(src, None)
|
||||||
|
if ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src, video_id, m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||||
|
else:
|
||||||
|
quality = f.get('quality')
|
||||||
|
ff = {
|
||||||
|
'url': src,
|
||||||
|
'format_id': quality or format_id,
|
||||||
|
'quality': quality_key(quality),
|
||||||
}
|
}
|
||||||
|
type_ = f.get('type')
|
||||||
|
if type_ and type_.split('/')[0] == 'audio':
|
||||||
|
ff['vcodec'] = 'none'
|
||||||
|
ff['ext'] = ext or 'mp3'
|
||||||
|
formats.append(ff)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
config = playlist['config']
|
||||||
|
|
||||||
|
live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
|
||||||
|
title = config['title']
|
||||||
|
if live:
|
||||||
|
title = self._live_title(title)
|
||||||
|
uploader = ppjson.get('config', {}).get('branding')
|
||||||
|
upload_date = ppjson.get('config', {}).get('publicationDate')
|
||||||
|
duration = int_or_none(config.get('duration'))
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'id': thumbnail.get('quality') or thumbnail_id,
|
||||||
|
'url': thumbnail['src'],
|
||||||
|
'preference': quality_key(thumbnail.get('quality')),
|
||||||
|
} for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'is_live': live,
|
||||||
|
'uploader': uploader if uploader != '-' else None,
|
||||||
|
'upload_date': upload_date[0:8] if upload_date else None,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NDREmbedIE(NDREmbedBaseIE):
|
||||||
|
IE_NAME = 'ndr:embed'
|
||||||
|
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||||
|
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ndraktuell28488',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Norddeutschland begrüßt Flüchtlinge',
|
||||||
|
'is_live': False,
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'upload_date': '20150907',
|
||||||
|
'duration': 132,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
||||||
|
'md5': '002085c44bae38802d94ae5802a36e78',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'soundcheck3366',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
|
||||||
|
'is_live': False,
|
||||||
|
'uploader': 'ndr2',
|
||||||
|
'upload_date': '20150912',
|
||||||
|
'duration': 3554,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
||||||
|
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'audio51535',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'La Valette entgeht der Hinrichtung',
|
||||||
|
'is_live': False,
|
||||||
|
'uploader': 'ndrinfo',
|
||||||
|
'upload_date': '20140729',
|
||||||
|
'duration': 884,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
|
||||||
|
'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'visite11010',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Visite - die ganze Sendung',
|
||||||
|
'is_live': False,
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'upload_date': '20150902',
|
||||||
|
'duration': 3525,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# httpVideoLive
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'livestream217',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
'upload_date': '20150910',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class NJoyEmbedIE(NDREmbedBaseIE):
|
||||||
|
IE_NAME = 'njoy:embed'
|
||||||
|
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
# httpVideo
|
||||||
|
'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
|
||||||
|
'md5': '8483cbfe2320bd4d28a349d62d88bd74',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'doku948',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
||||||
|
'is_live': False,
|
||||||
|
'upload_date': '20150807',
|
||||||
|
'duration': 1011,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# httpAudio
|
||||||
|
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
||||||
|
'md5': 'd989f80f28ac954430f7b8a48197188a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'stefanrichter100',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Interview mit einem Augenzeugen',
|
||||||
|
'is_live': False,
|
||||||
|
'uploader': 'njoy',
|
||||||
|
'upload_date': '20150909',
|
||||||
|
'duration': 140,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# httpAudioLive, no explicit ext
|
||||||
|
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'webradioweltweit100',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
'uploader': 'njoy',
|
||||||
|
'upload_date': '20150810',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|||||||
@@ -16,12 +16,53 @@ from ..utils import (
|
|||||||
|
|
||||||
class NFLIE(InfoExtractor):
|
class NFLIE(InfoExtractor):
|
||||||
IE_NAME = 'nfl.com'
|
IE_NAME = 'nfl.com'
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)
|
||||||
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
https?://
|
||||||
|
(?P<host>
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
nfl|
|
||||||
|
buffalobills|
|
||||||
|
miamidolphins|
|
||||||
|
patriots|
|
||||||
|
newyorkjets|
|
||||||
|
baltimoreravens|
|
||||||
|
bengals|
|
||||||
|
clevelandbrowns|
|
||||||
|
steelers|
|
||||||
|
houstontexans|
|
||||||
|
colts|
|
||||||
|
jaguars|
|
||||||
|
titansonline|
|
||||||
|
denverbroncos|
|
||||||
|
kcchiefs|
|
||||||
|
raiders|
|
||||||
|
chargers|
|
||||||
|
dallascowboys|
|
||||||
|
giants|
|
||||||
|
philadelphiaeagles|
|
||||||
|
redskins|
|
||||||
|
chicagobears|
|
||||||
|
detroitlions|
|
||||||
|
packers|
|
||||||
|
vikings|
|
||||||
|
atlantafalcons|
|
||||||
|
panthers|
|
||||||
|
neworleanssaints|
|
||||||
|
buccaneers|
|
||||||
|
azcardinals|
|
||||||
|
stlouisrams|
|
||||||
|
49ers|
|
||||||
|
seahawks
|
||||||
|
)\.com|
|
||||||
|
.+?\.clubs\.nfl\.com
|
||||||
|
)
|
||||||
|
)/
|
||||||
(?:.+?/)*
|
(?:.+?/)*
|
||||||
(?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
(?P<id>[^/#?&]+)
|
||||||
_TESTS = [
|
'''
|
||||||
{
|
_TESTS = [{
|
||||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -33,8 +74,7 @@ class NFLIE(InfoExtractor):
|
|||||||
'timestamp': 1411337580,
|
'timestamp': 1411337580,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -46,8 +86,7 @@ class NFLIE(InfoExtractor):
|
|||||||
'timestamp': 1388354455,
|
'timestamp': 1388354455,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ap3000000467607',
|
'id': '0ap3000000467607',
|
||||||
@@ -57,12 +96,24 @@ class NFLIE(InfoExtractor):
|
|||||||
'timestamp': 1422850320,
|
'timestamp': 1422850320,
|
||||||
'upload_date': '20150202',
|
'upload_date': '20150202',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
|
||||||
|
'md5': '4c319e2f625ffd0b481b4382c6fc124c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'n-238346',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '10 Days at Gillette',
|
||||||
|
'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
|
||||||
|
'timestamp': 1442618809,
|
||||||
|
'upload_date': '20150918',
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}, {
|
||||||
]
|
'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def prepend_host(host, url):
|
def prepend_host(host, url):
|
||||||
@@ -95,13 +146,14 @@ class NFLIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||||
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
|
r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
|
||||||
default='static/content/static/config/video/config.json'))
|
webpage, 'config URL', default='static/content/static/config/video/config.json',
|
||||||
|
group='config'))
|
||||||
# For articles, the id in the url is not the video id
|
# For articles, the id in the url is not the video id
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
|
r'(?:<nflcs:avplayer[^>]+data-contentId\s*=\s*|contentId\s*:\s*)(["\'])(?P<id>.+?)\1',
|
||||||
config = self._download_json(config_url, video_id,
|
webpage, 'video id', default=video_id, group='id')
|
||||||
note='Downloading player config')
|
config = self._download_json(config_url, video_id, 'Downloading player config')
|
||||||
url_template = NFLIE.prepend_host(
|
url_template = NFLIE.prepend_host(
|
||||||
host, '{contentURLTemplate:}'.format(**config))
|
host, '{contentURLTemplate:}'.format(**config))
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encode_dict,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@@ -100,10 +101,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'mail': username,
|
'mail': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
}
|
}
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||||
# chokes on unicode
|
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
'https://secure.nicovideo.jp/secure/login', login_data)
|
'https://secure.nicovideo.jp/secure/login', login_data)
|
||||||
login_results = self._download_webpage(
|
login_results = self._download_webpage(
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import str_to_int
|
from ..utils import str_to_int
|
||||||
@@ -9,61 +8,93 @@ from ..utils import str_to_int
|
|||||||
|
|
||||||
class NineGagIE(InfoExtractor):
|
class NineGagIE(InfoExtractor):
|
||||||
IE_NAME = '9gag'
|
IE_NAME = '9gag'
|
||||||
_VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
|
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
||||||
(?:
|
|
||||||
v/(?P<numid>[0-9]+)|
|
|
||||||
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
"url": "http://9gag.tv/v/1912",
|
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||||
"info_dict": {
|
'info_dict': {
|
||||||
"id": "1912",
|
'id': 'Kk2X5',
|
||||||
"ext": "mp4",
|
'ext': 'mp4',
|
||||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
|
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
||||||
'uploader': 'CompilationChannel',
|
'uploader': 'CompilationChannel',
|
||||||
'upload_date': '20131110',
|
'upload_date': '20131110',
|
||||||
"view_count": int,
|
'view_count': int,
|
||||||
"thumbnail": "re:^https?://",
|
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube']
|
'add_ie': ['Youtube'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
'url': 'http://9gag.com/tv/p/aKolP3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'KklwM',
|
'id': 'aKolP3',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'display_id': 'alternate-banned-opening-scene-of-gravity',
|
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
||||||
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
|
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
||||||
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
|
'uploader_id': 'rickmereki',
|
||||||
'uploader': 'Krishna Shenoi',
|
'uploader': 'Rick Mereki',
|
||||||
'upload_date': '20140401',
|
'upload_date': '20110803',
|
||||||
'uploader_id': 'krishnashenoi93',
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'add_ie': ['Vimeo'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://9gag.com/tv/p/KklwM',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://9gag.tv/p/Kk2X5',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_EXTERNAL_VIDEO_PROVIDER = {
|
||||||
|
'1': {
|
||||||
|
'url': '%s',
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
},
|
||||||
|
'2': {
|
||||||
|
'url': 'http://player.vimeo.com/video/%s',
|
||||||
|
'ie_key': 'Vimeo',
|
||||||
|
},
|
||||||
|
'3': {
|
||||||
|
'url': 'http://instagram.com/p/%s',
|
||||||
|
'ie_key': 'Instagram',
|
||||||
|
},
|
||||||
|
'4': {
|
||||||
|
'url': 'http://vine.co/v/%s',
|
||||||
|
'ie_key': 'Vine',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('numid') or mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
display_id = mobj.group('display_id') or video_id
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
post_view = json.loads(self._html_search_regex(
|
post_view = self._parse_json(
|
||||||
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
|
self._search_regex(
|
||||||
|
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
||||||
|
webpage, 'post view'),
|
||||||
|
display_id)
|
||||||
|
|
||||||
youtube_id = post_view['videoExternalId']
|
ie_key = None
|
||||||
|
source_url = post_view.get('sourceUrl')
|
||||||
|
if not source_url:
|
||||||
|
external_video_id = post_view['videoExternalId']
|
||||||
|
external_video_provider = post_view['videoExternalProvider']
|
||||||
|
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
||||||
|
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
||||||
title = post_view['title']
|
title = post_view['title']
|
||||||
description = post_view['description']
|
description = post_view.get('description')
|
||||||
view_count = str_to_int(post_view['externalView'])
|
view_count = str_to_int(post_view.get('externalView'))
|
||||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': youtube_id,
|
'url': source_url,
|
||||||
'ie_key': 'Youtube',
|
'ie_key': ie_key,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|||||||
@@ -1,19 +1,55 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NownessIE(InfoExtractor):
|
class NownessBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
|
def _extract_url_result(self, post):
|
||||||
|
if post['type'] == 'video':
|
||||||
|
for media in post['media']:
|
||||||
|
if media['type'] == 'video':
|
||||||
|
video_id = media['content']
|
||||||
|
source = media['source']
|
||||||
|
if source == 'brightcove':
|
||||||
|
player_code = self._download_webpage(
|
||||||
|
'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
|
||||||
|
note='Downloading player JavaScript',
|
||||||
|
errnote='Unable to download player JavaScript')
|
||||||
|
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
|
||||||
|
if bc_url is None:
|
||||||
|
raise ExtractorError('Could not find player definition')
|
||||||
|
return self.url_result(bc_url, 'Brightcove')
|
||||||
|
elif source == 'vimeo':
|
||||||
|
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||||
|
elif source == 'youtube':
|
||||||
|
return self.url_result(video_id, 'Youtube')
|
||||||
|
elif source == 'cinematique':
|
||||||
|
# youtube-dl currently doesn't support cinematique
|
||||||
|
# return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
|
||||||
|
pass
|
||||||
|
|
||||||
_TESTS = [
|
def _api_request(self, url, request_path):
|
||||||
{
|
display_id = self._match_id(url)
|
||||||
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
request = compat_urllib_request.Request(
|
||||||
|
'http://api.nowness.com/api/' + request_path % display_id,
|
||||||
|
headers={
|
||||||
|
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
|
||||||
|
})
|
||||||
|
return display_id, self._download_json(request, display_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NownessIE(NownessBaseIE):
|
||||||
|
IE_NAME = 'nowness'
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
|
||||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2520295746001',
|
'id': '2520295746001',
|
||||||
@@ -22,10 +58,9 @@ class NownessIE(InfoExtractor):
|
|||||||
'description': 'Candor: The Art of Gesticulation',
|
'description': 'Candor: The Art of Gesticulation',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'uploader': 'Nowness',
|
'uploader': 'Nowness',
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
|
'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
|
||||||
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
|
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3716354522001',
|
'id': '3716354522001',
|
||||||
@@ -34,31 +69,66 @@ class NownessIE(InfoExtractor):
|
|||||||
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'uploader': 'Nowness',
|
'uploader': 'Nowness',
|
||||||
}
|
|
||||||
},
|
},
|
||||||
]
|
}, {
|
||||||
|
# vimeo
|
||||||
|
'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
|
||||||
|
'md5': '9a5a6a8edf806407e411296ab6bc2a49',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '130020913',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
|
||||||
|
'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'upload_date': '20150607',
|
||||||
|
'uploader': 'Cinema Sem Lei',
|
||||||
|
'uploader_id': 'cinemasemlei',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
_, post = self._api_request(url, 'post/getBySlug/%s')
|
||||||
video_id = mobj.group('slug')
|
return self._extract_url_result(post)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
player_url = self._search_regex(
|
|
||||||
r'"([^"]+/content/issue-[0-9.]+.js)"', webpage, 'player URL')
|
|
||||||
real_id = self._search_regex(
|
|
||||||
r'\sdata-videoId="([0-9]+)"', webpage, 'internal video ID')
|
|
||||||
|
|
||||||
player_code = self._download_webpage(
|
class NownessPlaylistIE(NownessBaseIE):
|
||||||
player_url, video_id,
|
IE_NAME = 'nowness:playlist'
|
||||||
note='Downloading player JavaScript',
|
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)'
|
||||||
errnote='Player download failed')
|
_TEST = {
|
||||||
player_code = player_code.replace("'+d+'", real_id)
|
'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
|
||||||
|
'info_dict': {
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
|
'id': '3286',
|
||||||
if bc_url is None:
|
},
|
||||||
raise ExtractorError('Could not find player definition')
|
'playlist_mincount': 8,
|
||||||
return {
|
|
||||||
'_type': 'url',
|
|
||||||
'url': bc_url,
|
|
||||||
'ie_key': 'Brightcove',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s')
|
||||||
|
entries = [self._extract_url_result(item) for item in playlist['items']]
|
||||||
|
return self.playlist_result(entries, playlist_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NownessSeriesIE(NownessBaseIE):
|
||||||
|
IE_NAME = 'nowness:series'
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nowness.com/series/60-seconds',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '60',
|
||||||
|
'title': '60 Seconds',
|
||||||
|
'description': 'One-minute wisdom in a new NOWNESS series',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id, series = self._api_request(url, 'series/getBySlug/%s')
|
||||||
|
entries = [self._extract_url_result(post) for post in series['posts']]
|
||||||
|
series_title = None
|
||||||
|
series_description = None
|
||||||
|
translations = series.get('translations', [])
|
||||||
|
if translations:
|
||||||
|
series_title = translations[0].get('title') or translations[0]['seoTitle']
|
||||||
|
series_description = translations[0].get('seoDescription')
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, compat_str(series['id']), series_title, series_description)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
|
|||||||
IE_NAME = 'nowvideo'
|
IE_NAME = 'nowvideo'
|
||||||
IE_DESC = 'NowVideo'
|
IE_DESC = 'NowVideo'
|
||||||
|
|
||||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co|li)'}
|
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
|
||||||
|
|
||||||
_HOST = 'www.nowvideo.ch'
|
_HOST = 'www.nowvideo.ch'
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
@@ -28,6 +29,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video has been blocked',
|
||||||
}, {
|
}, {
|
||||||
# metadataUrl
|
# metadataUrl
|
||||||
'url': 'http://ok.ru/video/63567059965189-0',
|
'url': 'http://ok.ru/video/63567059965189-0',
|
||||||
@@ -72,6 +74,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://ok.ru/video/%s' % video_id, video_id)
|
'http://ok.ru/video/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
error = self._search_regex(
|
||||||
|
r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
|
||||||
|
webpage, 'error', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
player = self._parse_json(
|
player = self._parse_json(
|
||||||
unescapeHTML(self._search_regex(
|
unescapeHTML(self._search_regex(
|
||||||
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
|
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
|
||||||
|
|||||||
@@ -1,70 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_urllib_parse_unquote_plus
|
|
||||||
from ..utils import (
|
|
||||||
parse_iso8601,
|
|
||||||
parse_age_limit,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class OpenFilmIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.openfilm.com/videos/human-resources-remastered',
|
|
||||||
'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '32736',
|
|
||||||
'display_id': 'human-resources-remastered',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Human Resources (Remastered)',
|
|
||||||
'description': 'Social Engineering in the 20th Century.',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
'duration': 7164,
|
|
||||||
'timestamp': 1334756988,
|
|
||||||
'upload_date': '20120418',
|
|
||||||
'uploader_id': '41117',
|
|
||||||
'view_count': int,
|
|
||||||
'age_limit': 0,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
player = compat_urllib_parse_unquote_plus(
|
|
||||||
self._og_search_video_url(webpage))
|
|
||||||
|
|
||||||
video = json.loads(self._search_regex(
|
|
||||||
r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
|
|
||||||
|
|
||||||
video_url = '%s1.mp4' % video['location']
|
|
||||||
video_id = video.get('video_id')
|
|
||||||
display_id = video.get('alias') or display_id
|
|
||||||
title = video.get('title')
|
|
||||||
description = video.get('description')
|
|
||||||
thumbnail = video.get('main_thumb')
|
|
||||||
duration = int_or_none(video.get('duration'))
|
|
||||||
timestamp = parse_iso8601(video.get('dt_published'), ' ')
|
|
||||||
uploader_id = video.get('user_id')
|
|
||||||
view_count = int_or_none(video.get('views_count'))
|
|
||||||
age_limit = parse_age_limit(video.get('age_limit'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'view_count': view_count,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
}
|
|
||||||
@@ -19,7 +19,7 @@ class PlaywireIE(InfoExtractor):
|
|||||||
'id': '3353705',
|
'id': '3353705',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'S04_RM_UCL_Rus',
|
'title': 'S04_RM_UCL_Rus',
|
||||||
'thumbnail': 're:^http://.*\.png$',
|
'thumbnail': 're:^https?://.*\.png$',
|
||||||
'duration': 145.94,
|
'duration': 145.94,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ from ..aes import (
|
|||||||
|
|
||||||
|
|
||||||
class PornHubIE(InfoExtractor):
|
class PornHubIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
|
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
'md5': '882f488fa1f0026f023f33576004a2ed',
|
'md5': '882f488fa1f0026f023f33576004a2ed',
|
||||||
@@ -34,6 +34,9 @@ class PornHubIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@@ -72,6 +73,18 @@ class RaiIE(InfoExtractor):
|
|||||||
'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
|
'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
|
||||||
'uploader': 'RaiTre',
|
'uploader': 'RaiTre',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
||||||
|
'md5': '037104d2c14132887e5e4cf114569214',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0c7a664b-d0f4-4b2c-8835-3f82e46f433e',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Il pacco',
|
||||||
|
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
||||||
|
'uploader': 'RaiTre',
|
||||||
|
'upload_date': '20141221',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -90,11 +103,14 @@ class RaiIE(InfoExtractor):
|
|||||||
relinker_url = self._extract_relinker_url(webpage)
|
relinker_url = self._extract_relinker_url(webpage)
|
||||||
|
|
||||||
if not relinker_url:
|
if not relinker_url:
|
||||||
iframe_path = self._search_regex(
|
iframe_url = self._search_regex(
|
||||||
r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
|
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
|
||||||
|
r'drawMediaRaiTV\(["\'](.+?)["\']'],
|
||||||
webpage, 'iframe')
|
webpage, 'iframe')
|
||||||
|
if not iframe_url.startswith('http'):
|
||||||
|
iframe_url = compat_urlparse.urljoin(url, iframe_url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'%s/%s' % (host, iframe_path), video_id)
|
iframe_url, video_id)
|
||||||
relinker_url = self._extract_relinker_url(webpage)
|
relinker_url = self._extract_relinker_url(webpage)
|
||||||
|
|
||||||
relinker = self._download_json(
|
relinker = self._download_json(
|
||||||
|
|||||||
@@ -6,19 +6,19 @@ from ..compat import compat_urllib_parse_urlparse
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
xpath_attr,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RuutuIE(InfoExtractor):
|
class RuutuIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
'url': 'http://www.ruutu.fi/video/2058907',
|
||||||
'md5': 'ab2093f39be1ca8581963451b3c0234f',
|
'md5': 'ab2093f39be1ca8581963451b3c0234f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2058907',
|
'id': '2058907',
|
||||||
'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
|
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
|
||||||
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
|
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
|
||||||
@@ -28,14 +28,13 @@ class RuutuIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
|
'url': 'http://www.ruutu.fi/video/2057306',
|
||||||
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
|
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2057306',
|
'id': '2057306',
|
||||||
'display_id': 'superpesis-katso-koko-kausi-ruudussa',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Superpesis: katso koko kausi Ruudussa',
|
'title': 'Superpesis: katso koko kausi Ruudussa',
|
||||||
'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
|
'description': 'md5:da2736052fef3b2bd5e0005e63c25eac',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'duration': 40,
|
'duration': 40,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
@@ -44,29 +43,10 @@ class RuutuIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
video_xml = self._download_xml(
|
||||||
|
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
|
||||||
video_id = self._search_regex(
|
|
||||||
r'data-media-id="(\d+)"', webpage, 'media id')
|
|
||||||
|
|
||||||
video_xml_url = None
|
|
||||||
|
|
||||||
media_data = self._search_regex(
|
|
||||||
r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
|
|
||||||
'media data', default=None)
|
|
||||||
if media_data:
|
|
||||||
media_json = self._parse_json(media_data, display_id, fatal=False)
|
|
||||||
if media_json:
|
|
||||||
xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
|
|
||||||
if xml_url:
|
|
||||||
video_xml_url = xml_url.replace('{ID}', video_id)
|
|
||||||
|
|
||||||
if not video_xml_url:
|
|
||||||
video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
|
|
||||||
|
|
||||||
video_xml = self._download_xml(video_xml_url, video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
processed_urls = []
|
processed_urls = []
|
||||||
@@ -109,10 +89,9 @@ class RuutuIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
|
||||||
'title': self._og_search_title(webpage),
|
'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
|
||||||
'description': self._og_search_description(webpage),
|
'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
||||||
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ class ShahidIE(InfoExtractor):
|
|||||||
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
|
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '90574',
|
'id': '90574',
|
||||||
'ext': 'm3u8',
|
'ext': 'mp4',
|
||||||
'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
|
'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
|
||||||
'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
|
'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
|
||||||
'duration': 2972,
|
'duration': 2972,
|
||||||
@@ -81,7 +81,7 @@ class ShahidIE(InfoExtractor):
|
|||||||
compat_urllib_parse.urlencode({
|
compat_urllib_parse.urlencode({
|
||||||
'apiKey': 'sh@hid0nlin3',
|
'apiKey': 'sh@hid0nlin3',
|
||||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||||
}).encode('utf-8')),
|
})),
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
video = video[api_vars['playerType']]
|
video = video[api_vars['playerType']]
|
||||||
|
|||||||
@@ -1,24 +1,51 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mitele import MiTeleIE
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
get_element_by_attribute,
|
||||||
|
parse_duration,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelecincoIE(MiTeleIE):
|
class TelecincoIE(InfoExtractor):
|
||||||
IE_NAME = 'telecinco.es'
|
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||||
_VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'
|
_VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||||
|
'md5': '5cbef3ad5ef17bf0d21570332d140729',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MDSVID20141015_0058',
|
'id': 'MDSVID20141015_0058',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
|
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
|
||||||
'duration': 662,
|
'duration': 662,
|
||||||
},
|
},
|
||||||
'params': {
|
}, {
|
||||||
# m3u8 download
|
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||||
'skip_download': True,
|
'md5': '0a5b9f3cc8b074f50a0578f823a12694',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MDSVID20150916_0128',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '¿Quién es este ex futbolista con el que hablan ...',
|
||||||
|
'duration': 79,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||||
|
'md5': 'ad1bfaaba922dd4a295724b05b68f86a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MDSVID20150513_0220',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#DOYLACARA. Con la trata no hay trato',
|
||||||
|
'duration': 50,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||||
@@ -27,3 +54,41 @@ class TelecincoIE(MiTeleIE):
|
|||||||
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
|
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, episode)
|
||||||
|
embed_data_json = self._search_regex(
|
||||||
|
r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
||||||
|
).replace('\'', '"')
|
||||||
|
embed_data = json.loads(embed_data_json)
|
||||||
|
|
||||||
|
domain = embed_data['mediaUrl']
|
||||||
|
if not domain.startswith('http'):
|
||||||
|
# only happens in telecinco.es videos
|
||||||
|
domain = 'http://' + domain
|
||||||
|
info_url = compat_urlparse.urljoin(
|
||||||
|
domain,
|
||||||
|
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||||
|
)
|
||||||
|
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||||
|
|
||||||
|
video_link = info_el.find('videoUrl/link').text
|
||||||
|
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
||||||
|
token_info = self._download_json(
|
||||||
|
embed_data['flashvars']['ov_tk'] + '?' + token_query,
|
||||||
|
episode,
|
||||||
|
transform_source=strip_jsonp
|
||||||
|
)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': embed_data['videoId'],
|
||||||
|
'display_id': episode,
|
||||||
|
'title': info_el.find('title').text,
|
||||||
|
'formats': formats,
|
||||||
|
'description': get_element_by_attribute('class', 'text', webpage),
|
||||||
|
'thumbnail': info_el.find('thumb').text,
|
||||||
|
'duration': parse_duration(info_el.find('duration').text),
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,14 +2,12 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
|
||||||
|
|
||||||
class TudouIE(InfoExtractor):
|
class TudouIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
|
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||||
'md5': '140a49ed444bd22f93330985d8475fcb',
|
'md5': '140a49ed444bd22f93330985d8475fcb',
|
||||||
@@ -27,41 +25,41 @@ class TudouIE(InfoExtractor):
|
|||||||
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
||||||
|
|
||||||
def _url_for_id(self, id, quality=None):
|
def _url_for_id(self, video_id, quality=None):
|
||||||
info_url = "http://v2.tudou.com/f?id=" + str(id)
|
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
|
||||||
if quality:
|
if quality:
|
||||||
info_url += '&hd' + quality
|
info_url += '&hd' + quality
|
||||||
webpage = self._download_webpage(info_url, id, "Opening the info webpage")
|
xml_data = self._download_xml(info_url, video_id, "Opening the info XML page")
|
||||||
final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url')
|
final_url = xml_data.text
|
||||||
return final_url
|
return final_url
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
|
youku_vcode = self._search_regex(
|
||||||
if m and m.group(1):
|
r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None)
|
||||||
return {
|
if youku_vcode:
|
||||||
'_type': 'url',
|
return self.url_result('youku:' + youku_vcode, ie='Youku')
|
||||||
'url': 'youku:' + m.group(1),
|
|
||||||
'ie_key': 'Youku'
|
|
||||||
}
|
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
r",kw:\s*['\"](.+?)[\"']", webpage, 'title')
|
r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title')
|
||||||
thumbnail_url = self._search_regex(
|
thumbnail_url = self._search_regex(
|
||||||
r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
|
r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False)
|
||||||
|
|
||||||
player_url = self._search_regex(
|
player_url = self._search_regex(
|
||||||
r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']",
|
r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]',
|
||||||
webpage, 'player URL', default=self._PLAYER_URL)
|
webpage, 'player URL', default=self._PLAYER_URL)
|
||||||
|
|
||||||
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
segments = self._parse_json(self._search_regex(
|
||||||
segments = json.loads(segs_json)
|
r'segs: \'([^\']+)\'', webpage, 'segments'), video_id)
|
||||||
# It looks like the keys are the arguments that have to be passed as
|
# It looks like the keys are the arguments that have to be passed as
|
||||||
# the hd field in the request url, we pick the higher
|
# the hd field in the request url, we pick the higher
|
||||||
# Also, filter non-number qualities (see issue #3643).
|
# Also, filter non-number qualities (see issue #3643).
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
str_to_int,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -12,18 +14,41 @@ class VidmeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vid.me/QNB',
|
'url': 'https://vid.me/QNB',
|
||||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
'md5': 'c62f1156138dc3323902188c5b5a8bd6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'QNB',
|
'id': 'QNB',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Fishing for piranha - the easy way',
|
'title': 'Fishing for piranha - the easy way',
|
||||||
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
|
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
|
||||||
'duration': 119.92,
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'timestamp': 1406313244,
|
'timestamp': 1406313244,
|
||||||
'upload_date': '20140725',
|
'upload_date': '20140725',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'age_limit': 0,
|
||||||
|
'duration': 119.92,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://vid.me/Gc6M',
|
||||||
|
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Gc6M',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1441211642,
|
||||||
|
'upload_date': '20150902',
|
||||||
|
'uploader': 'SunshineM',
|
||||||
|
'uploader_id': '3552827',
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 223.72,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# tests uploader field
|
# tests uploader field
|
||||||
@@ -33,63 +58,94 @@ class VidmeIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Carver',
|
'title': 'The Carver',
|
||||||
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
|
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
|
||||||
'duration': 97.859999999999999,
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'timestamp': 1433203629,
|
'timestamp': 1433203629,
|
||||||
'upload_date': '20150602',
|
'upload_date': '20150602',
|
||||||
'uploader': 'Thomas',
|
'uploader': 'Thomas',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'uploader_id': '109747',
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 97.859999999999999,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
|
# nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
|
||||||
'url': 'https://vid.me/e/Wmur',
|
'url': 'https://vid.me/e/Wmur',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': 'Wmur',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'naked smoking & stretching',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1430931613,
|
||||||
|
'upload_date': '20150506',
|
||||||
|
'uploader': 'naked-yogi',
|
||||||
|
'uploader_id': '1638622',
|
||||||
|
'age_limit': 18,
|
||||||
|
'duration': 653.26999999999998,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url = url.replace('vid.me/e/', 'vid.me/')
|
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
try:
|
||||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
response = self._download_json(
|
||||||
|
'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||||
|
response = self._parse_json(e.cause.read(), video_id)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
error = response.get('error')
|
||||||
description = self._og_search_description(webpage, default='')
|
if error:
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
raise ExtractorError(
|
||||||
timestamp = int_or_none(self._og_search_property(
|
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
||||||
'updated_time', webpage, fatal=False))
|
|
||||||
width = int_or_none(self._og_search_property(
|
video = response['video']
|
||||||
'video:width', webpage, fatal=False))
|
|
||||||
height = int_or_none(self._og_search_property(
|
formats = [{
|
||||||
'video:height', webpage, fatal=False))
|
'format_id': f.get('type'),
|
||||||
duration = float_or_none(self._html_search_regex(
|
'url': f['uri'],
|
||||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
'width': int_or_none(f.get('width')),
|
||||||
view_count = str_to_int(self._html_search_regex(
|
'height': int_or_none(f.get('height')),
|
||||||
r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?',
|
} for f in video.get('formats', []) if f.get('uri')]
|
||||||
webpage, 'view count', fatal=False))
|
self._sort_formats(formats)
|
||||||
like_count = str_to_int(self._html_search_regex(
|
|
||||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
title = video['title']
|
||||||
webpage, 'like count', fatal=False))
|
description = video.get('description')
|
||||||
uploader = self._html_search_regex(
|
thumbnail = video.get('thumbnail_url')
|
||||||
'class="video_author_username"[^>]*>([^<]+)',
|
timestamp = parse_iso8601(video.get('date_created'), ' ')
|
||||||
webpage, 'uploader', default=None)
|
uploader = video.get('user', {}).get('username')
|
||||||
|
uploader_id = video.get('user', {}).get('user_id')
|
||||||
|
age_limit = 18 if video.get('nsfw') is True else 0
|
||||||
|
duration = float_or_none(video.get('duration'))
|
||||||
|
view_count = int_or_none(video.get('view_count'))
|
||||||
|
like_count = int_or_none(video.get('likes_count'))
|
||||||
|
comment_count = int_or_none(video.get('comment_count'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'age_limit': age_limit,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'uploader': uploader,
|
'comment_count': comment_count,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@@ -91,31 +92,27 @@ class VierVideosIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
program = mobj.group('program')
|
program = mobj.group('program')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, program)
|
|
||||||
|
|
||||||
page_id = mobj.group('page')
|
page_id = mobj.group('page')
|
||||||
if page_id:
|
if page_id:
|
||||||
page_id = int(page_id)
|
page_id = int(page_id)
|
||||||
start_page = page_id
|
start_page = page_id
|
||||||
last_page = start_page + 1
|
|
||||||
playlist_id = '%s-page%d' % (program, page_id)
|
playlist_id = '%s-page%d' % (program, page_id)
|
||||||
else:
|
else:
|
||||||
start_page = 0
|
start_page = 0
|
||||||
last_page = int(self._search_regex(
|
|
||||||
r'videos\?page=(\d+)">laatste</a>',
|
|
||||||
webpage, 'last page', default=0)) + 1
|
|
||||||
playlist_id = program
|
playlist_id = program
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for current_page_id in range(start_page, last_page):
|
for current_page_id in itertools.count(start_page):
|
||||||
current_page = self._download_webpage(
|
current_page = self._download_webpage(
|
||||||
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
|
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
|
||||||
program,
|
program,
|
||||||
'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
|
'Downloading page %d' % (current_page_id + 1))
|
||||||
page_entries = [
|
page_entries = [
|
||||||
self.url_result('http://www.vier.be' + video_url, 'Vier')
|
self.url_result('http://www.vier.be' + video_url, 'Vier')
|
||||||
for video_url in re.findall(
|
for video_url in re.findall(
|
||||||
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
|
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
|
||||||
entries.extend(page_entries)
|
entries.extend(page_entries)
|
||||||
|
if page_id or '>Meer<' not in current_page:
|
||||||
|
break
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id)
|
return self.playlist_result(entries, playlist_id)
|
||||||
|
|||||||
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
@@ -16,14 +18,14 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ViewsterIE(InfoExtractor):
|
class ViewsterIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
_VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# movie, Type=Movie
|
# movie, Type=Movie
|
||||||
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
||||||
'md5': '14d3cfffe66d57b41ae2d9c873416f01',
|
'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1140-11855-000',
|
'id': '1140-11855-000',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'The listening Project',
|
'title': 'The listening Project',
|
||||||
'description': 'md5:bac720244afd1a8ea279864e67baa071',
|
'description': 'md5:bac720244afd1a8ea279864e67baa071',
|
||||||
'timestamp': 1214870400,
|
'timestamp': 1214870400,
|
||||||
@@ -33,10 +35,10 @@ class ViewsterIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# series episode, Type=Episode
|
# series episode, Type=Episode
|
||||||
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
|
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
|
||||||
'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
|
'md5': '9243079a8531809efe1b089db102c069',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1284-19427-001',
|
'id': '1284-19427-001',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'The World and a Wall',
|
'title': 'The World and a Wall',
|
||||||
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
|
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
|
||||||
'timestamp': 1428192000,
|
'timestamp': 1428192000,
|
||||||
@@ -61,6 +63,14 @@ class ViewsterIE(InfoExtractor):
|
|||||||
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
|
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 16,
|
'playlist_mincount': 16,
|
||||||
|
}, {
|
||||||
|
# geo restricted series
|
||||||
|
'url': 'https://www.viewster.com/serie/1280-18794-002/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# geo restricted video
|
||||||
|
'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||||
@@ -74,8 +84,8 @@ class ViewsterIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
# Get 'api_token' cookie
|
# Get 'api_token' cookie
|
||||||
self._request_webpage(HEADRequest(url), video_id)
|
self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id)
|
||||||
cookies = self._get_cookies(url)
|
cookies = self._get_cookies('http://www.viewster.com/')
|
||||||
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
|
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
|
||||||
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
@@ -85,10 +95,16 @@ class ViewsterIE(InfoExtractor):
|
|||||||
entry_id = info.get('Id') or info['id']
|
entry_id = info.get('Id') or info['id']
|
||||||
|
|
||||||
# unfinished serie has no Type
|
# unfinished serie has no Type
|
||||||
if info.get('Type') in ['Serie', None]:
|
if info.get('Type') in ('Serie', None):
|
||||||
|
try:
|
||||||
episodes = self._download_json(
|
episodes = self._download_json(
|
||||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||||
video_id, 'Downloading series JSON')
|
video_id, 'Downloading series JSON')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
else:
|
||||||
|
raise
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
|
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
|
||||||
@@ -98,7 +114,7 @@ class ViewsterIE(InfoExtractor):
|
|||||||
return self.playlist_result(entries, video_id, title, description)
|
return self.playlist_result(entries, video_id, title, description)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
|
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
|
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
|
||||||
% (entry_id, compat_urllib_parse.quote(media_type)),
|
% (entry_id, compat_urllib_parse.quote(media_type)),
|
||||||
@@ -120,9 +136,22 @@ class ViewsterIE(InfoExtractor):
|
|||||||
fatal=False # m3u8 sometimes fail
|
fatal=False # m3u8 sometimes fail
|
||||||
))
|
))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
format_id = media.get('Bitrate')
|
||||||
|
f = {
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
})
|
'format_id': 'mp4-%s' % format_id,
|
||||||
|
'height': int_or_none(media.get('Height')),
|
||||||
|
'width': int_or_none(media.get('Width')),
|
||||||
|
'preference': 1,
|
||||||
|
}
|
||||||
|
if format_id and not f['height']:
|
||||||
|
f['height'] = int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
synopsis = info.get('Synopsis', {})
|
synopsis = info.get('Synopsis', {})
|
||||||
|
|||||||
86
youtube_dl/extractor/vlive.py
Normal file
86
youtube_dl/extractor/vlive.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hmac
|
||||||
|
from hashlib import sha1
|
||||||
|
from base64 import b64encode
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext
|
||||||
|
)
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
|
||||||
|
|
||||||
|
class VLiveIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vlive'
|
||||||
|
# www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
|
||||||
|
_VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://m.vlive.tv/video/1326',
|
||||||
|
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1326',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[V] Girl\'s Day\'s Broadcast',
|
||||||
|
'creator': 'Girl\'s Day',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://m.vlive.tv/video/%s' % video_id,
|
||||||
|
video_id, note='Download video page')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
creator = self._html_search_regex(
|
||||||
|
r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
|
||||||
|
|
||||||
|
url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
|
||||||
|
msgpad = '%.0f' % (time() * 1000)
|
||||||
|
md = b64encode(
|
||||||
|
hmac.new(self._SECRET.encode('ascii'),
|
||||||
|
(url[:255] + msgpad).encode('ascii'), sha1).digest()
|
||||||
|
)
|
||||||
|
url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
|
||||||
|
playinfo = self._download_json(url, video_id, 'Downloading video json')
|
||||||
|
|
||||||
|
if playinfo.get('message', '') != 'success':
|
||||||
|
raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
|
||||||
|
|
||||||
|
if not playinfo.get('result'):
|
||||||
|
raise ExtractorError('No videos found.')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for vid in playinfo['result'].get('videos', {}).get('list', []):
|
||||||
|
formats.append({
|
||||||
|
'url': vid['source'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'abr': vid.get('bitrate', {}).get('audio'),
|
||||||
|
'vbr': vid.get('bitrate', {}).get('video'),
|
||||||
|
'format_id': vid['encodingOption']['name'],
|
||||||
|
'height': vid.get('height'),
|
||||||
|
'width': vid.get('width'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for caption in playinfo['result'].get('captions', {}).get('list', []):
|
||||||
|
subtitles[caption['language']] = [
|
||||||
|
{'ext': determine_ext(caption['source'], default_ext='vtt'),
|
||||||
|
'url': caption['source']}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'creator': creator,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
@@ -19,25 +19,25 @@ class WashingtonPostIE(InfoExtractor):
|
|||||||
'title': 'Sinkhole of bureaucracy',
|
'title': 'Sinkhole of bureaucracy',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': '79132cc09ec5309fa590ae46e4cc31bc',
|
'md5': 'b9be794ceb56c7267d410a13f99d801a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Breaking Points: The Paper Mine',
|
'title': 'Breaking Points: The Paper Mine',
|
||||||
'duration': 1287,
|
'duration': 1290,
|
||||||
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
||||||
'uploader': 'The Washington Post',
|
'uploader': 'The Washington Post',
|
||||||
'timestamp': 1395527908,
|
'timestamp': 1395527908,
|
||||||
'upload_date': '20140322',
|
'upload_date': '20140322',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'md5': 'e1d5734c06865cc504ad99dc2de0d443',
|
'md5': '1fff6a689d8770966df78c8cb6c8c17c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The town bureaucracy sustains',
|
'title': 'The town bureaucracy sustains',
|
||||||
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
||||||
'duration': 2217,
|
'duration': 2220,
|
||||||
'timestamp': 1395528005,
|
'timestamp': 1395528005,
|
||||||
'upload_date': '20140322',
|
'upload_date': '20140322',
|
||||||
'uploader': 'The Washington Post',
|
'uploader': 'The Washington Post',
|
||||||
|
|||||||
@@ -1,40 +1,33 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
class WimpIE(InfoExtractor):
|
class WimpIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
|
_VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)/'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.wimp.com/maruexhausted/',
|
'url': 'http://www.wimp.com/maruexhausted/',
|
||||||
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
|
'md5': 'ee21217ffd66d058e8b16be340b74883',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'maruexhausted',
|
'id': 'maruexhausted',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Maru is exhausted.',
|
'title': 'Maru is exhausted.',
|
||||||
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# youtube video
|
|
||||||
'url': 'http://www.wimp.com/clowncar/',
|
'url': 'http://www.wimp.com/clowncar/',
|
||||||
|
'md5': '4e2986c793694b55b37cf92521d12bb4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cG4CEr2aiSg',
|
'id': 'clowncar',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Basset hound clown car...incredible!',
|
'title': 'It\'s like a clown car.',
|
||||||
'description': 'md5:8d228485e0719898c017203f900b3a35',
|
'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
|
||||||
'uploader': 'Gretchen Hoey',
|
|
||||||
'uploader_id': 'gretchenandjeff1',
|
|
||||||
'upload_date': '20140303',
|
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
[r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
|
[r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class XuiteIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Audio
|
# Audio
|
||||||
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
||||||
'md5': '63a42c705772aa53fd4c1a0027f86adf',
|
'md5': 'e79284c87b371424885448d11f6398c8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3860914',
|
'id': '3860914',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ class YahooIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||||
'md5': '67010fdf3a08d290e060a4dd96baa07b',
|
'md5': '88e209b417f173d86186bef6e4d1f160',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
|
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -144,6 +144,17 @@ class YahooIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://tw.news.yahoo.com/-100120367.html',
|
'url': 'https://tw.news.yahoo.com/-100120367.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
|
||||||
|
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
|
||||||
|
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Communitary - Community Episode 1: Ladders',
|
||||||
|
'description': 'md5:8fc39608213295748e1e289807838c97',
|
||||||
|
'duration': 1646,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -171,6 +182,19 @@ class YahooIE(InfoExtractor):
|
|||||||
if nbc_sports_url:
|
if nbc_sports_url:
|
||||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
# Query result is often embedded in webpage as JSON. Sometimes explicit requests
|
||||||
|
# to video API results in a failure with geo restriction reason therefore using
|
||||||
|
# embedded query result when present sounds reasonable.
|
||||||
|
config_json = self._search_regex(
|
||||||
|
r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
|
||||||
|
webpage, 'videoplayer applet', default=None)
|
||||||
|
if config_json:
|
||||||
|
config = self._parse_json(config_json, display_id, fatal=False)
|
||||||
|
if config:
|
||||||
|
sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
|
||||||
|
if sapi:
|
||||||
|
return self._extract_info(display_id, sapi, webpage)
|
||||||
|
|
||||||
items_json = self._search_regex(
|
items_json = self._search_regex(
|
||||||
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||||
default=None)
|
default=None)
|
||||||
@@ -190,22 +214,10 @@ class YahooIE(InfoExtractor):
|
|||||||
video_id = info['id']
|
video_id = info['id']
|
||||||
return self._get_info(video_id, display_id, webpage)
|
return self._get_info(video_id, display_id, webpage)
|
||||||
|
|
||||||
def _get_info(self, video_id, display_id, webpage):
|
def _extract_info(self, display_id, query, webpage):
|
||||||
region = self._search_regex(
|
info = query['query']['results']['mediaObj'][0]
|
||||||
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
|
|
||||||
webpage, 'region', fatal=False, default='US')
|
|
||||||
data = compat_urllib_parse.urlencode({
|
|
||||||
'protocol': 'http',
|
|
||||||
'region': region,
|
|
||||||
})
|
|
||||||
query_url = (
|
|
||||||
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
|
||||||
'{id}?{data}'.format(id=video_id, data=data))
|
|
||||||
query_result = self._download_json(
|
|
||||||
query_url, display_id, 'Downloading video info')
|
|
||||||
|
|
||||||
info = query_result['query']['results']['mediaObj'][0]
|
|
||||||
meta = info.get('meta')
|
meta = info.get('meta')
|
||||||
|
video_id = info.get('id')
|
||||||
|
|
||||||
if not meta:
|
if not meta:
|
||||||
msg = info['status'].get('msg')
|
msg = info['status'].get('msg')
|
||||||
@@ -231,6 +243,9 @@ class YahooIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
if s.get('format') == 'm3u8_playlist':
|
||||||
|
format_info['protocol'] = 'm3u8_native'
|
||||||
|
format_info['ext'] = 'mp4'
|
||||||
format_url = compat_urlparse.urljoin(host, path)
|
format_url = compat_urlparse.urljoin(host, path)
|
||||||
format_info['url'] = format_url
|
format_info['url'] = format_url
|
||||||
formats.append(format_info)
|
formats.append(format_info)
|
||||||
@@ -264,6 +279,21 @@ class YahooIE(InfoExtractor):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_info(self, video_id, display_id, webpage):
|
||||||
|
region = self._search_regex(
|
||||||
|
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
|
||||||
|
webpage, 'region', fatal=False, default='US')
|
||||||
|
data = compat_urllib_parse.urlencode({
|
||||||
|
'protocol': 'http',
|
||||||
|
'region': region,
|
||||||
|
})
|
||||||
|
query_url = (
|
||||||
|
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
||||||
|
'{id}?{data}'.format(id=video_id, data=data))
|
||||||
|
query_result = self._download_json(
|
||||||
|
query_url, display_id, 'Downloading video info')
|
||||||
|
return self._extract_info(display_id, query_result, webpage)
|
||||||
|
|
||||||
|
|
||||||
class YahooSearchIE(SearchInfoExtractor):
|
class YahooSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = 'Yahoo screen search'
|
IE_DESC = 'Yahoo screen search'
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
encode_dict,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
@@ -111,10 +112,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
'hl': 'en_US',
|
'hl': 'en_US',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
|
||||||
# chokes on unicode
|
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
|
||||||
|
|
||||||
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||||
login_results = self._download_webpage(
|
login_results = self._download_webpage(
|
||||||
@@ -147,8 +145,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
'TrustDevice': 'on',
|
'TrustDevice': 'on',
|
||||||
})
|
})
|
||||||
|
|
||||||
tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
|
tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
|
||||||
tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
|
|
||||||
|
|
||||||
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
||||||
tfa_results = self._download_webpage(
|
tfa_results = self._download_webpage(
|
||||||
@@ -1657,6 +1654,9 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
channel_page = self._download_webpage(
|
channel_page = self._download_webpage(
|
||||||
url + '?view=57', channel_id,
|
url + '?view=57', channel_id,
|
||||||
'Downloading channel page', fatal=False)
|
'Downloading channel page', fatal=False)
|
||||||
|
if channel_page is False:
|
||||||
|
channel_playlist_id = False
|
||||||
|
else:
|
||||||
channel_playlist_id = self._html_search_meta(
|
channel_playlist_id = self._html_search_meta(
|
||||||
'channelId', channel_page, 'channel id', default=None)
|
'channelId', channel_page, 'channel id', default=None)
|
||||||
if not channel_playlist_id:
|
if not channel_playlist_id:
|
||||||
@@ -1838,8 +1838,8 @@ class YoutubeShowIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
||||||
IE_NAME = 'youtube:show'
|
IE_NAME = 'youtube:show'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.youtube.com/show/airdisasters',
|
'url': 'https://www.youtube.com/show/airdisasters',
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 5,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'airdisasters',
|
'id': 'airdisasters',
|
||||||
'title': 'Air Disasters',
|
'title': 'Air Disasters',
|
||||||
@@ -1850,7 +1850,7 @@ class YoutubeShowIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
playlist_id = mobj.group('id')
|
playlist_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, playlist_id, 'Downloading show webpage')
|
'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage')
|
||||||
# There's one playlist for each season of the show
|
# There's one playlist for each season of the show
|
||||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||||
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
||||||
@@ -1973,6 +1973,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
|||||||
annotation_id=annotation_[^&]+|
|
annotation_id=annotation_[^&]+|
|
||||||
x-yt-cl=[0-9]+|
|
x-yt-cl=[0-9]+|
|
||||||
hl=[^&]*|
|
hl=[^&]*|
|
||||||
|
t=[0-9]+
|
||||||
)?
|
)?
|
||||||
|
|
|
|
||||||
attribution_link\?a=[^&]+
|
attribution_link\?a=[^&]+
|
||||||
@@ -1995,6 +1996,9 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/watch?t=2372',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import os.path
|
import os.path
|
||||||
import optparse
|
import optparse
|
||||||
import shlex
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from .downloader.external import list_external_downloaders
|
from .downloader.external import list_external_downloaders
|
||||||
@@ -11,6 +10,7 @@ from .compat import (
|
|||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
|
compat_shlex_split,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
@@ -28,7 +28,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
try:
|
try:
|
||||||
res = []
|
res = []
|
||||||
for l in optionf:
|
for l in optionf:
|
||||||
res += shlex.split(l, comments=True)
|
res += compat_shlex_split(l, comments=True)
|
||||||
finally:
|
finally:
|
||||||
optionf.close()
|
optionf.close()
|
||||||
return res
|
return res
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import os
|
|||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
|
cli_configuration_args,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -61,11 +62,7 @@ class PostProcessor(object):
|
|||||||
self._downloader.report_warning(errnote)
|
self._downloader.report_warning(errnote)
|
||||||
|
|
||||||
def _configuration_args(self, default=[]):
|
def _configuration_args(self, default=[]):
|
||||||
pp_args = self._downloader.params.get('postprocessor_args')
|
return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
|
||||||
if pp_args is None:
|
|
||||||
return default
|
|
||||||
assert isinstance(pp_args, list)
|
|
||||||
return pp_args
|
|
||||||
|
|
||||||
|
|
||||||
class AudioConversionError(PostProcessingError):
|
class AudioConversionError(PostProcessingError):
|
||||||
|
|||||||
@@ -135,7 +135,10 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
|
|
||||||
files_cmd = []
|
files_cmd = []
|
||||||
for path in input_paths:
|
for path in input_paths:
|
||||||
files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
|
files_cmd.extend([
|
||||||
|
encodeArgument('-i'),
|
||||||
|
encodeFilename(self._ffmpeg_filename_argument(path), True)
|
||||||
|
])
|
||||||
cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
|
cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
|
||||||
files_cmd +
|
files_cmd +
|
||||||
[encodeArgument(o) for o in opts] +
|
[encodeArgument(o) for o in opts] +
|
||||||
@@ -155,10 +158,10 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
self.run_ffmpeg_multiple_files([path], out_path, opts)
|
self.run_ffmpeg_multiple_files([path], out_path, opts)
|
||||||
|
|
||||||
def _ffmpeg_filename_argument(self, fn):
|
def _ffmpeg_filename_argument(self, fn):
|
||||||
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
|
# Always use 'file:' because the filename may contain ':' (ffmpeg
|
||||||
if fn.startswith('-'):
|
# interprets that as a protocol) or can start with '-' (-- is broken in
|
||||||
return './' + fn
|
# ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
|
||||||
return fn
|
return 'file:' + fn
|
||||||
|
|
||||||
|
|
||||||
class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ def write_json_file(obj, fn):
|
|||||||
if sys.version_info >= (2, 7):
|
if sys.version_info >= (2, 7):
|
||||||
def find_xpath_attr(node, xpath, key, val=None):
|
def find_xpath_attr(node, xpath, key, val=None):
|
||||||
""" Find the xpath xpath[@key=val] """
|
""" Find the xpath xpath[@key=val] """
|
||||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
assert re.match(r'^[a-zA-Z_-]+$', key)
|
||||||
if val:
|
if val:
|
||||||
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||||
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
|
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
|
||||||
@@ -176,12 +176,12 @@ def xpath_with_ns(path, ns_map):
|
|||||||
return '/'.join(replaced)
|
return '/'.join(replaced)
|
||||||
|
|
||||||
|
|
||||||
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
||||||
if sys.version_info < (2, 7): # Crazy 2.6
|
if sys.version_info < (2, 7): # Crazy 2.6
|
||||||
xpath = xpath.encode('ascii')
|
xpath = xpath.encode('ascii')
|
||||||
|
|
||||||
n = node.find(xpath)
|
n = node.find(xpath)
|
||||||
if n is None or n.text is None:
|
if n is None:
|
||||||
if default is not NO_DEFAULT:
|
if default is not NO_DEFAULT:
|
||||||
return default
|
return default
|
||||||
elif fatal:
|
elif fatal:
|
||||||
@@ -189,9 +189,37 @@ def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
|||||||
raise ExtractorError('Could not find XML element %s' % name)
|
raise ExtractorError('Could not find XML element %s' % name)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
|
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
||||||
|
n = xpath_element(node, xpath, name, fatal=fatal, default=default)
|
||||||
|
if n is None or n == default:
|
||||||
|
return n
|
||||||
|
if n.text is None:
|
||||||
|
if default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
name = xpath if name is None else name
|
||||||
|
raise ExtractorError('Could not find XML element\'s text %s' % name)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
return n.text
|
return n.text
|
||||||
|
|
||||||
|
|
||||||
|
def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
|
||||||
|
n = find_xpath_attr(node, xpath, key)
|
||||||
|
if n is None:
|
||||||
|
if default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
name = '%s[@%s]' % (xpath, key) if name is None else name
|
||||||
|
raise ExtractorError('Could not find XML attribute %s' % name)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
return n.attrib[key]
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_id(id, html):
|
def get_element_by_id(id, html):
|
||||||
"""Return the content of the tag with the specified ID in the passed HTML document"""
|
"""Return the content of the tag with the specified ID in the passed HTML document"""
|
||||||
return get_element_by_attribute("id", id, html)
|
return get_element_by_attribute("id", id, html)
|
||||||
@@ -591,7 +619,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
|||||||
# expected HTTP responses to meet HTTP/1.0 or later (see also
|
# expected HTTP responses to meet HTTP/1.0 or later (see also
|
||||||
# https://github.com/rg3/youtube-dl/issues/6727)
|
# https://github.com/rg3/youtube-dl/issues/6727)
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
kwargs['strict'] = True
|
kwargs[b'strict'] = True
|
||||||
hc = http_class(*args, **kwargs)
|
hc = http_class(*args, **kwargs)
|
||||||
source_address = ydl_handler._params.get('source_address')
|
source_address = ydl_handler._params.get('source_address')
|
||||||
if source_address is not None:
|
if source_address is not None:
|
||||||
@@ -720,7 +748,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
gz = io.BytesIO(self.deflate(resp.read()))
|
gz = io.BytesIO(self.deflate(resp.read()))
|
||||||
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986
|
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/6457).
|
||||||
if 300 <= resp.code < 400:
|
if 300 <= resp.code < 400:
|
||||||
location = resp.headers.get('Location')
|
location = resp.headers.get('Location')
|
||||||
if location:
|
if location:
|
||||||
@@ -754,6 +783,30 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
|||||||
req, **kwargs)
|
req, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
||||||
|
def __init__(self, cookiejar=None):
|
||||||
|
compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||||
|
|
||||||
|
def http_response(self, request, response):
|
||||||
|
# Python 2 will choke on next HTTP request in row if there are non-ASCII
|
||||||
|
# characters in Set-Cookie HTTP header of last response (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/6769).
|
||||||
|
# In order to at least prevent crashing we will percent encode Set-Cookie
|
||||||
|
# header before HTTPCookieProcessor starts processing it.
|
||||||
|
# if sys.version_info < (3, 0) and response.headers:
|
||||||
|
# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
|
||||||
|
# set_cookie = response.headers.get(set_cookie_header)
|
||||||
|
# if set_cookie:
|
||||||
|
# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
|
||||||
|
# if set_cookie != set_cookie_escaped:
|
||||||
|
# del response.headers[set_cookie_header]
|
||||||
|
# response.headers[set_cookie_header] = set_cookie_escaped
|
||||||
|
return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
|
||||||
|
|
||||||
|
https_request = compat_urllib_request.HTTPCookieProcessor.http_request
|
||||||
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||||
""" Return a UNIX timestamp from the given date """
|
""" Return a UNIX timestamp from the given date """
|
||||||
|
|
||||||
@@ -1583,6 +1636,10 @@ def urlencode_postdata(*args, **kargs):
|
|||||||
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def encode_dict(d, encoding='utf-8'):
|
||||||
|
return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
etree_iter = xml.etree.ElementTree.Element.iter
|
etree_iter = xml.etree.ElementTree.Element.iter
|
||||||
except AttributeError: # Python <=2.6
|
except AttributeError: # Python <=2.6
|
||||||
@@ -1923,6 +1980,32 @@ def dfxp2srt(dfxp_data):
|
|||||||
return ''.join(out)
|
return ''.join(out)
|
||||||
|
|
||||||
|
|
||||||
|
def cli_option(params, command_option, param):
|
||||||
|
param = params.get(param)
|
||||||
|
return [command_option, param] if param is not None else []
|
||||||
|
|
||||||
|
|
||||||
|
def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
|
||||||
|
param = params.get(param)
|
||||||
|
assert isinstance(param, bool)
|
||||||
|
if separator:
|
||||||
|
return [command_option + separator + (true_value if param else false_value)]
|
||||||
|
return [command_option, true_value if param else false_value]
|
||||||
|
|
||||||
|
|
||||||
|
def cli_valueless_option(params, command_option, param, expected_value=True):
|
||||||
|
param = params.get(param)
|
||||||
|
return [command_option] if param == expected_value else []
|
||||||
|
|
||||||
|
|
||||||
|
def cli_configuration_args(params, param, default=[]):
|
||||||
|
ex_args = params.get(param)
|
||||||
|
if ex_args is None:
|
||||||
|
return default
|
||||||
|
assert isinstance(ex_args, list)
|
||||||
|
return ex_args
|
||||||
|
|
||||||
|
|
||||||
class ISO639Utils(object):
|
class ISO639Utils(object):
|
||||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||||
_lang_map = {
|
_lang_map = {
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.09.03'
|
__version__ = '2015.09.22'
|
||||||
|
|||||||
Reference in New Issue
Block a user