Compare commits
120 Commits
2014.01.30
...
2014.02.10
Author | SHA1 | Date | |
---|---|---|---|
![]() |
2e20bba708 | ||
![]() |
e70dc1d14b | ||
![]() |
026fcc0495 | ||
![]() |
81c2f20b53 | ||
![]() |
1afe753462 | ||
![]() |
524c2c716a | ||
![]() |
b542d4bbd7 | ||
![]() |
17968e444c | ||
![]() |
2e3fd9ec2f | ||
![]() |
d6a283b025 | ||
![]() |
9766538124 | ||
![]() |
98dbee8681 | ||
![]() |
e421491b3b | ||
![]() |
6828d37c41 | ||
![]() |
bf5f610099 | ||
![]() |
8b7f73404a | ||
![]() |
85cacb2f51 | ||
![]() |
b3fa3917e2 | ||
![]() |
082c6c867a | ||
![]() |
03fcf1ab57 | ||
![]() |
3b00dea5eb | ||
![]() |
8bc6c8e3c0 | ||
![]() |
79bc27b53a | ||
![]() |
84dd703199 | ||
![]() |
c6fdba23a6 | ||
![]() |
b19fe521a9 | ||
![]() |
c1e672d121 | ||
![]() |
f4371f4784 | ||
![]() |
d914d9d187 | ||
![]() |
845d14d377 | ||
![]() |
4a9540b6d2 | ||
![]() |
9f31be7000 | ||
![]() |
41fa1b627d | ||
![]() |
c0c4e66b29 | ||
![]() |
cd8662de22 | ||
![]() |
3587159614 | ||
![]() |
d67cc9fa7c | ||
![]() |
bf3a2fe923 | ||
![]() |
e9ea0bf123 | ||
![]() |
63424b6233 | ||
![]() |
0bf35c5cf5 | ||
![]() |
95c29381eb | ||
![]() |
94c4abce7f | ||
![]() |
f2dffe55f8 | ||
![]() |
46a073bfac | ||
![]() |
df872ec4e7 | ||
![]() |
5de90176d9 | ||
![]() |
dcf3eec47a | ||
![]() |
e9e4f30d26 | ||
![]() |
83cebd73d4 | ||
![]() |
1df4229bd7 | ||
![]() |
3c995527e9 | ||
![]() |
7c62b568a2 | ||
![]() |
ccf9114e84 | ||
![]() |
d8061908bb | ||
![]() |
211e17dd43 | ||
![]() |
6cb38a9994 | ||
![]() |
fa7df757a7 | ||
![]() |
8c82077619 | ||
![]() |
e5d1f9e50a | ||
![]() |
7ee50ae7b5 | ||
![]() |
de563c9da0 | ||
![]() |
50451f2a18 | ||
![]() |
9bc70948e1 | ||
![]() |
5dc733f071 | ||
![]() |
bc4850908c | ||
![]() |
20650c8654 | ||
![]() |
56dced2670 | ||
![]() |
eef726c04b | ||
![]() |
acf1555d76 | ||
![]() |
22e7f1a6ec | ||
![]() |
3c49325658 | ||
![]() |
bb1cd2bea1 | ||
![]() |
fdf1f8d4ce | ||
![]() |
117c8c6b97 | ||
![]() |
5cef4ff09b | ||
![]() |
91264ce572 | ||
![]() |
c79ef8e1ae | ||
![]() |
58d915df51 | ||
![]() |
7881a64499 | ||
![]() |
90159f5561 | ||
![]() |
99877772d0 | ||
![]() |
b0268cb6ce | ||
![]() |
4edff4cfa8 | ||
![]() |
1eac553e7e | ||
![]() |
9d3ac7444d | ||
![]() |
588128d054 | ||
![]() |
8e93b9b9aa | ||
![]() |
b4bcffefa3 | ||
![]() |
2b39af9b4f | ||
![]() |
23fe495feb | ||
![]() |
b5dbe89bba | ||
![]() |
dbe80ca7ad | ||
![]() |
009a3408f5 | ||
![]() |
b58e3c8918 | ||
![]() |
56b6faf91e | ||
![]() |
7ac1f877a7 | ||
![]() |
d55433bbfd | ||
![]() |
f0ce2bc1c5 | ||
![]() |
c3bc00b90e | ||
![]() |
ff6b7b049b | ||
![]() |
f46359121f | ||
![]() |
37c1525c17 | ||
![]() |
c85e4cf7b4 | ||
![]() |
c66dcda287 | ||
![]() |
6d845922ab | ||
![]() |
2949cbe036 | ||
![]() |
c3309a7774 | ||
![]() |
7aed837595 | ||
![]() |
0eb799bae9 | ||
![]() |
4baff4a4ae | ||
![]() |
45d7bc2f8b | ||
![]() |
c0c2ddddcd | ||
![]() |
a96ed91610 | ||
![]() |
c1206423c4 | ||
![]() |
659aa21ba1 | ||
![]() |
efd02e858a | ||
![]() |
3bf8bc7f37 | ||
![]() |
c81a855b0f | ||
![]() |
4f879a5be0 |
12
README.md
12
README.md
@@ -53,6 +53,12 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
from google videos for youtube-dl "large
|
||||
apple". By default (with value "auto")
|
||||
youtube-dl guesses.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: do not read the user
|
||||
configuration in ~/.config/youtube-dl.conf
|
||||
(%APPDATA%/youtube-dl/config.txt on
|
||||
Windows)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
@@ -325,7 +331,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||
|
||||
# BUILD INSTRUCTIONS
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||
|
||||
@@ -347,6 +353,10 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
* zip
|
||||
* nosetests
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||
|
||||
# BUGS
|
||||
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
@@ -13,6 +15,7 @@ from youtube_dl.extractor import (
|
||||
FacebookIE,
|
||||
gen_extractors,
|
||||
JustinTVIE,
|
||||
PBSIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
@@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
|
||||
def test_youtube_playlist_matching(self):
|
||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||
assertPlaylist(u'PL63F0C78739B09958')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
# Top tracks
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||
|
||||
def test_youtube_matching(self):
|
||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||
@@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
@@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
def test_facebook_matching(self):
|
||||
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
@@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
||||
|
||||
def test_pbs(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2350
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -22,6 +22,7 @@ import socket
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
from youtube_dl.utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_HTTPError,
|
||||
@@ -110,7 +111,7 @@ def generator(test_case):
|
||||
ydl.download([test_case['url']])
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
raise
|
||||
|
||||
if try_num == RETRIES:
|
||||
|
@@ -34,6 +34,8 @@ from youtube_dl.extractor import (
|
||||
KhanAcademyIE,
|
||||
EveryonesMixtapeIE,
|
||||
RutubeChannelIE,
|
||||
GoogleSearchIE,
|
||||
GenericIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -229,6 +231,24 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['id'], '1409')
|
||||
self.assertTrue(len(result['entries']) >= 34)
|
||||
|
||||
def test_multiple_brightcove_videos(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2283
|
||||
dl = FakeYDL()
|
||||
ie = GenericIE(dl)
|
||||
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
|
||||
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
||||
self.assertEqual(len(result['entries']), 3)
|
||||
|
||||
def test_GoogleSearch(self):
|
||||
dl = FakeYDL()
|
||||
ie = GoogleSearchIE(dl)
|
||||
result = ie.extract('gvsearch15:python language')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'python language')
|
||||
self.assertEqual(result['title'], 'python language')
|
||||
self.assertTrue(len(result['entries']) == 15)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -10,9 +10,11 @@ from test.helper import FakeYDL, md5
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
BlipTVIE,
|
||||
YoutubeIE,
|
||||
DailymotionIE,
|
||||
TEDIE,
|
||||
VimeoIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -202,5 +204,80 @@ class TestTedSubtitles(BaseTestSubtitles):
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
class TestBlipTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://blip.tv/a/a-6603250'
|
||||
IE = BlipTVIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
|
||||
|
||||
|
||||
class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vimeo.com/76979871'
|
||||
IE = VimeoIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||
self.url = 'http://vimeo.com/56015672'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -127,6 +127,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = u'''<root>
|
||||
|
@@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'ytdl test PL')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
||||
|
||||
def test_youtube_playlist_noplaylist(self):
|
||||
@@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertEqual(result['_type'], 'url')
|
||||
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
|
||||
def test_issue_673(self):
|
||||
dl = FakeYDL()
|
||||
@@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||
|
||||
@@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = result['entries']
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_channel(self):
|
||||
dl = FakeYDL()
|
||||
@@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_toplist(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeTopListIE(dl)
|
||||
|
@@ -27,6 +27,12 @@ _TESTS = [
|
||||
85,
|
||||
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
u'js',
|
||||
90,
|
||||
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
@@ -18,6 +18,7 @@
|
||||
# code is documented here:
|
||||
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
|
||||
|
||||
# This specific script just adds the downloaded folder to the end of the $PATH,
|
||||
# which allows the contained youtube-dl executable to be found.
|
||||
export PATH=${PATH}:$(dirname $0)
|
||||
# This specific script just aliases youtube-dl to the python script that this
|
||||
# library provides. This requires updating the PYTHONPATH to ensure that the
|
||||
# full set of code can be located.
|
||||
alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
|
||||
|
@@ -41,6 +41,7 @@ __authors__ = (
|
||||
'Chris Gahan',
|
||||
'Saimadhav Heblikar',
|
||||
'Mike Col',
|
||||
'Andreas Schmitz',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -100,6 +101,43 @@ def parseOpts(overrideArguments=None):
|
||||
optionf.close()
|
||||
return res
|
||||
|
||||
def _readUserConf():
|
||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||
if xdg_config_home:
|
||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||
if not os.path.isfile(userConfFile):
|
||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||
else:
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||
if not os.path.isfile(userConfFile):
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||
userConf = _readOptions(userConfFile, None)
|
||||
|
||||
if userConf is None:
|
||||
appdata_dir = os.environ.get('appdata')
|
||||
if appdata_dir:
|
||||
userConf = _readOptions(
|
||||
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||
default=None)
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||
default=None)
|
||||
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||
default=None)
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||
default=None)
|
||||
|
||||
if userConf is None:
|
||||
userConf = []
|
||||
|
||||
return userConf
|
||||
|
||||
def _format_option_string(option):
|
||||
''' ('-o', '--option') -> -o, --format METAVAR'''
|
||||
|
||||
@@ -203,6 +241,11 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--default-search',
|
||||
dest='default_search', metavar='PREFIX',
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||
general.add_option(
|
||||
'--ignore-config',
|
||||
action='store_true',
|
||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||
|
||||
|
||||
selection.add_option(
|
||||
'--playlist-start',
|
||||
@@ -457,44 +500,18 @@ def parseOpts(overrideArguments=None):
|
||||
if opts.verbose:
|
||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||
else:
|
||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||
|
||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||
if xdg_config_home:
|
||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||
if not os.path.isfile(userConfFile):
|
||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||
else:
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||
if not os.path.isfile(userConfFile):
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||
userConf = _readOptions(userConfFile, None)
|
||||
|
||||
if userConf is None:
|
||||
appdata_dir = os.environ.get('appdata')
|
||||
if appdata_dir:
|
||||
userConf = _readOptions(
|
||||
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||
default=None)
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||
default=None)
|
||||
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||
default=None)
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||
default=None)
|
||||
|
||||
if userConf is None:
|
||||
userConf = []
|
||||
|
||||
commandLineConf = sys.argv[1:]
|
||||
if '--ignore-config' in commandLineConf:
|
||||
systemConf = []
|
||||
userConf = []
|
||||
else:
|
||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||
if '--ignore-config' in systemConf:
|
||||
userConf = []
|
||||
else:
|
||||
userConf = _readUserConf()
|
||||
argv = systemConf + userConf + commandLineConf
|
||||
|
||||
opts, args = parser.parse_args(argv)
|
||||
if opts.verbose:
|
||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||
|
@@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url', None)
|
||||
page_url = info_dict.get('page_url', None)
|
||||
app = info_dict.get('app', None)
|
||||
play_path = info_dict.get('play_path', None)
|
||||
tc_url = info_dict.get('tc_url', None)
|
||||
flash_version = info_dict.get('flash_version', None)
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
|
||||
@@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if app is not None:
|
||||
basic_args += ['--app', app]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if flash_version is not None:
|
||||
basic_args += ['--flashVer', flash_version]
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
|
@@ -15,6 +15,7 @@ from .arte import (
|
||||
from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
@@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cbs import CBSIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
@@ -54,12 +56,14 @@ from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elpais import ElPaisIE
|
||||
from .escapist import EscapistIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
@@ -96,10 +100,12 @@ from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
@@ -109,13 +115,16 @@ from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .la7 import LA7IE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
@@ -125,6 +134,7 @@ from .mit import TechTVMITIE, MITIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mpora import MporaIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mooshare import MooshareIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVIggyIE,
|
||||
@@ -136,11 +146,14 @@ from .myvideo import MyVideoIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import NBCNewsIE
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .nfb import NFBIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .novamov import NovamovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
@@ -198,6 +211,7 @@ from .ted import TEDIE
|
||||
from .tf1 import TF1IE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .toutv import TouTvIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
@@ -228,6 +242,7 @@ from .vimeo import (
|
||||
from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .wat import WatIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
|
217
youtube_dl/extractor/bbccouk.py
Normal file
217
youtube_dl/extractor/bbccouk.py
Normal file
@@ -0,0 +1,217 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
|
||||
'info_dict': {
|
||||
'id': 'p01q7wz4',
|
||||
'ext': 'flv',
|
||||
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
|
||||
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
|
||||
'duration': 1936,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int(media.get('width'))
|
||||
height = int(media.get('height'))
|
||||
file_size = int(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_captions(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||
p.text.strip() if p.text is not None else '')
|
||||
subtitles[lang] = srt
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
group_id = mobj.group('id')
|
||||
|
||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||
'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
return
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -1,128 +1,137 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class BlipTVIE(InfoExtractor):
|
||||
class BlipTVIE(SubtitlesInfoExtractor):
|
||||
"""Information extractor for blip.tv"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'file': '5779306.mov',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'info_dict': {
|
||||
'id': '5779306',
|
||||
'ext': 'mov',
|
||||
'upload_date': '20111205',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'uploader': 'Comic Book Resources - CBR TV',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
}
|
||||
}
|
||||
|
||||
def report_direct_download(self, title):
|
||||
"""Report information extraction."""
|
||||
self.to_screen('%s: Direct download detected' % title)
|
||||
}, {
|
||||
# https://github.com/rg3/youtube-dl/pull/2274
|
||||
'note': 'Video with subtitles',
|
||||
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||
'info_dict': {
|
||||
'id': '6586561',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Red vs. Blue',
|
||||
'description': 'One-Zero-One',
|
||||
'upload_date': '20130614',
|
||||
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
|
||||
# See https://github.com/rg3/youtube-dl/issues/857
|
||||
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||
if embed_mobj:
|
||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
|
||||
video_id = self._search_regex(
|
||||
r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||
|
||||
if '?' in url:
|
||||
cchar = '&'
|
||||
else:
|
||||
cchar = '?'
|
||||
|
||||
cchar = '&' if '?' in url else '?'
|
||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||
request = compat_urllib_request.Request(json_url)
|
||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||
self.report_extraction(mobj.group(1))
|
||||
urlh = self._request_webpage(request, None, False,
|
||||
'unable to download video info webpage')
|
||||
|
||||
try:
|
||||
json_code_bytes = urlh.read()
|
||||
json_code = json_code_bytes.decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
|
||||
json_data = self._download_json(request, video_id=presumptive_id)
|
||||
|
||||
try:
|
||||
json_data = json.loads(json_code)
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
else:
|
||||
data = json_data
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
else:
|
||||
data = json_data
|
||||
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
formats = []
|
||||
if 'additionalMedia' in data:
|
||||
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
|
||||
if not int(f['media_width']): # filter m3u8
|
||||
continue
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': f['role'],
|
||||
'width': int(f['media_width']),
|
||||
'height': int(f['media_height']),
|
||||
})
|
||||
else:
|
||||
video_id = compat_str(data['item_id'])
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
subtitles = {}
|
||||
formats = []
|
||||
if 'additionalMedia' in data:
|
||||
for f in data['additionalMedia']:
|
||||
if f.get('file_type_srt') == 1:
|
||||
LANGS = {
|
||||
'english': 'en',
|
||||
}
|
||||
lang = f['role'].rpartition('-')[-1].strip().lower()
|
||||
langcode = LANGS.get(lang, lang)
|
||||
subtitles[langcode] = f['url']
|
||||
continue
|
||||
if not int(f['media_width']): # filter m3u8
|
||||
continue
|
||||
formats.append({
|
||||
'url': data['media']['url'],
|
||||
'width': int(data['media']['width']),
|
||||
'height': int(data['media']['height']),
|
||||
'url': f['url'],
|
||||
'format_id': f['role'],
|
||||
'width': int(f['media_width']),
|
||||
'height': int(f['media_height']),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': data['media']['url'],
|
||||
'width': int(data['media']['width']),
|
||||
'height': int(data['media']['height']),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': compat_str(data['item_id']),
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
'formats': formats,
|
||||
}
|
||||
except (ValueError, KeyError) as err:
|
||||
raise ExtractorError('Unable to parse video information: %s' % repr(err))
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
'formats': formats,
|
||||
'subtitles': video_subtitles,
|
||||
}
|
||||
|
||||
def _download_subtitle_url(self, sub_lang, url):
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Youtubedl-user-agent', 'youtube-dl')
|
||||
return self._download_webpage(req, None, note=False)
|
||||
|
||||
|
||||
class BlipTVUserIE(InfoExtractor):
|
||||
"""Information Extractor for blip.tv users."""
|
||||
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||
_PAGE_SIZE = 12
|
||||
IE_NAME = 'blip.tv:user'
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract username
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
username = mobj.group(1)
|
||||
|
||||
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
||||
@@ -131,7 +140,6 @@ class BlipTVUserIE(InfoExtractor):
|
||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||
page_base = page_base % mobj.group(1)
|
||||
|
||||
|
||||
# Download video ids using BlipTV Ajax calls. Result size per
|
||||
# query is limited (currently to 12 videos) so we need to query
|
||||
# page by page until there are no video ids - it means we got
|
||||
@@ -142,8 +150,8 @@ class BlipTVUserIE(InfoExtractor):
|
||||
|
||||
while True:
|
||||
url = page_base + "&page=" + str(pagenum)
|
||||
page = self._download_webpage(url, username,
|
||||
'Downloading video ids from page %d' % pagenum)
|
||||
page = self._download_webpage(
|
||||
url, username, 'Downloading video ids from page %d' % pagenum)
|
||||
|
||||
# Extract video identifiers
|
||||
ids_in_page = []
|
||||
@@ -167,4 +175,4 @@ class BlipTVUserIE(InfoExtractor):
|
||||
|
||||
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
||||
return [self.playlist_result(url_entries, playlist_title=username)]
|
||||
|
@@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
ooyala_url = self._twitter_search_player(webpage)
|
||||
return self.url_result(ooyala_url, OoyalaIE.ie_key())
|
||||
embed_code = self._search_regex(
|
||||
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
||||
'embed code')
|
||||
return OoyalaIE._build_url_result(embed_code)
|
||||
|
@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
"""Try to extract the brightcove url from the wepbage, returns None
|
||||
"""Try to extract the brightcove url from the webpage, returns None
|
||||
if it can't be found
|
||||
"""
|
||||
urls = cls._extract_brightcove_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_urls(cls, webpage):
|
||||
"""Return a list of all Brightcove URLs from the webpage """
|
||||
|
||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||
if url_m:
|
||||
return url_m.group(1)
|
||||
return [url_m.group(1)]
|
||||
|
||||
m_brightcove = re.search(
|
||||
matches = re.findall(
|
||||
r'''(?sx)<object
|
||||
(?:
|
||||
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
|
||||
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||
).+?</object>''',
|
||||
webpage)
|
||||
if m_brightcove is not None:
|
||||
return cls._build_brighcove_url(m_brightcove.group())
|
||||
else:
|
||||
return None
|
||||
return [cls._build_brighcove_url(m) for m in matches]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
@@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
@@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
|
97
youtube_dl/extractor/chilloutzone.py
Normal file
97
youtube_dl/extractor/chilloutzone.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class ChilloutzoneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||
'md5': 'a76f3457e813ea0037e5244f509e66d1',
|
||||
'info_dict': {
|
||||
'id': 'enemene-meck-alle-katzen-weg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Enemene Meck - Alle Katzen weg',
|
||||
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at YouTube',
|
||||
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||
'info_dict': {
|
||||
'id': '1YVQaAgHyRU',
|
||||
'ext': 'mp4',
|
||||
'title': '16 Photos Taken 1 Second Before Disaster',
|
||||
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
|
||||
'uploader': 'BuzzFeedVideo',
|
||||
'uploader_id': 'BuzzFeedVideo',
|
||||
'upload_date': '20131105',
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at Vimeo',
|
||||
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
|
||||
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
|
||||
'info_dict': {
|
||||
'id': '85523671',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Sunday Times - Icons',
|
||||
'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
|
||||
'uploader': 'Us',
|
||||
'uploader_id': 'usfilms',
|
||||
'upload_date': '20140131'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
base64_video_info = self._html_search_regex(
|
||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
|
||||
video_info_dict = json.loads(decoded_video_info)
|
||||
|
||||
# get video information from dict
|
||||
video_url = video_info_dict['mediaUrl']
|
||||
description = clean_html(video_info_dict.get('description'))
|
||||
title = video_info_dict['title']
|
||||
native_platform = video_info_dict['nativePlatform']
|
||||
native_video_id = video_info_dict['nativeVideoId']
|
||||
source_priority = video_info_dict['sourcePriority']
|
||||
|
||||
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||
if native_platform is None:
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
webpage, 'fallback video URL', default=None)
|
||||
if youtube_url is not None:
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||
# the own CDN
|
||||
if source_priority == 'native':
|
||||
if native_platform == 'youtube':
|
||||
return self.url_result(native_video_id, ie='Youtube')
|
||||
if native_platform == 'vimeo':
|
||||
return self.url_result(
|
||||
'http://vimeo.com/' + native_video_id, ie='Vimeo')
|
||||
|
||||
if not video_url:
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
@@ -1,12 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
|
@@ -4,6 +4,7 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
@@ -11,22 +12,25 @@ class CollegeHumorIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
'file': '6902724.mp4',
|
||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||
'info_dict': {
|
||||
'id': '6902724',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comic-Con Cosplay Catastrophe',
|
||||
'description': 'Fans get creative this year at San Diego. Too',
|
||||
'description': 'Fans get creative this year',
|
||||
'age_limit': 13,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
'file': '3505939.mp4',
|
||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||
'info_dict': {
|
||||
'id': '3505939',
|
||||
'ext': 'mp4',
|
||||
'title': 'Font Conference',
|
||||
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
||||
'description': 'This video wasn\'t long enough,',
|
||||
'age_limit': 10,
|
||||
'duration': 179,
|
||||
},
|
||||
},
|
||||
# embedded youtube video
|
||||
@@ -38,7 +42,7 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||
'uploader': 'Funnyplox TV',
|
||||
'uploader_id': 'funnyploxtv',
|
||||
'description': 'md5:b20fc87608e2837596bbc8df85a3c34d',
|
||||
'description': 'md5:11812366244110c3523968aa74f02521',
|
||||
'upload_date': '20140128',
|
||||
},
|
||||
'params': {
|
||||
@@ -82,6 +86,8 @@ class CollegeHumorIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(vdata.get('duration'), 1000)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': vdata['title'],
|
||||
@@ -89,4 +95,5 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'thumbnail': vdata.get('thumbnail'),
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'duration': duration,
|
||||
}
|
||||
|
@@ -271,8 +271,11 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_json(self, url_or_request, video_id,
|
||||
note=u'Downloading JSON metadata',
|
||||
errnote=u'Unable to download JSON metadata'):
|
||||
errnote=u'Unable to download JSON metadata',
|
||||
transform_source=None):
|
||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
if transform_source:
|
||||
json_string = transform_source(json_string)
|
||||
try:
|
||||
return json.loads(json_string)
|
||||
except ValueError as ve:
|
||||
@@ -399,7 +402,7 @@ class InfoExtractor(object):
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
||||
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
||||
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
|
@@ -1,49 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
|
||||
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
|
||||
IE_DESC = 'C-SPAN'
|
||||
_TEST = {
|
||||
'url': 'http://www.c-spanvideo.org/program/HolderonV',
|
||||
'file': '315139.mp4',
|
||||
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||
'info_dict': {
|
||||
'id': '315139',
|
||||
'ext': 'mp4',
|
||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||
},
|
||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
prog_name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, prog_name)
|
||||
video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')
|
||||
page_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._html_search_regex(
|
||||
[
|
||||
# The full description
|
||||
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
|
||||
# If the description is small enough the other div is not
|
||||
# present, otherwise this is a stripped version
|
||||
r'<p class=\'initial\'>(.*?)</p>'
|
||||
],
|
||||
webpage, 'description', flags=re.DOTALL)
|
||||
|
||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||
data_json = self._download_webpage(
|
||||
info_url, video_id, 'Downloading video info')
|
||||
data = json.loads(data_json)
|
||||
data = self._download_json(info_url, video_id)
|
||||
|
||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
||||
|
||||
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||
video_id)
|
||||
|
||||
def find_string(s):
|
||||
return find_xpath_attr(doc, './/string', 'name', s).text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': find_string('title'),
|
||||
'url': url,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': find_string('poster'),
|
||||
}
|
||||
|
58
youtube_dl/extractor/elpais.py
Normal file
58
youtube_dl/extractor/elpais.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class ElPaisIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
||||
IE_DESC = 'El País'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
||||
'md5': '98406f301f19562170ec071b83433d55',
|
||||
'info_dict': {
|
||||
'id': 'tiempo-nuevo-recetas-viejas',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tiempo nuevo, recetas viejas',
|
||||
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
|
||||
'upload_date': '20140206',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
prefix = self._html_search_regex(
|
||||
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
|
||||
video_suffix = self._search_regex(
|
||||
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
|
||||
video_url = prefix + video_suffix
|
||||
thumbnail_suffix = self._search_regex(
|
||||
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
|
||||
fatal=False)
|
||||
thumbnail = (
|
||||
None if thumbnail_suffix is None
|
||||
else prefix + thumbnail_suffix)
|
||||
title = self._html_search_regex(
|
||||
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||
webpage, 'title')
|
||||
date_str = self._search_regex(
|
||||
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||
webpage, 'upload date', fatal=False)
|
||||
upload_date = (None if date_str is None else unified_strdate(date_str))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
38
youtube_dl/extractor/firstpost.py
Normal file
38
youtube_dl/extractor/firstpost.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FirstpostIE(InfoExtractor):
|
||||
IE_NAME = 'Firstpost.com'
|
||||
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
|
||||
'md5': 'ee9114957692f01fb1263ed87039112a',
|
||||
'info_dict': {
|
||||
'id': '1025403',
|
||||
'ext': 'mp4',
|
||||
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
|
||||
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
|
||||
webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
|
||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||
|
||||
# Look for BrightCove:
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is not None:
|
||||
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||
if bc_urls:
|
||||
self.to_screen('Brightcove video detected.')
|
||||
surl = smuggle_url(bc_url, {'Referer': url})
|
||||
return self.url_result(surl, 'Brightcove')
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||
'ie_key': 'Brightcove'
|
||||
} for bc_url in bc_urls]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
@@ -8,32 +10,42 @@ from ..utils import (
|
||||
|
||||
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = u'Google Video search'
|
||||
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
|
||||
IE_DESC = 'Google Video search'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = u'video.google:search'
|
||||
IE_NAME = 'video.google:search'
|
||||
_SEARCH_KEY = 'gvsearch'
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
|
||||
entries = []
|
||||
res = {
|
||||
'_type': 'playlist',
|
||||
'id': query,
|
||||
'entries': []
|
||||
'title': query,
|
||||
}
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
|
||||
webpage = self._download_webpage(result_url, u'gvsearch:' + query,
|
||||
note='Downloading result page ' + str(pagenum))
|
||||
for pagenum in itertools.count():
|
||||
result_url = (
|
||||
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
|
||||
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
|
||||
|
||||
for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
|
||||
e = {
|
||||
webpage = self._download_webpage(
|
||||
result_url, 'gvsearch:' + query,
|
||||
note='Downloading result page ' + str(pagenum + 1))
|
||||
|
||||
for hit_idx, mobj in enumerate(re.finditer(
|
||||
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||
|
||||
# Skip playlists
|
||||
if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
|
||||
continue
|
||||
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'url': mobj.group(1)
|
||||
}
|
||||
res['entries'].append(e)
|
||||
})
|
||||
|
||||
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
|
||||
if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
|
||||
res['entries'] = entries[:n]
|
||||
return res
|
||||
|
@@ -1,39 +1,36 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
"""Information Extractor for Ina.fr"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
|
||||
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
u'file': u'I12055569.mp4',
|
||||
u'md5': u'a667021bf2b41f8dc6049479d9bb38a3',
|
||||
u'info_dict': {
|
||||
u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\""
|
||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||
'info_dict': {
|
||||
'id': 'I12055569',
|
||||
'ext': 'mp4',
|
||||
'title': 'François Hollande "Je crois que c\'est clair"',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self,url):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
|
||||
video_extension = 'mp4'
|
||||
webpage = self._download_webpage(mrss_url, video_id)
|
||||
mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
|
||||
info_doc = self._download_xml(mrss_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
|
||||
webpage, u'video URL')
|
||||
video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url']
|
||||
|
||||
video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
|
||||
webpage, u'title')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'title': video_title,
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': info_doc.find('.//title').text,
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InfoQIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||
_TEST = {
|
||||
"name": "InfoQ",
|
||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||
@@ -26,9 +26,9 @@ class InfoQIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id=url)
|
||||
self.report_extraction(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Extract video URL
|
||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
||||
@@ -50,6 +50,6 @@ class InfoQIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
||||
'description': video_description,
|
||||
}
|
||||
|
85
youtube_dl/extractor/iprima.py
Normal file
85
youtube_dl/extractor/iprima.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from random import random
|
||||
from math import floor
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_request
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||
'info_dict': {
|
||||
'id': '39152',
|
||||
'ext': 'flv',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
|
||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
|
||||
floor(random()*1073741824),
|
||||
floor(random()*1073741824))
|
||||
|
||||
req = compat_urllib_request.Request(player_url)
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id)
|
||||
|
||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||
|
||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||
|
||||
if zoneGEO != '0':
|
||||
base_url = base_url.replace('token', 'token_'+zoneGEO)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'hq', 'hd']:
|
||||
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||
|
||||
if filename == 'null':
|
||||
continue
|
||||
|
||||
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
|
||||
|
||||
if format_id == 'lq':
|
||||
quality = 0
|
||||
elif format_id == 'hq':
|
||||
quality = 1
|
||||
elif format_id == 'hd':
|
||||
quality = 2
|
||||
filename = 'hq/'+filename
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'quality': quality,
|
||||
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
@@ -14,15 +14,16 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/53141',
|
||||
'file': '53141.mp4',
|
||||
'md5': '6ff5be2254e796ed346251d117196cf4',
|
||||
'info_dict': {
|
||||
'id': '53141',
|
||||
'ext': 'mp4',
|
||||
'title': 'Иван Васильевич меняет профессию',
|
||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||
'duration': 5498,
|
||||
@@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
|
||||
# Serial's serie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
'file': '74791.mp4',
|
||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
'info_dict': {
|
||||
'id': '74791',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дежурный ангел - 1 серия',
|
||||
'duration': 2490,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
@@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
|
||||
class IviCompilationIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru compilations'
|
||||
IE_NAME = 'ivi:compilation'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
|
||||
def _extract_entries(self, html, compilation_id):
|
||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||
|
49
youtube_dl/extractor/jadorecettepub.py
Normal file
49
youtube_dl/extractor/jadorecettepub.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class JadoreCettePubIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||
'md5': '401286a06067c70b44076044b66515de',
|
||||
'info_dict': {
|
||||
'id': 'jLMja3tr7a4',
|
||||
'ext': 'mp4',
|
||||
'title': 'La pire utilisation de Star Wars',
|
||||
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
|
||||
webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
|
||||
fatal=False)
|
||||
real_url = self._search_regex(
|
||||
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
|
||||
video_id = YoutubeIE.extract_id(real_url)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': real_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
u'file': u'5182.mp4',
|
||||
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
u'info_dict': {
|
||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
'info_dict': {
|
||||
'id': '5182',
|
||||
'ext': 'mp4',
|
||||
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
xml_link = self._html_search_regex(
|
||||
r'<param name="flashvars" value="config=(.*?)" />',
|
||||
webpage, u'config URL')
|
||||
webpage, 'config URL')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||
xml_link, u'video ID')
|
||||
xml_link, 'video ID')
|
||||
|
||||
config = self._download_xml(
|
||||
xml_link, title, u'Downloading XML config')
|
||||
xml_link, title, 'Downloading XML config')
|
||||
info_json = config.find('format.json').text
|
||||
info = json.loads(info_json)['versions'][0]
|
||||
|
||||
|
66
youtube_dl/extractor/kontrtube.py
Normal file
66
youtube_dl/extractor/kontrtube.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
IE_NAME = 'kontrtube'
|
||||
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||
'info_dict': {
|
||||
'id': '2678',
|
||||
'ext': 'mp4',
|
||||
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
|
||||
'duration': 270,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||
'video title')
|
||||
description = self._html_search_meta('description', webpage, 'video description')
|
||||
|
||||
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||
'view count', fatal=False)
|
||||
view_count = int(view_count) if view_count is not None else None
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||
fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = int(mobj.group('total'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
63
youtube_dl/extractor/lifenews.py
Normal file
63
youtube_dl/extractor/lifenews.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://lifenews.ru/news/126342',
|
||||
'file': '126342.mp4',
|
||||
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||
'info_dict': {
|
||||
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||
'upload_date': '20140130',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
|
||||
comment_count = self._html_search_regex(
|
||||
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'upload_date': unified_strdate(upload_date),
|
||||
}
|
@@ -4,9 +4,6 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
|
56
youtube_dl/extractor/m6.py
Normal file
56
youtube_dl/extractor/m6.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class M6IE(InfoExtractor):
|
||||
IE_NAME = 'm6'
|
||||
_VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
|
||||
'md5': '242994a87de2c316891428e0176bcb77',
|
||||
'info_dict': {
|
||||
'id': '11323908',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
|
||||
'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
|
||||
'duration': 100,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
||||
'Downloading video RSS')
|
||||
|
||||
title = rss.find('./channel/item/title').text
|
||||
description = rss.find('./channel/item/description').text
|
||||
thumbnail = rss.find('./channel/item/visuel_clip_big').text
|
||||
duration = int(rss.find('./channel/item/duration').text)
|
||||
view_count = int(rss.find('./channel/item/nombre_vues').text)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'sd', 'hq', 'hd']:
|
||||
video_url = rss.find('./channel/item/url_video_%s' % format_id)
|
||||
if video_url is None:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url.text,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
114
youtube_dl/extractor/mooshare.py
Normal file
114
youtube_dl/extractor/mooshare.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class MooshareIE(InfoExtractor):
|
||||
IE_NAME = 'mooshare'
|
||||
IE_DESC = 'Mooshare.biz'
|
||||
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
|
||||
'md5': '4e14f9562928aecd2e42c6f341c8feba',
|
||||
'info_dict': {
|
||||
'id': '8dqtk4bjbp8g',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comedy Football 2011 - (part 1-2)',
|
||||
'duration': 893,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://mooshare.biz/aipjtoc4g95j',
|
||||
'info_dict': {
|
||||
'id': 'aipjtoc4g95j',
|
||||
'ext': 'mp4',
|
||||
'title': 'Orange Caramel Dashing Through the Snow',
|
||||
'duration': 212,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||
|
||||
download_form = {
|
||||
'op': 'download1',
|
||||
'id': video_id,
|
||||
'hash': hash_key,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||
time.sleep(5)
|
||||
|
||||
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||
|
||||
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
|
||||
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
|
||||
duration = int(duration_str) if duration_str is not None else None
|
||||
|
||||
formats = []
|
||||
|
||||
# SD video
|
||||
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'format_id': 'sd',
|
||||
'format': 'SD',
|
||||
})
|
||||
|
||||
# HD video
|
||||
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'format_id': 'hd',
|
||||
'format': 'HD',
|
||||
})
|
||||
|
||||
# rtmp video
|
||||
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('rtmpurl'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'rtmp',
|
||||
'format': 'HD',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -82,10 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
title_el = find_xpath_attr(
|
||||
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:video_title')
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//title')
|
||||
if title_el.text is None:
|
||||
title_el = None
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||
|
||||
title = title_el.text
|
||||
if title is None:
|
||||
raise ExtractorError('Could not find video title')
|
||||
|
89
youtube_dl/extractor/ndr.py
Normal file
89
youtube_dl/extractor/ndr.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NDRIE(InfoExtractor):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Mediathek'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
# video
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
|
||||
'md5': '20eba151ff165f386643dad9c1da08f7',
|
||||
'info_dict': {
|
||||
'id': '19925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hallo Niedersachsen ',
|
||||
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
|
||||
'duration': 1722,
|
||||
},
|
||||
},
|
||||
# audio
|
||||
{
|
||||
'url': 'http://www.ndr.de/903/audio191719.html',
|
||||
'md5': '41ed601768534dd18a9ae34d84798129',
|
||||
'info_dict': {
|
||||
'id': '191719',
|
||||
'ext': 'mp3',
|
||||
'title': '"Es war schockierend"',
|
||||
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
|
||||
'duration': 112,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
title = self._og_search_title(page)
|
||||
description = self._og_search_description(page)
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
|
||||
page)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
formats = []
|
||||
|
||||
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||
if mp3_url:
|
||||
formats.append({
|
||||
'url': mp3_url.group('audio'),
|
||||
'format_id': 'mp3',
|
||||
})
|
||||
|
||||
thumbnail = None
|
||||
|
||||
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
if video_url:
|
||||
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
|
||||
page, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.ndr.de' + thumbnail
|
||||
for format_id in ['lo', 'hi', 'hq']:
|
||||
formats.append({
|
||||
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -4,18 +4,18 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class NewgroundsIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||
'file': '549479.mp3',
|
||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||
'info_dict': {
|
||||
"title": "B7 - BusMode",
|
||||
"uploader": "Burn7",
|
||||
'id': '549479',
|
||||
'ext': 'mp3',
|
||||
'title': 'B7 - BusMode',
|
||||
'uploader': 'Burn7',
|
||||
}
|
||||
}
|
||||
|
||||
|
93
youtube_dl/extractor/nfb.py
Normal file
93
youtube_dl/extractor/nfb.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
IE_NAME = 'nfb'
|
||||
IE_DESC = 'National Film Board of Canada'
|
||||
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||
'info_dict': {
|
||||
'id': 'qallunaat_why_white_people_are_funny',
|
||||
'ext': 'mp4',
|
||||
'title': 'Qallunaat! Why White People Are Funny ',
|
||||
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||
'duration': 3128,
|
||||
'uploader': 'Mark Sandiford',
|
||||
'uploader_id': 'mark-sandiford',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||
page, 'director id', fatal=False)
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
|
||||
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||
|
||||
title = None
|
||||
description = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
formats = []
|
||||
|
||||
def extract_thumbnail(media):
|
||||
thumbnails = {}
|
||||
for asset in media.findall('assets/asset'):
|
||||
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||
if not thumbnails:
|
||||
return None
|
||||
if 'high' in thumbnails:
|
||||
return thumbnails['high']
|
||||
return list(thumbnails.values())[0]
|
||||
|
||||
for media in config.findall('./player/stream/media'):
|
||||
if media.get('type') == 'posterImage':
|
||||
thumbnail = extract_thumbnail(media)
|
||||
elif media.get('type') == 'video':
|
||||
duration = int(media.get('duration'))
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
formats = [{
|
||||
'url': x.find('default/streamerURI').text + '/',
|
||||
'play_path': x.find('default/url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': x.get('quality'),
|
||||
} for x in media.findall('assets/asset')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
}
|
61
youtube_dl/extractor/normalboots.py
Normal file
61
youtube_dl/extractor/normalboots.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
class NormalbootsIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||
_TEST = {
|
||||
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
||||
u'file': u'home-alone-games-jontron.mp4',
|
||||
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
||||
u'info_dict': {
|
||||
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
||||
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
||||
u'uploader': u'JonTron',
|
||||
u'upload_date': u'20140125',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
if url[:4] != 'http':
|
||||
url = 'http://' + url
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_title = self._og_search_title(webpage)
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||
webpage, 'uploader')
|
||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||
webpage, 'date')
|
||||
video_upload_date = unified_strdate(raw_upload_date)
|
||||
video_upload_date = unified_strdate(raw_upload_date)
|
||||
|
||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
||||
|
||||
info['url'] = video_url
|
||||
info['title'] = video_title
|
||||
info['description'] = video_description
|
||||
info['thumbnail'] = video_thumbnail
|
||||
info['uploader'] = video_uploader
|
||||
info['upload_date'] = video_upload_date
|
||||
|
||||
return info
|
@@ -1,34 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||
# Article with embedded player
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||
# Player
|
||||
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
||||
)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||
u'file': u'2365006249.mp4',
|
||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
u'info_dict': {
|
||||
u'title': u'A More Perfect Union',
|
||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
u'duration': 3190,
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'duration': 3190,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
display_id = presumptive_id
|
||||
if presumptive_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
url = self._search_regex(
|
||||
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
webpage, 'player URL')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
player_id = mobj.group('player_id')
|
||||
if not display_id:
|
||||
display_id = player_id
|
||||
if player_id:
|
||||
player_page = self._download_webpage(
|
||||
url, display_id, note='Downloading player page',
|
||||
errnote='Could not download player page')
|
||||
video_id = self._search_regex(
|
||||
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
info =json.loads(info_page)
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
}
|
||||
info = self._download_json(info_url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
}
|
||||
|
@@ -6,8 +6,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@@ -16,9 +14,10 @@ class RBMARadioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||
_TEST = {
|
||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||
'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||
'info_dict': {
|
||||
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||
'ext': 'mp3',
|
||||
"uploader_id": "ford-lopatin",
|
||||
"location": "Spain",
|
||||
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||
@@ -42,7 +41,6 @@ class RBMARadioIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
||||
|
||||
video_url = data['akamai_url'] + '&cbr=256'
|
||||
url_parts = compat_urllib_parse_urlparse(video_url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
|
||||
IE_NAME = '220.ro'
|
||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||
u'file': u'LYV6doKo7f.mp4',
|
||||
u'md5': u'03af18b73a07b4088753930db7a34add',
|
||||
u'info_dict': {
|
||||
u"title": u"Luati-le Banii sez 4 ep 1",
|
||||
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||
"url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||
'file': 'LYV6doKo7f.mp4',
|
||||
'md5': '03af18b73a07b4088753930db7a34add',
|
||||
'info_dict': {
|
||||
"title": "Luati-le Banii sez 4 ep 1",
|
||||
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
flashVars_str = self._search_regex(
|
||||
r'<param name="flashVars" value="([^"]+)"',
|
||||
webpage, u'flashVars')
|
||||
webpage, 'flashVars')
|
||||
flashVars = compat_parse_qs(flashVars_str)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
|
||||
'description': clean_html(flashVars['desc'][0]),
|
||||
'thumbnail': flashVars['preview'][0],
|
||||
}
|
||||
return info
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||
u'file': u'25665706.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Managing Scale and Complexity',
|
||||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
||||
'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||
'info_dict': {
|
||||
'id': '25665706',
|
||||
'ext': 'mp4',
|
||||
'title': 'Managing Scale and Complexity',
|
||||
'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
slideshare_obj = self._search_regex(
|
||||
r'var slideshare_object = ({.*?}); var user_info =',
|
||||
webpage, u'slideshare object')
|
||||
webpage, 'slideshare object')
|
||||
info = json.loads(slideshare_obj)
|
||||
if info['slideshow']['type'] != u'video':
|
||||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||
if info['slideshow']['type'] != 'video':
|
||||
raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||
|
||||
doc = info['doc']
|
||||
bucket = info['jsplayer']['video_bucket']
|
||||
ext = info['jsplayer']['video_extension']
|
||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||
description = self._html_search_regex(
|
||||
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
@@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'thumbnail': info['slideshow']['pin_image_url'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,34 +1,36 @@
|
||||
import re
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = u'southparkstudios.com'
|
||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
IE_NAME = 'southparkstudios.com'
|
||||
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Bat Daded',
|
||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||
'info_dict': {
|
||||
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bat Daded',
|
||||
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class SouthparkDeIE(SouthParkStudiosIE):
|
||||
IE_NAME = u'southpark.de'
|
||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||
IE_NAME = 'southpark.de'
|
||||
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'The Government Won\'t Respect My Privacy',
|
||||
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||
'info_dict': {
|
||||
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Government Won\'t Respect My Privacy',
|
||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
},
|
||||
}]
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -6,20 +8,20 @@ from .common import InfoExtractor
|
||||
class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
u'file': u'1259285.mp4',
|
||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
||||
u'info_dict': {
|
||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
||||
}
|
||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
'file': '1259285.mp4',
|
||||
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||
'info_dict': {
|
||||
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||
u'file': u'1309159.mp4',
|
||||
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
|
||||
u'info_dict': {
|
||||
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
|
||||
}
|
||||
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||
'file': '1309159.mp4',
|
||||
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
|
||||
'info_dict': {
|
||||
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
||||
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
||||
|
||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
|
||||
idoc = self._download_xml(
|
||||
xml_url, video_id,
|
||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||
note='Downloading XML', errnote='Failed to download XML')
|
||||
|
||||
formats = [
|
||||
{
|
||||
'format_id': n.tag.rpartition('type')[2],
|
||||
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||
'width': int(n.find('./width').text),
|
||||
'height': int(n.find('./height').text),
|
||||
'abr': int(n.find('./audiobitrate').text),
|
||||
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
return info
|
||||
|
@@ -1,36 +1,38 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class StatigramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
||||
_VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://statigr.am/p/522207370455279102_24101272',
|
||||
u'file': u'522207370455279102_24101272.mp4',
|
||||
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
|
||||
u'info_dict': {
|
||||
u'uploader_id': u'aguynamedpatrick',
|
||||
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
||||
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
||||
'info_dict': {
|
||||
'id': '522207370455279102_24101272',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'aguynamedpatrick',
|
||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
html_title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>',
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'@([^ ]+)', title, u'uploader name', fatal=False)
|
||||
ext = 'mp4'
|
||||
r'@([^ ]+)', title, 'uploader name', fatal=False)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id' : uploader_id
|
||||
}]
|
||||
'uploader_id': uploader_id
|
||||
}
|
||||
|
@@ -62,10 +62,13 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
subtitles[sub_lang] = subtitle
|
||||
return subtitles
|
||||
|
||||
def _download_subtitle_url(self, sub_lang, url):
|
||||
return self._download_webpage(url, None, note=False)
|
||||
|
||||
def _request_subtitle_url(self, sub_lang, url):
|
||||
""" makes the http request for the subtitle """
|
||||
try:
|
||||
sub = self._download_webpage(url, None, note=False)
|
||||
sub = self._download_subtitle_url(sub_lang, url)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
@@ -79,7 +82,11 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
returns {sub_lang: url} or {} if not available
|
||||
Must be redefined by the subclasses
|
||||
"""
|
||||
pass
|
||||
|
||||
# By default, allow implementations to simply pass in the result
|
||||
assert isinstance(webpage, dict), \
|
||||
'_get_available_subtitles not implemented'
|
||||
return webpage
|
||||
|
||||
def _get_available_automatic_caption(self, video_id, webpage):
|
||||
"""
|
||||
|
@@ -1,22 +1,23 @@
|
||||
#coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class ThisAVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
||||
_TEST = {
|
||||
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
|
||||
u"file": u"47734.flv",
|
||||
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
|
||||
u"info_dict": {
|
||||
u"title": u"高樹マリア - Just fit",
|
||||
u"uploader": u"dj7970",
|
||||
u"uploader_id": u"dj7970"
|
||||
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
|
||||
'md5': '0480f1ef3932d901f0e0e719f188f19b',
|
||||
'info_dict': {
|
||||
'id': '47734',
|
||||
'ext': 'flv',
|
||||
'title': '高樹マリア - Just fit',
|
||||
'uploader': 'dj7970',
|
||||
'uploader_id': 'dj7970'
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
|
||||
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
|
||||
r"addVariable\('file','([^']+)'\);", webpage, 'video url')
|
||||
uploader = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||
webpage, u'uploader name', fatal=False)
|
||||
webpage, 'uploader name', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||
webpage, u'uploader id', fatal=False)
|
||||
webpage, 'uploader id', fatal=False)
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': uploader,
|
||||
|
50
youtube_dl/extractor/tinypic.py
Normal file
50
youtube_dl/extractor/tinypic.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from youtube_dl.utils import ExtractorError
|
||||
|
||||
|
||||
class TinyPicIE(InfoExtractor):
|
||||
IE_NAME = 'tinypic'
|
||||
IE_DESC = 'tinypic.com videos'
|
||||
_VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
|
||||
'md5': '609b74432465364e72727ebc6203f044',
|
||||
'info_dict': {
|
||||
'id': '6xw7tc',
|
||||
'ext': 'flv',
|
||||
'title': 'shadow phenomenon weird',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
||||
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
file_id = mobj.group('fileid')
|
||||
server_id = mobj.group('serverid')
|
||||
|
||||
KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
|
||||
keywords = self._html_search_meta('keywords', webpage, 'title')
|
||||
title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
|
||||
|
||||
video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
|
||||
thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
|
||||
|
||||
return {
|
||||
'id': file_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title
|
||||
}
|
@@ -1,4 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,25 +11,25 @@ from ..utils import (
|
||||
|
||||
|
||||
class TouTvIE(InfoExtractor):
|
||||
IE_NAME = u'tou.tv'
|
||||
IE_NAME = 'tou.tv'
|
||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.tou.tv/30-vies/S04E41',
|
||||
u'file': u'30-vies_S04E41.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'30 vies Saison 4 / Épisode 41',
|
||||
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||
u'age_limit': 8,
|
||||
u'uploader': u'Groupe des Nouveaux Médias',
|
||||
u'duration': 1296,
|
||||
u'upload_date': u'20131118',
|
||||
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||
'file': '30-vies_S04E41.mp4',
|
||||
'info_dict': {
|
||||
'title': '30 vies Saison 4 / Épisode 41',
|
||||
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||
'age_limit': 8,
|
||||
'uploader': 'Groupe des Nouveaux Médias',
|
||||
'duration': 1296,
|
||||
'upload_date': '20131118',
|
||||
'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True, # Requires rtmpdump
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
},
|
||||
u'skip': 'Only available in Canada'
|
||||
'skip': 'Only available in Canada'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -36,25 +38,25 @@ class TouTvIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mediaId = self._search_regex(
|
||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||
r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
|
||||
|
||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||
streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||
streams_doc = self._download_xml(
|
||||
streams_url, video_id, note=u'Downloading stream list')
|
||||
streams_url, video_id, note='Downloading stream list')
|
||||
|
||||
video_url = next(n.text
|
||||
for n in streams_doc.findall('.//choice/url')
|
||||
if u'//ad.doubleclick' not in n.text)
|
||||
if '//ad.doubleclick' not in n.text)
|
||||
if video_url.endswith('/Unavailable.flv'):
|
||||
raise ExtractorError(
|
||||
u'Access to this video is blocked from outside of Canada',
|
||||
'Access to this video is blocked from outside of Canada',
|
||||
expected=True)
|
||||
|
||||
duration_str = self._html_search_meta(
|
||||
'video:duration', webpage, u'duration')
|
||||
'video:duration', webpage, 'duration')
|
||||
duration = int(duration_str) if duration_str else None
|
||||
upload_date_str = self._html_search_meta(
|
||||
'video:release_date', webpage, u'upload date')
|
||||
'video:release_date', webpage, 'upload date')
|
||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||
|
||||
return {
|
||||
|
@@ -1,17 +1,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TrailerAddictIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||
u'file': u'76184.mp4',
|
||||
u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
|
||||
u'info_dict': {
|
||||
u"title": u"Prince Avalanche Trailer",
|
||||
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
|
||||
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||
'md5': '41365557f3c8c397d091da510e73ceb4',
|
||||
'info_dict': {
|
||||
'id': '76184',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prince Avalanche Trailer',
|
||||
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,9 +26,15 @@ class TrailerAddictIE(InfoExtractor):
|
||||
|
||||
title = self._search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' - Trailer Addict','')
|
||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
||||
webpage, 'Views Count')
|
||||
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
||||
view_count_str = self._search_regex(
|
||||
r'<span class="views_n">([0-9,.]+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
view_count = (
|
||||
None if view_count_str is None
|
||||
else int(view_count_str.replace(',', '')))
|
||||
video_id = self._search_regex(
|
||||
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
|
||||
webpage, 'video id')
|
||||
|
||||
# Presence of (no)watchplus function indicates HD quality is available
|
||||
if re.search(r'function (no)?watchplus()', webpage):
|
||||
@@ -39,14 +49,16 @@ class TrailerAddictIE(InfoExtractor):
|
||||
info_webpage, 'Download url').replace('%3F','?')
|
||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||
info_webpage, 'thumbnail url')
|
||||
ext = final_url.split('.')[-1].split('?')[0]
|
||||
|
||||
return [{
|
||||
'id' : video_id,
|
||||
'url' : final_url,
|
||||
'ext' : ext,
|
||||
'title' : title,
|
||||
'thumbnail' : thumbnail_url,
|
||||
'description' : self._og_search_description(webpage),
|
||||
'view_count' : view_count,
|
||||
}]
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ from ..aes import (
|
||||
)
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
u'file': u'229795.mp4',
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -10,48 +12,48 @@ from ..utils import (
|
||||
|
||||
class UstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
||||
IE_NAME = u'ustream'
|
||||
IE_NAME = 'ustream'
|
||||
_TEST = {
|
||||
u'url': u'http://www.ustream.tv/recorded/20274954',
|
||||
u'file': u'20274954.flv',
|
||||
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
|
||||
u'info_dict': {
|
||||
u"uploader": u"Young Americans for Liberty",
|
||||
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
|
||||
}
|
||||
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||
'file': '20274954.flv',
|
||||
'md5': '088f151799e8f572f84eb62f17d73e5c',
|
||||
'info_dict': {
|
||||
"uploader": "Young Americans for Liberty",
|
||||
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
|
||||
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
||||
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||
|
||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
return info
|
||||
|
||||
class UstreamChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
||||
IE_NAME = u'ustream:channel'
|
||||
IE_NAME = 'ustream:channel'
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
|
@@ -1,3 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,45 +13,44 @@ from ..utils import (
|
||||
|
||||
|
||||
class Vbox7IE(InfoExtractor):
|
||||
"""Information Extractor for Vbox7"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
|
||||
_VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://vbox7.com/play:249bb972c2',
|
||||
u'file': u'249bb972c2.flv',
|
||||
u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
|
||||
u'info_dict': {
|
||||
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
|
||||
}
|
||||
'url': 'http://vbox7.com/play:249bb972c2',
|
||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||
'info_dict': {
|
||||
'id': '249bb972c2',
|
||||
'ext': 'flv',
|
||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self,url):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
||||
new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
|
||||
new_location = self._search_regex(r'window\.location = \'(.*)\';',
|
||||
redirect_page, 'redirect location')
|
||||
redirect_url = urlh.geturl() + new_location
|
||||
webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
|
||||
webpage = self._download_webpage(redirect_url, video_id,
|
||||
'Downloading redirect page')
|
||||
|
||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'title').split('/')[0].strip()
|
||||
webpage, 'title').split('/')[0].strip()
|
||||
|
||||
ext = "flv"
|
||||
info_url = "http://vbox7.com/play/magare.do"
|
||||
data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
|
||||
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
|
||||
info_request = compat_urllib_request.Request(info_url, data)
|
||||
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
|
||||
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
|
||||
if info_response is None:
|
||||
raise ExtractorError(u'Unable to extract the media url')
|
||||
raise ExtractorError('Unable to extract the media url')
|
||||
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}]
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
import datetime
|
||||
|
||||
@@ -22,16 +23,16 @@ class VevoIE(InfoExtractor):
|
||||
vevo:)
|
||||
(?P<id>[^&?#]+)'''
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
u'file': u'GB1101300280.mp4',
|
||||
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"title": u"Somebody to Die For",
|
||||
u"duration": 230.12,
|
||||
u"width": 1920,
|
||||
u"height": 1080,
|
||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
'file': 'GB1101300280.mp4',
|
||||
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||
'info_dict': {
|
||||
"upload_date": "20130624",
|
||||
"uploader": "Hurts",
|
||||
"title": "Somebody to Die For",
|
||||
"duration": 230.12,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
}
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||
@@ -44,7 +45,7 @@ class VevoIE(InfoExtractor):
|
||||
if version['version'] > last_version['version']:
|
||||
last_version = version
|
||||
if last_version['version'] == -1:
|
||||
raise ExtractorError(u'Unable to extract last version of the video')
|
||||
raise ExtractorError('Unable to extract last version of the video')
|
||||
|
||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||
formats = []
|
||||
@@ -85,7 +86,7 @@ class VevoIE(InfoExtractor):
|
||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': u'SMIL_' + m.group('cbr'),
|
||||
'format_id': 'SMIL_' + m.group('cbr'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'vbr': int(m.group('vbr')),
|
||||
@@ -101,26 +102,25 @@ class VevoIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
||||
video_info = json.loads(info_json)['video']
|
||||
video_info = self._download_json(json_url, video_id)['video']
|
||||
|
||||
formats = self._formats_from_json(video_info)
|
||||
try:
|
||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||
smil_xml = self._download_webpage(smil_url, video_id,
|
||||
u'Downloading SMIL info')
|
||||
'Downloading SMIL info')
|
||||
formats.extend(self._formats_from_smil(smil_xml))
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError):
|
||||
raise
|
||||
self._downloader.report_warning(
|
||||
u'Cannot download SMIL information, falling back to JSON ..')
|
||||
'Cannot download SMIL information, falling back to JSON ..')
|
||||
|
||||
timestamp_ms = int(self._search_regex(
|
||||
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
|
||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'formats': formats,
|
||||
@@ -129,5 +129,3 @@ class VevoIE(InfoExtractor):
|
||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||
'duration': video_info['duration'],
|
||||
}
|
||||
|
||||
return info
|
||||
|
@@ -6,10 +6,10 @@ import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
clean_html,
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
@@ -19,7 +19,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class VimeoIE(InfoExtractor):
|
||||
class VimeoIE(SubtitlesInfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
@@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor):
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/76979871',
|
||||
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
||||
'note': 'Video with subtitles',
|
||||
'info_dict': {
|
||||
'id': '76979871',
|
||||
'ext': 'mp4',
|
||||
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||
'upload_date': '20131015',
|
||||
'uploader_id': 'staff',
|
||||
'uploader': 'Vimeo Staff',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self):
|
||||
@@ -273,19 +287,31 @@ class VimeoIE(InfoExtractor):
|
||||
if len(formats) == 0:
|
||||
raise ExtractorError('No known codec found')
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = config['request'].get('text_tracks')
|
||||
if text_tracks:
|
||||
for tt in text_tracks:
|
||||
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
|
||||
|
||||
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'uploader_id': video_uploader_id,
|
||||
'upload_date': video_upload_date,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'upload_date': video_upload_date,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'formats': formats,
|
||||
'webpage_url': url,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'subtitles': video_subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,18 +1,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class VineIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
|
||||
_TEST = {
|
||||
u'url': u'https://vine.co/v/b9KOOWX7HUx',
|
||||
u'file': u'b9KOOWX7HUx.mp4',
|
||||
u'md5': u'2f36fed6235b16da96ce9b4dc890940d',
|
||||
u'info_dict': {
|
||||
u"uploader": u"Jack Dorsey",
|
||||
u"title": u"Chicken."
|
||||
}
|
||||
'url': 'https://vine.co/v/b9KOOWX7HUx',
|
||||
'md5': '2f36fed6235b16da96ce9b4dc890940d',
|
||||
'info_dict': {
|
||||
'id': 'b9KOOWX7HUx',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Jack Dorsey',
|
||||
'title': 'Chicken.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -24,17 +27,17 @@ class VineIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
||||
webpage, u'video URL')
|
||||
video_url = self._html_search_meta('twitter:player:stream', webpage,
|
||||
'video URL')
|
||||
|
||||
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
||||
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': uploader,
|
||||
}]
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
80
youtube_dl/extractor/vube.py
Normal file
80
youtube_dl/extractor/vube.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class VubeIE(InfoExtractor):
|
||||
IE_NAME = 'vube'
|
||||
IE_DESC = 'Vube.com'
|
||||
_VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'f81dcf6d0448e3291f54380181695821',
|
||||
'info_dict': {
|
||||
'id': 'YL2qNPkqon',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||
'uploader': 'Chiara.Grispo',
|
||||
'uploader_id': '1u3hX0znhP',
|
||||
'upload_date': '20140103',
|
||||
'duration': 170.56
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json('http://vube.com/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
public_id = video['public_id']
|
||||
|
||||
formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||
'height': int(fmt['height']),
|
||||
'abr': int(fmt['audio_bitrate']),
|
||||
'vbr': int(fmt['video_bitrate']),
|
||||
'format_id': fmt['media_resolution_id']
|
||||
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed']
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = video['thumbnail_src']
|
||||
if thumbnail.startswith('//'):
|
||||
thumbnail = 'http:' + thumbnail
|
||||
uploader = video['user_alias']
|
||||
uploader_id = video['user_url_id']
|
||||
upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d')
|
||||
duration = video['duration']
|
||||
view_count = video['raw_view_count']
|
||||
like_count = video['total_likes']
|
||||
dislike_count= video['total_hates']
|
||||
|
||||
comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id,
|
||||
video_id, 'Downloading video comment JSON')
|
||||
|
||||
comment_count = comment['total']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
@@ -5,7 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
@@ -34,6 +34,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
orderedSet,
|
||||
write_json_file,
|
||||
uppercase_escape,
|
||||
)
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
@@ -136,7 +137,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
||||
(?:www\.)?pwnyoutube\.com|
|
||||
(?:www\.)?pwnyoutube\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
@@ -502,7 +503,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return a % b
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||
if m:
|
||||
fname = m.group('func')
|
||||
if fname not in functions:
|
||||
@@ -1085,8 +1086,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _extract_id(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@classmethod
|
||||
def extract_id(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group(2)
|
||||
@@ -1115,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
mobj = re.search(self._NEXT_URL_RE, url)
|
||||
if mobj:
|
||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||
video_id = self._extract_id(url)
|
||||
video_id = self.extract_id(url)
|
||||
|
||||
# Get video webpage
|
||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
@@ -1422,7 +1424,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = u'YouTube.com playlists'
|
||||
_VALID_URL = r"""(?:
|
||||
_VALID_URL = r"""(?x)(?:
|
||||
(?:https?://)?
|
||||
(?:\w+\.)?
|
||||
youtube\.com/
|
||||
@@ -1431,7 +1433,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
|
||||
(
|
||||
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
|
||||
# Top tracks, they can also include dots
|
||||
|(?:MC)[\w\.]*
|
||||
)
|
||||
.*
|
||||
|
|
||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||
@@ -1441,11 +1447,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||
IE_NAME = u'youtube:playlist'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -1469,7 +1470,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract playlist id
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
playlist_id = mobj.group(1) or mobj.group(2)
|
||||
@@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
# Download all channel pages using the json-based channel_ajax query
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_webpage(url, channel_id,
|
||||
u'Downloading page #%s' % pagenum)
|
||||
|
||||
page = json.loads(page)
|
||||
|
||||
page = self._download_json(
|
||||
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||
transform_source=uppercase_escape)
|
||||
|
||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||
video_ids.extend(ids_in_page)
|
||||
|
||||
|
@@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
|
||||
def unified_strdate(date_str):
|
||||
"""Return a string with the date in the format YYYYMMDD"""
|
||||
upload_date = None
|
||||
#Replace commas
|
||||
date_str = date_str.replace(',',' ')
|
||||
date_str = date_str.replace(',', ' ')
|
||||
# %z (UTC offset) is only supported in python>=3.2
|
||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
|
||||
format_expressions = [
|
||||
'%d %B %Y',
|
||||
'%B %d %Y',
|
||||
@@ -771,11 +772,12 @@ def unified_strdate(date_str):
|
||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||
'%Y-%m-%dT%H:%M:%S',
|
||||
'%Y-%m-%dT%H:%M',
|
||||
]
|
||||
for expression in format_expressions:
|
||||
try:
|
||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
except:
|
||||
except ValueError:
|
||||
pass
|
||||
if upload_date is None:
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
@@ -1212,3 +1214,9 @@ class PagedList(object):
|
||||
if end == nextfirstid:
|
||||
break
|
||||
return res
|
||||
|
||||
|
||||
def uppercase_escape(s):
|
||||
return re.sub(
|
||||
r'\\U([0-9a-fA-F]{8})',
|
||||
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.01.30.2'
|
||||
__version__ = '2014.02.10'
|
||||
|
Reference in New Issue
Block a user