Compare commits
87 Commits
2014.01.30
...
2014.02.06
Author | SHA1 | Date | |
---|---|---|---|
![]() |
df872ec4e7 | ||
![]() |
5de90176d9 | ||
![]() |
dcf3eec47a | ||
![]() |
e9e4f30d26 | ||
![]() |
83cebd73d4 | ||
![]() |
1df4229bd7 | ||
![]() |
3c995527e9 | ||
![]() |
7c62b568a2 | ||
![]() |
ccf9114e84 | ||
![]() |
d8061908bb | ||
![]() |
211e17dd43 | ||
![]() |
6cb38a9994 | ||
![]() |
fa7df757a7 | ||
![]() |
8c82077619 | ||
![]() |
e5d1f9e50a | ||
![]() |
7ee50ae7b5 | ||
![]() |
de563c9da0 | ||
![]() |
50451f2a18 | ||
![]() |
9bc70948e1 | ||
![]() |
5dc733f071 | ||
![]() |
bc4850908c | ||
![]() |
20650c8654 | ||
![]() |
56dced2670 | ||
![]() |
eef726c04b | ||
![]() |
acf1555d76 | ||
![]() |
22e7f1a6ec | ||
![]() |
3c49325658 | ||
![]() |
bb1cd2bea1 | ||
![]() |
fdf1f8d4ce | ||
![]() |
117c8c6b97 | ||
![]() |
5cef4ff09b | ||
![]() |
91264ce572 | ||
![]() |
c79ef8e1ae | ||
![]() |
58d915df51 | ||
![]() |
7881a64499 | ||
![]() |
90159f5561 | ||
![]() |
99877772d0 | ||
![]() |
b0268cb6ce | ||
![]() |
4edff4cfa8 | ||
![]() |
1eac553e7e | ||
![]() |
9d3ac7444d | ||
![]() |
588128d054 | ||
![]() |
8e93b9b9aa | ||
![]() |
b4bcffefa3 | ||
![]() |
2b39af9b4f | ||
![]() |
23fe495feb | ||
![]() |
b5dbe89bba | ||
![]() |
dbe80ca7ad | ||
![]() |
009a3408f5 | ||
![]() |
b58e3c8918 | ||
![]() |
56b6faf91e | ||
![]() |
7ac1f877a7 | ||
![]() |
d55433bbfd | ||
![]() |
f0ce2bc1c5 | ||
![]() |
c3bc00b90e | ||
![]() |
ff6b7b049b | ||
![]() |
f46359121f | ||
![]() |
37c1525c17 | ||
![]() |
c85e4cf7b4 | ||
![]() |
c66dcda287 | ||
![]() |
6d845922ab | ||
![]() |
2949cbe036 | ||
![]() |
c3309a7774 | ||
![]() |
7aed837595 | ||
![]() |
0eb799bae9 | ||
![]() |
4baff4a4ae | ||
![]() |
45d7bc2f8b | ||
![]() |
c0c2ddddcd | ||
![]() |
a96ed91610 | ||
![]() |
c1206423c4 | ||
![]() |
659aa21ba1 | ||
![]() |
efd02e858a | ||
![]() |
3bf8bc7f37 | ||
![]() |
8ccda826d5 | ||
![]() |
b9381e43c2 | ||
![]() |
fcdea2666d | ||
![]() |
c4db377cbb | ||
![]() |
90dc5e8693 | ||
![]() |
c81a855b0f | ||
![]() |
c8d8ec8567 | ||
![]() |
4f879a5be0 | ||
![]() |
1a0648b4a9 | ||
![]() |
3c1b4669d0 | ||
![]() |
24b3d5e538 | ||
![]() |
ab083b08ab | ||
![]() |
89acb96927 | ||
![]() |
d1b30713fb |
12
README.md
12
README.md
@@ -53,6 +53,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
from google videos for youtube-dl "large
|
from google videos for youtube-dl "large
|
||||||
apple". By default (with value "auto")
|
apple". By default (with value "auto")
|
||||||
youtube-dl guesses.
|
youtube-dl guesses.
|
||||||
|
--ignore-config Do not read configuration files. When given
|
||||||
|
in the global configuration file /etc
|
||||||
|
/youtube-dl.conf: do not read the user
|
||||||
|
configuration in ~/.config/youtube-dl.conf
|
||||||
|
(%APPDATA%/youtube-dl/config.txt on
|
||||||
|
Windows)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
@@ -325,7 +331,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
|||||||
|
|
||||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||||
|
|
||||||
# BUILD INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
@@ -347,6 +353,10 @@ If you want to create a build of youtube-dl yourself, you'll need
|
|||||||
* zip
|
* zip
|
||||||
* nosetests
|
* nosetests
|
||||||
|
|
||||||
|
### Adding support for a new site
|
||||||
|
|
||||||
|
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||||
|
@@ -22,6 +22,7 @@ import socket
|
|||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
compat_http_client,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
@@ -110,7 +111,7 @@ def generator(test_case):
|
|||||||
ydl.download([test_case['url']])
|
ydl.download([test_case['url']])
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if try_num == RETRIES:
|
if try_num == RETRIES:
|
||||||
|
@@ -34,6 +34,8 @@ from youtube_dl.extractor import (
|
|||||||
KhanAcademyIE,
|
KhanAcademyIE,
|
||||||
EveryonesMixtapeIE,
|
EveryonesMixtapeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
GoogleSearchIE,
|
||||||
|
GenericIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -229,6 +231,24 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], '1409')
|
self.assertEqual(result['id'], '1409')
|
||||||
self.assertTrue(len(result['entries']) >= 34)
|
self.assertTrue(len(result['entries']) >= 34)
|
||||||
|
|
||||||
|
def test_multiple_brightcove_videos(self):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/2283
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = GenericIE(dl)
|
||||||
|
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
|
||||||
|
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
||||||
|
self.assertEqual(len(result['entries']), 3)
|
||||||
|
|
||||||
|
def test_GoogleSearch(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = GoogleSearchIE(dl)
|
||||||
|
result = ie.extract('gvsearch15:python language')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'python language')
|
||||||
|
self.assertEqual(result['title'], 'python language')
|
||||||
|
self.assertTrue(len(result['entries']) == 15)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -10,9 +10,11 @@ from test.helper import FakeYDL, md5
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
BlipTVIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
TEDIE,
|
TEDIE,
|
||||||
|
VimeoIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -202,5 +204,80 @@ class TestTedSubtitles(BaseTestSubtitles):
|
|||||||
for lang in langs:
|
for lang in langs:
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBlipTVSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://blip.tv/a/a-6603250'
|
||||||
|
IE = BlipTVIE
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
|
||||||
|
|
||||||
|
|
||||||
|
class TestVimeoSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://vimeo.com/76979871'
|
||||||
|
IE = VimeoIE
|
||||||
|
|
||||||
|
def test_no_writesubtitles(self):
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||||
|
|
||||||
|
def test_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['fr']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_automatic_captions(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslang'] = ['en']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(len(subtitles.keys()) == 0)
|
||||||
|
|
||||||
|
def test_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'http://vimeo.com/56015672'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
def test_multiple_langs(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['es', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -27,6 +27,12 @@ _TESTS = [
|
|||||||
85,
|
85,
|
||||||
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||||
|
u'js',
|
||||||
|
90,
|
||||||
|
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
24
youtube-dl.plugin.zsh
Normal file
24
youtube-dl.plugin.zsh
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# This allows the youtube-dl command to be installed in ZSH using antigen.
|
||||||
|
# Antigen is a bundle manager. It allows you to enhance the functionality of
|
||||||
|
# your zsh session by installing bundles and themes easily.
|
||||||
|
|
||||||
|
# Antigen documentation:
|
||||||
|
# http://antigen.sharats.me/
|
||||||
|
# https://github.com/zsh-users/antigen
|
||||||
|
|
||||||
|
# Install youtube-dl:
|
||||||
|
# antigen bundle rg3/youtube-dl
|
||||||
|
# Bundles installed by antigen are available for use immediately.
|
||||||
|
|
||||||
|
# Update youtube-dl (and all other antigen bundles):
|
||||||
|
# antigen update
|
||||||
|
|
||||||
|
# The antigen command will download the git repository to a folder and then
|
||||||
|
# execute an enabling script (this file). The complete process for loading the
|
||||||
|
# code is documented here:
|
||||||
|
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
|
||||||
|
|
||||||
|
# This specific script just aliases youtube-dl to the python script that this
|
||||||
|
# library provides. This requires updating the PYTHONPATH to ensure that the
|
||||||
|
# full set of code can be located.
|
||||||
|
alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
|
@@ -100,6 +100,43 @@ def parseOpts(overrideArguments=None):
|
|||||||
optionf.close()
|
optionf.close()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def _readUserConf():
|
||||||
|
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||||
|
if xdg_config_home:
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||||
|
else:
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||||
|
userConf = _readOptions(userConfFile, None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
appdata_dir = os.environ.get('appdata')
|
||||||
|
if appdata_dir:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = []
|
||||||
|
|
||||||
|
return userConf
|
||||||
|
|
||||||
def _format_option_string(option):
|
def _format_option_string(option):
|
||||||
''' ('-o', '--option') -> -o, --format METAVAR'''
|
''' ('-o', '--option') -> -o, --format METAVAR'''
|
||||||
|
|
||||||
@@ -203,6 +240,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('--default-search',
|
general.add_option('--default-search',
|
||||||
dest='default_search', metavar='PREFIX',
|
dest='default_search', metavar='PREFIX',
|
||||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||||
|
general.add_option(
|
||||||
|
'--ignore-config',
|
||||||
|
action='store_true',
|
||||||
|
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--playlist-start',
|
'--playlist-start',
|
||||||
@@ -457,44 +499,18 @@ def parseOpts(overrideArguments=None):
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||||
else:
|
else:
|
||||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
|
||||||
|
|
||||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
|
||||||
if xdg_config_home:
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
|
||||||
else:
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
|
||||||
userConf = _readOptions(userConfFile, None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
appdata_dir = os.environ.get('appdata')
|
|
||||||
if appdata_dir:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = []
|
|
||||||
|
|
||||||
commandLineConf = sys.argv[1:]
|
commandLineConf = sys.argv[1:]
|
||||||
|
if '--ignore-config' in commandLineConf:
|
||||||
|
systemConf = []
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||||
|
if '--ignore-config' in systemConf:
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
userConf = _readUserConf()
|
||||||
argv = systemConf + userConf + commandLineConf
|
argv = systemConf + userConf + commandLineConf
|
||||||
|
|
||||||
opts, args = parser.parse_args(argv)
|
opts, args = parser.parse_args(argv)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||||
|
@@ -54,12 +54,14 @@ from .ebaumsworld import EbaumsWorldIE
|
|||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
|
from .elpais import ElPaisIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
|
from .firstpost import FirstpostIE
|
||||||
from .fktv import (
|
from .fktv import (
|
||||||
FKTVIE,
|
FKTVIE,
|
||||||
FKTVPosteckeIE,
|
FKTVPosteckeIE,
|
||||||
@@ -96,6 +98,7 @@ from .ina import InaIE
|
|||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
|
from .iprima import IPrimaIE
|
||||||
from .ivi import (
|
from .ivi import (
|
||||||
IviIE,
|
IviIE,
|
||||||
IviCompilationIE
|
IviCompilationIE
|
||||||
@@ -110,12 +113,14 @@ from .khanacademy import KhanAcademyIE
|
|||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
from .lynda import (
|
from .lynda import (
|
||||||
LyndaIE,
|
LyndaIE,
|
||||||
LyndaCourseIE
|
LyndaCourseIE
|
||||||
)
|
)
|
||||||
|
from .m6 import M6IE
|
||||||
from .macgamestore import MacGameStoreIE
|
from .macgamestore import MacGameStoreIE
|
||||||
from .malemotion import MalemotionIE
|
from .malemotion import MalemotionIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
@@ -125,6 +130,7 @@ from .mit import TechTVMITIE, MITIE
|
|||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mpora import MporaIE
|
from .mpora import MporaIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
|
from .mooshare import MooshareIE
|
||||||
from .mtv import (
|
from .mtv import (
|
||||||
MTVIE,
|
MTVIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
@@ -141,6 +147,7 @@ from .newgrounds import NewgroundsIE
|
|||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
from .normalboots import NormalbootsIE
|
||||||
from .novamov import NovamovIE
|
from .novamov import NovamovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
@@ -198,6 +205,7 @@ from .ted import TEDIE
|
|||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
from .tinypic import TinyPicIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
@@ -228,6 +236,7 @@ from .vimeo import (
|
|||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
from .viki import VikiIE
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
|
from .vube import VubeIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
|
@@ -1,128 +1,137 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
|
||||||
ExtractorError,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVIE(InfoExtractor):
|
class BlipTVIE(SubtitlesInfoExtractor):
|
||||||
"""Information extractor for blip.tv"""
|
"""Information extractor for blip.tv"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
|
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||||
'file': '5779306.mov',
|
|
||||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5779306',
|
||||||
|
'ext': 'mov',
|
||||||
'upload_date': '20111205',
|
'upload_date': '20111205',
|
||||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||||
'uploader': 'Comic Book Resources - CBR TV',
|
'uploader': 'Comic Book Resources - CBR TV',
|
||||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/2274
|
||||||
def report_direct_download(self, title):
|
'note': 'Video with subtitles',
|
||||||
"""Report information extraction."""
|
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||||
self.to_screen('%s: Direct download detected' % title)
|
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6586561',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Red vs. Blue',
|
||||||
|
'description': 'One-Zero-One',
|
||||||
|
'upload_date': '20130614',
|
||||||
|
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
# See https://github.com/rg3/youtube-dl/issues/857
|
# See https://github.com/rg3/youtube-dl/issues/857
|
||||||
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||||
if embed_mobj:
|
if embed_mobj:
|
||||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||||
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
|
video_id = self._search_regex(
|
||||||
|
r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||||
|
|
||||||
if '?' in url:
|
cchar = '&' if '?' in url else '?'
|
||||||
cchar = '&'
|
|
||||||
else:
|
|
||||||
cchar = '?'
|
|
||||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||||
request = compat_urllib_request.Request(json_url)
|
request = compat_urllib_request.Request(json_url)
|
||||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||||
self.report_extraction(mobj.group(1))
|
|
||||||
urlh = self._request_webpage(request, None, False,
|
|
||||||
'unable to download video info webpage')
|
|
||||||
|
|
||||||
try:
|
json_data = self._download_json(request, video_id=presumptive_id)
|
||||||
json_code_bytes = urlh.read()
|
|
||||||
json_code = json_code_bytes.decode('utf-8')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
|
|
||||||
|
|
||||||
try:
|
if 'Post' in json_data:
|
||||||
json_data = json.loads(json_code)
|
data = json_data['Post']
|
||||||
if 'Post' in json_data:
|
else:
|
||||||
data = json_data['Post']
|
data = json_data
|
||||||
else:
|
|
||||||
data = json_data
|
|
||||||
|
|
||||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
video_id = compat_str(data['item_id'])
|
||||||
formats = []
|
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||||
if 'additionalMedia' in data:
|
subtitles = {}
|
||||||
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
|
formats = []
|
||||||
if not int(f['media_width']): # filter m3u8
|
if 'additionalMedia' in data:
|
||||||
continue
|
for f in data['additionalMedia']:
|
||||||
formats.append({
|
if f.get('file_type_srt') == 1:
|
||||||
'url': f['url'],
|
LANGS = {
|
||||||
'format_id': f['role'],
|
'english': 'en',
|
||||||
'width': int(f['media_width']),
|
}
|
||||||
'height': int(f['media_height']),
|
lang = f['role'].rpartition('-')[-1].strip().lower()
|
||||||
})
|
langcode = LANGS.get(lang, lang)
|
||||||
else:
|
subtitles[langcode] = f['url']
|
||||||
|
continue
|
||||||
|
if not int(f['media_width']): # filter m3u8
|
||||||
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': data['media']['url'],
|
'url': f['url'],
|
||||||
'width': int(data['media']['width']),
|
'format_id': f['role'],
|
||||||
'height': int(data['media']['height']),
|
'width': int(f['media_width']),
|
||||||
|
'height': int(f['media_height']),
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': data['media']['url'],
|
||||||
|
'width': int(data['media']['width']),
|
||||||
|
'height': int(data['media']['height']),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(data['item_id']),
|
'id': video_id,
|
||||||
'uploader': data['display_name'],
|
'uploader': data['display_name'],
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'thumbnail': data['thumbnailUrl'],
|
'thumbnail': data['thumbnailUrl'],
|
||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
'user_agent': 'iTunes/10.6.1',
|
'user_agent': 'iTunes/10.6.1',
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
'subtitles': video_subtitles,
|
||||||
except (ValueError, KeyError) as err:
|
}
|
||||||
raise ExtractorError('Unable to parse video information: %s' % repr(err))
|
|
||||||
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||||
|
# when we request with a common UA
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Youtubedl-user-agent', 'youtube-dl')
|
||||||
|
return self._download_webpage(req, None, note=False)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
"""Information Extractor for blip.tv users."""
|
|
||||||
|
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||||
_PAGE_SIZE = 12
|
_PAGE_SIZE = 12
|
||||||
IE_NAME = 'blip.tv:user'
|
IE_NAME = 'blip.tv:user'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract username
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
username = mobj.group(1)
|
username = mobj.group(1)
|
||||||
|
|
||||||
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
||||||
@@ -131,7 +140,6 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||||
page_base = page_base % mobj.group(1)
|
page_base = page_base % mobj.group(1)
|
||||||
|
|
||||||
|
|
||||||
# Download video ids using BlipTV Ajax calls. Result size per
|
# Download video ids using BlipTV Ajax calls. Result size per
|
||||||
# query is limited (currently to 12 videos) so we need to query
|
# query is limited (currently to 12 videos) so we need to query
|
||||||
# page by page until there are no video ids - it means we got
|
# page by page until there are no video ids - it means we got
|
||||||
@@ -142,8 +150,8 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
url = page_base + "&page=" + str(pagenum)
|
url = page_base + "&page=" + str(pagenum)
|
||||||
page = self._download_webpage(url, username,
|
page = self._download_webpage(
|
||||||
'Downloading video ids from page %d' % pagenum)
|
url, username, 'Downloading video ids from page %d' % pagenum)
|
||||||
|
|
||||||
# Extract video identifiers
|
# Extract video identifiers
|
||||||
ids_in_page = []
|
ids_in_page = []
|
||||||
@@ -167,4 +175,4 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
return [self.playlist_result(url_entries, playlist_title=username)]
|
||||||
|
@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_brightcove_url(cls, webpage):
|
def _extract_brightcove_url(cls, webpage):
|
||||||
"""Try to extract the brightcove url from the wepbage, returns None
|
"""Try to extract the brightcove url from the webpage, returns None
|
||||||
if it can't be found
|
if it can't be found
|
||||||
"""
|
"""
|
||||||
|
urls = cls._extract_brightcove_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_brightcove_urls(cls, webpage):
|
||||||
|
"""Return a list of all Brightcove URLs from the webpage """
|
||||||
|
|
||||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
return url_m.group(1)
|
return [url_m.group(1)]
|
||||||
|
|
||||||
m_brightcove = re.search(
|
matches = re.findall(
|
||||||
r'''(?sx)<object
|
r'''(?sx)<object
|
||||||
(?:
|
(?:
|
||||||
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
|
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
||||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||||
).+?</object>''',
|
).+?</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
if m_brightcove is not None:
|
return [cls._build_brighcove_url(m) for m in matches]
|
||||||
return cls._build_brighcove_url(m_brightcove.group())
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
@@ -1,12 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
translation_table = {
|
translation_table = {
|
||||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||||
|
@@ -4,6 +4,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class CollegeHumorIE(InfoExtractor):
|
class CollegeHumorIE(InfoExtractor):
|
||||||
@@ -11,24 +12,45 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||||
'file': '6902724.mp4',
|
|
||||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '6902724',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Comic-Con Cosplay Catastrophe',
|
'title': 'Comic-Con Cosplay Catastrophe',
|
||||||
'description': 'Fans get creative this year at San Diego. Too',
|
'description': 'Fans get creative this year',
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||||
'file': '3505939.mp4',
|
|
||||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3505939',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Font Conference',
|
'title': 'Font Conference',
|
||||||
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
'description': 'This video wasn\'t long enough,',
|
||||||
'age_limit': 10,
|
'age_limit': 10,
|
||||||
|
'duration': 179,
|
||||||
},
|
},
|
||||||
}]
|
},
|
||||||
|
# embedded youtube video
|
||||||
|
{
|
||||||
|
'url': 'http://www.collegehumor.com/embed/6950457',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'W5gMp3ZjYg4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||||
|
'uploader': 'Funnyplox TV',
|
||||||
|
'uploader_id': 'funnyploxtv',
|
||||||
|
'description': 'md5:11812366244110c3523968aa74f02521',
|
||||||
|
'upload_date': '20140128',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -38,6 +60,12 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
data = json.loads(self._download_webpage(
|
data = json.loads(self._download_webpage(
|
||||||
jsonUrl, video_id, 'Downloading info JSON'))
|
jsonUrl, video_id, 'Downloading info JSON'))
|
||||||
vdata = data['video']
|
vdata = data['video']
|
||||||
|
if vdata.get('youtubeId') is not None:
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': vdata['youtubeId'],
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
}
|
||||||
|
|
||||||
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
||||||
rating = vdata.get('rating')
|
rating = vdata.get('rating')
|
||||||
@@ -49,7 +77,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
PREFS = {'high_quality': 2, 'low_quality': 0}
|
PREFS = {'high_quality': 2, 'low_quality': 0}
|
||||||
formats = []
|
formats = []
|
||||||
for format_key in ('mp4', 'webm'):
|
for format_key in ('mp4', 'webm'):
|
||||||
for qname, qurl in vdata[format_key].items():
|
for qname, qurl in vdata.get(format_key, {}).items():
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_key + '_' + qname,
|
'format_id': format_key + '_' + qname,
|
||||||
'url': qurl,
|
'url': qurl,
|
||||||
@@ -58,6 +86,8 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(vdata.get('duration'), 1000)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': vdata['title'],
|
'title': vdata['title'],
|
||||||
@@ -65,4 +95,5 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'thumbnail': vdata.get('thumbnail'),
|
'thumbnail': vdata.get('thumbnail'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
@@ -399,7 +399,7 @@ class InfoExtractor(object):
|
|||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
||||||
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
return [
|
return [
|
||||||
|
@@ -1,49 +1,60 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
find_xpath_attr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CSpanIE(InfoExtractor):
|
class CSpanIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
|
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
|
||||||
IE_DESC = 'C-SPAN'
|
IE_DESC = 'C-SPAN'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.c-spanvideo.org/program/HolderonV',
|
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||||
'file': '315139.mp4',
|
|
||||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '315139',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||||
},
|
},
|
||||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
prog_name = mobj.group('name')
|
page_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, prog_name)
|
webpage = self._download_webpage(url, page_id)
|
||||||
video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')
|
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
[
|
||||||
description = self._og_search_description(webpage)
|
# The full description
|
||||||
|
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
|
||||||
|
# If the description is small enough the other div is not
|
||||||
|
# present, otherwise this is a stripped version
|
||||||
|
r'<p class=\'initial\'>(.*?)</p>'
|
||||||
|
],
|
||||||
|
webpage, 'description', flags=re.DOTALL)
|
||||||
|
|
||||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||||
data_json = self._download_webpage(
|
data = self._download_json(info_url, video_id)
|
||||||
info_url, video_id, 'Downloading video info')
|
|
||||||
data = json.loads(data_json)
|
|
||||||
|
|
||||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
||||||
|
|
||||||
|
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
def find_string(s):
|
||||||
|
return find_xpath_attr(doc, './/string', 'name', s).text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': find_string('title'),
|
||||||
'url': url,
|
'url': url,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': find_string('poster'),
|
||||||
}
|
}
|
||||||
|
58
youtube_dl/extractor/elpais.py
Normal file
58
youtube_dl/extractor/elpais.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class ElPaisIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
||||||
|
IE_DESCR = 'El País'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
||||||
|
'md5': '98406f301f19562170ec071b83433d55',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'tiempo-nuevo-recetas-viejas',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tiempo nuevo, recetas viejas',
|
||||||
|
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
|
||||||
|
'upload_date': '20140206',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
prefix = self._html_search_regex(
|
||||||
|
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
|
||||||
|
video_suffix = self._search_regex(
|
||||||
|
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
|
||||||
|
video_url = prefix + video_suffix
|
||||||
|
thumbnail_suffix = self._search_regex(
|
||||||
|
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
|
||||||
|
fatal=False)
|
||||||
|
thumbnail = (
|
||||||
|
None if thumbnail_suffix is None
|
||||||
|
else prefix + thumbnail_suffix)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||||
|
webpage, 'title')
|
||||||
|
date_str = self._search_regex(
|
||||||
|
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||||
|
webpage, 'upload date', fatal=False)
|
||||||
|
upload_date = (None if date_str is None else unified_strdate(date_str))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
38
youtube_dl/extractor/firstpost.py
Normal file
38
youtube_dl/extractor/firstpost.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class FirstpostIE(InfoExtractor):
|
||||||
|
IE_NAME = 'Firstpost.com'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
|
||||||
|
'md5': 'ee9114957692f01fb1263ed87039112a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1025403',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
|
||||||
|
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
|
||||||
|
webpage, 'video URL')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}
|
@@ -1,4 +1,7 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@@ -30,7 +33,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'pluzz.francetv.fr'
|
IE_NAME = 'pluzz.francetv.fr'
|
||||||
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
||||||
|
|
||||||
# Can't use tests, videos expire in 7 days
|
# Can't use tests, videos expire in 7 days
|
||||||
@@ -44,17 +47,17 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'francetvinfo.fr'
|
IE_NAME = 'francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||||
u'file': u'84981923.mp4',
|
'file': '84981923.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Soir 3',
|
'title': 'Soir 3',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -62,13 +65,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
|
video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'francetv'
|
IE_NAME = 'francetv'
|
||||||
IE_DESC = u'France 2, 3, 4, 5 and Ô'
|
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
||||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||||
(?:
|
(?:
|
||||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||||
@@ -78,73 +81,73 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
# france2
|
# france2
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||||
u'file': u'75540104.mp4',
|
'file': '75540104.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'13h15, le samedi...',
|
'title': '13h15, le samedi...',
|
||||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france3
|
# france3
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||||
u'ext': u'flv',
|
'ext': 'flv',
|
||||||
u'title': u'Le scandale du prix des médicaments',
|
'title': 'Le scandale du prix des médicaments',
|
||||||
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france4
|
# france4
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
u'ext': u'flv',
|
'ext': 'flv',
|
||||||
u'title': u'Hero Corp Making of - Extrait 1',
|
'title': 'Hero Corp Making of - Extrait 1',
|
||||||
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
|
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france5
|
# france5
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'92837968',
|
'id': '92837968',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'C à dire ?!',
|
'title': 'C à dire ?!',
|
||||||
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# franceo
|
# franceo
|
||||||
{
|
{
|
||||||
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'92327925',
|
'id': '92327925',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'Infô-Afrique',
|
'title': 'Infô-Afrique',
|
||||||
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
|
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
u'skip': u'The id changes frequently',
|
'skip': 'The id changes frequently',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -160,26 +163,26 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
'\.fr/\?id-video=([^"/&]+)'),
|
'\.fr/\?id-video=([^"/&]+)'),
|
||||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||||
]
|
]
|
||||||
video_id = self._html_search_regex(id_res, webpage, u'video ID')
|
video_id = self._html_search_regex(id_res, webpage, 'video ID')
|
||||||
else:
|
else:
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class GenerationQuoiIE(InfoExtractor):
|
class GenerationQuoiIE(InfoExtractor):
|
||||||
IE_NAME = u'france2.fr:generation-quoi'
|
IE_NAME = 'france2.fr:generation-quoi'
|
||||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
||||||
u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
|
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Génération Quoi - Garde à Vous',
|
'title': 'Génération Quoi - Garde à Vous',
|
||||||
u'uploader': u'Génération Quoi',
|
'uploader': 'Génération Quoi',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# It uses Dailymotion
|
# It uses Dailymotion
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -194,20 +197,20 @@ class GenerationQuoiIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class CultureboxIE(FranceTVBaseInfoExtractor):
|
class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'culturebox.francetvinfo.fr'
|
IE_NAME = 'culturebox.francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'EV_6785',
|
'id': 'EV_6785',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'Einstein on the beach au Théâtre du Châtelet',
|
'title': 'Einstein on the beach au Théâtre du Châtelet',
|
||||||
u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,5 +218,5 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id')
|
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
@@ -38,18 +38,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'R\u00e9gis plante sa Jeep',
|
'title': 'R\u00e9gis plante sa Jeep',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# embedded vimeo video
|
|
||||||
{
|
|
||||||
'add_ie': ['Vimeo'],
|
|
||||||
'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
|
||||||
'file': '22444065.mp4',
|
|
||||||
'md5': '2903896e23df39722c33f015af0666e2',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
|
|
||||||
'uploader_id': 'skillsmatter',
|
|
||||||
'uploader': 'Skills Matter',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# bandcamp page with custom domain
|
# bandcamp page with custom domain
|
||||||
{
|
{
|
||||||
'add_ie': ['Bandcamp'],
|
'add_ie': ['Bandcamp'],
|
||||||
@@ -246,11 +234,21 @@ class GenericIE(InfoExtractor):
|
|||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||||
if bc_url is not None:
|
if bc_urls:
|
||||||
self.to_screen('Brightcove video detected.')
|
self.to_screen('Brightcove video detected.')
|
||||||
surl = smuggle_url(bc_url, {'Referer': url})
|
entries = [{
|
||||||
return self.url_result(surl, 'Brightcove')
|
'_type': 'url',
|
||||||
|
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||||
|
'ie_key': 'Brightcove'
|
||||||
|
} for bc_url in bc_urls]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': video_title,
|
||||||
|
'id': video_id,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
# Look for embedded (iframe) Vimeo player
|
# Look for embedded (iframe) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -8,32 +10,42 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GoogleSearchIE(SearchInfoExtractor):
|
class GoogleSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'Google Video search'
|
IE_DESC = 'Google Video search'
|
||||||
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
|
|
||||||
_MAX_RESULTS = 1000
|
_MAX_RESULTS = 1000
|
||||||
IE_NAME = u'video.google:search'
|
IE_NAME = 'video.google:search'
|
||||||
_SEARCH_KEY = 'gvsearch'
|
_SEARCH_KEY = 'gvsearch'
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
|
|
||||||
|
entries = []
|
||||||
res = {
|
res = {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': query,
|
'id': query,
|
||||||
'entries': []
|
'title': query,
|
||||||
}
|
}
|
||||||
|
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count():
|
||||||
result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
|
result_url = (
|
||||||
webpage = self._download_webpage(result_url, u'gvsearch:' + query,
|
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
|
||||||
note='Downloading result page ' + str(pagenum))
|
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
|
||||||
|
|
||||||
for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
|
webpage = self._download_webpage(
|
||||||
e = {
|
result_url, 'gvsearch:' + query,
|
||||||
|
note='Downloading result page ' + str(pagenum + 1))
|
||||||
|
|
||||||
|
for hit_idx, mobj in enumerate(re.finditer(
|
||||||
|
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||||
|
|
||||||
|
# Skip playlists
|
||||||
|
if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append({
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': mobj.group(1)
|
'url': mobj.group(1)
|
||||||
}
|
})
|
||||||
res['entries'].append(e)
|
|
||||||
|
|
||||||
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
|
if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
|
||||||
|
res['entries'] = entries[:n]
|
||||||
return res
|
return res
|
||||||
|
@@ -1,39 +1,36 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class InaIE(InfoExtractor):
|
class InaIE(InfoExtractor):
|
||||||
"""Information Extractor for Ina.fr"""
|
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||||
u'file': u'I12055569.mp4',
|
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||||
u'md5': u'a667021bf2b41f8dc6049479d9bb38a3',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'I12055569',
|
||||||
u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\""
|
'ext': 'mp4',
|
||||||
|
'title': 'François Hollande "Je crois que c\'est clair"',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
|
mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
|
||||||
video_extension = 'mp4'
|
info_doc = self._download_xml(mrss_url, video_id)
|
||||||
webpage = self._download_webpage(mrss_url, video_id)
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
|
video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url']
|
||||||
webpage, u'video URL')
|
|
||||||
|
|
||||||
video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
|
return {
|
||||||
webpage, u'title')
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
return [{
|
'title': info_doc.find('.//title').text,
|
||||||
'id': video_id,
|
}
|
||||||
'url': video_url,
|
|
||||||
'ext': video_extension,
|
|
||||||
'title': video_title,
|
|
||||||
}]
|
|
||||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class InfoQIE(InfoExtractor):
|
class InfoQIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
|
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"name": "InfoQ",
|
"name": "InfoQ",
|
||||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||||
@@ -26,9 +26,9 @@ class InfoQIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id=url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
self.report_extraction(url)
|
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
||||||
@@ -50,6 +50,6 @@ class InfoQIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
}
|
}
|
||||||
|
85
youtube_dl/extractor/iprima.py
Normal file
85
youtube_dl/extractor/iprima.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from random import random
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
|
class IPrimaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39152',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Partička (92)',
|
||||||
|
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
|
||||||
|
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
|
||||||
|
floor(random()*1073741824),
|
||||||
|
floor(random()*1073741824))
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(player_url)
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
playerpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||||
|
|
||||||
|
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||||
|
|
||||||
|
if zoneGEO != '0':
|
||||||
|
base_url = base_url.replace('token', 'token_'+zoneGEO)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['lq', 'hq', 'hd']:
|
||||||
|
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||||
|
|
||||||
|
if filename == 'null':
|
||||||
|
continue
|
||||||
|
|
||||||
|
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
|
||||||
|
|
||||||
|
if format_id == 'lq':
|
||||||
|
quality = 0
|
||||||
|
elif format_id == 'hq':
|
||||||
|
quality = 1
|
||||||
|
elif format_id == 'hd':
|
||||||
|
quality = 2
|
||||||
|
filename = 'hq/'+filename
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': base_url,
|
||||||
|
'quality': quality,
|
||||||
|
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': real_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
63
youtube_dl/extractor/lifenews.py
Normal file
63
youtube_dl/extractor/lifenews.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class LifeNewsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'lifenews'
|
||||||
|
IE_DESC = 'LIFE | NEWS'
|
||||||
|
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://lifenews.ru/news/126342',
|
||||||
|
'file': '126342.mp4',
|
||||||
|
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||||
|
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||||
|
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||||
|
'upload_date': '20140130',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||||
|
if title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
|
||||||
|
|
||||||
|
upload_date = self._html_search_regex(
|
||||||
|
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'upload_date': unified_strdate(upload_date),
|
||||||
|
}
|
@@ -4,14 +4,11 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
class LiveLeakIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||||
'file': '757_1364311680.mp4',
|
'file': '757_1364311680.mp4',
|
||||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||||
@@ -20,7 +17,17 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'uploader': 'ljfriel2',
|
'uploader': 'ljfriel2',
|
||||||
'title': 'Most unlucky car accident'
|
'title': 'Most unlucky car accident'
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||||
|
'file': 'f93_1390833151.mp4',
|
||||||
|
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||||
|
'uploader': 'ARD_Stinkt',
|
||||||
|
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -28,7 +35,11 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
sources_raw = self._search_regex(
|
sources_raw = self._search_regex(
|
||||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs')
|
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
||||||
|
if sources_raw is None:
|
||||||
|
sources_raw = '[{ %s}]' % (
|
||||||
|
self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
|
||||||
|
|
||||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
||||||
sources = json.loads(sources_json)
|
sources = json.loads(sources_json)
|
||||||
|
|
||||||
|
56
youtube_dl/extractor/m6.py
Normal file
56
youtube_dl/extractor/m6.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class M6IE(InfoExtractor):
|
||||||
|
IE_NAME = 'm6'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
|
||||||
|
'md5': '242994a87de2c316891428e0176bcb77',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11323908',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
|
||||||
|
'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
|
||||||
|
'duration': 100,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
||||||
|
'Downloading video RSS')
|
||||||
|
|
||||||
|
title = rss.find('./channel/item/title').text
|
||||||
|
description = rss.find('./channel/item/description').text
|
||||||
|
thumbnail = rss.find('./channel/item/visuel_clip_big').text
|
||||||
|
duration = int(rss.find('./channel/item/duration').text)
|
||||||
|
view_count = int(rss.find('./channel/item/nombre_vues').text)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['lq', 'sd', 'hq', 'hd']:
|
||||||
|
video_url = rss.find('./channel/item/url_video_%s' % format_id)
|
||||||
|
if video_url is None:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': video_url.text,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -16,7 +16,8 @@ class MalemotionIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "Bien dur",
|
"title": "Bien dur",
|
||||||
"age_limit": 18,
|
"age_limit": 18,
|
||||||
}
|
},
|
||||||
|
'skip': 'This video has been deleted.'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
114
youtube_dl/extractor/mooshare.py
Normal file
114
youtube_dl/extractor/mooshare.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MooshareIE(InfoExtractor):
|
||||||
|
IE_NAME = 'mooshare'
|
||||||
|
IE_DESC = 'Mooshare.biz'
|
||||||
|
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
|
||||||
|
'md5': '4e14f9562928aecd2e42c6f341c8feba',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8dqtk4bjbp8g',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Comedy Football 2011 - (part 1-2)',
|
||||||
|
'duration': 893,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://mooshare.biz/aipjtoc4g95j',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aipjtoc4g95j',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Orange Caramel Dashing Through the Snow',
|
||||||
|
'duration': 212,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||||
|
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||||
|
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||||
|
|
||||||
|
download_form = {
|
||||||
|
'op': 'download1',
|
||||||
|
'id': video_id,
|
||||||
|
'hash': hash_key,
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
|
||||||
|
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
|
||||||
|
duration = int(duration_str) if duration_str is not None else None
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# SD video
|
||||||
|
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'format_id': 'sd',
|
||||||
|
'format': 'SD',
|
||||||
|
})
|
||||||
|
|
||||||
|
# HD video
|
||||||
|
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'format_id': 'hd',
|
||||||
|
'format': 'HD',
|
||||||
|
})
|
||||||
|
|
||||||
|
# rtmp video
|
||||||
|
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('rtmpurl'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'format': 'HD',
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -82,10 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
title_el = find_xpath_attr(
|
title_el = find_xpath_attr(
|
||||||
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
'scheme', 'urn:mtvn:video_title')
|
'scheme', 'urn:mtvn:video_title')
|
||||||
if title_el is None:
|
|
||||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
|
||||||
if title_el is None:
|
if title_el is None:
|
||||||
title_el = itemdoc.find('.//title')
|
title_el = itemdoc.find('.//title')
|
||||||
|
if title_el.text is None:
|
||||||
|
title_el = None
|
||||||
|
if title_el is None:
|
||||||
|
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||||
|
|
||||||
title = title_el.text
|
title = title_el.text
|
||||||
if title is None:
|
if title is None:
|
||||||
raise ExtractorError('Could not find video title')
|
raise ExtractorError('Could not find video title')
|
||||||
@@ -119,7 +122,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
if mgid.endswith('.swf'):
|
if mgid.endswith('.swf'):
|
||||||
mgid = mgid[:-4]
|
mgid = mgid[:-4]
|
||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
|
mgid = self._search_regex(
|
||||||
|
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||||
|
webpage, u'mgid')
|
||||||
return self._get_videos_info(mgid)
|
return self._get_videos_info(mgid)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -4,18 +4,18 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsIE(InfoExtractor):
|
class NewgroundsIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||||
'file': '549479.mp3',
|
|
||||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "B7 - BusMode",
|
'id': '549479',
|
||||||
"uploader": "Burn7",
|
'ext': 'mp3',
|
||||||
|
'title': 'B7 - BusMode',
|
||||||
|
'uploader': 'Burn7',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
61
youtube_dl/extractor/normalboots.py
Normal file
61
youtube_dl/extractor/normalboots.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
class NormalbootsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
|
u'file': u'home-alone-games-jontron.mp4',
|
||||||
|
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
||||||
|
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
||||||
|
u'uploader': u'JonTron',
|
||||||
|
u'upload_date': u'20140125',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'uploader': None,
|
||||||
|
'upload_date': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if url[:4] != 'http':
|
||||||
|
url = 'http://' + url
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_title = self._og_search_title(webpage)
|
||||||
|
video_description = self._og_search_description(webpage)
|
||||||
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
|
webpage, 'uploader')
|
||||||
|
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
|
webpage, 'date')
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||||
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
||||||
|
|
||||||
|
info['url'] = video_url
|
||||||
|
info['title'] = video_title
|
||||||
|
info['description'] = video_description
|
||||||
|
info['thumbnail'] = video_thumbnail
|
||||||
|
info['uploader'] = video_uploader
|
||||||
|
info['upload_date'] = video_upload_date
|
||||||
|
|
||||||
|
return info
|
@@ -1,34 +1,68 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class PBSIE(InfoExtractor):
|
class PBSIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?:
|
||||||
|
# Direct video URL
|
||||||
|
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
|
||||||
|
# Article with embedded player
|
||||||
|
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||||
|
# Player
|
||||||
|
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||||
u'file': u'2365006249.mp4',
|
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '2365006249',
|
||||||
u'title': u'A More Perfect Union',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
'title': 'A More Perfect Union',
|
||||||
u'duration': 3190,
|
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||||
|
'duration': 3190,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
|
display_id = presumptive_id
|
||||||
|
if presumptive_id:
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
url = self._search_regex(
|
||||||
|
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||||
|
webpage, 'player URL')
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
player_id = mobj.group('player_id')
|
||||||
|
if not display_id:
|
||||||
|
display_id = player_id
|
||||||
|
if player_id:
|
||||||
|
player_page = self._download_webpage(
|
||||||
|
url, display_id, note='Downloading player page',
|
||||||
|
errnote='Could not download player page')
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||||
|
else:
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = video_id
|
||||||
|
|
||||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
info_page = self._download_webpage(info_url, video_id)
|
info = self._download_json(info_url, display_id)
|
||||||
info =json.loads(info_page)
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'title': info['title'],
|
'id': video_id,
|
||||||
'url': info['alternate_encoding']['url'],
|
'title': info['title'],
|
||||||
'ext': 'mp4',
|
'url': info['alternate_encoding']['url'],
|
||||||
'description': info['program'].get('description'),
|
'ext': 'mp4',
|
||||||
'thumbnail': info.get('image_url'),
|
'description': info['program'].get('description'),
|
||||||
'duration': info.get('duration'),
|
'thumbnail': info.get('image_url'),
|
||||||
}
|
'duration': info.get('duration'),
|
||||||
|
}
|
||||||
|
@@ -6,8 +6,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,9 +14,10 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||||
'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
|
||||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||||
|
'ext': 'mp3',
|
||||||
"uploader_id": "ford-lopatin",
|
"uploader_id": "ford-lopatin",
|
||||||
"location": "Spain",
|
"location": "Spain",
|
||||||
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||||
@@ -42,7 +41,6 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
raise ExtractorError('Invalid JSON: ' + str(e))
|
||||||
|
|
||||||
video_url = data['akamai_url'] + '&cbr=256'
|
video_url = data['akamai_url'] + '&cbr=256'
|
||||||
url_parts = compat_urllib_parse_urlparse(video_url)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
|
|||||||
IE_NAME = '220.ro'
|
IE_NAME = '220.ro'
|
||||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
"url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||||
u'file': u'LYV6doKo7f.mp4',
|
'file': 'LYV6doKo7f.mp4',
|
||||||
u'md5': u'03af18b73a07b4088753930db7a34add',
|
'md5': '03af18b73a07b4088753930db7a34add',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Luati-le Banii sez 4 ep 1",
|
"title": "Luati-le Banii sez 4 ep 1",
|
||||||
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
flashVars_str = self._search_regex(
|
flashVars_str = self._search_regex(
|
||||||
r'<param name="flashVars" value="([^"]+)"',
|
r'<param name="flashVars" value="([^"]+)"',
|
||||||
webpage, u'flashVars')
|
webpage, 'flashVars')
|
||||||
flashVars = compat_parse_qs(flashVars_str)
|
flashVars = compat_parse_qs(flashVars_str)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
|
|||||||
'description': clean_html(flashVars['desc'][0]),
|
'description': clean_html(flashVars['desc'][0]),
|
||||||
'thumbnail': flashVars['preview'][0],
|
'thumbnail': flashVars['preview'][0],
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -1,34 +1,36 @@
|
|||||||
import re
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = u'southparkstudios.com'
|
IE_NAME = 'southparkstudios.com'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||||
|
|
||||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
|
||||||
u'title': u'Bat Daded',
|
'ext': 'mp4',
|
||||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
'title': 'Bat Daded',
|
||||||
|
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class SouthparkDeIE(SouthParkStudiosIE):
|
class SouthparkDeIE(SouthParkStudiosIE):
|
||||||
IE_NAME = u'southpark.de'
|
IE_NAME = 'southpark.de'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||||
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||||
u'title': u'The Government Won\'t Respect My Privacy',
|
'ext': 'mp4',
|
||||||
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
'title': 'The Government Won\'t Respect My Privacy',
|
||||||
|
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -6,20 +8,20 @@ from .common import InfoExtractor
|
|||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
u'file': u'1259285.mp4',
|
'file': '1259285.mp4',
|
||||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||||
u'file': u'1309159.mp4',
|
'file': '1309159.mp4',
|
||||||
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
|
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
|
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||||
}
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
|
||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
note='Downloading XML', errnote='Failed to download XML')
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': n.tag.rpartition('type')[2],
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||||
'width': int(n.find('./width').text),
|
'width': int(n.find('./width').text),
|
||||||
'height': int(n.find('./height').text),
|
'height': int(n.find('./height').text),
|
||||||
'abr': int(n.find('./audiobitrate').text),
|
'abr': int(n.find('./audiobitrate').text),
|
||||||
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -1,36 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class StatigramIE(InfoExtractor):
|
class StatigramIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
_VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://statigr.am/p/522207370455279102_24101272',
|
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
||||||
u'file': u'522207370455279102_24101272.mp4',
|
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
||||||
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '522207370455279102_24101272',
|
||||||
u'uploader_id': u'aguynamedpatrick',
|
'ext': 'mp4',
|
||||||
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
'uploader_id': 'aguynamedpatrick',
|
||||||
|
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
html_title = self._html_search_regex(
|
html_title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>',
|
r'<title>(.+?)</title>',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'@([^ ]+)', title, u'uploader name', fatal=False)
|
r'@([^ ]+)', title, 'uploader name', fatal=False)
|
||||||
ext = 'mp4'
|
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': self._og_search_video_url(webpage),
|
'url': self._og_search_video_url(webpage),
|
||||||
'ext': ext,
|
'title': title,
|
||||||
'title': title,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id' : uploader_id
|
'uploader_id': uploader_id
|
||||||
}]
|
}
|
||||||
|
@@ -62,10 +62,13 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
subtitles[sub_lang] = subtitle
|
subtitles[sub_lang] = subtitle
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
return self._download_webpage(url, None, note=False)
|
||||||
|
|
||||||
def _request_subtitle_url(self, sub_lang, url):
|
def _request_subtitle_url(self, sub_lang, url):
|
||||||
""" makes the http request for the subtitle """
|
""" makes the http request for the subtitle """
|
||||||
try:
|
try:
|
||||||
sub = self._download_webpage(url, None, note=False)
|
sub = self._download_subtitle_url(sub_lang, url)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||||
return
|
return
|
||||||
@@ -79,7 +82,11 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
returns {sub_lang: url} or {} if not available
|
returns {sub_lang: url} or {} if not available
|
||||||
Must be redefined by the subclasses
|
Must be redefined by the subclasses
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
# By default, allow implementations to simply pass in the result
|
||||||
|
assert isinstance(webpage, dict), \
|
||||||
|
'_get_available_subtitles not implemented'
|
||||||
|
return webpage
|
||||||
|
|
||||||
def _get_available_automatic_caption(self, video_id, webpage):
|
def _get_available_automatic_caption(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
|
@@ -1,22 +1,23 @@
|
|||||||
#coding: utf-8
|
#coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import determine_ext
|
||||||
determine_ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
class ThisAVIE(InfoExtractor):
|
class ThisAVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
|
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
|
||||||
u"file": u"47734.flv",
|
'md5': '0480f1ef3932d901f0e0e719f188f19b',
|
||||||
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '47734',
|
||||||
u"title": u"高樹マリア - Just fit",
|
'ext': 'flv',
|
||||||
u"uploader": u"dj7970",
|
'title': '高樹マリア - Just fit',
|
||||||
u"uploader_id": u"dj7970"
|
'uploader': 'dj7970',
|
||||||
|
'uploader_id': 'dj7970'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
|
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
|
r"addVariable\('file','([^']+)'\);", webpage, 'video url')
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||||
webpage, u'uploader name', fatal=False)
|
webpage, 'uploader name', fatal=False)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||||
webpage, u'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
50
youtube_dl/extractor/tinypic.py
Normal file
50
youtube_dl/extractor/tinypic.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from youtube_dl.utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class TinyPicIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tinypic'
|
||||||
|
IE_DESC = 'tinypic.com videos'
|
||||||
|
_VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
|
||||||
|
'md5': '609b74432465364e72727ebc6203f044',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6xw7tc',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'shadow phenomenon weird',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
||||||
|
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
file_id = mobj.group('fileid')
|
||||||
|
server_id = mobj.group('serverid')
|
||||||
|
|
||||||
|
KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
|
||||||
|
keywords = self._html_search_meta('keywords', webpage, 'title')
|
||||||
|
title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
|
||||||
|
|
||||||
|
video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
|
||||||
|
thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title
|
||||||
|
}
|
@@ -1,4 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -9,25 +11,25 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TouTvIE(InfoExtractor):
|
class TouTvIE(InfoExtractor):
|
||||||
IE_NAME = u'tou.tv'
|
IE_NAME = 'tou.tv'
|
||||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tou.tv/30-vies/S04E41',
|
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||||
u'file': u'30-vies_S04E41.mp4',
|
'file': '30-vies_S04E41.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'30 vies Saison 4 / Épisode 41',
|
'title': '30 vies Saison 4 / Épisode 41',
|
||||||
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
|
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||||
u'age_limit': 8,
|
'age_limit': 8,
|
||||||
u'uploader': u'Groupe des Nouveaux Médias',
|
'uploader': 'Groupe des Nouveaux Médias',
|
||||||
u'duration': 1296,
|
'duration': 1296,
|
||||||
u'upload_date': u'20131118',
|
'upload_date': '20131118',
|
||||||
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True, # Requires rtmpdump
|
'skip_download': True, # Requires rtmpdump
|
||||||
},
|
},
|
||||||
u'skip': 'Only available in Canada'
|
'skip': 'Only available in Canada'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -36,25 +38,25 @@ class TouTvIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
mediaId = self._search_regex(
|
mediaId = self._search_regex(
|
||||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
|
||||||
|
|
||||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||||
streams_doc = self._download_xml(
|
streams_doc = self._download_xml(
|
||||||
streams_url, video_id, note=u'Downloading stream list')
|
streams_url, video_id, note='Downloading stream list')
|
||||||
|
|
||||||
video_url = next(n.text
|
video_url = next(n.text
|
||||||
for n in streams_doc.findall('.//choice/url')
|
for n in streams_doc.findall('.//choice/url')
|
||||||
if u'//ad.doubleclick' not in n.text)
|
if '//ad.doubleclick' not in n.text)
|
||||||
if video_url.endswith('/Unavailable.flv'):
|
if video_url.endswith('/Unavailable.flv'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
u'Access to this video is blocked from outside of Canada',
|
'Access to this video is blocked from outside of Canada',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
duration_str = self._html_search_meta(
|
duration_str = self._html_search_meta(
|
||||||
'video:duration', webpage, u'duration')
|
'video:duration', webpage, 'duration')
|
||||||
duration = int(duration_str) if duration_str else None
|
duration = int(duration_str) if duration_str else None
|
||||||
upload_date_str = self._html_search_meta(
|
upload_date_str = self._html_search_meta(
|
||||||
'video:release_date', webpage, u'upload date')
|
'video:release_date', webpage, 'upload date')
|
||||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -1,17 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TrailerAddictIE(InfoExtractor):
|
class TrailerAddictIE(InfoExtractor):
|
||||||
|
_WORKING = False
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||||
u'file': u'76184.mp4',
|
'md5': '41365557f3c8c397d091da510e73ceb4',
|
||||||
u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '76184',
|
||||||
u"title": u"Prince Avalanche Trailer",
|
'ext': 'mp4',
|
||||||
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
|
'title': 'Prince Avalanche Trailer',
|
||||||
|
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,9 +26,15 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._search_regex(r'<title>(.+?)</title>',
|
title = self._search_regex(r'<title>(.+?)</title>',
|
||||||
webpage, 'video title').replace(' - Trailer Addict','')
|
webpage, 'video title').replace(' - Trailer Addict','')
|
||||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
view_count_str = self._search_regex(
|
||||||
webpage, 'Views Count')
|
r'<span class="views_n">([0-9,.]+)</span>',
|
||||||
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
webpage, 'view count', fatal=False)
|
||||||
|
view_count = (
|
||||||
|
None if view_count_str is None
|
||||||
|
else int(view_count_str.replace(',', '')))
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
# Presence of (no)watchplus function indicates HD quality is available
|
# Presence of (no)watchplus function indicates HD quality is available
|
||||||
if re.search(r'function (no)?watchplus()', webpage):
|
if re.search(r'function (no)?watchplus()', webpage):
|
||||||
@@ -39,14 +49,16 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
info_webpage, 'Download url').replace('%3F','?')
|
info_webpage, 'Download url').replace('%3F','?')
|
||||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||||
info_webpage, 'thumbnail url')
|
info_webpage, 'thumbnail url')
|
||||||
ext = final_url.split('.')[-1].split('?')[0]
|
|
||||||
|
|
||||||
return [{
|
description = self._html_search_regex(
|
||||||
'id' : video_id,
|
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||||
'url' : final_url,
|
webpage, 'description', fatal=False)
|
||||||
'ext' : ext,
|
|
||||||
'title' : title,
|
return {
|
||||||
'thumbnail' : thumbnail_url,
|
'id': video_id,
|
||||||
'description' : self._og_search_description(webpage),
|
'url': final_url,
|
||||||
'view_count' : view_count,
|
'title': title,
|
||||||
}]
|
'thumbnail': thumbnail_url,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -10,48 +12,48 @@ from ..utils import (
|
|||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
||||||
IE_NAME = u'ustream'
|
IE_NAME = 'ustream'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ustream.tv/recorded/20274954',
|
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||||
u'file': u'20274954.flv',
|
'file': '20274954.flv',
|
||||||
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
|
'md5': '088f151799e8f572f84eb62f17d73e5c',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"uploader": u"Young Americans for Liberty",
|
"uploader": "Young Americans for Liberty",
|
||||||
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
|
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('videoID')
|
video_id = m.group('videoID')
|
||||||
|
|
||||||
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
|
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||||
webpage, u'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': video_title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': video_title,
|
|
||||||
'uploader': uploader,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
return info
|
|
||||||
|
|
||||||
class UstreamChannelIE(InfoExtractor):
|
class UstreamChannelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
||||||
IE_NAME = u'ustream:channel'
|
IE_NAME = 'ustream:channel'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
@@ -1,3 +1,6 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -10,45 +13,44 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class Vbox7IE(InfoExtractor):
|
class Vbox7IE(InfoExtractor):
|
||||||
"""Information Extractor for Vbox7"""
|
_VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://vbox7.com/play:249bb972c2',
|
'url': 'http://vbox7.com/play:249bb972c2',
|
||||||
u'file': u'249bb972c2.flv',
|
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||||
u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '249bb972c2',
|
||||||
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
|
'ext': 'flv',
|
||||||
}
|
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
video_id = mobj.group('id')
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
||||||
new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
|
new_location = self._search_regex(r'window\.location = \'(.*)\';',
|
||||||
|
redirect_page, 'redirect location')
|
||||||
redirect_url = urlh.geturl() + new_location
|
redirect_url = urlh.geturl() + new_location
|
||||||
webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
|
webpage = self._download_webpage(redirect_url, video_id,
|
||||||
|
'Downloading redirect page')
|
||||||
|
|
||||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||||
webpage, u'title').split('/')[0].strip()
|
webpage, 'title').split('/')[0].strip()
|
||||||
|
|
||||||
ext = "flv"
|
|
||||||
info_url = "http://vbox7.com/play/magare.do"
|
info_url = "http://vbox7.com/play/magare.do"
|
||||||
data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
|
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
|
||||||
info_request = compat_urllib_request.Request(info_url, data)
|
info_request = compat_urllib_request.Request(info_url, data)
|
||||||
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
|
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
|
||||||
if info_response is None:
|
if info_response is None:
|
||||||
raise ExtractorError(u'Unable to extract the media url')
|
raise ExtractorError('Unable to extract the media url')
|
||||||
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
|
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': final_url,
|
'url': final_url,
|
||||||
'ext': ext,
|
'ext': 'flv',
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': thumbnail_url,
|
||||||
}]
|
}
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
@@ -22,16 +23,16 @@ class VevoIE(InfoExtractor):
|
|||||||
vevo:)
|
vevo:)
|
||||||
(?P<id>[^&?#]+)'''
|
(?P<id>[^&?#]+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
u'file': u'GB1101300280.mp4',
|
'file': 'GB1101300280.mp4',
|
||||||
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
|
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"upload_date": u"20130624",
|
"upload_date": "20130624",
|
||||||
u"uploader": u"Hurts",
|
"uploader": "Hurts",
|
||||||
u"title": u"Somebody to Die For",
|
"title": "Somebody to Die For",
|
||||||
u"duration": 230.12,
|
"duration": 230.12,
|
||||||
u"width": 1920,
|
"width": 1920,
|
||||||
u"height": 1080,
|
"height": 1080,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||||
@@ -44,7 +45,7 @@ class VevoIE(InfoExtractor):
|
|||||||
if version['version'] > last_version['version']:
|
if version['version'] > last_version['version']:
|
||||||
last_version = version
|
last_version = version
|
||||||
if last_version['version'] == -1:
|
if last_version['version'] == -1:
|
||||||
raise ExtractorError(u'Unable to extract last version of the video')
|
raise ExtractorError('Unable to extract last version of the video')
|
||||||
|
|
||||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||||
formats = []
|
formats = []
|
||||||
@@ -85,7 +86,7 @@ class VevoIE(InfoExtractor):
|
|||||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': u'SMIL_' + m.group('cbr'),
|
'format_id': 'SMIL_' + m.group('cbr'),
|
||||||
'vcodec': m.group('vcodec'),
|
'vcodec': m.group('vcodec'),
|
||||||
'acodec': m.group('acodec'),
|
'acodec': m.group('acodec'),
|
||||||
'vbr': int(m.group('vbr')),
|
'vbr': int(m.group('vbr')),
|
||||||
@@ -101,26 +102,25 @@ class VevoIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||||
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
video_info = self._download_json(json_url, video_id)['video']
|
||||||
video_info = json.loads(info_json)['video']
|
|
||||||
|
|
||||||
formats = self._formats_from_json(video_info)
|
formats = self._formats_from_json(video_info)
|
||||||
try:
|
try:
|
||||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
smil_xml = self._download_webpage(smil_url, video_id,
|
smil_xml = self._download_webpage(smil_url, video_id,
|
||||||
u'Downloading SMIL info')
|
'Downloading SMIL info')
|
||||||
formats.extend(self._formats_from_smil(smil_xml))
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if not isinstance(ee.cause, compat_HTTPError):
|
if not isinstance(ee.cause, compat_HTTPError):
|
||||||
raise
|
raise
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
u'Cannot download SMIL information, falling back to JSON ..')
|
'Cannot download SMIL information, falling back to JSON ..')
|
||||||
|
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int(self._search_regex(
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
|
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info['title'],
|
'title': video_info['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -129,5 +129,3 @@ class VevoIE(InfoExtractor):
|
|||||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||||
'duration': video_info['duration'],
|
'duration': video_info['duration'],
|
||||||
}
|
}
|
||||||
|
|
||||||
return info
|
|
||||||
|
@@ -6,10 +6,10 @@ import re
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -19,7 +19,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VimeoIE(InfoExtractor):
|
class VimeoIE(SubtitlesInfoExtractor):
|
||||||
"""Information extractor for vimeo.com."""
|
"""Information extractor for vimeo.com."""
|
||||||
|
|
||||||
# _VALID_URL matches Vimeo URLs
|
# _VALID_URL matches Vimeo URLs
|
||||||
@@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor):
|
|||||||
'videopassword': 'youtube-dl',
|
'videopassword': 'youtube-dl',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://vimeo.com/76979871',
|
||||||
|
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
||||||
|
'note': 'Video with subtitles',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '76979871',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||||
|
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||||
|
'upload_date': '20131015',
|
||||||
|
'uploader_id': 'staff',
|
||||||
|
'uploader': 'Vimeo Staff',
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@@ -273,19 +287,31 @@ class VimeoIE(InfoExtractor):
|
|||||||
if len(formats) == 0:
|
if len(formats) == 0:
|
||||||
raise ExtractorError('No known codec found')
|
raise ExtractorError('No known codec found')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
text_tracks = config['request'].get('text_tracks')
|
||||||
|
if text_tracks:
|
||||||
|
for tt in text_tracks:
|
||||||
|
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
|
||||||
|
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'webpage_url': url,
|
'webpage_url': url,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
|
'subtitles': video_subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,18 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class VineIE(InfoExtractor):
|
class VineIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'https://vine.co/v/b9KOOWX7HUx',
|
'url': 'https://vine.co/v/b9KOOWX7HUx',
|
||||||
u'file': u'b9KOOWX7HUx.mp4',
|
'md5': '2f36fed6235b16da96ce9b4dc890940d',
|
||||||
u'md5': u'2f36fed6235b16da96ce9b4dc890940d',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'b9KOOWX7HUx',
|
||||||
u"uploader": u"Jack Dorsey",
|
'ext': 'mp4',
|
||||||
u"title": u"Chicken."
|
'uploader': 'Jack Dorsey',
|
||||||
}
|
'title': 'Chicken.',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -24,17 +27,17 @@ class VineIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
video_url = self._html_search_meta('twitter:player:stream', webpage,
|
||||||
webpage, u'video URL')
|
'video URL')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}]
|
}
|
||||||
|
80
youtube_dl/extractor/vube.py
Normal file
80
youtube_dl/extractor/vube.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class VubeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vube'
|
||||||
|
IE_DESC = 'Vube.com'
|
||||||
|
_VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||||
|
'md5': 'f81dcf6d0448e3291f54380181695821',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'YL2qNPkqon',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||||
|
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||||
|
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||||
|
'uploader': 'Chiara.Grispo',
|
||||||
|
'uploader_id': '1u3hX0znhP',
|
||||||
|
'upload_date': '20140103',
|
||||||
|
'duration': 170.56
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video = self._download_json('http://vube.com/api/v2/video/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
public_id = video['public_id']
|
||||||
|
|
||||||
|
formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||||
|
'height': int(fmt['height']),
|
||||||
|
'abr': int(fmt['audio_bitrate']),
|
||||||
|
'vbr': int(fmt['video_bitrate']),
|
||||||
|
'format_id': fmt['media_resolution_id']
|
||||||
|
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed']
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
description = video.get('description')
|
||||||
|
thumbnail = video['thumbnail_src']
|
||||||
|
if thumbnail.startswith('//'):
|
||||||
|
thumbnail = 'http:' + thumbnail
|
||||||
|
uploader = video['user_alias']
|
||||||
|
uploader_id = video['user_url_id']
|
||||||
|
upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d')
|
||||||
|
duration = video['duration']
|
||||||
|
view_count = video['raw_view_count']
|
||||||
|
like_count = video['total_likes']
|
||||||
|
dislike_count= video['total_hates']
|
||||||
|
|
||||||
|
comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id,
|
||||||
|
video_id, 'Downloading video comment JSON')
|
||||||
|
|
||||||
|
comment_count = comment['total']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@@ -5,7 +5,6 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -502,7 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return a % b
|
return a % b
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||||
if m:
|
if m:
|
||||||
fname = m.group('func')
|
fname = m.group('func')
|
||||||
if fname not in functions:
|
if fname not in functions:
|
||||||
|
@@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
https_request = http_request
|
https_request = http_request
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
def unified_strdate(date_str):
|
def unified_strdate(date_str):
|
||||||
"""Return a string with the date in the format YYYYMMDD"""
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
upload_date = None
|
upload_date = None
|
||||||
#Replace commas
|
#Replace commas
|
||||||
date_str = date_str.replace(',',' ')
|
date_str = date_str.replace(',',' ')
|
||||||
# %z (UTC offset) is only supported in python>=3.2
|
# %z (UTC offset) is only supported in python>=3.2
|
||||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str)
|
||||||
format_expressions = [
|
format_expressions = [
|
||||||
'%d %B %Y',
|
'%d %B %Y',
|
||||||
'%B %d %Y',
|
'%B %d %Y',
|
||||||
@@ -771,11 +772,12 @@ def unified_strdate(date_str):
|
|||||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||||
'%Y-%m-%dT%H:%M:%S',
|
'%Y-%m-%dT%H:%M:%S',
|
||||||
|
'%Y-%m-%dT%H:%M',
|
||||||
]
|
]
|
||||||
for expression in format_expressions:
|
for expression in format_expressions:
|
||||||
try:
|
try:
|
||||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||||
except:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
if upload_date is None:
|
if upload_date is None:
|
||||||
timetuple = email.utils.parsedate_tz(date_str)
|
timetuple = email.utils.parsedate_tz(date_str)
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.01.30.1'
|
__version__ = '2014.02.06.1'
|
||||||
|
Reference in New Issue
Block a user