Compare commits
43 Commits
2014.03.24
...
2014.03.29
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c5850eeb4 | ||
|
|
bd3e077a2d | ||
|
|
7e70ac36b3 | ||
|
|
2cc0082dc0 | ||
|
|
056b56688a | ||
|
|
b17418313f | ||
|
|
e9a6fd6a68 | ||
|
|
bf30f3bd9d | ||
|
|
330edf2d84 | ||
|
|
43f775e4ca | ||
|
|
8f6562448c | ||
|
|
263f4b514b | ||
|
|
f0da3f1ef9 | ||
|
|
cb3ac1c610 | ||
|
|
8efd15f477 | ||
|
|
d26ebe990f | ||
|
|
28acf5500a | ||
|
|
214c22c704 | ||
|
|
8cdafb47b9 | ||
|
|
0dae5083f1 | ||
|
|
4c89bbd22c | ||
|
|
e2b06e76c1 | ||
|
|
e9c076c317 | ||
|
|
6c072e7d25 | ||
|
|
ac6c104871 | ||
|
|
69c01a9f68 | ||
|
|
e55213ce35 | ||
|
|
24a2aac445 | ||
|
|
98acdc895b | ||
|
|
bd3b5b8b10 | ||
|
|
9a90636805 | ||
|
|
6a66ae96ed | ||
|
|
2c8a4ba6b5 | ||
|
|
ad8915b729 | ||
|
|
34cbc7ee8d | ||
|
|
a59e40a1ea | ||
|
|
ad0a75db6b | ||
|
|
1d0e49e1c7 | ||
|
|
b4461b6ebe | ||
|
|
80959224fe | ||
|
|
865cbf4fc5 | ||
|
|
196f061cac | ||
|
|
99b380c33b |
3
Makefile
3
Makefile
@@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude '__pycache' \
|
||||
--exclude '.git' \
|
||||
--exclude 'testdata' \
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl \
|
||||
bin devscripts test youtube_dl docs \
|
||||
CHANGELOG LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||
youtube-dl
|
||||
|
||||
@@ -169,6 +169,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate do not download the video and do not write
|
||||
anything to disk
|
||||
--skip-download do not download the video
|
||||
@@ -180,7 +181,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--get-duration simulate, quiet but print video length
|
||||
--get-filename simulate, quiet but print output filename
|
||||
--get-format simulate, quiet but print output format
|
||||
-j, --dump-json simulate, quiet but print JSON information
|
||||
-j, --dump-json simulate, quiet but print JSON information.
|
||||
See --output for a description of available
|
||||
keys.
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
|
||||
@@ -143,5 +143,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||
|
||||
def test_ComedyCentralShows(self):
|
||||
self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -94,6 +94,7 @@ class YoutubeDL(object):
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
no_warnings: Do not print out anything for warnings.
|
||||
forceurl: Force printing final URL.
|
||||
forcetitle: Force printing title.
|
||||
forceid: Force printing ID.
|
||||
@@ -376,6 +377,8 @@ class YoutubeDL(object):
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(message)
|
||||
else:
|
||||
if self.params.get('no_warnings'):
|
||||
return
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
|
||||
@@ -51,6 +51,7 @@ __authors__ = (
|
||||
'David Wagner',
|
||||
'Juan C. Olivares',
|
||||
'Mattias Harrysson',
|
||||
'phaer',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -364,6 +365,10 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
verbosity.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
verbosity.add_option(
|
||||
'--no-warnings',
|
||||
dest='no_warnings', action='store_true', default=False,
|
||||
help='Ignore warnings')
|
||||
verbosity.add_option('-s', '--simulate',
|
||||
action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
|
||||
verbosity.add_option('--skip-download',
|
||||
@@ -391,7 +396,7 @@ def parseOpts(overrideArguments=None):
|
||||
help='simulate, quiet but print output format', default=False)
|
||||
verbosity.add_option('-j', '--dump-json',
|
||||
action='store_true', dest='dumpjson',
|
||||
help='simulate, quiet but print JSON information', default=False)
|
||||
help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
|
||||
verbosity.add_option('--newline',
|
||||
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
||||
verbosity.add_option('--no-progress',
|
||||
@@ -708,6 +713,7 @@ def _real_main(argv=None):
|
||||
'password': opts.password,
|
||||
'videopassword': opts.videopassword,
|
||||
'quiet': (opts.quiet or any_printing),
|
||||
'no_warnings': opts.no_warnings,
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'forceid': opts.getid,
|
||||
|
||||
@@ -23,6 +23,8 @@ class HttpFD(FileDownloader):
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
if 'user_agent' in info_dict:
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
if 'http_referer' in info_dict:
|
||||
headers['Referer'] = info_dict['http_referer']
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
|
||||
@@ -177,6 +177,8 @@ from .normalboots import NormalbootsIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .ntv import NTVIE
|
||||
from .oe1 import OE1IE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
@@ -208,7 +210,6 @@ from .rutv import RUTVIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
@@ -257,13 +258,13 @@ from .udemy import (
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
|
||||
@@ -16,9 +16,10 @@ class AppleTrailersIE(InfoExtractor):
|
||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||
"playlist": [
|
||||
{
|
||||
"file": "manofsteel-trailer4.mov",
|
||||
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
||||
"info_dict": {
|
||||
"id": "manofsteel-trailer4",
|
||||
"ext": "mov",
|
||||
"duration": 111,
|
||||
"title": "Trailer 4",
|
||||
"upload_date": "20130523",
|
||||
@@ -26,9 +27,10 @@ class AppleTrailersIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
"file": "manofsteel-trailer3.mov",
|
||||
"md5": "b8017b7131b721fb4e8d6f49e1df908c",
|
||||
"info_dict": {
|
||||
"id": "manofsteel-trailer3",
|
||||
"ext": "mov",
|
||||
"duration": 182,
|
||||
"title": "Trailer 3",
|
||||
"upload_date": "20130417",
|
||||
@@ -36,9 +38,10 @@ class AppleTrailersIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
"file": "manofsteel-trailer.mov",
|
||||
"md5": "d0f1e1150989b9924679b441f3404d48",
|
||||
"info_dict": {
|
||||
"id": "manofsteel-trailer",
|
||||
"ext": "mov",
|
||||
"duration": 148,
|
||||
"title": "Trailer",
|
||||
"upload_date": "20121212",
|
||||
@@ -46,15 +49,16 @@ class AppleTrailersIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
"file": "manofsteel-teaser.mov",
|
||||
"md5": "5fe08795b943eb2e757fa95cb6def1cb",
|
||||
"info_dict": {
|
||||
"id": "manofsteel-teaser",
|
||||
"ext": "mov",
|
||||
"duration": 93,
|
||||
"title": "Teaser",
|
||||
"upload_date": "20120721",
|
||||
"uploader_id": "wb",
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
@@ -65,16 +69,16 @@ class AppleTrailersIE(InfoExtractor):
|
||||
movie = mobj.group('movie')
|
||||
uploader_id = mobj.group('company')
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
|
||||
# The ' in the onClick attributes are not escaped, it couldn't be parsed
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
def _clean_json(m):
|
||||
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
s = u'<html>' + s + u'</html>'
|
||||
s = '<html>' + s + u'</html>'
|
||||
return s
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
@@ -82,7 +86,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
for li in doc.findall('./div/ul/li'):
|
||||
on_click = li.find('.//a').attrib['onClick']
|
||||
trailer_info_json = self._search_regex(self._JSON_RE,
|
||||
on_click, u'trailer info')
|
||||
on_click, 'trailer info')
|
||||
trailer_info = json.loads(trailer_info_json)
|
||||
title = trailer_info['title']
|
||||
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
||||
@@ -98,8 +102,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
first_url = trailer_info['url']
|
||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
||||
settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
|
||||
settings = json.loads(settings_json)
|
||||
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||
|
||||
formats = []
|
||||
for format in settings['metadata']['sizes']:
|
||||
@@ -107,7 +110,6 @@ class AppleTrailersIE(InfoExtractor):
|
||||
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_url),
|
||||
'format': format['type'],
|
||||
'width': format['width'],
|
||||
'height': int(format['height']),
|
||||
|
||||
@@ -11,22 +11,24 @@ from ..utils import (
|
||||
|
||||
|
||||
class AUEngineIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
||||
'file': 'lfvlytY6.mp4',
|
||||
'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
|
||||
'info_dict': {
|
||||
'id': 'lfvlytY6',
|
||||
'ext': 'mp4',
|
||||
'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
||||
webpage, 'title')
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
|
||||
title = title.strip()
|
||||
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||
links = map(compat_urllib_parse.unquote, links)
|
||||
@@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor):
|
||||
elif '/videos/' in link:
|
||||
video_url = link
|
||||
if not video_url:
|
||||
raise ExtractorError(u'Could not find video URL')
|
||||
raise ExtractorError('Could not find video URL')
|
||||
ext = '.' + determine_ext(video_url)
|
||||
if ext == title[-len(ext):]:
|
||||
title = title[:-len(ext)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
|
||||
}
|
||||
|
||||
@@ -1,22 +1,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
|
||||
},
|
||||
u'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
u'skip_download': True,
|
||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
'md5': '7bf08858ff7c203c870e8a6190e221e5',
|
||||
'info_dict': {
|
||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||
'ext': 'flv',
|
||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
embed_code = self._search_regex(
|
||||
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
||||
'embed code')
|
||||
return OoyalaIE._build_url_result(embed_code)
|
||||
f4m_url = self._search_regex(
|
||||
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
||||
'f4m url')
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
return {
|
||||
'id': name.split('-')[-1],
|
||||
'title': title,
|
||||
'url': f4m_url,
|
||||
'ext': 'flv',
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ class CanalplusIE(InfoExtractor):
|
||||
video_id = mobj.groupdict().get('id')
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, mobj.group('path'))
|
||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||
doc = self._download_xml(info_url,video_id,
|
||||
u'Downloading video info')
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
||||
u'info_dict': {
|
||||
u'id': u'4629301',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Brick Briscoe',
|
||||
u'duration': 612,
|
||||
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
||||
'info_dict': {
|
||||
'id': '4629301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brick Briscoe',
|
||||
'duration': 612,
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
js_player = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||
video_id, u'Downlaoding player')
|
||||
video_id, 'Downlaoding player')
|
||||
# it includes a required token
|
||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
|
||||
|
||||
pdoc = self._download_xml(
|
||||
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||
video_id, u'Downloading video info',
|
||||
video_id, 'Downloading video info',
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
track_doc = pdoc.find('trackList/track')
|
||||
|
||||
@@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -32,31 +32,32 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(InfoExtractor):
|
||||
IE_DESC = 'The Daily Show / Colbert Report'
|
||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow or :colbert
|
||||
# urls for episodes like:
|
||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||
|(https?://)?(www\.)?
|
||||
(?P<showname>thedailyshow|colbertnation)\.com/
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
(full-episodes/(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
|
||||
$"""
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||
$'''
|
||||
_TEST = {
|
||||
'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'file': '422212.mp4',
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
'info_dict': {
|
||||
"upload_date": "20121214",
|
||||
"description": "Kristen Stewart",
|
||||
"uploader": "thedailyshow",
|
||||
"title": "thedailyshow-kristen-stewart part 1"
|
||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121213',
|
||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow-kristen-stewart part 1',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,11 +80,6 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
'400': (384, 216),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
@staticmethod
|
||||
def _transform_rtmp_url(rtmp_video_url):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
|
||||
@@ -99,9 +95,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
|
||||
if mobj.group('shortname'):
|
||||
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
||||
url = 'http://www.thedailyshow.com/full-episodes/'
|
||||
url = 'http://thedailyshow.cc.com/full-episodes/'
|
||||
else:
|
||||
url = 'http://www.colbertnation.com/full-episodes/'
|
||||
url = 'http://thecolbertreport.cc.com/full-episodes/'
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
assert mobj is not None
|
||||
|
||||
@@ -120,9 +116,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
epTitle = mobj.group('showname')
|
||||
else:
|
||||
epTitle = mobj.group('episode')
|
||||
show_name = mobj.group('showname')
|
||||
|
||||
self.report_extraction(epTitle)
|
||||
webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
if dlNewest:
|
||||
url = htmlHandle.geturl()
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@@ -130,71 +126,86 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||
epTitle = mobj.group('episode')
|
||||
epTitle = mobj.group('episode').rpartition('/')[-1]
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
|
||||
if len(mMovieParams) == 0:
|
||||
# The Colbert Report embeds the information in a without
|
||||
# a URL prefix; so extract the alternate reference
|
||||
# and then add the URL prefix manually.
|
||||
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||
idoc = self._download_xml(indexUrl, epTitle,
|
||||
'Downloading show index',
|
||||
'unable to download episode index')
|
||||
# Correct cc.com in uri
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
|
||||
|
||||
results = []
|
||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
|
||||
idoc = self._download_xml(
|
||||
index_url, epTitle,
|
||||
'Downloading show index', 'Unable to download episode index')
|
||||
|
||||
itemEls = idoc.findall('.//item')
|
||||
for partNum,itemEl in enumerate(itemEls):
|
||||
mediaId = itemEl.findall('./guid')[0].text
|
||||
shortMediaId = mediaId.split(':')[-1]
|
||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
||||
officialTitle = itemEl.findall('./title')[0].text
|
||||
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
title = idoc.find('./channel/title').text
|
||||
description = idoc.find('./channel/description').text
|
||||
|
||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||
cdoc = self._download_xml(configUrl, epTitle,
|
||||
'Downloading configuration for %s' % shortMediaId)
|
||||
entries = []
|
||||
item_els = idoc.findall('.//item')
|
||||
for part_num, itemEl in enumerate(item_els):
|
||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
||||
|
||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
duration = float_or_none(content.attrib.get('duration'))
|
||||
mediagen_url = content.attrib['url']
|
||||
guid = itemEl.find('.//guid').text.rpartition(':')[-1]
|
||||
|
||||
cdoc = self._download_xml(
|
||||
mediagen_url, epTitle,
|
||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
||||
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
turls.append(finfo)
|
||||
|
||||
if len(turls) == 0:
|
||||
self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
|
||||
continue
|
||||
|
||||
formats = []
|
||||
for format, rtmp_video_url in turls:
|
||||
w, h = self._video_dimensions.get(format, (None, None))
|
||||
formats.append({
|
||||
'format_id': 'vhttp-%s' % format,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'format_id': format,
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url,
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
|
||||
results.append({
|
||||
'id': shortMediaId,
|
||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||
entries.append({
|
||||
'id': guid,
|
||||
'title': virtual_id,
|
||||
'formats': formats,
|
||||
'uploader': showId,
|
||||
'upload_date': officialDate,
|
||||
'title': effTitle,
|
||||
'thumbnail': None,
|
||||
'description': compat_str(officialTitle),
|
||||
'uploader': show_name,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return results
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
@@ -1,23 +1,25 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EHowIE(InfoExtractor):
|
||||
IE_NAME = u'eHow'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
|
||||
IE_NAME = 'eHow'
|
||||
_VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
|
||||
u'file': u'12245069.flv',
|
||||
u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
|
||||
u'info_dict': {
|
||||
u"title": u"Hardwood Flooring Basics",
|
||||
u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
|
||||
u"uploader": u"Erick Nathan"
|
||||
'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
|
||||
'md5': '9809b4e3f115ae2088440bcb4efbf371',
|
||||
'info_dict': {
|
||||
'id': '12245069',
|
||||
'ext': 'flv',
|
||||
'title': 'Hardwood Flooring Basics',
|
||||
'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
|
||||
'uploader': 'Erick Nathan',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,21 +28,16 @@ class EHowIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||
webpage, u'video URL')
|
||||
final_url = compat_urllib_parse.unquote(video_url)
|
||||
uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
|
||||
webpage, u'uploader')
|
||||
webpage, 'video URL')
|
||||
final_url = compat_urllib_parse.unquote(video_url)
|
||||
uploader = self._html_search_meta('uploader', webpage)
|
||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||
ext = determine_ext(final_url)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ from ..utils import (
|
||||
from .brightcove import BrightcoveIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .smotri import SmotriIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -212,6 +213,21 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
# smotri embed
|
||||
{
|
||||
'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
|
||||
'md5': 'ec40048448e9284c9a1de77bb188108b',
|
||||
'info_dict': {
|
||||
'id': 'v27008541fad',
|
||||
'ext': 'mp4',
|
||||
'title': 'Крым и Севастополь вошли в состав России',
|
||||
'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
|
||||
'duration': 900,
|
||||
'upload_date': '20140318',
|
||||
'uploader': 'rbctv_2012_4',
|
||||
'uploader_id': 'rbctv_2012_4',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -547,6 +563,11 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
|
||||
# Look for embedded smotri.com player
|
||||
smotri_url = SmotriIE._extract_url(webpage)
|
||||
if smotri_url:
|
||||
return self.url_result(smotri_url, 'Smotri')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
class MooshareIE(InfoExtractor):
|
||||
IE_NAME = 'mooshare'
|
||||
IE_DESC = 'Mooshare.biz'
|
||||
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||
_VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -18,57 +16,54 @@ from ..utils import (
|
||||
|
||||
|
||||
class NiconicoIE(InfoExtractor):
|
||||
IE_NAME = u'niconico'
|
||||
IE_DESC = u'ニコニコ動画'
|
||||
IE_NAME = 'niconico'
|
||||
IE_DESC = 'ニコニコ動画'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
|
||||
u'file': u'sm22312215.mp4',
|
||||
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
|
||||
u'info_dict': {
|
||||
u'title': u'Big Buck Bunny',
|
||||
u'uploader': u'takuya0301',
|
||||
u'uploader_id': u'2698420',
|
||||
u'upload_date': u'20131123',
|
||||
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'info_dict': {
|
||||
'id': 'sm22312215',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'uploader': 'takuya0301',
|
||||
'uploader_id': '2698420',
|
||||
'upload_date': '20131123',
|
||||
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||
},
|
||||
u'params': {
|
||||
u'username': u'ydl.niconico@gmail.com',
|
||||
u'password': u'youtube-dl',
|
||||
'params': {
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
'password': 'youtube-dl',
|
||||
},
|
||||
}
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = True
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return False
|
||||
# Login is required
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
u'mail': username,
|
||||
u'password': password,
|
||||
'mail': username,
|
||||
'password': password,
|
||||
}
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||
request = compat_urllib_request.Request(
|
||||
u'https://secure.nicovideo.jp/secure/login', login_data)
|
||||
'https://secure.nicovideo.jp/secure/login', login_data)
|
||||
login_results = self._download_webpage(
|
||||
request, u'', note=u'Logging in', errnote=u'Unable to log in')
|
||||
request, None, note='Logging in', errnote='Unable to log in')
|
||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -82,12 +77,12 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
video_info = self._download_xml(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||
note=u'Downloading video info page')
|
||||
note='Downloading video info page')
|
||||
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||
video_id, u'Downloading flv info')
|
||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||
video_id, 'Downloading flv info')
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
@@ -106,22 +101,22 @@ class NiconicoIE(InfoExtractor):
|
||||
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||
try:
|
||||
user_info = self._download_xml(
|
||||
url, video_id, note=u'Downloading user information')
|
||||
url, video_id, note='Downloading user information')
|
||||
video_uploader = user_info.find('.//nickname').text
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_real_url,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
'format': video_format,
|
||||
'thumbnail': video_thumbnail,
|
||||
'id': video_id,
|
||||
'url': video_real_url,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
'format': video_format,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'view_count': video_view_count,
|
||||
'view_count': video_view_count,
|
||||
'webpage_url': video_webpage_url,
|
||||
}
|
||||
|
||||
157
youtube_dl/extractor/ntv.py
Normal file
157
youtube_dl/extractor/ntv.py
Normal file
@@ -0,0 +1,157 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
|
||||
class NTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ntv.ru/novosti/863142/',
|
||||
'info_dict': {
|
||||
'id': '746000',
|
||||
'ext': 'flv',
|
||||
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||
'duration': 136,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||
'info_dict': {
|
||||
'id': '750370',
|
||||
'ext': 'flv',
|
||||
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'duration': 172,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
'info_dict': {
|
||||
'id': '747480',
|
||||
'ext': 'flv',
|
||||
'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
||||
'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
||||
'duration': 1496,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||
'info_dict': {
|
||||
'id': '750783',
|
||||
'ext': 'flv',
|
||||
'title': 'Остросюжетный фильм «Кома» 4 апреля вечером на НТВ',
|
||||
'description': 'Остросюжетный фильм «Кома» 4 апреля вечером на НТВ',
|
||||
'duration': 28,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||
'info_dict': {
|
||||
'id': '751482',
|
||||
'ext': 'flv',
|
||||
'title': '«Дело врачей»: «Деревце жизни»',
|
||||
'description': '«Дело врачей»: «Деревце жизни»',
|
||||
'duration': 2590,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||
r'<video restriction[^>]+><key>(\d+)</key>'
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
for pattern in self._VIDEO_ID_REGEXES:
|
||||
mobj = re.search(pattern, page)
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if not mobj:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
||||
title = unescapeHTML(player.find('./data/title').text)
|
||||
description = unescapeHTML(player.find('./data/description').text)
|
||||
|
||||
video = player.find('./data/video')
|
||||
video_id = video.find('./id').text
|
||||
thumbnail = video.find('./splash').text
|
||||
duration = int(video.find('./totaltime').text)
|
||||
view_count = int(video.find('./views').text)
|
||||
puid22 = video.find('./puid22').text
|
||||
|
||||
apps = {
|
||||
'4': 'video1',
|
||||
'7': 'video2',
|
||||
}
|
||||
|
||||
app = apps[puid22] if puid22 in apps else apps['4']
|
||||
|
||||
formats = []
|
||||
for format_id in ['', 'hi', 'webm']:
|
||||
file = video.find('./%sfile' % format_id)
|
||||
if file is None:
|
||||
continue
|
||||
size = video.find('./%ssize' % format_id)
|
||||
formats.append({
|
||||
'url': 'rtmp://media.ntv.ru/%s' % app,
|
||||
'app': app,
|
||||
'play_path': file.text,
|
||||
'rtmp_conn': 'B:1',
|
||||
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
|
||||
'page_url': 'http://www.ntv.ru',
|
||||
'flash_ver': 'LNX 11,2,202,341',
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
'filesize': int(size.text),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
40
youtube_dl/extractor/oe1.py
Normal file
40
youtube_dl/extractor/oe1.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
# audios on oe1.orf.at are only available for 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
|
||||
class OE1IE(InfoExtractor):
|
||||
IE_DESC = 'oe1.orf.at'
|
||||
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
||||
show_id
|
||||
)
|
||||
|
||||
timestamp = datetime.datetime.strptime('%s %s' % (
|
||||
data['item']['day_label'],
|
||||
data['item']['time']
|
||||
), '%d.%m.%Y %H:%M')
|
||||
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
||||
|
||||
return {
|
||||
'id': show_id,
|
||||
'title': data['item']['title'],
|
||||
'url': data['item']['url_stream'],
|
||||
'ext': 'mp3',
|
||||
'description': data['item'].get('info'),
|
||||
'timestamp': unix_timestamp
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Ooyala'],
|
||||
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
||||
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
||||
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
||||
u'info_dict': {
|
||||
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
|
||||
return self.url_result(ooyala_url, 'Ooyala')
|
||||
@@ -13,22 +13,24 @@ from ..utils import (
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||
'file': 'v261036632ab.mp4',
|
||||
'md5': '2a7b08249e6f5636557579c368040eb9',
|
||||
'info_dict': {
|
||||
'id': 'v261036632ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'катастрофа с камер видеонаблюдения',
|
||||
'uploader': 'rbc2008',
|
||||
'uploader_id': 'rbc08',
|
||||
@@ -40,9 +42,10 @@ class SmotriIE(InfoExtractor):
|
||||
# real video id 57591
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v57591cb20',
|
||||
'file': 'v57591cb20.flv',
|
||||
'md5': '830266dfc21f077eac5afd1883091bcd',
|
||||
'info_dict': {
|
||||
'id': 'v57591cb20',
|
||||
'ext': 'flv',
|
||||
'title': 'test',
|
||||
'uploader': 'Support Photofile@photofile',
|
||||
'uploader_id': 'support-photofile',
|
||||
@@ -54,9 +57,10 @@ class SmotriIE(InfoExtractor):
|
||||
# video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
|
||||
'file': 'v1390466a13c.mp4',
|
||||
'md5': 'f6331cef33cad65a0815ee482a54440b',
|
||||
'info_dict': {
|
||||
'id': 'v1390466a13c',
|
||||
'ext': 'mp4',
|
||||
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
'uploader': 'timoxa40',
|
||||
'uploader_id': 'timoxa40',
|
||||
@@ -71,9 +75,10 @@ class SmotriIE(InfoExtractor):
|
||||
# age limit + video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
|
||||
'file': 'v15408898bcf.flv',
|
||||
'md5': '91e909c9f0521adf5ee86fbe073aad70',
|
||||
'info_dict': {
|
||||
'id': 'v15408898bcf',
|
||||
'ext': 'flv',
|
||||
'title': 'этот ролик не покажут по ТВ',
|
||||
'uploader': 'zzxxx',
|
||||
'uploader_id': 'ueggb',
|
||||
@@ -85,7 +90,22 @@ class SmotriIE(InfoExtractor):
|
||||
'params': {
|
||||
'videopassword': '333'
|
||||
}
|
||||
}
|
||||
},
|
||||
# swf player
|
||||
{
|
||||
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
|
||||
'md5': '4d47034979d9390d14acdf59c4935bc2',
|
||||
'info_dict': {
|
||||
'id': 'v9188090500',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shakira - Don\'t Bother',
|
||||
'uploader': 'HannahL',
|
||||
'uploader_id': 'lisaha95',
|
||||
'upload_date': '20090331',
|
||||
'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
|
||||
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_SUCCESS = 0
|
||||
@@ -93,6 +113,21 @@ class SmotriIE(InfoExtractor):
|
||||
_PASSWORD_DETECTED = 2
|
||||
_VIDEO_NOT_FOUND = 3
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
|
||||
<div\s+class="video_image">[^<]+</div>\s*
|
||||
<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
|
||||
if mobj is not None:
|
||||
return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
|
||||
|
||||
def _search_meta(self, name, html, display_name=None):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
@@ -134,7 +169,7 @@ class SmotriIE(InfoExtractor):
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
video_page_url = 'http://' + mobj.group('url')
|
||||
video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
|
||||
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
|
||||
|
||||
# Warning if video is unavailable
|
||||
@@ -222,7 +257,7 @@ class SmotriIE(InfoExtractor):
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'duration': video_duration,
|
||||
'view_count': video_view_count,
|
||||
'view_count': int_or_none(video_view_count),
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
'video_page_url': video_page_url
|
||||
}
|
||||
|
||||
@@ -18,12 +18,14 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
|
|
||||
(?P<type_watch>watch)/[^/]+/[^/]+
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
/(?P<name>[\w-]+) # Here goes the name and then ".html"
|
||||
.*)$
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||
'info_dict': {
|
||||
@@ -36,7 +38,17 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'md5': '226f4fb9c62380d11b7995efa4c87994',
|
||||
'info_dict': {
|
||||
'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
||||
}
|
||||
}]
|
||||
|
||||
_FORMATS_PREFERENCE = {
|
||||
'low': 1,
|
||||
@@ -57,6 +69,8 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
name = m.group('name')
|
||||
if m.group('type_talk'):
|
||||
return self._talk_info(url, name)
|
||||
elif m.group('type_watch'):
|
||||
return self._watch_info(url, name)
|
||||
else:
|
||||
return self._playlist_videos_info(url, name)
|
||||
|
||||
@@ -123,3 +137,26 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
else:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
def _watch_info(self, url, name):
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
config_json = self._html_search_regex(
|
||||
r"data-config='([^']+)", webpage, 'config')
|
||||
config = json.loads(config_json)
|
||||
video_url = config['video']['url']
|
||||
thumbnail = config.get('image', {}).get('url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': name,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
61
youtube_dl/extractor/urort.py
Normal file
61
youtube_dl/extractor/urort.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class UrortIE(InfoExtractor):
|
||||
IE_DESC = 'NRK P3 Urørt'
|
||||
_VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://urort.p3.no/#!/Band/Gerilja',
|
||||
'md5': '5ed31a924be8a05e47812678a86e127b',
|
||||
'info_dict': {
|
||||
'id': '33124-4',
|
||||
'ext': 'mp3',
|
||||
'title': 'The Bomb',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'like_count': int,
|
||||
'uploader': 'Gerilja',
|
||||
'uploader_id': 'Gerilja',
|
||||
'upload_date': '20100323',
|
||||
},
|
||||
'params': {
|
||||
'matchtitle': '^The Bomb$', # To test, we want just one video
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
|
||||
json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
|
||||
songs = self._download_json(json_url, playlist_id)
|
||||
print(songs[0])
|
||||
|
||||
entries = [{
|
||||
'id': '%d-%s' % (s['BandId'], s['$id']),
|
||||
'title': s['Title'],
|
||||
'url': s['TrackUrl'],
|
||||
'ext': 'mp3',
|
||||
'uploader_id': playlist_id,
|
||||
'uploader': s.get('BandName', playlist_id),
|
||||
'like_count': s.get('LikeCount'),
|
||||
'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
|
||||
'upload_date': unified_strdate(s.get('Released')),
|
||||
} for s in songs]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': playlist_id,
|
||||
'entries': entries,
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
},
|
||||
u'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
try:
|
||||
ooyala_url = self._og_search_video_url(webpage)
|
||||
except ExtractorError:
|
||||
try:
|
||||
embed_code = self._search_regex(
|
||||
r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
|
||||
u'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
except ExtractorError:
|
||||
raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
||||
@@ -1181,6 +1181,10 @@ def int_or_none(v, scale=1):
|
||||
return v if v is None else (int(v) // scale)
|
||||
|
||||
|
||||
def float_or_none(v, scale=1):
|
||||
return v if v is None else (float(v) / scale)
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
if s is None:
|
||||
return None
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.03.24.5'
|
||||
__version__ = '2014.03.29'
|
||||
|
||||
Reference in New Issue
Block a user