Compare commits

..

20 Commits

Author SHA1 Message Date
Philipp Hagemeister
8c5850eeb4 release 2014.03.29 2014-03-29 14:01:53 +01:00
Philipp Hagemeister
bd3e077a2d Merge branch 'master' of github.com:rg3/youtube-dl 2014-03-29 14:01:19 +01:00
Jaime Marquínez Ferrándiz
7e70ac36b3 [bloomberg] Fix extraction (fixes #2154)
Stop using the OoyalaIE, extract the f4m url instead.
2014-03-29 11:55:12 +01:00
Philipp Hagemeister
2cc0082dc0 Credit @phaer for OE1 (#2646) 2014-03-29 10:11:32 +01:00
Sergey M․
056b56688a [ntv] Simplify 2014-03-29 15:55:03 +07:00
Philipp Hagemeister
b17418313f [oe1] Simplify (#2646) 2014-03-28 23:23:58 +01:00
Philipp Hagemeister
e9a6fd6a68 Merge remote-tracking branch 'phaer/add-oe1-support' 2014-03-28 23:21:58 +01:00
Philipp Hagemeister
bf30f3bd9d release 2014.03.28 2014-03-28 23:14:54 +01:00
Philipp Hagemeister
330edf2d84 Mention where to find keys in --dump-json (Fixes #2648) 2014-03-28 23:13:03 +01:00
Philipp Hagemeister
43f775e4ca [comedycentral] Duration can now be a float (Fixes #2647) 2014-03-28 23:06:34 +01:00
Sergey M․
8f6562448c [ntv] Move app guess outside formats loop 2014-03-28 23:09:56 +07:00
Sergey M․
263f4b514b [ntv] Add support for ntv.ru (Closes #2581) 2014-03-28 23:01:08 +07:00
phaer
f0da3f1ef9 [oe1] Add support for oe1.orf.at. 2014-03-28 17:57:25 +02:00
Sergey M․
cb3ac1c610 [smotri] Modernize and add support for emdebbed videos (Closes #2585) 2014-03-28 19:58:49 +07:00
Sergey M․
8efd15f477 [canalplus] Fix video id extraction (Closes #2645) 2014-03-28 18:47:15 +07:00
Jaime Marquínez Ferrándiz
d26ebe990f [ehow] Modernize 2014-03-27 21:23:02 +01:00
Jaime Marquínez Ferrándiz
28acf5500a [appletrailers] Modernize 2014-03-27 21:10:51 +01:00
Jaime Marquínez Ferrándiz
214c22c704 [niconico] Modernize 2014-03-27 21:01:09 +01:00
Sergey M․
8cdafb47b9 [mooshare] Add support for URLs starting with 'www' 2014-03-27 19:08:35 +07:00
Philipp Hagemeister
0dae5083f1 [urort] Add date 2014-03-27 02:56:23 +01:00
17 changed files with 376 additions and 108 deletions

View File

@@ -181,7 +181,9 @@ which means you can modify it, redistribute it or use it however you like.
--get-duration simulate, quiet but print video length
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
-j, --dump-json simulate, quiet but print JSON information
-j, --dump-json simulate, quiet but print JSON information.
See --output for a description of available
keys.
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar

View File

@@ -51,6 +51,7 @@ __authors__ = (
'David Wagner',
'Juan C. Olivares',
'Mattias Harrysson',
'phaer',
)
__license__ = 'Public Domain'
@@ -395,7 +396,7 @@ def parseOpts(overrideArguments=None):
help='simulate, quiet but print output format', default=False)
verbosity.add_option('-j', '--dump-json',
action='store_true', dest='dumpjson',
help='simulate, quiet but print JSON information', default=False)
help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
verbosity.add_option('--newline',
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
verbosity.add_option('--no-progress',

View File

@@ -177,6 +177,8 @@ from .normalboots import NormalbootsIE
from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
from .ntv import NTVIE
from .oe1 import OE1IE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .parliamentliveuk import ParliamentLiveUKIE

View File

@@ -16,9 +16,10 @@ class AppleTrailersIE(InfoExtractor):
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
"playlist": [
{
"file": "manofsteel-trailer4.mov",
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
"info_dict": {
"id": "manofsteel-trailer4",
"ext": "mov",
"duration": 111,
"title": "Trailer 4",
"upload_date": "20130523",
@@ -26,9 +27,10 @@ class AppleTrailersIE(InfoExtractor):
},
},
{
"file": "manofsteel-trailer3.mov",
"md5": "b8017b7131b721fb4e8d6f49e1df908c",
"info_dict": {
"id": "manofsteel-trailer3",
"ext": "mov",
"duration": 182,
"title": "Trailer 3",
"upload_date": "20130417",
@@ -36,9 +38,10 @@ class AppleTrailersIE(InfoExtractor):
},
},
{
"file": "manofsteel-trailer.mov",
"md5": "d0f1e1150989b9924679b441f3404d48",
"info_dict": {
"id": "manofsteel-trailer",
"ext": "mov",
"duration": 148,
"title": "Trailer",
"upload_date": "20121212",
@@ -46,15 +49,16 @@ class AppleTrailersIE(InfoExtractor):
},
},
{
"file": "manofsteel-teaser.mov",
"md5": "5fe08795b943eb2e757fa95cb6def1cb",
"info_dict": {
"id": "manofsteel-teaser",
"ext": "mov",
"duration": 93,
"title": "Teaser",
"upload_date": "20120721",
"uploader_id": "wb",
},
}
},
]
}
@@ -65,16 +69,16 @@ class AppleTrailersIE(InfoExtractor):
movie = mobj.group('movie')
uploader_id = mobj.group('company')
playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
s = re.sub(self._JSON_RE, _clean_json, s)
s = u'<html>' + s + u'</html>'
s = '<html>' + s + u'</html>'
return s
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
@@ -82,7 +86,7 @@ class AppleTrailersIE(InfoExtractor):
for li in doc.findall('./div/ul/li'):
on_click = li.find('.//a').attrib['onClick']
trailer_info_json = self._search_regex(self._JSON_RE,
on_click, u'trailer info')
on_click, 'trailer info')
trailer_info = json.loads(trailer_info_json)
title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
@@ -98,8 +102,7 @@ class AppleTrailersIE(InfoExtractor):
first_url = trailer_info['url']
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
settings = json.loads(settings_json)
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
formats = []
for format in settings['metadata']['sizes']:
@@ -107,7 +110,6 @@ class AppleTrailersIE(InfoExtractor):
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
formats.append({
'url': format_url,
'ext': determine_ext(format_url),
'format': format['type'],
'width': format['width'],
'height': int(format['height']),

View File

@@ -1,22 +1,21 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
_TEST = {
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
u'info_dict': {
u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
},
u'params': {
# Requires ffmpeg (m3u8 manifest)
u'skip_download': True,
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
'md5': '7bf08858ff7c203c870e8a6190e221e5',
'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv',
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
},
}
@@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
embed_code = self._search_regex(
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
'embed code')
return OoyalaIE._build_url_result(embed_code)
f4m_url = self._search_regex(
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
'f4m url')
title = re.sub(': Video$', '', self._og_search_title(webpage))
return {
'id': name.split('-')[-1],
'title': title,
'url': f4m_url,
'ext': 'flv',
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -28,7 +28,7 @@ class CanalplusIE(InfoExtractor):
video_id = mobj.groupdict().get('id')
if video_id is None:
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id
doc = self._download_xml(info_url,video_id,
u'Downloading video info')

View File

@@ -8,7 +8,7 @@ from ..utils import (
compat_str,
compat_urllib_parse,
ExtractorError,
int_or_none,
float_or_none,
unified_strdate,
)
@@ -159,7 +159,7 @@ class ComedyCentralShowsIE(InfoExtractor):
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
duration = int_or_none(content.attrib.get('duration'))
duration = float_or_none(content.attrib.get('duration'))
mediagen_url = content.attrib['url']
guid = itemEl.find('.//guid').text.rpartition(':')[-1]

View File

@@ -1,23 +1,25 @@
from __future__ import unicode_literals
import re
from ..utils import (
compat_urllib_parse,
determine_ext
)
from .common import InfoExtractor
class EHowIE(InfoExtractor):
IE_NAME = u'eHow'
_VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
IE_NAME = 'eHow'
_VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
_TEST = {
u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
u'file': u'12245069.flv',
u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
u'info_dict': {
u"title": u"Hardwood Flooring Basics",
u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
u"uploader": u"Erick Nathan"
'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
'md5': '9809b4e3f115ae2088440bcb4efbf371',
'info_dict': {
'id': '12245069',
'ext': 'flv',
'title': 'Hardwood Flooring Basics',
'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
'uploader': 'Erick Nathan',
}
}
@@ -26,21 +28,16 @@ class EHowIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
webpage, u'video URL')
final_url = compat_urllib_parse.unquote(video_url)
uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
webpage, u'uploader')
webpage, 'video URL')
final_url = compat_urllib_parse.unquote(video_url)
uploader = self._html_search_meta('uploader', webpage)
title = self._og_search_title(webpage).replace(' | eHow', '')
ext = determine_ext(final_url)
return {
'_type': 'video',
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'id': video_id,
'url': final_url,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
'uploader': uploader,
'uploader': uploader,
}

View File

@@ -25,6 +25,7 @@ from ..utils import (
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
class GenericIE(InfoExtractor):
@@ -212,6 +213,21 @@ class GenericIE(InfoExtractor):
'skip_download': 'Requires rtmpdump'
}
},
# smotri embed
{
'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
'md5': 'ec40048448e9284c9a1de77bb188108b',
'info_dict': {
'id': 'v27008541fad',
'ext': 'mp4',
'title': 'Крым и Севастополь вошли в состав России',
'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
'duration': 900,
'upload_date': '20140318',
'uploader': 'rbctv_2012_4',
'uploader_id': 'rbctv_2012_4',
},
},
]
def report_download_webpage(self, video_id):
@@ -547,6 +563,11 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
# Look for embedded smotri.com player
smotri_url = SmotriIE._extract_url(webpage)
if smotri_url:
return self.url_result(smotri_url, 'Smotri')
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:

View File

@@ -14,7 +14,7 @@ from ..utils import (
class MooshareIE(InfoExtractor):
IE_NAME = 'mooshare'
IE_DESC = 'Mooshare.biz'
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
_VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
_TESTS = [
{

View File

@@ -1,12 +1,10 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
@@ -18,57 +16,54 @@ from ..utils import (
class NiconicoIE(InfoExtractor):
IE_NAME = u'niconico'
IE_DESC = u'ニコニコ動画'
IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画'
_TEST = {
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
u'file': u'sm22312215.mp4',
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
u'info_dict': {
u'title': u'Big Buck Bunny',
u'uploader': u'takuya0301',
u'uploader_id': u'2698420',
u'upload_date': u'20131123',
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'url': 'http://www.nicovideo.jp/watch/sm22312215',
'md5': 'd1a75c0823e2f629128c43e1212760f9',
'info_dict': {
'id': 'sm22312215',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'uploader': 'takuya0301',
'uploader_id': '2698420',
'upload_date': '20131123',
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
},
u'params': {
u'username': u'ydl.niconico@gmail.com',
u'password': u'youtube-dl',
'params': {
'username': 'ydl.niconico@gmail.com',
'password': 'youtube-dl',
},
}
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_NETRC_MACHINE = 'niconico'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = True
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
# Login is required
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
# Log in
login_form_strs = {
u'mail': username,
u'password': password,
'mail': username,
'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
request = compat_urllib_request.Request(
u'https://secure.nicovideo.jp/secure/login', login_data)
'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, u'', note=u'Logging in', errnote=u'Unable to log in')
request, None, note='Logging in', errnote='Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
self._downloader.report_warning('unable to log in: bad username or password')
return False
return True
@@ -82,12 +77,12 @@ class NiconicoIE(InfoExtractor):
video_info = self._download_xml(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
note='Downloading video info page')
# Get flv info
flv_info_webpage = self._download_webpage(
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, u'Downloading flv info')
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, 'Downloading flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
@@ -106,22 +101,22 @@ class NiconicoIE(InfoExtractor):
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info = self._download_xml(
url, video_id, note=u'Downloading user information')
url, video_id, note='Downloading user information')
video_uploader = user_info.find('.//nickname').text
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
except ExtractorError as err:
self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
return {
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'view_count': video_view_count,
'view_count': video_view_count,
'webpage_url': video_webpage_url,
}

157
youtube_dl/extractor/ntv.py Normal file
View File

@@ -0,0 +1,157 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unescapeHTML
)
class NTVIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
_TESTS = [
{
'url': 'http://www.ntv.ru/novosti/863142/',
'info_dict': {
'id': '746000',
'ext': 'flv',
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'duration': 136,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/video/novosti/750370/',
'info_dict': {
'id': '750370',
'ext': 'flv',
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'duration': 172,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
'info_dict': {
'id': '747480',
'ext': 'flv',
'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
'duration': 1496,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/kino/Koma_film',
'info_dict': {
'id': '750783',
'ext': 'flv',
'title': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ',
'description': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ',
'duration': 28,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
'info_dict': {
'id': '751482',
'ext': 'flv',
'title': '«Дело врачей»: «Деревце жизни»',
'description': '«Дело врачей»: «Деревце жизни»',
'duration': 2590,
},
'params': {
# rtmp download
'skip_download': True,
},
},
]
_VIDEO_ID_REGEXES = [
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
r'<video embed=[^>]+><id>(\d+)</id>',
r'<video restriction[^>]+><key>(\d+)</key>'
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page')
for pattern in self._VIDEO_ID_REGEXES:
mobj = re.search(pattern, page)
if mobj:
break
if not mobj:
raise ExtractorError('No media links available for %s' % video_id)
video_id = mobj.group(1)
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
title = unescapeHTML(player.find('./data/title').text)
description = unescapeHTML(player.find('./data/description').text)
video = player.find('./data/video')
video_id = video.find('./id').text
thumbnail = video.find('./splash').text
duration = int(video.find('./totaltime').text)
view_count = int(video.find('./views').text)
puid22 = video.find('./puid22').text
apps = {
'4': 'video1',
'7': 'video2',
}
app = apps[puid22] if puid22 in apps else apps['4']
formats = []
for format_id in ['', 'hi', 'webm']:
file = video.find('./%sfile' % format_id)
if file is None:
continue
size = video.find('./%ssize' % format_id)
formats.append({
'url': 'rtmp://media.ntv.ru/%s' % app,
'app': app,
'play_path': file.text,
'rtmp_conn': 'B:1',
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
'page_url': 'http://www.ntv.ru',
'flash_ver': 'LNX 11,2,202,341',
'rtmp_live': True,
'ext': 'flv',
'filesize': int(size.text),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
}

View File

@@ -0,0 +1,40 @@
# coding: utf-8
from __future__ import unicode_literals
import calendar
import datetime
import re
from .common import InfoExtractor
# audios on oe1.orf.at are only available for 7 days, so we can't
# add tests.
class OE1IE(InfoExtractor):
IE_DESC = 'oe1.orf.at'
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_id = mobj.group('id')
data = self._download_json(
'http://oe1.orf.at/programm/%s/konsole' % show_id,
show_id
)
timestamp = datetime.datetime.strptime('%s %s' % (
data['item']['day_label'],
data['item']['time']
), '%d.%m.%Y %H:%M')
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
return {
'id': show_id,
'title': data['item']['title'],
'url': data['item']['url_stream'],
'ext': 'mp3',
'description': data['item'].get('info'),
'timestamp': unix_timestamp
}

View File

@@ -13,22 +13,24 @@ from ..utils import (
compat_urllib_request,
ExtractorError,
url_basename,
int_or_none,
)
class SmotriIE(InfoExtractor):
IE_DESC = 'Smotri.com'
IE_NAME = 'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_NETRC_MACHINE = 'smotri'
_TESTS = [
# real video id 2610366
{
'url': 'http://smotri.com/video/view/?id=v261036632ab',
'file': 'v261036632ab.mp4',
'md5': '2a7b08249e6f5636557579c368040eb9',
'info_dict': {
'id': 'v261036632ab',
'ext': 'mp4',
'title': 'катастрофа с камер видеонаблюдения',
'uploader': 'rbc2008',
'uploader_id': 'rbc08',
@@ -40,9 +42,10 @@ class SmotriIE(InfoExtractor):
# real video id 57591
{
'url': 'http://smotri.com/video/view/?id=v57591cb20',
'file': 'v57591cb20.flv',
'md5': '830266dfc21f077eac5afd1883091bcd',
'info_dict': {
'id': 'v57591cb20',
'ext': 'flv',
'title': 'test',
'uploader': 'Support Photofile@photofile',
'uploader_id': 'support-photofile',
@@ -54,9 +57,10 @@ class SmotriIE(InfoExtractor):
# video-password
{
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
'file': 'v1390466a13c.mp4',
'md5': 'f6331cef33cad65a0815ee482a54440b',
'info_dict': {
'id': 'v1390466a13c',
'ext': 'mp4',
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
'uploader': 'timoxa40',
'uploader_id': 'timoxa40',
@@ -71,9 +75,10 @@ class SmotriIE(InfoExtractor):
# age limit + video-password
{
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
'file': 'v15408898bcf.flv',
'md5': '91e909c9f0521adf5ee86fbe073aad70',
'info_dict': {
'id': 'v15408898bcf',
'ext': 'flv',
'title': 'этот ролик не покажут по ТВ',
'uploader': 'zzxxx',
'uploader_id': 'ueggb',
@@ -85,7 +90,22 @@ class SmotriIE(InfoExtractor):
'params': {
'videopassword': '333'
}
}
},
# swf player
{
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
'md5': '4d47034979d9390d14acdf59c4935bc2',
'info_dict': {
'id': 'v9188090500',
'ext': 'mp4',
'title': 'Shakira - Don\'t Bother',
'uploader': 'HannahL',
'uploader_id': 'lisaha95',
'upload_date': '20090331',
'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
},
},
]
_SUCCESS = 0
@@ -93,6 +113,21 @@ class SmotriIE(InfoExtractor):
_PASSWORD_DETECTED = 2
_VIDEO_NOT_FOUND = 3
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
webpage)
if mobj is not None:
return mobj.group('url')
mobj = re.search(
r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
<div\s+class="video_image">[^<]+</div>\s*
<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
if mobj is not None:
return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
def _search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
@@ -134,7 +169,7 @@ class SmotriIE(InfoExtractor):
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
# Warning if video is unavailable
@@ -222,7 +257,7 @@ class SmotriIE(InfoExtractor):
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'duration': video_duration,
'view_count': video_view_count,
'view_count': int_or_none(video_view_count),
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
}

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
unified_strdate,
)
@@ -24,6 +25,7 @@ class UrortIE(InfoExtractor):
'like_count': int,
'uploader': 'Gerilja',
'uploader_id': 'Gerilja',
'upload_date': '20100323',
},
'params': {
'matchtitle': '^The Bomb$', # To test, we want just one video
@@ -37,6 +39,7 @@ class UrortIE(InfoExtractor):
fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
songs = self._download_json(json_url, playlist_id)
print(songs[0])
entries = [{
'id': '%d-%s' % (s['BandId'], s['$id']),
@@ -47,6 +50,7 @@ class UrortIE(InfoExtractor):
'uploader': s.get('BandName', playlist_id),
'like_count': s.get('LikeCount'),
'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
'upload_date': unified_strdate(s.get('Released')),
} for s in songs]
return {

View File

@@ -1181,6 +1181,10 @@ def int_or_none(v, scale=1):
return v if v is None else (int(v) // scale)
def float_or_none(v, scale=1):
return v if v is None else (float(v) / scale)
def parse_duration(s):
if s is None:
return None

View File

@@ -1,2 +1,2 @@
__version__ = '2014.03.27.1'
__version__ = '2014.03.29'