Compare commits

...

10 Commits

Author SHA1 Message Date
Philipp Hagemeister
747373d4ae release 2014.03.21.1 2014-03-21 01:00:27 +01:00
Philipp Hagemeister
18d367c0a5 Remove legacy InfoExtractors file 2014-03-21 01:00:06 +01:00
Philipp Hagemeister
a1a530b067 [pbs] Add support for video ratings 2014-03-21 00:59:51 +01:00
Philipp Hagemeister
cb9722cb3f [viki] Modernize 2014-03-21 00:53:18 +01:00
Philipp Hagemeister
773c0b4bb8 [pbs] Add support for widget URLs (Fixes #2594) 2014-03-21 00:46:32 +01:00
Philipp Hagemeister
23c322a531 release 2014.03.21 2014-03-21 00:37:23 +01:00
Philipp Hagemeister
7e8c0af004 Add --prefer-insecure option (Fixes #2364) 2014-03-21 00:37:10 +01:00
Philipp Hagemeister
d2983ccb25 [ninegag] Modernize and remove unused import 2014-03-21 00:37:10 +01:00
Philipp Hagemeister
f24e9833dc [youporn] Modernize 2014-03-21 00:37:10 +01:00
Sergey M․
bc2bdf5709 [kontrtube] Modernize 2014-03-20 23:05:57 +07:00
13 changed files with 92 additions and 64 deletions

View File

@@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
an empty string (--proxy "") for direct an empty string (--proxy "") for direct
connection connection
--no-check-certificate Suppress HTTPS certificate validation. --no-check-certificate Suppress HTTPS certificate validation.
--prefer-insecure Use an unencrypted connection to retrieve
information about the video. (Currently
supported only for YouTube)
--cache-dir DIR Location in the filesystem where youtube-dl --cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information can store some downloaded information
permanently. By default $XDG_CACHE_HOME permanently. By default $XDG_CACHE_HOME

View File

@@ -141,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_pbs(self): def test_pbs(self):
# https://github.com/rg3/youtube-dl/issues/2350 # https://github.com/rg3/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -1,4 +0,0 @@
# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
from .extractor.common import InfoExtractor, SearchInfoExtractor
from .extractor import gen_extractors, get_info_extractor

View File

@@ -148,6 +148,8 @@ class YoutubeDL(object):
again. again.
cookiefile: File name where cookies should be read from and dumped to. cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text bidi_workaround: Work around buggy terminals without bidirectional text

View File

@@ -237,6 +237,9 @@ def parseOpts(overrideArguments=None):
'--proxy', dest='proxy', default=None, metavar='URL', '--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--prefer-insecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
general.add_option( general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -257,7 +260,6 @@ def parseOpts(overrideArguments=None):
action='store_true', action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
selection.add_option( selection.add_option(
'--playlist-start', '--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int, dest='playliststart', metavar='NUMBER', default=1, type=int,
@@ -756,6 +758,7 @@ def _real_main(argv=None):
'download_archive': download_archive_fn, 'download_archive': download_archive_fn,
'cookiefile': opts.cookiefile, 'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate, 'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure,
'proxy': opts.proxy, 'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout, 'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround, 'bidi_workaround': opts.bidi_workaround,

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none
class KontrTubeIE(InfoExtractor): class KontrTubeIE(InfoExtractor):
@@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, title = self._html_search_regex(
'video title') r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
description = self._html_search_meta('description', webpage, 'video description') description = self._html_search_meta('description', webpage, 'video description')
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', mobj = re.search(
webpage) r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, view_count = self._html_search_regex(
'view count', fatal=False) r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
view_count = int(view_count) if view_count is not None else None
comment_count = None comment_count = None
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', comment_str = self._html_search_regex(
fatal=False) r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
if comment_str.startswith('комментариев нет'): if comment_str.startswith('комментариев нет'):
comment_count = 0 comment_count = 0
else: else:
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str) mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
if mobj: if mobj:
comment_count = int(mobj.group('total')) comment_count = mobj.group('total')
return { return {
'id': video_id, 'id': video_id,
@@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
'title': title, 'title': title,
'description': description, 'description': description,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': int_or_none(view_count),
'comment_count': comment_count, 'comment_count': int_or_none(comment_count),
} }

View File

@@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@@ -12,8 +11,9 @@ class NineGagIE(InfoExtractor):
_TEST = { _TEST = {
"url": "http://9gag.tv/v/1912", "url": "http://9gag.tv/v/1912",
"file": "1912.mp4",
"info_dict": { "info_dict": {
"id": "1912",
"ext": "mp4",
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome", "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
"view_count": int, "view_count": int,

View File

@@ -3,6 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
US_RATINGS,
)
class PBSIE(InfoExtractor): class PBSIE(InfoExtractor):
@@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
# Article with embedded player # Article with embedded player
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) | (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
# Player # Player
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/ video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
) )
''' '''
@@ -57,6 +60,11 @@ class PBSIE(InfoExtractor):
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id) info = self._download_json(info_url, display_id)
rating_str = info.get('rating')
if rating_str is not None:
rating_str = rating_str.rpartition('-')[2]
age_limit = US_RATINGS.get(rating_str)
return { return {
'id': video_id, 'id': video_id,
'title': info['title'], 'title': info['title'],
@@ -65,4 +73,5 @@ class PBSIE(InfoExtractor):
'description': info['program'].get('description'), 'description': info['program'].get('description'),
'thumbnail': info.get('image_url'), 'thumbnail': info.get('image_url'),
'duration': info.get('duration'), 'duration': info.get('duration'),
'age_limit': age_limit,
} }

View File

@@ -1,29 +1,33 @@
from __future__ import unicode_literals
import re import re
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
US_RATINGS,
) )
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
class VikiIE(SubtitlesInfoExtractor): class VikiIE(SubtitlesInfoExtractor):
IE_NAME = u'viki' IE_NAME = 'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = { _TEST = {
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14', 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
u'file': u'1023585v.mp4', 'md5': 'a21454021c2646f5433514177e2caa5f',
u'md5': u'a21454021c2646f5433514177e2caa5f', 'info_dict': {
u'info_dict': { 'id': '1023585v',
u'title': u'Heirs Episode 14', 'ext': 'mp4',
u'uploader': u'SBS', 'title': 'Heirs Episode 14',
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e', 'uploader': 'SBS',
u'upload_date': u'20131121', 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
u'age_limit': 13, 'upload_date': '20131121',
'age_limit': 13,
}, },
u'skip': u'Blocked in the US', 'skip': 'Blocked in the US',
} }
def _real_extract(self, url): def _real_extract(self, url):
@@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor):
rating_str = self._html_search_regex( rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage, r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
u'rating information', default='').strip() 'rating information', default='').strip()
RATINGS = { age_limit = US_RATINGS.get(rating_str)
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}
age_limit = RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage( info_webpage = self._download_webpage(
info_url, video_id, note=u'Downloading info page') info_url, video_id, note='Downloading info page')
if re.match(r'\s*<div\s+class="video-error', info_webpage): if re.match(r'\s*<div\s+class="video-error', info_webpage):
raise ExtractorError( raise ExtractorError(
u'Video %s is blocked from your location.' % video_id, 'Video %s is blocked from your location.' % video_id,
expected=True) expected=True)
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL') r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
upload_date_str = self._html_search_regex( upload_date_str = self._html_search_regex(
r'"created_at":"([^"]+)"', info_webpage, u'upload date') r'"created_at":"([^"]+)"', info_webpage, 'upload date')
upload_date = ( upload_date = (
unified_strdate(upload_date_str) unified_strdate(upload_date_str)
if upload_date_str is not None if upload_date_str is not None

View File

@@ -1,3 +1,6 @@
from __future__ import unicode_literals
import json import json
import re import re
import sys import sys
@@ -17,24 +20,25 @@ from ..aes import (
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_TEST = { _TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4', 'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89', 'info_dict': {
u'info_dict': { 'id': '505835',
u"upload_date": u"20101221", 'ext': 'mp4',
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 'upload_date': '20101221',
u"uploader": u"Ask Dan And Jennifer", 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?", 'uploader': 'Ask Dan And Jennifer',
u"age_limit": 18, 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
'age_limit': 18,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url') url = mobj.group('proto') + 'www.' + mobj.group('url')
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
@@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
# Get JSON parameters # Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
try: try:
params = json.loads(json_params) params = json.loads(json_params)
except: except:
@@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page # Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE, download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
webpage, u'download list').strip() webpage, 'download list').strip()
LINK_RE = r'<a href="([^"]+)">' LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html) links = re.findall(LINK_RE, download_list_html)
@@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
resolution = format_parts[0] resolution = format_parts[0]
height = int(resolution[:-len('p')]) height = int(resolution[:-len('p')])
bitrate = int(format_parts[1][:-len('k')]) bitrate = int(format_parts[1][:-len('k')])
format = u'-'.join(format_parts) + u'-' + dn format = '-'.join(format_parts) + '-' + dn
formats.append({ formats.append({
'url': video_url, 'url': video_url,

View File

@@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
def _real_extract(self, url): def _real_extract(self, url):
proto = (
u'http' if self._downloader.params.get('prefer_insecure', False)
else u'https')
# Extract original video URL from URL with redirection, like age verification, using next_url parameter # Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url) mobj = re.search(self._NEXT_URL_RE, url)
if mobj: if mobj:
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
video_id = self.extract_id(url) video_id = self.extract_id(url)
# Get video webpage # Get video webpage
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
video_webpage = self._download_webpage(url, video_id) video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL # Attempt to extract SWF player URL
@@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'asv': 3, 'asv': 3,
'sts':'1588', 'sts':'1588',
}) })
video_info_url = 'https://www.youtube.com/get_video_info?' + data video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id, video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False, note=False,
errnote='unable to download video info webpage') errnote='unable to download video info webpage')
@@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else: else:
age_gate = False age_gate = False
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type)) % (video_id, el_type))
video_info_webpage = self._download_webpage(video_info_url, video_id, video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False, note=False,
@@ -1445,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'duration': video_duration, 'duration': video_duration,
'age_limit': 18 if age_gate else 0, 'age_limit': 18 if age_gate else 0,
'annotations': video_annotations, 'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count, 'view_count': view_count,
'like_count': like_count, 'like_count': like_count,
'dislike_count': dislike_count, 'dislike_count': dislike_count,

View File

@@ -1289,3 +1289,12 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
return getpass.getpass(prompt, *args, **kwargs) return getpass.getpass(prompt, *args, **kwargs)
else: else:
compat_getpass = getpass.getpass compat_getpass = getpass.getpass
US_RATINGS = {
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}

View File

@@ -1,2 +1,2 @@
__version__ = '2014.03.20' __version__ = '2014.03.21.1'