Compare commits

..

18 Commits

Author SHA1 Message Date
Philipp Hagemeister
671c0f151d release 2013.12.04 2013-12-04 14:19:07 +01:00
Philipp Hagemeister
27dcce1904 [youtube] Resolve URLs in comments 2013-12-04 14:18:49 +01:00
Jaime Marquínez Ferrándiz
84db81815a Move common code for extractors based in MTV services to a new base class
Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)
2013-12-03 14:58:24 +01:00
Jaime Marquínez Ferrándiz
fb7abb31af Remove the compatibility code used before the new format system was implemented 2013-12-03 14:31:20 +01:00
Philipp Hagemeister
ce93879a9b [daum] Fix real video ID extraction 2013-12-03 14:16:58 +01:00
Philipp Hagemeister
938384c587 [redtube] Fix search for title 2013-12-03 14:08:16 +01:00
Philipp Hagemeister
e9d8e302aa [xhamster] Change test checksum 2013-12-03 14:06:16 +01:00
Jaime Marquínez Ferrándiz
cb7fb54600 Change the ie_name of YoutubeSearchDateIE
It produced a duplicate entry when listing the extractors with '--list-extractors' and generates noise in the commit log when generating the supported sites webpage (like in 09f355f73b)
2013-12-03 13:55:25 +01:00
Philipp Hagemeister
cf6758d204 Document disabling proxy (#1882) 2013-12-03 13:33:07 +01:00
Philipp Hagemeister
731e3dde29 release 2013.12.03 2013-12-03 13:13:09 +01:00
Philipp Hagemeister
a0eaa341e1 [configuration] Undo code breakage 2013-12-03 13:11:20 +01:00
Philipp Hagemeister
fb27c2295e Correct configuration file locations 2013-12-03 13:09:48 +01:00
Philipp Hagemeister
1b753cb334 Add Windows configuration file locations (#1881) 2013-12-03 13:04:02 +01:00
Philipp Hagemeister
36a826a50d Clarify --download-archive help (#1757) 2013-12-03 11:54:52 +01:00
Philipp Hagemeister
8796857429 Credit @dstftw for smotri IE 2013-12-02 17:43:22 +01:00
Philipp Hagemeister
aaebed13a8 [smotri] Simplify 2013-12-02 17:08:17 +01:00
Philipp Hagemeister
25939ffe56 Merge branch 'smotri.com' of https://github.com/dstftw/youtube-dl 2013-12-02 15:56:35 +01:00
dst
5270d8cb13 Added extractors for smotri.com 2013-12-02 20:10:19 +07:00
24 changed files with 397 additions and 143 deletions

View File

@@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like.
--list-extractors List all supported extractors and the URLs they
would handle
--extractor-descriptions Output descriptions of all supported extractors
--proxy URL Use the specified HTTP/HTTPS proxy
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an
empty string (--proxy "") for direct connection
--no-check-certificate Suppress HTTPS certificate validation.
--cache-dir DIR Location in the filesystem where youtube-dl can
store downloaded information permanently. By
@@ -55,7 +56,7 @@ which means you can modify it, redistribute it or use it however you like.
--dateafter DATE download only videos uploaded after this date
--no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given age
--download-archive FILE Download only videos not present in the archive
--download-archive FILE Download only videos not listed in the archive
file. Record the IDs of all downloaded videos in
it.
@@ -183,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
# OUTPUT TEMPLATE

View File

@@ -22,7 +22,9 @@ from youtube_dl.extractor import (
LivestreamIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE
)
@@ -119,6 +121,24 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_community(self):
dl = FakeYDL()
ie = SmotriCommunityIE(dl)
result = ie.extract('http://smotri.com/community/video/kommuna')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'kommuna')
self.assertEqual(result['title'], u'КПРФ')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_user(self):
dl = FakeYDL()
ie = SmotriUserIE(dl)
result = ie.extract('http://smotri.com/user/inspector')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'inspector')
self.assertEqual(result['title'], u'Inspector')
self.assertTrue(len(result['entries']) >= 9)
if __name__ == '__main__':
unittest.main()

View File

@@ -36,6 +36,7 @@ __authors__ = (
'Marcin Cieślak',
'Anton Larionov',
'Takuya Tsuchida',
'Sergey M.',
)
__license__ = 'Public Domain'
@@ -80,11 +81,11 @@ from .PostProcessor import (
def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes):
def _readOptions(filename_bytes, default=[]):
try:
optionf = open(filename_bytes)
except IOError:
return [] # silently skip if file is not present
return default # silently skip if file is not present
try:
res = []
for l in optionf:
@@ -190,7 +191,9 @@ def parseOpts(overrideArguments=None):
general.add_option('--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions',
help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option(
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -223,7 +226,7 @@ def parseOpts(overrideArguments=None):
default=None, type=int)
selection.add_option('--download-archive', metavar='FILE',
dest='download_archive',
help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.')
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
authentication.add_option('-u', '--username',
@@ -418,6 +421,8 @@ def parseOpts(overrideArguments=None):
if opts.verbose:
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
systemConf = _readOptions('/etc/youtube-dl.conf')
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@@ -427,8 +432,31 @@ def parseOpts(overrideArguments=None):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
userConf = _readOptions(userConfFile, None)
if userConf is None:
appdata_dir = os.environ.get('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
userConf = []
commandLineConf = sys.argv[1:]
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)

View File

@@ -121,6 +121,11 @@ from .rutube import RutubeIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .smotri import (
SmotriIE,
SmotriCommunityIE,
SmotriUserIE,
)
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import (

View File

@@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
})
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = {
playlist.append({
'_type': 'video',
'id': video_id,
'title': title,
@@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
'upload_date': upload_date,
'uploader_id': uploader_id,
'user_agent': 'QuickTime compatible (youtube-dl)',
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['ext']
playlist.append(info)
})
return {
'_type': 'playlist',

View File

@@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
for f in formats:
f['ext'] = determine_ext(f['url'])
info = {
return {
'_type': 'video',
'id': video_id,
'title': title,
@@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
'description': description,
'uploader': uploader,
'upload_date': upload_date,
'thumbnail': data.get('misc', {}).get('image'),
}
thumbnail = data.get('misc', {}).get('image')
if thumbnail:
info['thumbnail'] = thumbnail
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag
from .mtv import MTVServicesInfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
@@ -11,7 +11,7 @@ from ..utils import (
)
class ComedyCentralIE(MTVIE):
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
@@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE):
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor):
})
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
info = {
results.append({
'id': shortMediaId,
'formats': formats,
'uploader': showId,
@@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor):
'title': effTitle,
'thumbnail': None,
'description': compat_str(officialTitle),
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
results.append(info)
})
return results

View File

@@ -364,7 +364,8 @@ class InfoExtractor(object):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=False)

View File

@@ -28,7 +28,8 @@ class DaumIE(InfoExtractor):
video_id = mobj.group(1)
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
full_id = self._search_regex(
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml(
@@ -56,7 +57,7 @@ class DaumIE(InfoExtractor):
'format_id': profile,
})
info = {
return {
'id': video_id,
'title': info.find('TITLE').text,
'formats': formats,
@@ -65,6 +66,3 @@ class DaumIE(InfoExtractor):
'duration': int(info.find('DURATION').text),
'upload_date': info.find('REGDTTM').text[:8],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor):
return (qidx, prefer_http, format['video_bitrate'])
formats.sort(key=_sortkey)
info = {
return {
'_type': 'video',
'id': video_id,
'title': video_title,
@@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor):
'uploader': video_uploader,
'upload_date': upload_date,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -44,13 +44,10 @@ class FazIE(InfoExtractor):
})
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
info = {
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': config.find('STILL/STILL_BIG').text,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
'format_id': q,
})
info = {
return {
'id': data_video['guid'],
'title': compat_urllib_parse.unquote(data_video['title']),
'formats': formats,
'description': get_meta_content('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,13 +1,11 @@
import re
from .mtv import MTVIE, _media_xml_tag
from .mtv import MTVServicesInfoExtractor
class GametrailersIE(MTVIE):
"""
Gametrailers use the same videos system as MTVIE, it just changes the feed
url, where the uri is and the method to get the thumbnails.
"""
class GametrailersIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
_TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
@@ -17,15 +15,9 @@ class GametrailersIE(MTVIE):
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')

View File

@@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
webpage, u'description', flags=re.DOTALL)
info = {
return {
'id': video_id,
'title': clip.find('title').text,
'formats': formats,
'description': description,
'duration': int(clip.find('duration').text),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -10,35 +10,8 @@ from ..utils import (
def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
class MTVServicesInfoExtractor(InfoExtractor):
@staticmethod
def _id_from_uri(uri):
return uri.split(':')[-1]
@@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
return base + m.group('finalid')
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml:
@@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
else:
description = None
info = {
return {
'title': itemdoc.find('title').text,
'formats': self._extract_video_formats(mediagen_page),
'id': video_id,
@@ -101,11 +79,6 @@ class MTVIE(InfoExtractor):
'description': description,
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
@@ -113,6 +86,39 @@ class MTVIE(InfoExtractor):
u'Downloading info')
return [self._get_video_info(item) for item in idoc.findall('.//item')]
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')

View File

@@ -56,7 +56,7 @@ class NaverIE(InfoExtractor):
'height': int(format_el.find('height').text),
})
info = {
return {
'id': video_id,
'title': info.find('Subject').text,
'formats': formats,
@@ -65,6 +65,3 @@ class NaverIE(InfoExtractor):
'upload_date': info.find('WriteDate').text.replace('.', ''),
'view_count': int(info.find('PlayCount').text),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
video_title = self._html_search_regex(
r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title')
# No self-labeling, but they describe themselves as

View File

@@ -0,0 +1,252 @@
# encoding: utf-8
import re
import json
import hashlib
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError
)
class SmotriIE(InfoExtractor):
IE_DESC = u'Smotri.com'
IE_NAME = u'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
_TESTS = [
# real video id 2610366
{
u'url': u'http://smotri.com/video/view/?id=v261036632ab',
u'file': u'v261036632ab.mp4',
u'md5': u'2a7b08249e6f5636557579c368040eb9',
u'info_dict': {
u'title': u'катастрофа с камер видеонаблюдения',
u'uploader': u'rbc2008',
u'uploader_id': u'rbc08',
u'upload_date': u'20131118',
u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
# real video id 57591
{
u'url': u'http://smotri.com/video/view/?id=v57591cb20',
u'file': u'v57591cb20.flv',
u'md5': u'830266dfc21f077eac5afd1883091bcd',
u'info_dict': {
u'title': u'test',
u'uploader': u'Support Photofile@photofile',
u'uploader_id': u'support-photofile',
u'upload_date': u'20070704',
u'description': u'test, видео test',
u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
# video-password
{
u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
u'file': u'v1390466a13c.mp4',
u'md5': u'f6331cef33cad65a0815ee482a54440b',
u'info_dict': {
u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
u'uploader': u'timoxa40',
u'uploader_id': u'timoxa40',
u'upload_date': u'20100404',
u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
u'params': {
u'videopassword': u'qwerty',
},
},
# age limit + video-password
{
u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
u'file': u'v15408898bcf.flv',
u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
u'info_dict': {
u'title': u'этот ролик не покажут по ТВ',
u'uploader': u'zzxxx',
u'uploader_id': u'ueggb',
u'upload_date': u'20101001',
u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
u'age_limit': 18,
u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
u'params': {
u'videopassword': u'333'
}
}
]
_SUCCESS = 0
_PASSWORD_NOT_VERIFIED = 1
_PASSWORD_DETECTED = 2
_VIDEO_NOT_FOUND = 3
def _search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_regex(
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
html, display_name, fatal=False)
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
real_video_id = mobj.group('realvideoid')
# Download video JSON data
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._VIDEO_NOT_FOUND:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
# video-password set
video_password = self._downloader.params.get('videopassword', None)
if not video_password:
raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._PASSWORD_NOT_VERIFIED:
raise ExtractorError(u'Video password is invalid', expected=True)
if status != self._SUCCESS:
raise ExtractorError(u'Unexpected status value %s' % status)
# Extract the URL of the video
video_url = video_json['file_data']
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
video_page, u'confirm string')
confirm_url = video_page_url + '&confirm=%s' % confirm_string
video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
video_title = video_url.rsplit('/', 1)[-1]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
if video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
if video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
duration_str = self._search_meta(u'duration', video_page)
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_uploader_id = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_view_count = self._html_search_regex(
u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'video_duration': video_duration,
'view_count': video_view_count,
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
}
class SmotriCommunityIE(InfoExtractor):
IE_DESC = u'Smotri.com community videos'
IE_NAME = u'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
community_id = mobj.group('communityid')
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
rss = self._download_xml(url, community_id, u'Downloading community RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
community_title = self._html_search_regex(
u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
return self.playlist_result(entries, community_id, community_title)
class SmotriUserIE(InfoExtractor):
IE_DESC = u'Smotri.com user videos'
IE_NAME = u'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('userid')
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
rss = self._download_xml(url, user_id, u'Downloading user RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
user_nickname = self._html_search_regex(
u'^Видео режиссера (.*)$', description_text,
u'user nickname')
return self.playlist_result(entries, user_id, user_nickname)

View File

@@ -1,15 +1,14 @@
import re
from .mtv import MTVIE, _media_xml_tag
from .mtv import MTVServicesInfoExtractor
class SouthParkStudiosIE(MTVIE):
class SouthParkStudiosIE(MTVServicesInfoExtractor):
IE_NAME = u'southparkstudios.com'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
# Overwrite MTVIE properties we don't want
_TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
},
}]
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url = u'http://www.' + mobj.group(u'url')

View File

@@ -55,7 +55,7 @@ class TriluliluIE(InfoExtractor):
for fnode in format_doc.findall('./formats/format')
]
info = {
return {
'_type': 'video',
'id': video_id,
'formats': formats,
@@ -64,7 +64,3 @@ class TriluliluIE(InfoExtractor):
'thumbnail': thumbnail,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -47,7 +47,7 @@ class ViddlerIE(InfoExtractor):
r"thumbnail\s*:\s*'([^']*)'",
webpage, u'thumbnail', fatal=False)
info = {
return {
'_type': 'video',
'id': video_id,
'title': title,
@@ -56,9 +56,3 @@ class ViddlerIE(InfoExtractor):
'duration': duration,
'formats': formats,
}
# TODO: Remove when #980 has been merged
info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
info.update(info['formats'][-1])
return info

View File

@@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor):
{
u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
u'file': u'2221348.flv',
u'md5': u'e767b9475de189320f691f49c679c4c7',
u'md5': u'970a94178ca4118c5aa3aaea21211b81',
u'info_dict': {
u"upload_date": u"20130914",
u"uploader_id": u"jojo747400",

View File

@@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader": u"Philipp Hagemeister",
u"uploader_id": u"phihag",
u"upload_date": u"20121002",
u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
}
},
{
@@ -1366,6 +1366,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# description
video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
title="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
class="yt-uix-redirect-link"\s*>
[^<]+
</a>
''', r'\1', video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1765,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = u'YouTube.com searches, newest videos first'

View File

@@ -1,2 +1,2 @@
__version__ = '2013.12.02'
__version__ = '2013.12.04'