release 2013.12.04

[youtube] Resolve URLs in comments
Move common code for extractors based in MTV services to a new base class
2013-12-04 14:19:07 +01:00 · 2013-12-04 14:18:49 +01:00 · 2013-12-03 14:58:24 +01:00 · 2013-12-03 14:31:20 +01:00 · 2013-12-03 14:16:58 +01:00 · 2013-12-03 14:08:16 +01:00
24 changed files with 397 additions and 143 deletions
--- a/README.md
+++ b/README.md
@@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like.
    --list-extractors          List all supported extractors and the URLs they
                               would handle
    --extractor-descriptions   Output descriptions of all supported extractors
-    --proxy URL                Use the specified HTTP/HTTPS proxy
+    --proxy URL                Use the specified HTTP/HTTPS proxy. Pass in an
+                               empty string (--proxy "") for direct connection
    --no-check-certificate     Suppress HTTPS certificate validation.
    --cache-dir DIR            Location in the filesystem where youtube-dl can
                               store downloaded information permanently. By
@@ -55,7 +56,7 @@ which means you can modify it, redistribute it or use it however you like.
    --dateafter DATE           download only videos uploaded after this date
    --no-playlist              download only the currently playing video
    --age-limit YEARS          download only videos suitable for the given age
-    --download-archive FILE    Download only videos not present in the archive
+    --download-archive FILE    Download only videos not listed in the archive
                               file. Record the IDs of all downloaded videos in
                               it.

@@ -183,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like.

 # CONFIGURATION

-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.

 # OUTPUT TEMPLATE

--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -22,7 +22,9 @@ from youtube_dl.extractor import (
    LivestreamIE,
    NHLVideocenterIE,
    BambuserChannelIE,
-    BandcampAlbumIE
+    BandcampAlbumIE,
+    SmotriCommunityIE,
+    SmotriUserIE
 )


@@ -119,6 +121,24 @@ class TestPlaylists(unittest.TestCase):
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], u'Nightmare Night EP')
        self.assertTrue(len(result['entries']) >= 4)
+        
+    def test_smotri_community(self):
+        dl = FakeYDL()
+        ie = SmotriCommunityIE(dl)
+        result = ie.extract('http://smotri.com/community/video/kommuna')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'kommuna')
+        self.assertEqual(result['title'], u'КПРФ')
+        self.assertTrue(len(result['entries']) >= 4)
+        
+    def test_smotri_user(self):
+        dl = FakeYDL()
+        ie = SmotriUserIE(dl)
+        result = ie.extract('http://smotri.com/user/inspector')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'inspector')
+        self.assertEqual(result['title'], u'Inspector')
+        self.assertTrue(len(result['entries']) >= 9)

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -36,6 +36,7 @@ __authors__  = (
    'Marcin Cieślak',
    'Anton Larionov',
    'Takuya Tsuchida',
+    'Sergey M.',
 )

 __license__ = 'Public Domain'
@@ -80,11 +81,11 @@ from .PostProcessor import (


 def parseOpts(overrideArguments=None):
-    def _readOptions(filename_bytes):
+    def _readOptions(filename_bytes, default=[]):
        try:
            optionf = open(filename_bytes)
        except IOError:
-            return [] # silently skip if file is not present
+            return default  # silently skip if file is not present
        try:
            res = []
            for l in optionf:
@@ -190,7 +191,9 @@ def parseOpts(overrideArguments=None):
    general.add_option('--extractor-descriptions',
            action='store_true', dest='list_extractor_descriptions',
            help='Output descriptions of all supported extractors', default=False)
-    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
+    general.add_option(
+        '--proxy', dest='proxy', default=None, metavar='URL',
+        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
    general.add_option(
        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -223,7 +226,7 @@ def parseOpts(overrideArguments=None):
                         default=None, type=int)
    selection.add_option('--download-archive', metavar='FILE',
                         dest='download_archive',
-                         help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.')
+                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')


    authentication.add_option('-u', '--username',
@@ -418,6 +421,8 @@ def parseOpts(overrideArguments=None):
        if opts.verbose:
            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
    else:
+        systemConf = _readOptions('/etc/youtube-dl.conf')
+
        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
        if xdg_config_home:
            userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@@ -427,8 +432,31 @@ def parseOpts(overrideArguments=None):
            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
            if not os.path.isfile(userConfFile):
                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
-        systemConf = _readOptions('/etc/youtube-dl.conf')
-        userConf = _readOptions(userConfFile)
+        userConf = _readOptions(userConfFile, None)
+
+        if userConf is None:
+            appdata_dir = os.environ.get('appdata')
+            if appdata_dir:
+                userConf = _readOptions(
+                    os.path.join(appdata_dir, 'youtube-dl', 'config'),
+                    default=None)
+                if userConf is None:
+                    userConf = _readOptions(
+                        os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
+                        default=None)
+
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+                default=None)
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+                default=None)
+
+        if userConf is None:
+            userConf = []
+
        commandLineConf = sys.argv[1:]
        argv = systemConf + userConf + commandLineConf
        opts, args = parser.parse_args(argv)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -121,6 +121,11 @@ from .rutube import RutubeIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
+from .smotri import (
+    SmotriIE,
+    SmotriCommunityIE,
+    SmotriUserIE,
+)
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .southparkstudios import (
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
                })
            formats = sorted(formats, key=lambda f: (f['height'], f['width']))

-            info = {
+            playlist.append({
                '_type': 'video',
                'id': video_id,
                'title': title,
@@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
                'upload_date': upload_date,
                'uploader_id': uploader_id,
                'user_agent': 'QuickTime compatible (youtube-dl)',
-            }
-            # TODO: Remove when #980 has been merged
-            info['url'] = formats[-1]['url']
-            info['ext'] = formats[-1]['ext']
-
-            playlist.append(info)
+            })

        return {
            '_type': 'playlist',
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
        for f in formats:
            f['ext'] = determine_ext(f['url'])

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'title': title,
@@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
            'description': description,
            'uploader': uploader,
            'upload_date': upload_date,
+            'thumbnail': data.get('misc', {}).get('image'),
        }
-        thumbnail = data.get('misc', {}).get('image')
-        if thumbnail:
-            info['thumbnail'] = thumbnail
-
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -1,7 +1,7 @@
 import re

 from .common import InfoExtractor
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
 from ..utils import (
    compat_str,
    compat_urllib_parse,
@@ -11,7 +11,7 @@ from ..utils import (
 )


-class ComedyCentralIE(MTVIE):
+class ComedyCentralIE(MTVServicesInfoExtractor):
    _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
    _FEED_URL = u'http://comedycentral.com/feeds/mrss/'

@@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE):
            u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
        },
    }
-    # Overwrite MTVIE properties we don't want
-    _TESTS = []
-
-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        return itemdoc.find(search_path).attrib['url']

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                })

            effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
-            info = {
+            results.append({
                'id': shortMediaId,
                'formats': formats,
                'uploader': showId,
@@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor):
                'title': effTitle,
                'thumbnail': None,
                'description': compat_str(officialTitle),
-            }
-
-            # TODO: Remove when #980 has been merged
-            info.update(info['formats'][-1])
-
-            results.append(info)
+            })

        return results
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -364,7 +364,8 @@ class InfoExtractor(object):
        if display_name is None:
            display_name = name
        return self._html_search_regex(
-            r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
+            r'''(?ix)<meta
+                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
            html, display_name, fatal=False)

--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -28,7 +28,8 @@ class DaumIE(InfoExtractor):
        video_id = mobj.group(1)
        canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
        webpage = self._download_webpage(canonical_url, video_id)
-        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
+        full_id = self._search_regex(
+            r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
            webpage, u'full id')
        query = compat_urllib_parse.urlencode({'vid': full_id})
        info = self._download_xml(
@@ -56,7 +57,7 @@ class DaumIE(InfoExtractor):
                'format_id': profile,
            })

-        info = {
+        return {
            'id': video_id,
            'title': info.find('TITLE').text,
            'formats': formats,
@@ -65,6 +66,3 @@ class DaumIE(InfoExtractor):
            'duration': int(info.find('DURATION').text),
            'upload_date': info.find('REGDTTM').text[:8],
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor):
            return (qidx, prefer_http, format['video_bitrate'])
        formats.sort(key=_sortkey)

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'title': video_title,
@@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor):
            'uploader': video_uploader,
            'upload_date': upload_date,
        }
-
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -44,13 +44,10 @@ class FazIE(InfoExtractor):
            })

        descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
-        info = {
+        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'formats': formats,
            'description': descr,
            'thumbnail': config.find('STILL/STILL_BIG').text,
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
                'format_id': q,
            })

-        info = {
+        return {
            'id': data_video['guid'],
            'title': compat_urllib_parse.unquote(data_video['title']),
            'formats': formats,
            'description': get_meta_content('description', webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@@ -1,13 +1,11 @@
 import re

-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor

-class GametrailersIE(MTVIE):
-    """
-    Gametrailers use the same videos system as MTVIE, it just changes the feed
-    url, where the uri is and the method to get the thumbnails.
-    """
+
+class GametrailersIE(MTVServicesInfoExtractor):
    _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
+
    _TEST = {
        u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
        u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
@@ -17,15 +15,9 @@ class GametrailersIE(MTVIE):
            u'description': u'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
        },
    }
-    # Overwrite MTVIE properties we don't want
-    _TESTS = []

    _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'

-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        return itemdoc.find(search_path).attrib['url']
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
            webpage, u'description', flags=re.DOTALL)

-        info = {
+        return {
            'id': video_id,
            'title': clip.find('title').text,
            'formats': formats,
            'description': description,
            'duration': int(clip.find('duration').text),
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -10,35 +10,8 @@ from ..utils import (
 def _media_xml_tag(tag):
    return '{http://search.yahoo.com/mrss/}%s' % tag

-class MTVIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
-
-    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
-
-    _TESTS = [
-        {
-            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
-            u'file': u'853555.mp4',
-            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
-            u'info_dict': {
-                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
-                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
-            },
-        },
-        {
-            u'add_ie': ['Vevo'],
-            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
-            u'file': u'USCJY1331283.mp4',
-            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
-            u'info_dict': {
-                u'title': u'Everything Has Changed',
-                u'upload_date': u'20130606',
-                u'uploader': u'Taylor Swift',
-            },
-            u'skip': u'VEVO is only available in some countries',
-        },
-    ]

+class MTVServicesInfoExtractor(InfoExtractor):
    @staticmethod
    def _id_from_uri(uri):
        return uri.split(':')[-1]
@@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
        return base + m.group('finalid')

    def _get_thumbnail_url(self, uri, itemdoc):
-        return 'http://mtv.mtvnimages.com/uri/' + uri
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        thumb_node = itemdoc.find(search_path)
+        if thumb_node is None:
+            return None
+        else:
+            return thumb_node.attrib['url']

    def _extract_video_formats(self, metadataXml):
        if '/error_country_block.swf' in metadataXml:
@@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
        else:
            description = None

-        info = {
+        return {
            'title': itemdoc.find('title').text,
            'formats': self._extract_video_formats(mediagen_page),
            'id': video_id,
@@ -101,11 +79,6 @@ class MTVIE(InfoExtractor):
            'description': description,
        }

-        # TODO: Remove when #980 has been merged
-        info.update(info['formats'][-1])
-
-        return info
-
    def _get_videos_info(self, uri):
        video_id = self._id_from_uri(uri)
        data = compat_urllib_parse.urlencode({'uri': uri})
@@ -113,6 +86,39 @@ class MTVIE(InfoExtractor):
                                         u'Downloading info')
        return [self._get_video_info(item) for item in idoc.findall('.//item')]

+
+class MTVIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
+
+    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
+
+    _TESTS = [
+        {
+            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+            u'file': u'853555.mp4',
+            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
+            u'info_dict': {
+                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
+                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+            },
+        },
+        {
+            u'add_ie': ['Vevo'],
+            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
+            u'file': u'USCJY1331283.mp4',
+            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
+            u'info_dict': {
+                u'title': u'Everything Has Changed',
+                u'upload_date': u'20130606',
+                u'uploader': u'Taylor Swift',
+            },
+            u'skip': u'VEVO is only available in some countries',
+        },
+    ]
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        return 'http://mtv.mtvnimages.com/uri/' + uri
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -56,7 +56,7 @@ class NaverIE(InfoExtractor):
                'height': int(format_el.find('height').text),
            })

-        info = {
+        return {
            'id': video_id,
            'title': info.find('Subject').text,
            'formats': formats,
@@ -65,6 +65,3 @@ class NaverIE(InfoExtractor):
            'upload_date': info.find('WriteDate').text.replace('.', ''),
            'view_count': int(info.find('PlayCount').text),
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
            r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')

        video_title = self._html_search_regex(
-            r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
+            r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
            webpage, u'title')

        # No self-labeling, but they describe themselves as
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -0,0 +1,252 @@
+# encoding: utf-8
+
+import re
+import json
+import hashlib
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    ExtractorError
+)
+
+
+class SmotriIE(InfoExtractor):
+    IE_DESC = u'Smotri.com'
+    IE_NAME = u'smotri'
+    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
+
+    _TESTS = [
+        # real video id 2610366
+        {
+            u'url': u'http://smotri.com/video/view/?id=v261036632ab',
+            u'file': u'v261036632ab.mp4',
+            u'md5': u'2a7b08249e6f5636557579c368040eb9',
+            u'info_dict': {
+                u'title': u'катастрофа с камер видеонаблюдения',
+                u'uploader': u'rbc2008',
+                u'uploader_id': u'rbc08',
+                u'upload_date': u'20131118',
+                u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
+                u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
+            },
+        },
+        # real video id 57591
+        {
+            u'url': u'http://smotri.com/video/view/?id=v57591cb20',
+            u'file': u'v57591cb20.flv',
+            u'md5': u'830266dfc21f077eac5afd1883091bcd',
+            u'info_dict': {
+                u'title': u'test',
+                u'uploader': u'Support Photofile@photofile',
+                u'uploader_id': u'support-photofile',
+                u'upload_date': u'20070704',
+                u'description': u'test, видео test',
+                u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
+            },
+        },
+        # video-password
+        {
+            u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
+            u'file': u'v1390466a13c.mp4',
+            u'md5': u'f6331cef33cad65a0815ee482a54440b',
+            u'info_dict': {
+                u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+                u'uploader': u'timoxa40',
+                u'uploader_id': u'timoxa40',
+                u'upload_date': u'20100404',
+                u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
+                u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+            },
+            u'params': {
+                u'videopassword': u'qwerty',
+            },
+        },
+        # age limit + video-password
+        {
+            u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
+            u'file': u'v15408898bcf.flv',
+            u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
+            u'info_dict': {
+                u'title': u'этот ролик не покажут по ТВ',
+                u'uploader': u'zzxxx',
+                u'uploader_id': u'ueggb',
+                u'upload_date': u'20101001',
+                u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
+                u'age_limit': 18,
+                u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
+            },
+            u'params': {
+                u'videopassword': u'333'
+            }
+        }
+    ]
+    
+    _SUCCESS = 0
+    _PASSWORD_NOT_VERIFIED = 1
+    _PASSWORD_DETECTED = 2
+    _VIDEO_NOT_FOUND = 3
+
+    def _search_meta(self, name, html, display_name=None):
+        if display_name is None:
+            display_name = name
+        return self._html_search_regex(
+            r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
+            html, display_name, fatal=False)
+        return self._html_search_meta(name, html, display_name)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        real_video_id = mobj.group('realvideoid')
+
+        # Download video JSON data
+        video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
+        video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
+        video_json = json.loads(video_json_page)
+        
+        status = video_json['status']
+        if status == self._VIDEO_NOT_FOUND:
+            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+        elif status == self._PASSWORD_DETECTED:  # The video is protected by a password, retry with
+                                                # video-password set
+            video_password = self._downloader.params.get('videopassword', None)
+            if not video_password:
+                raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
+            video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
+            video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
+            video_json = json.loads(video_json_page)
+            status = video_json['status']
+            if status == self._PASSWORD_NOT_VERIFIED:
+                raise ExtractorError(u'Video password is invalid', expected=True)
+        
+        if status != self._SUCCESS:
+            raise ExtractorError(u'Unexpected status value %s' % status)
+        
+        # Extract the URL of the video
+        video_url = video_json['file_data']
+        
+        # Video JSON does not provide enough meta data
+        # We will extract some from the video web page instead
+        video_page_url = 'http://' + mobj.group('url')
+        video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
+        
+        # Adult content
+        if re.search(u'EroConfirmText">', video_page) is not None:
+            self.report_age_confirmation()
+            confirm_string = self._html_search_regex(
+                r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
+                video_page, u'confirm string')
+            confirm_url = video_page_url + '&confirm=%s' % confirm_string
+            video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
+            adult_content = True
+        else:
+            adult_content = False
+        
+        # Extract the rest of meta data
+        video_title = self._search_meta(u'name', video_page, u'title')
+        if not video_title:
+            video_title = video_url.rsplit('/', 1)[-1]
+
+        video_description = self._search_meta(u'description', video_page)
+        END_TEXT = u' на сайте Smotri.com'
+        if video_description.endswith(END_TEXT):
+            video_description = video_description[:-len(END_TEXT)]
+        START_TEXT = u'Смотреть онлайн ролик '
+        if video_description.startswith(START_TEXT):
+            video_description = video_description[len(START_TEXT):]
+        video_thumbnail = self._search_meta(u'thumbnail', video_page)
+
+        upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
+        upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
+        video_upload_date = (
+            (
+                upload_date_m.group('year') +
+                upload_date_m.group('month') +
+                upload_date_m.group('day')
+            )
+            if upload_date_m else None
+        )
+        
+        duration_str = self._search_meta(u'duration', video_page)
+        duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
+        video_duration = (
+            (
+                (int(duration_m.group('hours')) * 60 * 60) +
+                (int(duration_m.group('minutes')) * 60) +
+                int(duration_m.group('seconds'))
+            )
+            if duration_m else None
+        )
+        
+        video_uploader = self._html_search_regex(
+            u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
+            video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
+        
+        video_uploader_id = self._html_search_regex(
+            u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
+            video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
+        
+        video_view_count = self._html_search_regex(
+            u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
+            video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
+                
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
+            'description': video_description,
+            'uploader': video_uploader,
+            'upload_date': video_upload_date,
+            'uploader_id': video_uploader_id,
+            'video_duration': video_duration,
+            'view_count': video_view_count,
+            'age_limit': 18 if adult_content else 0,
+            'video_page_url': video_page_url
+        }
+
+
+class SmotriCommunityIE(InfoExtractor):
+    IE_DESC = u'Smotri.com community videos'
+    IE_NAME = u'smotri:community'
+    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+    
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        community_id = mobj.group('communityid')
+
+        url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
+        rss = self._download_xml(url, community_id, u'Downloading community RSS')
+
+        entries = [self.url_result(video_url.text, 'Smotri')
+                   for video_url in rss.findall('./channel/item/link')]
+
+        description_text = rss.find('./channel/description').text
+        community_title = self._html_search_regex(
+            u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
+
+        return self.playlist_result(entries, community_id, community_title)
+
+
+class SmotriUserIE(InfoExtractor):
+    IE_DESC = u'Smotri.com user videos'
+    IE_NAME = u'smotri:user'
+    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('userid')
+
+        url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
+        rss = self._download_xml(url, user_id, u'Downloading user RSS')
+
+        entries = [self.url_result(video_url.text, 'Smotri')
+                   for video_url in rss.findall('./channel/item/link')]
+
+        description_text = rss.find('./channel/description').text
+        user_nickname = self._html_search_regex(
+            u'^Видео режиссера (.*)$', description_text,
+            u'user nickname')
+
+        return self.playlist_result(entries, user_id, user_nickname)
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -1,15 +1,14 @@
 import re

-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor


-class SouthParkStudiosIE(MTVIE):
+class SouthParkStudiosIE(MTVServicesInfoExtractor):
    IE_NAME = u'southparkstudios.com'
    _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'

    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'

-    # Overwrite MTVIE properties we don't want
    _TESTS = [{
        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
        },
    }]

-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        thumb_node = itemdoc.find(search_path)
-        if thumb_node is None:
-            return None
-        else:
-            return thumb_node.attrib['url']
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        url = u'http://www.' + mobj.group(u'url')
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -55,7 +55,7 @@ class TriluliluIE(InfoExtractor):
            for fnode in format_doc.findall('./formats/format')
        ]

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'formats': formats,
@@ -64,7 +64,3 @@ class TriluliluIE(InfoExtractor):
            'thumbnail': thumbnail,
        }

-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -47,7 +47,7 @@ class ViddlerIE(InfoExtractor):
            r"thumbnail\s*:\s*'([^']*)'",
            webpage, u'thumbnail', fatal=False)

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'title': title,
@@ -56,9 +56,3 @@ class ViddlerIE(InfoExtractor):
            'duration': duration,
            'formats': formats,
        }
-
-        # TODO: Remove when #980 has been merged
-        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
-        info.update(info['formats'][-1])
-
-        return info
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor):
    {
        u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
        u'file': u'2221348.flv',
-        u'md5': u'e767b9475de189320f691f49c679c4c7',
+        u'md5': u'970a94178ca4118c5aa3aaea21211b81',
        u'info_dict': {
            u"upload_date": u"20130914",
            u"uploader_id": u"jojo747400",
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                u"uploader": u"Philipp Hagemeister",
                u"uploader_id": u"phihag",
                u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
            }
        },
        {
@@ -1366,6 +1366,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        # description
        video_description = get_element_by_id("eow-description", video_webpage)
        if video_description:
+            video_description = re.sub(r'''(?x)
+                <a\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    title="([^"]+)"\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    class="yt-uix-redirect-link"\s*>
+                [^<]+
+                </a>
+            ''', r'\1', video_description)
            video_description = clean_html(video_description)
        else:
            fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1765,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
        return self.playlist_result(videos, query)

 class YoutubeSearchDateIE(YoutubeSearchIE):
+    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
    _SEARCH_KEY = 'ytsearchdate'
    IE_DESC = u'YouTube.com searches, newest videos first'
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.12.02'
+__version__ = '2013.12.04'
Author	SHA1	Message	Date
Philipp Hagemeister	671c0f151d	release 2013.12.04	2013-12-04 14:19:07 +01:00
Philipp Hagemeister	27dcce1904	[youtube] Resolve URLs in comments	2013-12-04 14:18:49 +01:00
Jaime Marquínez Ferrándiz	84db81815a	Move common code for extractors based in MTV services to a new base class Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)	2013-12-03 14:58:24 +01:00
Jaime Marquínez Ferrándiz	fb7abb31af	Remove the compatibility code used before the new format system was implemented	2013-12-03 14:31:20 +01:00
Philipp Hagemeister	ce93879a9b	[daum] Fix real video ID extraction	2013-12-03 14:16:58 +01:00
Philipp Hagemeister	938384c587	[redtube] Fix search for title	2013-12-03 14:08:16 +01:00
Philipp Hagemeister	e9d8e302aa	[xhamster] Change test checksum	2013-12-03 14:06:16 +01:00
Jaime Marquínez Ferrándiz	cb7fb54600	Change the ie_name of YoutubeSearchDateIE It produced a duplicate entry when listing the extractors with '--list-extractors' and generates noise in the commit log when generating the supported sites webpage (like in `09f355f73b`)	2013-12-03 13:55:25 +01:00
Philipp Hagemeister	cf6758d204	Document disabling proxy (#1882 )	2013-12-03 13:33:07 +01:00
Philipp Hagemeister	731e3dde29	release 2013.12.03	2013-12-03 13:13:09 +01:00
Philipp Hagemeister	a0eaa341e1	[configuration] Undo code breakage	2013-12-03 13:11:20 +01:00
Philipp Hagemeister	fb27c2295e	Correct configuration file locations	2013-12-03 13:09:48 +01:00
Philipp Hagemeister	1b753cb334	Add Windows configuration file locations (#1881 )	2013-12-03 13:04:02 +01:00
Philipp Hagemeister	36a826a50d	Clarify --download-archive help (#1757 )	2013-12-03 11:54:52 +01:00
Philipp Hagemeister	8796857429	Credit @dstftw for smotri IE	2013-12-02 17:43:22 +01:00
Philipp Hagemeister	aaebed13a8	[smotri] Simplify	2013-12-02 17:08:17 +01:00
Philipp Hagemeister	25939ffe56	Merge branch 'smotri.com' of https://github.com/dstftw/youtube-dl	2013-12-02 15:56:35 +01:00
dst	5270d8cb13	Added extractors for smotri.com	2013-12-02 20:10:19 +07:00