[shared] Add extractor (Closes #3312)

2014-07-24 21:12:45 +07:00
parent 4192b51c7c
commit 916c145217
2 changed files with 58 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -259,6 +259,7 @@ from .savefrom import SaveFromIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
 from .shared import SharedIE
 from .sina import SinaIE
 from .slideshare import SlideshareIE
 from .slutload import SlutloadIE
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -0,0 +1,57 @@
 from __future__ import unicode_literals
 import re
 import base64
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    compat_urllib_request,
    compat_urllib_parse,
    int_or_none,
 )
 class SharedIE(InfoExtractor):
    _VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
    _TEST = {
        'url': 'http://shared.sx/0060718775',
        'md5': '53e1c58fc3e777ae1dfe9e57ba2f9c72',
        'info_dict': {
            'id': '0060718775',
            'ext': 'mp4',
            'title': 'Big Buck Bunny Trailer',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id)
        if re.search(r'>File does not exist<', page) is not None:
            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
        download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        video_page = self._download_webpage(request, video_id, 'Downloading video page')
        video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
        title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
        filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
        thumbnail = self._html_search_regex(
            r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'filesize': filesize,
            'title': title,
            'thumbnail': thumbnail,
        }