Merge remote-tracking branch 'upstream/master'
This commit is contained in:
		@@ -14,9 +14,9 @@ tests = [
 | 
			
		||||
    # 89 
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
 | 
			
		||||
     "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
 | 
			
		||||
    # 88
 | 
			
		||||
    # 88 - vflapUV9V 2013/08/28
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
 | 
			
		||||
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
 | 
			
		||||
     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
 | 
			
		||||
    # 87
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
 | 
			
		||||
     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
 | 
			
		||||
 
 | 
			
		||||
@@ -63,6 +63,17 @@ class FileDownloader(object):
 | 
			
		||||
        converted = float(bytes) / float(1024 ** exponent)
 | 
			
		||||
        return '%.2f%s' % (converted, suffix)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def format_seconds(seconds):
 | 
			
		||||
        (mins, secs) = divmod(seconds, 60)
 | 
			
		||||
        (hours, eta_mins) = divmod(mins, 60)
 | 
			
		||||
        if hours > 99:
 | 
			
		||||
            return '--:--:--'
 | 
			
		||||
        if hours == 0:
 | 
			
		||||
            return '%02d:%02d' % (mins, secs)
 | 
			
		||||
        else:
 | 
			
		||||
            return '%02d:%02d:%02d' % (hours, mins, secs)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def calc_percent(byte_counter, data_len):
 | 
			
		||||
        if data_len is None:
 | 
			
		||||
@@ -78,14 +89,7 @@ class FileDownloader(object):
 | 
			
		||||
            return '--:--'
 | 
			
		||||
        rate = float(current) / dif
 | 
			
		||||
        eta = int((float(total) - float(current)) / rate)
 | 
			
		||||
        (eta_mins, eta_secs) = divmod(eta, 60)
 | 
			
		||||
        (eta_hours, eta_mins) = divmod(eta_mins, 60)
 | 
			
		||||
        if eta_hours > 99:
 | 
			
		||||
            return '--:--:--'
 | 
			
		||||
        if eta_hours == 0:
 | 
			
		||||
            return '%02d:%02d' % (eta_mins, eta_secs)
 | 
			
		||||
        else:
 | 
			
		||||
            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
 | 
			
		||||
        return FileDownloader.format_seconds(eta)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def calc_speed(start, now, bytes):
 | 
			
		||||
@@ -234,12 +238,14 @@ class FileDownloader(object):
 | 
			
		||||
        """Report it was impossible to resume download."""
 | 
			
		||||
        self.to_screen(u'[download] Unable to resume')
 | 
			
		||||
 | 
			
		||||
    def report_finish(self):
 | 
			
		||||
    def report_finish(self, data_len_str, tot_time):
 | 
			
		||||
        """Report download finished."""
 | 
			
		||||
        if self.params.get('noprogress', False):
 | 
			
		||||
            self.to_screen(u'[download] Download completed')
 | 
			
		||||
        else:
 | 
			
		||||
            self.to_screen(u'')
 | 
			
		||||
            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
 | 
			
		||||
            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
 | 
			
		||||
                (clear_line, data_len_str, self.format_seconds(tot_time)))
 | 
			
		||||
 | 
			
		||||
    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
 | 
			
		||||
        self.report_destination(filename)
 | 
			
		||||
@@ -542,7 +548,7 @@ class FileDownloader(object):
 | 
			
		||||
            self.report_error(u'Did not get any data blocks')
 | 
			
		||||
            return False
 | 
			
		||||
        stream.close()
 | 
			
		||||
        self.report_finish()
 | 
			
		||||
        self.report_finish(data_len_str, (time.time() - start))
 | 
			
		||||
        if data_len is not None and byte_counter != data_len:
 | 
			
		||||
            raise ContentTooShortError(byte_counter, int(data_len))
 | 
			
		||||
        self.try_rename(tmpfilename, filename)
 | 
			
		||||
 
 | 
			
		||||
@@ -430,6 +430,10 @@ def _real_main(argv=None):
 | 
			
		||||
    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 | 
			
		||||
    https_handler = make_HTTPS_handler(opts)
 | 
			
		||||
    opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
 | 
			
		||||
    # Delete the default user-agent header, which would otherwise apply in
 | 
			
		||||
    # cases where our custom HTTP handler doesn't come into play
 | 
			
		||||
    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 | 
			
		||||
    opener.addheaders =[]
 | 
			
		||||
    compat_urllib_request.install_opener(opener)
 | 
			
		||||
    socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from .appletrailers import AppleTrailersIE
 | 
			
		||||
from .addanime import AddAnimeIE
 | 
			
		||||
from .archiveorg import ArchiveOrgIE
 | 
			
		||||
from .ard import ARDIE
 | 
			
		||||
from .arte import ArteTvIE
 | 
			
		||||
@@ -9,6 +11,7 @@ from .brightcove import BrightcoveIE
 | 
			
		||||
from .c56 import C56IE
 | 
			
		||||
from .canalplus import CanalplusIE
 | 
			
		||||
from .canalc2 import Canalc2IE
 | 
			
		||||
from .cnn import CNNIE
 | 
			
		||||
from .collegehumor import CollegeHumorIE
 | 
			
		||||
from .comedycentral import ComedyCentralIE
 | 
			
		||||
from .condenast import CondeNastIE
 | 
			
		||||
@@ -53,6 +56,7 @@ from .muzu import MuzuTVIE
 | 
			
		||||
from .myspass import MySpassIE
 | 
			
		||||
from .myvideo import MyVideoIE
 | 
			
		||||
from .nba import NBAIE
 | 
			
		||||
from .nbc import NBCNewsIE
 | 
			
		||||
from .ooyala import OoyalaIE
 | 
			
		||||
from .pbs import PBSIE
 | 
			
		||||
from .photobucket import PhotobucketIE
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										76
									
								
								youtube_dl/extractor/addanime.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								youtube_dl/extractor/addanime.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,76 @@
 | 
			
		||||
import ast
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_HTTPError,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AddAnimeIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
 | 
			
		||||
    IE_NAME = u'AddAnime'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
 | 
			
		||||
        u'file': u'24MR3YO5SAS9.flv',
 | 
			
		||||
        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"description": u"One Piece 606",
 | 
			
		||||
            u"title": u"One Piece 606"
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        try:
 | 
			
		||||
            mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
            video_id = mobj.group('video_id')
 | 
			
		||||
            webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        except ExtractorError as ee:
 | 
			
		||||
            if not isinstance(ee.cause, compat_HTTPError):
 | 
			
		||||
                raise
 | 
			
		||||
 | 
			
		||||
            redir_webpage = ee.cause.read().decode('utf-8')
 | 
			
		||||
            action = self._search_regex(
 | 
			
		||||
                r'<form id="challenge-form" action="([^"]+)"',
 | 
			
		||||
                redir_webpage, u'Redirect form')
 | 
			
		||||
            vc = self._search_regex(
 | 
			
		||||
                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
 | 
			
		||||
                redir_webpage, u'redirect vc value')
 | 
			
		||||
            av = re.search(
 | 
			
		||||
                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
 | 
			
		||||
                redir_webpage)
 | 
			
		||||
            if av is None:
 | 
			
		||||
                raise ExtractorError(u'Cannot find redirect math task')
 | 
			
		||||
            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
 | 
			
		||||
 | 
			
		||||
            parsed_url = compat_urllib_parse_urlparse(url)
 | 
			
		||||
            av_val = av_res + len(parsed_url.netloc)
 | 
			
		||||
            confirm_url = (
 | 
			
		||||
                parsed_url.scheme + u'://' + parsed_url.netloc +
 | 
			
		||||
                action + '?' +
 | 
			
		||||
                compat_urllib_parse.urlencode({
 | 
			
		||||
                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
 | 
			
		||||
            self._download_webpage(
 | 
			
		||||
                confirm_url, video_id,
 | 
			
		||||
                note=u'Confirming after redirect')
 | 
			
		||||
            webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
 | 
			
		||||
                                       webpage, u'video file URL')
 | 
			
		||||
        video_title = self._og_search_title(webpage)
 | 
			
		||||
        video_description = self._og_search_description(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id':  video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'description': video_description
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										167
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										167
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,167 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AppleTrailersIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
 | 
			
		||||
        u"playlist": [
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-trailer4.mov",
 | 
			
		||||
                u"md5": u"11874af099d480cc09e103b189805d5f",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 111,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
 | 
			
		||||
                    u"title": u"Trailer 4",
 | 
			
		||||
                    u"upload_date": u"20130523",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-trailer3.mov",
 | 
			
		||||
                u"md5": u"07a0a262aae5afe68120eed61137ab34",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 182,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
 | 
			
		||||
                    u"title": u"Trailer 3",
 | 
			
		||||
                    u"upload_date": u"20130417",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-trailer.mov",
 | 
			
		||||
                u"md5": u"e401fde0813008e3307e54b6f384cff1",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 148,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
 | 
			
		||||
                    u"title": u"Trailer",
 | 
			
		||||
                    u"upload_date": u"20121212",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-teaser.mov",
 | 
			
		||||
                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 93,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
 | 
			
		||||
                    u"title": u"Teaser",
 | 
			
		||||
                    u"upload_date": u"20120721",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            }
 | 
			
		||||
        ]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        movie = mobj.group('movie')
 | 
			
		||||
        uploader_id = mobj.group('company')
 | 
			
		||||
 | 
			
		||||
        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
 | 
			
		||||
        playlist_snippet = self._download_webpage(playlist_url, movie)
 | 
			
		||||
        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
 | 
			
		||||
        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
 | 
			
		||||
 | 
			
		||||
        size_cache = {}
 | 
			
		||||
 | 
			
		||||
        doc = xml.etree.ElementTree.fromstring(playlist_html)
 | 
			
		||||
        playlist = []
 | 
			
		||||
        for li in doc.findall('./div/ul/li'):
 | 
			
		||||
            title = li.find('.//h3').text
 | 
			
		||||
            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
 | 
			
		||||
            thumbnail = li.find('.//img').attrib['src']
 | 
			
		||||
 | 
			
		||||
            date_el = li.find('.//p')
 | 
			
		||||
            upload_date = None
 | 
			
		||||
            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
 | 
			
		||||
            if m:
 | 
			
		||||
                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
 | 
			
		||||
            runtime_el = date_el.find('./br')
 | 
			
		||||
            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
 | 
			
		||||
            duration = None
 | 
			
		||||
            if m:
 | 
			
		||||
                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
 | 
			
		||||
 | 
			
		||||
            formats = []
 | 
			
		||||
            for formats_el in li.findall('.//a'):
 | 
			
		||||
                if formats_el.attrib['class'] != 'OverlayPanel':
 | 
			
		||||
                    continue
 | 
			
		||||
                target = formats_el.attrib['target']
 | 
			
		||||
 | 
			
		||||
                format_code = formats_el.text
 | 
			
		||||
                if 'Automatic' in format_code:
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                size_q = formats_el.attrib['href']
 | 
			
		||||
                size_id = size_q.rpartition('#videos-')[2]
 | 
			
		||||
                if size_id not in size_cache:
 | 
			
		||||
                    size_url = url + size_q
 | 
			
		||||
                    sizepage_html = self._download_webpage(
 | 
			
		||||
                        size_url, movie,
 | 
			
		||||
                        note=u'Downloading size info %s' % size_id,
 | 
			
		||||
                        errnote=u'Error while downloading size info %s' % size_id,
 | 
			
		||||
                    )
 | 
			
		||||
                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
 | 
			
		||||
                    size_cache[size_id] = _doc
 | 
			
		||||
 | 
			
		||||
                sizepage_doc = size_cache[size_id]
 | 
			
		||||
                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
 | 
			
		||||
                for vid_a in links:
 | 
			
		||||
                    href = vid_a.get('href')
 | 
			
		||||
                    if not href.endswith(target):
 | 
			
		||||
                        continue
 | 
			
		||||
                    detail_q = href.partition('#')[0]
 | 
			
		||||
                    detail_url = url + '/' + detail_q
 | 
			
		||||
 | 
			
		||||
                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
 | 
			
		||||
                    detail_id = m.group('detail_id')
 | 
			
		||||
 | 
			
		||||
                    detail_html = self._download_webpage(
 | 
			
		||||
                        detail_url, movie,
 | 
			
		||||
                        note=u'Downloading detail %s %s' % (detail_id, size_id),
 | 
			
		||||
                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
 | 
			
		||||
                    )
 | 
			
		||||
                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
 | 
			
		||||
                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
 | 
			
		||||
                    assert movie_link_el.get('class') == 'movieLink'
 | 
			
		||||
                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
 | 
			
		||||
                    ext = determine_ext(movie_link)
 | 
			
		||||
                    assert ext == 'mov'
 | 
			
		||||
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'format': format_code,
 | 
			
		||||
                        'ext': ext,
 | 
			
		||||
                        'url': movie_link,
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
            info = {
 | 
			
		||||
                '_type': 'video',
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'duration': duration,
 | 
			
		||||
                'thumbnail': thumbnail,
 | 
			
		||||
                'upload_date': upload_date,
 | 
			
		||||
                'uploader_id': uploader_id,
 | 
			
		||||
                'user_agent': 'QuickTime compatible (youtube-dl)',
 | 
			
		||||
            }
 | 
			
		||||
            # TODO: Remove when #980 has been merged
 | 
			
		||||
            info['url'] = formats[-1]['url']
 | 
			
		||||
            info['ext'] = formats[-1]['ext']
 | 
			
		||||
 | 
			
		||||
            playlist.append(info)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': movie,
 | 
			
		||||
            'entries': playlist,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/cnn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/cnn.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CNNIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
 | 
			
		||||
        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
 | 
			
		||||
        u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
 | 
			
		||||
        u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Nadal wins 8th French Open title',
 | 
			
		||||
            u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
 | 
			
		||||
        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
 | 
			
		||||
        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
 | 
			
		||||
        u"info_dict": {
 | 
			
		||||
            u"title": "Student's epic speech stuns new freshmen",
 | 
			
		||||
            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        path = mobj.group('path')
 | 
			
		||||
        page_title = mobj.group('title')
 | 
			
		||||
        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
 | 
			
		||||
        info_xml = self._download_webpage(info_url, page_title)
 | 
			
		||||
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for f in info.findall('files/file'):
 | 
			
		||||
            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
 | 
			
		||||
            if mf is not None:
 | 
			
		||||
                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
 | 
			
		||||
        formats = sorted(formats)
 | 
			
		||||
        (_,_,_, video_path) = formats[-1]
 | 
			
		||||
        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
 | 
			
		||||
 | 
			
		||||
        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
 | 
			
		||||
        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
 | 
			
		||||
 | 
			
		||||
        return {'id': info.attrib['id'],
 | 
			
		||||
                'title': info.find('headline').text,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': determine_ext(video_url),
 | 
			
		||||
                'thumbnail': thumbnails[-1][1],
 | 
			
		||||
                'thumbnails': thumbs_dict,
 | 
			
		||||
                'description': info.find('description').text,
 | 
			
		||||
                }
 | 
			
		||||
@@ -129,7 +129,7 @@ class InfoExtractor(object):
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            if errnote is None:
 | 
			
		||||
                errnote = u'Unable to download webpage'
 | 
			
		||||
            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
 | 
			
		||||
            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
 | 
			
		||||
 | 
			
		||||
    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
 | 
			
		||||
        """ Returns a tuple (page content as string, URL handle) """
 | 
			
		||||
 
 | 
			
		||||
@@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
 | 
			
		||||
            webpage, 'title', default=u'NA')
 | 
			
		||||
 | 
			
		||||
        # Step 2, Simulate clicking the image box to launch video
 | 
			
		||||
        DOMAIN = 'https://plus.google.com'
 | 
			
		||||
        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
 | 
			
		||||
        DOMAIN = 'https://plus.google.com/'
 | 
			
		||||
        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
 | 
			
		||||
            webpage, u'video page URL')
 | 
			
		||||
        if not video_page.startswith(DOMAIN):
 | 
			
		||||
            video_page = DOMAIN + video_page
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/nbc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/nbc.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,33 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import find_xpath_attr, compat_str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NBCNewsIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
 | 
			
		||||
        u'file': u'52753292.flv',
 | 
			
		||||
        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Crew emerges after four-month Mars food study',
 | 
			
		||||
            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
 | 
			
		||||
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': info.find('headline').text,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
 | 
			
		||||
                'description': compat_str(info.find('caption').text),
 | 
			
		||||
                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
 | 
			
		||||
                }
 | 
			
		||||
@@ -419,7 +419,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        elif len(s) == 89:
 | 
			
		||||
            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
 | 
			
		||||
        elif len(s) == 88:
 | 
			
		||||
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
 | 
			
		||||
            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
 | 
			
		||||
        elif len(s) == 87:
 | 
			
		||||
            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
 | 
			
		||||
        elif len(s) == 86:
 | 
			
		||||
 
 | 
			
		||||
@@ -60,6 +60,11 @@ try:
 | 
			
		||||
except ImportError: # Python 2
 | 
			
		||||
    import httplib as compat_http_client
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from urllib.error import HTTPError as compat_HTTPError
 | 
			
		||||
except ImportError:  # Python 2
 | 
			
		||||
    from urllib2 import HTTPError as compat_HTTPError
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from subprocess import DEVNULL
 | 
			
		||||
    compat_subprocess_get_DEVNULL = lambda: DEVNULL
 | 
			
		||||
@@ -476,7 +481,7 @@ def formatSeconds(secs):
 | 
			
		||||
def make_HTTPS_handler(opts):
 | 
			
		||||
    if sys.version_info < (3,2):
 | 
			
		||||
        # Python's 2.x handler is very simplistic
 | 
			
		||||
        return YoutubeDLHandlerHTTPS()
 | 
			
		||||
        return compat_urllib_request.HTTPSHandler()
 | 
			
		||||
    else:
 | 
			
		||||
        import ssl
 | 
			
		||||
        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 | 
			
		||||
@@ -485,11 +490,11 @@ def make_HTTPS_handler(opts):
 | 
			
		||||
        context.verify_mode = (ssl.CERT_NONE
 | 
			
		||||
                               if opts.no_check_certificate
 | 
			
		||||
                               else ssl.CERT_REQUIRED)
 | 
			
		||||
        return YoutubeDLHandlerHTTPS(context=context)
 | 
			
		||||
        return compat_urllib_request.HTTPSHandler(context=context)
 | 
			
		||||
 | 
			
		||||
class ExtractorError(Exception):
 | 
			
		||||
    """Error during info extraction."""
 | 
			
		||||
    def __init__(self, msg, tb=None, expected=False):
 | 
			
		||||
    def __init__(self, msg, tb=None, expected=False, cause=None):
 | 
			
		||||
        """ tb, if given, is the original traceback (so that it can be printed out).
 | 
			
		||||
        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 | 
			
		||||
        """
 | 
			
		||||
@@ -502,6 +507,7 @@ class ExtractorError(Exception):
 | 
			
		||||
 | 
			
		||||
        self.traceback = tb
 | 
			
		||||
        self.exc_info = sys.exc_info()  # preserve original exception
 | 
			
		||||
        self.cause = cause
 | 
			
		||||
 | 
			
		||||
    def format_traceback(self):
 | 
			
		||||
        if self.traceback is None:
 | 
			
		||||
@@ -569,8 +575,7 @@ class ContentTooShortError(Exception):
 | 
			
		||||
        self.downloaded = downloaded
 | 
			
		||||
        self.expected = expected
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
 | 
			
		||||
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 | 
			
		||||
    """Handler for HTTP requests and responses.
 | 
			
		||||
 | 
			
		||||
    This class, when installed with an OpenerDirector, automatically adds
 | 
			
		||||
@@ -603,8 +608,8 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
 | 
			
		||||
        ret.code = code
 | 
			
		||||
        return ret
 | 
			
		||||
 | 
			
		||||
    def _http_request(self, req):
 | 
			
		||||
        for h, v in std_headers.items():
 | 
			
		||||
    def http_request(self, req):
 | 
			
		||||
        for h,v in std_headers.items():
 | 
			
		||||
            if h in req.headers:
 | 
			
		||||
                del req.headers[h]
 | 
			
		||||
            req.add_header(h, v)
 | 
			
		||||
@@ -619,7 +624,7 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
 | 
			
		||||
            del req.headers['Youtubedl-user-agent']
 | 
			
		||||
        return req
 | 
			
		||||
 | 
			
		||||
    def _http_response(self, req, resp):
 | 
			
		||||
    def http_response(self, req, resp):
 | 
			
		||||
        old_resp = resp
 | 
			
		||||
        # gzip
 | 
			
		||||
        if resp.headers.get('Content-encoding', '') == 'gzip':
 | 
			
		||||
@@ -633,16 +638,8 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
 | 
			
		||||
            resp.msg = old_resp.msg
 | 
			
		||||
        return resp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
 | 
			
		||||
    http_request = YoutubeDLHandler_Template._http_request
 | 
			
		||||
    http_response = YoutubeDLHandler_Template._http_response
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
 | 
			
		||||
    https_request = YoutubeDLHandler_Template._http_request
 | 
			
		||||
    https_response = YoutubeDLHandler_Template._http_response
 | 
			
		||||
 | 
			
		||||
    https_request = http_request
 | 
			
		||||
    https_response = http_response
 | 
			
		||||
 | 
			
		||||
def unified_strdate(date_str):
 | 
			
		||||
    """Return a string with the date in the format YYYYMMDD"""
 | 
			
		||||
 
 | 
			
		||||
@@ -1,2 +1,2 @@
 | 
			
		||||
 | 
			
		||||
__version__ = '2013.08.27'
 | 
			
		||||
__version__ = '2013.08.28'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user