release 2013.09.12

Check for both automatic captions and subtitles with options --write-sub and --write-auto-sub (fixes #1224 )
[youtube] Fix detection of videos with automatic captions
2013-09-12 11:26:44 +02:00 · 2013-09-12 11:15:25 +02:00 · 2013-09-11 19:24:56 +02:00 · 2013-09-11 19:17:30 +02:00 · 2013-09-11 19:08:43 +02:00 · 2013-09-11 17:58:51 +02:00
12 changed files with 301 additions and 184 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -17,4 +17,10 @@ youtube-dl.tar.gz
 .coverage
 cover/
 updates_key.pem
-*.egg-info
+*.egg-info
+*.srt
+*.sbv
+*.vtt
+*.flv
+*.mp4
+*.part
--- a/README.md
+++ b/README.md
@@ -123,10 +123,8 @@ which means you can modify it, redistribute it or use it however you like.
                               only)

 ## Subtitle Options:
-    --write-sub                write subtitle file (currently youtube only)
-    --write-auto-sub           write automatic subtitle file (currently youtube
-                               only)
-    --only-sub                 [deprecated] alias of --skip-download
+    --write-sub                write subtitle file
+    --write-auto-sub           write automatic subtitle file (youtube only)
    --all-subs                 downloads all the available subtitles of the
                               video
    --list-subs                lists all available subtitles for the video
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -14,7 +14,7 @@ def main():
        template = tmplf.read()

    ie_htmls = []
-    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
+    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
        try:
            ie_html += ': {}'.format(ie.IE_DESC)
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -38,7 +38,6 @@
    "writedescription": false, 
    "writeinfojson": true, 
    "writesubtitles": false,
-    "onlysubtitles": false,
    "allsubtitles": false,
    "listssubtitles": false
 }
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+import json
+import io
+import hashlib
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.extractor import DailymotionIE
+from youtube_dl.utils import *
+from helper import FakeYDL
+
+md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
+class TestDailymotionSubtitles(unittest.TestCase):
+    def setUp(self):
+        self.DL = FakeYDL()
+        self.url = 'http://www.dailymotion.com/video/xczg00'
+    def getInfoDict(self):
+        IE = DailymotionIE(self.DL)
+        info_dict = IE.extract(self.url)
+        return info_dict
+    def getSubtitles(self):
+        info_dict = self.getInfoDict()
+        return info_dict[0]['subtitles']
+    def test_no_writesubtitles(self):
+        subtitles = self.getSubtitles()
+        self.assertEqual(subtitles, None)
+    def test_subtitles(self):
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+    def test_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['fr']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+    def test_allsubtitles(self):
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles.keys()), 5)
+    def test_list_subtitles(self):
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+    def test_automatic_captions(self):
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslang'] = ['en']
+        subtitles = self.getSubtitles()
+        self.assertTrue(len(subtitles.keys()) == 0)
+    def test_nosubtitles(self):
+        self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+    def test_multiple_langs(self):
+        self.DL.params['writesubtitles'] = True
+        langs = ['es', 'fr', 'de']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -18,85 +18,63 @@ md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()

 class TestYoutubeSubtitles(unittest.TestCase):
    def setUp(self):
-        DL = FakeYDL()
-        DL.params['allsubtitles'] = False
-        DL.params['writesubtitles'] = False
-        DL.params['subtitlesformat'] = 'srt'
-        DL.params['listsubtitles'] = False
-    def test_youtube_no_subtitles(self):
-        DL = FakeYDL()
-        DL.params['writesubtitles'] = False
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
-        subtitles = info_dict[0]['subtitles']
+        self.DL = FakeYDL()
+        self.url = 'QRS8MkLhQmM'
+    def getInfoDict(self):
+        IE = YoutubeIE(self.DL)
+        info_dict = IE.extract(self.url)
+        return info_dict
+    def getSubtitles(self):
+        info_dict = self.getInfoDict()
+        return info_dict[0]['subtitles']        
+    def test_youtube_no_writesubtitles(self):
+        self.DL.params['writesubtitles'] = False
+        subtitles = self.getSubtitles()
        self.assertEqual(subtitles, None)
    def test_youtube_subtitles(self):
-        DL = FakeYDL()
-        DL.params['writesubtitles'] = True
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles']['en']
-        self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
-    def test_youtube_subtitles_it(self):
-        DL = FakeYDL()
-        DL.params['writesubtitles'] = True
-        DL.params['subtitleslangs'] = ['it']
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles']['it']
-        self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
-    def test_youtube_onlysubtitles(self):
-        DL = FakeYDL()
-        DL.params['writesubtitles'] = True
-        DL.params['onlysubtitles'] = True
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles']['en']
-        self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
+    def test_youtube_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['it']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
    def test_youtube_allsubtitles(self):
-        DL = FakeYDL()
-        DL.params['allsubtitles'] = True
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
-        subtitles = info_dict[0]['subtitles']
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
        self.assertEqual(len(subtitles.keys()), 13)
    def test_youtube_subtitles_sbv_format(self):
-        DL = FakeYDL()
-        DL.params['writesubtitles'] = True
-        DL.params['subtitlesformat'] = 'sbv'
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles']['en']
-        self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitlesformat'] = 'sbv'
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
    def test_youtube_subtitles_vtt_format(self):
-        DL = FakeYDL()
-        DL.params['writesubtitles'] = True
-        DL.params['subtitlesformat'] = 'vtt'
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles']['en']
-        self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitlesformat'] = 'vtt'
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
    def test_youtube_list_subtitles(self):
-        DL = FakeYDL()
-        DL.params['listsubtitles'] = True
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('QRS8MkLhQmM')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
        self.assertEqual(info_dict, None)
    def test_youtube_automatic_captions(self):
-        DL = FakeYDL()
-        DL.params['writeautomaticsub'] = True
-        DL.params['subtitleslangs'] = ['it']
-        IE = YoutubeIE(DL)
-        info_dict = IE.extract('8YoUxe5ncPo')
-        sub = info_dict[0]['subtitles']['it']
-        self.assertTrue(sub is not None)
+        self.url = '8YoUxe5ncPo'
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslangs'] = ['it']
+        subtitles = self.getSubtitles()
+        self.assertTrue(subtitles['it'] is not None)
+    def test_youtube_nosubtitles(self):
+        self.url = 'sAjKT8FhjI8'
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
    def test_youtube_multiple_langs(self):
-        DL = FakeYDL()
-        DL.params['writesubtitles'] = True
+        self.url = 'QRS8MkLhQmM'
+        self.DL.params['writesubtitles'] = True
        langs = ['it', 'fr', 'de']
-        DL.params['subtitleslangs'] = langs
-        IE = YoutubeIE(DL)
-        subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
        for lang in langs:
            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)

--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -29,6 +29,7 @@ __authors__  = (
    'Albert Kim',
    'Pierre Rudloff',
    'Huarong Huo',
+    'Ismael Mejía',
 )

 __license__ = 'Public Domain'
@@ -205,13 +206,10 @@ def parseOpts(overrideArguments=None):

    subtitles.add_option('--write-sub', '--write-srt',
            action='store_true', dest='writesubtitles',
-            help='write subtitle file (currently youtube only)', default=False)
+            help='write subtitle file', default=False)
    subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
            action='store_true', dest='writeautomaticsub',
-            help='write automatic subtitle file (currently youtube only)', default=False)
-    subtitles.add_option('--only-sub',
-            action='store_true', dest='skip_download',
-            help='[deprecated] alias of --skip-download', default=False)
+            help='write automatic subtitle file (youtube only)', default=False)
    subtitles.add_option('--all-subs',
            action='store_true', dest='allsubtitles',
            help='downloads all the available subtitles of the video', default=False)
@@ -222,7 +220,7 @@ def parseOpts(overrideArguments=None):
            action='store', dest='subtitlesformat', metavar='FORMAT',
            help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
-            action='callback', dest='subtitleslang', metavar='LANGS', type='str',
+            action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
            default=[], callback=_comma_separated_values_options_callback,
            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')

@@ -593,7 +591,7 @@ def _real_main(argv=None):
        'allsubtitles': opts.allsubtitles,
        'listsubtitles': opts.listsubtitles,
        'subtitlesformat': opts.subtitlesformat,
-        'subtitleslangs': opts.subtitleslang,
+        'subtitleslangs': opts.subtitleslangs,
        'matchtitle': decodeOption(opts.matchtitle),
        'rejecttitle': decodeOption(opts.rejecttitle),
        'max_downloads': opts.max_downloads,
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -3,15 +3,19 @@ import json
 import itertools

 from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
+
 from ..utils import (
    compat_urllib_request,
+    compat_str,
    get_element_by_attribute,
    get_element_by_id,

    ExtractorError,
 )

-class DailymotionIE(InfoExtractor):
+
+class DailymotionIE(SubtitlesInfoExtractor):
    """Information Extractor for Dailymotion"""

    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
@@ -73,6 +77,12 @@ class DailymotionIE(InfoExtractor):
            raise ExtractorError(u'Unable to extract video URL')
        video_url = info[max_quality]

+        # subtitles
+        video_subtitles = self.extract_subtitles(video_id)
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id)
+            return
+
        return [{
            'id':       video_id,
            'url':      video_url,
@@ -80,9 +90,25 @@ class DailymotionIE(InfoExtractor):
            'upload_date':  video_upload_date,
            'title':    self._og_search_title(webpage),
            'ext':      video_extension,
+            'subtitles':    video_subtitles,
            'thumbnail': info['thumbnail_url']
        }]

+    def _get_available_subtitles(self, video_id):
+        try:
+            sub_list = self._download_webpage(
+                'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
+                video_id, note=False)
+        except ExtractorError as err:
+            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            return {}
+        info = json.loads(sub_list)
+        if (info['total'] > 0):
+            sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
+            return sub_lang_list
+        self._downloader.report_warning(u'video doesn\'t have subtitles')
+        return {}
+

 class DailymotionPlaylistIE(InfoExtractor):
    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@@ -19,8 +19,7 @@ class HowcastIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('id')
-        webpage_url = 'http://www.howcast.com/videos/' + video_id
-        webpage = self._download_webpage(webpage_url, video_id)
+        webpage = self._download_webpage(url, video_id)

        self.report_extraction(video_id)

--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -0,0 +1,92 @@
+from .common import InfoExtractor
+
+from ..utils import (
+    compat_str,
+    ExtractorError,
+)
+
+
+class SubtitlesInfoExtractor(InfoExtractor):
+    @property
+    def _have_to_download_any_subtitles(self):
+        return any([self._downloader.params.get('writesubtitles', False),
+                    self._downloader.params.get('writeautomaticsub'),
+                    self._downloader.params.get('allsubtitles', False)])
+
+    def _list_available_subtitles(self, video_id, webpage=None):
+        """ outputs the available subtitles for the video """
+        sub_lang_list = self._get_available_subtitles(video_id)
+        auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
+        sub_lang = ",".join(list(sub_lang_list.keys()))
+        self.to_screen(u'%s: Available subtitles for video: %s' %
+                       (video_id, sub_lang))
+        auto_lang = ",".join(auto_captions_list.keys())
+        self.to_screen(u'%s: Available automatic captions for video: %s' %
+                       (video_id, auto_lang))
+
+    def extract_subtitles(self, video_id, video_webpage=None):
+        """
+        returns {sub_lang: sub} ,{} if subtitles not found or None if the
+        subtitles aren't requested.
+        """
+        if not self._have_to_download_any_subtitles:
+            return None
+        available_subs_list = {}
+        if self._downloader.params.get('writeautomaticsub', False):
+            available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
+        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+            available_subs_list.update(self._get_available_subtitles(video_id))
+
+        if not available_subs_list:  # error, it didn't get the available subtitles
+            return {}
+        if self._downloader.params.get('allsubtitles', False):
+            sub_lang_list = available_subs_list
+        else:
+            if self._downloader.params.get('subtitleslangs', False):
+                requested_langs = self._downloader.params.get('subtitleslangs')
+            elif 'en' in available_subs_list:
+                requested_langs = ['en']
+            else:
+                requested_langs = [list(available_subs_list.keys())[0]]
+
+            sub_lang_list = {}
+            for sub_lang in requested_langs:
+                if not sub_lang in available_subs_list:
+                    self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+                    continue
+                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
+
+        subtitles = {}
+        for sub_lang, url in sub_lang_list.items():
+            subtitle = self._request_subtitle_url(sub_lang, url)
+            if subtitle:
+                subtitles[sub_lang] = subtitle
+        return subtitles
+
+    def _request_subtitle_url(self, sub_lang, url):
+        """ makes the http request for the subtitle """
+        try:
+            sub = self._download_webpage(url, None, note=False)
+        except ExtractorError as err:
+            self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
+            return
+        if not sub:
+            self._downloader.report_warning(u'Did not fetch video subtitles')
+            return
+        return sub
+
+    def _get_available_subtitles(self, video_id):
+        """
+        returns {sub_lang: url} or {} if not available
+        Must be redefined by the subclasses
+        """
+        pass
+
+    def _get_available_automatic_caption(self, video_id, webpage):
+        """
+        returns {sub_lang: url} or {} if not available
+        Must be redefined by the subclasses that support automatic captions,
+        otherwise it will return {}
+        """
+        self._downloader.report_warning(u'Automatic Captions not supported by this server')
+        return {}
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -5,8 +5,10 @@ import netrc
 import re
 import socket
 import itertools
+import xml.etree.ElementTree

 from .common import InfoExtractor, SearchInfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_http_client,
    compat_parse_qs,
@@ -130,7 +132,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            return
        self._confirm_age()

-class YoutubeIE(YoutubeBaseInfoExtractor):
+
+class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
    IE_DESC = u'YouTube.com'
    _VALID_URL = r"""^
                     (
@@ -397,19 +400,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        """Report attempt to download video info webpage."""
        self.to_screen(u'%s: Downloading video info webpage' % video_id)

-    def report_video_subtitles_download(self, video_id):
-        """Report attempt to download video info webpage."""
-        self.to_screen(u'%s: Checking available subtitles' % video_id)
-
-    def report_video_subtitles_request(self, video_id, sub_lang, format):
-        """Report attempt to download video info webpage."""
-        self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
-
-    def report_video_subtitles_available(self, video_id, sub_lang_list):
-        """Report available subtitles."""
-        sub_lang = ",".join(list(sub_lang_list.keys()))
-        self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
-
    def report_information_extraction(self, video_id):
        """Report attempt to extract video information."""
        self.to_screen(u'%s: Extracting video information' % video_id)
@@ -464,56 +454,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            # Fallback to the other algortihms
            return self._decrypt_signature(s)

-
    def _get_available_subtitles(self, video_id):
-        self.report_video_subtitles_download(video_id)
-        request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
        try:
-            sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            sub_list = self._download_webpage(
+                'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
+                video_id, note=False)
+        except ExtractorError as err:
            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
            return {}
-        sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
-        sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
+        lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
+
+        sub_lang_list = {}
+        for l in lang_list:
+            lang = l[1]
+            params = compat_urllib_parse.urlencode({
+                'lang': lang,
+                'v': video_id,
+                'fmt': self._downloader.params.get('subtitlesformat'),
+            })
+            url = u'http://www.youtube.com/api/timedtext?' + params
+            sub_lang_list[lang] = url
        if not sub_lang_list:
            self._downloader.report_warning(u'video doesn\'t have subtitles')
            return {}
        return sub_lang_list

-    def _list_available_subtitles(self, video_id):
-        sub_lang_list = self._get_available_subtitles(video_id)
-        self.report_video_subtitles_available(video_id, sub_lang_list)
-
-    def _request_subtitle(self, sub_lang, sub_name, video_id, format):
-        """
-        Return the subtitle as a string or None if they are not found
-        """
-        self.report_video_subtitles_request(video_id, sub_lang, format)
-        params = compat_urllib_parse.urlencode({
-            'lang': sub_lang,
-            'name': sub_name,
-            'v': video_id,
-            'fmt': format,
-        })
-        url = 'http://www.youtube.com/api/timedtext?' + params
-        try:
-            sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
-            return
-        if not sub:
-            self._downloader.report_warning(u'Did not fetch video subtitles')
-            return
-        return sub
-
-    def _request_automatic_caption(self, video_id, webpage):
+    def _get_available_automatic_caption(self, video_id, webpage):
        """We need the webpage for getting the captions url, pass it as an
           argument to speed up the process."""
-        sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
        sub_format = self._downloader.params.get('subtitlesformat')
        self.to_screen(u'%s: Looking for automatic captions' % video_id)
        mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
-        err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+        err_msg = u'Couldn\'t find automatic captions for %s' % video_id
        if mobj is None:
            self._downloader.report_warning(err_msg)
            return {}
@@ -522,53 +494,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            args = player_config[u'args']
            caption_url = args[u'ttsurl']
            timestamp = args[u'timestamp']
-            params = compat_urllib_parse.urlencode({
-                'lang': 'en',
-                'tlang': sub_lang,
-                'fmt': sub_format,
-                'ts': timestamp,
-                'kind': 'asr',
+            # We get the available subtitles
+            list_params = compat_urllib_parse.urlencode({
+                'type': 'list',
+                'tlangs': 1,
+                'asrs': 1,
            })
-            subtitles_url = caption_url + '&' + params
-            sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
-            return {sub_lang: sub}
+            list_url = caption_url + '&' + list_params
+            list_page = self._download_webpage(list_url, video_id)
+            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+            original_lang_node = caption_list.find('track')
+            if original_lang_node.attrib.get('kind') != 'asr' :
+                self._downloader.report_warning(u'Video doesn\'t have automatic captions')
+                return {}
+            original_lang = original_lang_node.attrib['lang_code']
+
+            sub_lang_list = {}
+            for lang_node in caption_list.findall('target'):
+                sub_lang = lang_node.attrib['lang_code']
+                params = compat_urllib_parse.urlencode({
+                    'lang': original_lang,
+                    'tlang': sub_lang,
+                    'fmt': sub_format,
+                    'ts': timestamp,
+                    'kind': 'asr',
+                })
+                sub_lang_list[sub_lang] = caption_url + '&' + params
+            return sub_lang_list
        # An extractor error can be raise by the download process if there are
        # no automatic captions but there are subtitles
        except (KeyError, ExtractorError):
            self._downloader.report_warning(err_msg)
            return {}
-    
-    def _extract_subtitles(self, video_id):
-        """
-        Return a dictionary: {language: subtitles} or {} if the subtitles
-        couldn't be found
-        """
-        available_subs_list = self._get_available_subtitles(video_id)
-        sub_format = self._downloader.params.get('subtitlesformat')
-        if  not available_subs_list: #There was some error, it didn't get the available subtitles
-            return {}
-        if self._downloader.params.get('allsubtitles', False):
-            sub_lang_list = available_subs_list
-        else:
-            if self._downloader.params.get('subtitleslangs', False):
-                reqested_langs = self._downloader.params.get('subtitleslangs')
-            elif 'en' in available_subs_list:
-                reqested_langs = ['en']
-            else:
-                reqested_langs = [list(available_subs_list.keys())[0]]
-
-            sub_lang_list = {}
-            for sub_lang in reqested_langs:
-                if not sub_lang in available_subs_list:
-                    self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
-                    continue
-                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
-        subtitles = {}
-        for sub_lang in sub_lang_list:
-            subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
-            if subtitle:
-                subtitles[sub_lang] = subtitle
-        return subtitles

    def _print_formats(self, formats):
        print('Available formats:')
@@ -768,15 +725,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                video_description = u''

        # subtitles
-        video_subtitles = None
-
-        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
-            video_subtitles = self._extract_subtitles(video_id)
-        elif self._downloader.params.get('writeautomaticsub', False):
-            video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+        video_subtitles = self.extract_subtitles(video_id, video_webpage)

        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id)
+            self._list_available_subtitles(video_id, video_webpage)
            return

        if 'length_seconds' not in video_info:
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.11.09'
+__version__ = '2013.09.12'
Author	SHA1	Message	Date
Philipp Hagemeister	07ac9e2cc2	release 2013.09.12	2013-09-12 11:26:44 +02:00
Jaime Marquínez Ferrándiz	6bc520c207	Check for both automatic captions and subtitles with options `--write-sub` and `--write-auto-sub` (fixes #1224 )	2013-09-12 11:15:25 +02:00
Jaime Marquínez Ferrándiz	e3dc22ca3a	[youtube] Fix detection of videos with automatic captions	2013-09-11 19:24:56 +02:00
Jaime Marquínez Ferrándiz	d665f8d3cb	[subtitles] Also list the available automatic captions languages with '--list-sub'	2013-09-11 19:17:30 +02:00
Jaime Marquínez Ferrándiz	055e6f3657	[youtube] Support automatic captions with original language different from English (fixes #1225 ) and download in multiple languages.	2013-09-11 19:08:43 +02:00
Jaime Marquínez Ferrándiz	ac4f319ba1	Credit @iemejia	2013-09-11 17:58:51 +02:00
Jaime Marquínez Ferrándiz	542cca0e8c	Merge branch 'subtitles_rework' (closes PR #1326 )	2013-09-11 17:41:24 +02:00
Jaime Marquínez Ferrándiz	6a2449df3b	[howcast] Do not download from http://www.howcast.com/videos/{video_id} It takes too much to follow the redirection.	2013-09-11 17:36:23 +02:00
Jaime Marquínez Ferrándiz	7fad1c6328	[subtitles] Use self._download_webpage for extracting the subtitles It raises ExtractorError for the same exceptions we have to catch.	2013-09-11 16:24:47 +02:00
Jaime Marquínez Ferrándiz	d82134c339	[subtitles] Simplify the extraction of subtitles in subclasses and remove NoAutoSubtitlesInfoExtractor Subclasses just need to call the method extract_subtitles, which will call _extract_subtitles and _request_automatic_caption Now the default implementation of _request_automatic_caption returns {}.	2013-09-11 16:05:49 +02:00
Jaime Marquínez Ferrándiz	54d39d8b2f	[subtitles] rename SubitlesIE to SubtitlesInfoExtractor Otherwise it can be automatically detected as a IE ready for use.	2013-09-11 15:51:04 +02:00
Jaime Marquínez Ferrándiz	de7f3446e0	[youtube] move subtitles methods from the base extractor to YoutubeIE	2013-09-11 15:48:23 +02:00
Ismael Mejia	f8e52269c1	[subtitles] made inheritance hierarchy flat as requested	2013-09-11 15:21:09 +02:00
Ismael Mejia	cf1dd0c59e	Merge branch 'master' into subtitles_rework	2013-09-11 14:26:48 +02:00
Jaime Marquínez Ferrándiz	22c8b52545	In the supported sites page, sort the extractors in case insensitive	2013-09-11 12:04:27 +02:00
Ismael Mejia	72836fcee4	Merge branch 'master' into subtitles_rework	2013-09-06 23:24:41 +02:00
Ismael Mejia	d6e203b3dc	[subtitles] fixed multiple subtitles language separated by comma after merge As mentioned in the pull request, I forgot to include this changes. `aa6a10c44a`	2013-09-06 16:30:13 +02:00
Ismael Mejia	06a401c845	Merge branch 'master' into subtitles_rework	2013-08-28 00:33:12 +02:00
Ismael Mejia	bd2dee6c67	Merge branch 'master' into subtitles_rework	2013-08-23 01:47:10 +02:00
Ismael Mejia	18b4e04f1c	Merge branch 'master' into subtitles_rework	2013-08-22 23:29:36 +02:00
Ismael Mejia	d80a064eff	[subtitles] Added tests to check correct behavior when no subtitles are available	2013-08-08 22:22:33 +02:00
Ismael Mejia	d55de6eec2	[subtitles] Skips now the subtitles that has already been downloaded. Just a validation for file exists, I also removed a method that wasn't been used because it was a copy paste from FileDownloader.	2013-08-08 18:30:04 +02:00
Ismael Mejia	69df680b97	[subtitles] Improved docs + new class for servers who don't support auto-caption	2013-08-08 11:20:56 +02:00
Ismael Mejia	447591e1ae	[test] Cleaned subtitles tests	2013-08-08 11:03:52 +02:00
Ismael Mejia	33eb0ce4c4	[subtitles] removed only-sub option (--skip-download achieves the same functionality)	2013-08-08 10:06:24 +02:00
Ismael Mejia	505c28aac9	Separated subtitle options in their own group	2013-08-08 09:53:25 +02:00
Ismael Mejia	8377574c9c	[internal] Improved subtitle architecture + (update in youtube/dailymotion) The structure of subtitles was refined, you only need to implement one method that returns a dictionnary of the available subtitles (lang, url) to support all the subtitle options in a website. I updated the subtitle downloaders for youtube/dailymotion to show how it works.	2013-08-08 08:54:10 +02:00
Ismael Mejia	372297e713	Undo the previous commit (it was a mistake)	2013-08-07 21:24:42 +02:00
Ismael Mejia	953e32b2c1	[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten.	2013-08-07 18:59:11 +02:00
Ismael Mejia	5898e28272	Fixed small type issue	2013-08-07 18:48:24 +02:00
Ismael Mejia	67dfbc0cb9	Added exceptions for the subtitle and video types in .gitignore	2013-08-07 18:42:40 +02:00