[rtl2] Fix extraction for test_RTL2_1

This commit is contained in:
Yen Chi Hsuan 2015-08-21 13:20:32 +08:00
parent 9eb4ab6ad9
commit 5e1a5ac8de

View File

@ -1,6 +1,7 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -28,6 +29,10 @@ class RTL2IE(InfoExtractor):
'title': 'Anna erwischt Alex!', 'title': 'Anna erwischt Alex!',
'description': 'Anna ist Alex\' Tochter bei Köln 50667.' 'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
}, },
'params': {
# rtmp download
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -38,6 +43,13 @@ class RTL2IE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
mobj = re.search(
r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
webpage)
if mobj:
vico_id = mobj.group('vico_id')
vivi_id = mobj.group('vivi_id')
else:
vico_id = self._html_search_regex( vico_id = self._html_search_regex(
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
vivi_id = self._html_search_regex( vivi_id = self._html_search_regex(