[ina] Simplify
Download the feed with ‘_download_xml’ to make the extraction easier
This commit is contained in:
		| @@ -1,39 +1,36 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
| class InaIE(InfoExtractor): | class InaIE(InfoExtractor): | ||||||
|     """Information Extractor for Ina.fr""" |     _VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*' | ||||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*' |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', |         'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', | ||||||
|         u'file': u'I12055569.mp4', |         'md5': 'a667021bf2b41f8dc6049479d9bb38a3', | ||||||
|         u'md5': u'a667021bf2b41f8dc6049479d9bb38a3', |         'info_dict': { | ||||||
|         u'info_dict': { |             'id': 'I12055569', | ||||||
|             u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\"" |             'ext': 'mp4', | ||||||
|  |             'title': 'François Hollande "Je crois que c\'est clair"', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self,url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |  | ||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id |         mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id | ||||||
|         video_extension = 'mp4' |         info_doc = self._download_xml(mrss_url, video_id) | ||||||
|         webpage = self._download_webpage(mrss_url, video_id) |  | ||||||
|  |  | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|  |  | ||||||
|         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', |         video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url'] | ||||||
|             webpage, u'video URL') |  | ||||||
|  |  | ||||||
|         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', |         return { | ||||||
|             webpage, u'title') |             'id': video_id, | ||||||
|  |             'url': video_url, | ||||||
|         return [{ |             'title': info_doc.find('.//title').text, | ||||||
|             'id':       video_id, |         } | ||||||
|             'url':      video_url, |  | ||||||
|             'ext':      video_extension, |  | ||||||
|             'title':    video_title, |  | ||||||
|         }] |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz