[mtvservices:embedded] Use another endpoint to get feed URL
Closes #10363 In the original mtvservices:embedded test case, config.xml is still used to get the feed URL. Some other examples, including test_Generic_40 (http://www.vulture.com/2016/06/new-key-peele-sketches-released.html), and the video mentioned in #10363, use another endpoint to get the feed URL. The 'index.html' approach works for the original test case, too. So I didn't keep the old approach.
This commit is contained in:
		| @@ -1,3 +1,9 @@ | |||||||
|  | version <unreleased> | ||||||
|  |  | ||||||
|  | Extractors | ||||||
|  | * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) | ||||||
|  |  | ||||||
|  |  | ||||||
| version 2016.08.24.1 | version 2016.08.24.1 | ||||||
|  |  | ||||||
| Extractors | Extractors | ||||||
|   | |||||||
| @@ -2,7 +2,6 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| from .mtv import MTVServicesInfoExtractor | from .mtv import MTVServicesInfoExtractor | ||||||
| from ..utils import unified_strdate | from ..utils import unified_strdate | ||||||
| from ..compat import compat_urllib_parse_urlencode |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class BetIE(MTVServicesInfoExtractor): | class BetIE(MTVServicesInfoExtractor): | ||||||
| @@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor): | |||||||
|     _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" |     _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" | ||||||
|  |  | ||||||
|     def _get_feed_query(self, uri): |     def _get_feed_query(self, uri): | ||||||
|         return compat_urllib_parse_urlencode({ |         return { | ||||||
|             'uuid': uri, |             'uuid': uri, | ||||||
|         }) |         } | ||||||
|  |  | ||||||
|     def _extract_mgid(self, webpage): |     def _extract_mgid(self, webpage): | ||||||
|         return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') |         return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') | ||||||
|   | |||||||
| @@ -4,7 +4,6 @@ import re | |||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|     compat_urllib_parse_urlencode, |  | ||||||
|     compat_str, |     compat_str, | ||||||
|     compat_xpath, |     compat_xpath, | ||||||
| ) | ) | ||||||
| @@ -14,12 +13,13 @@ from ..utils import ( | |||||||
|     fix_xml_ampersands, |     fix_xml_ampersands, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     HEADRequest, |     HEADRequest, | ||||||
|  |     RegexNotFoundError, | ||||||
|     sanitized_Request, |     sanitized_Request, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
|     timeconvert, |     timeconvert, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|  |     update_url_query, | ||||||
|     url_basename, |     url_basename, | ||||||
|     RegexNotFoundError, |  | ||||||
|     xpath_text, |     xpath_text, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -36,6 +36,11 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|     def _id_from_uri(uri): |     def _id_from_uri(uri): | ||||||
|         return uri.split(':')[-1] |         return uri.split(':')[-1] | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _remove_template_parameter(url): | ||||||
|  |         # Remove the templates, like &device={device} | ||||||
|  |         return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) | ||||||
|  |  | ||||||
|     # This was originally implemented for ComedyCentral, but it also works here |     # This was originally implemented for ComedyCentral, but it also works here | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _transform_rtmp_url(cls, rtmp_video_url): |     def _transform_rtmp_url(cls, rtmp_video_url): | ||||||
| @@ -117,9 +122,7 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|         video_id = self._id_from_uri(uri) |         video_id = self._id_from_uri(uri) | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|         content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))) |         content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))) | ||||||
|         mediagen_url = content_el.attrib['url'] |         mediagen_url = self._remove_template_parameter(content_el.attrib['url']) | ||||||
|         # Remove the templates, like &device={device} |  | ||||||
|         mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url) |  | ||||||
|         if 'acceptMethods' not in mediagen_url: |         if 'acceptMethods' not in mediagen_url: | ||||||
|             mediagen_url += '&' if '?' in mediagen_url else '?' |             mediagen_url += '&' if '?' in mediagen_url else '?' | ||||||
|             mediagen_url += 'acceptMethods=fms' |             mediagen_url += 'acceptMethods=fms' | ||||||
| @@ -178,12 +181,12 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|         data = {'uri': uri} |         data = {'uri': uri} | ||||||
|         if self._LANG: |         if self._LANG: | ||||||
|             data['lang'] = self._LANG |             data['lang'] = self._LANG | ||||||
|         return compat_urllib_parse_urlencode(data) |         return data | ||||||
|  |  | ||||||
|     def _get_videos_info(self, uri): |     def _get_videos_info(self, uri): | ||||||
|         video_id = self._id_from_uri(uri) |         video_id = self._id_from_uri(uri) | ||||||
|         feed_url = self._get_feed_url(uri) |         feed_url = self._get_feed_url(uri) | ||||||
|         info_url = feed_url + '?' + self._get_feed_query(uri) |         info_url = update_url_query(feed_url, self._get_feed_query(uri)) | ||||||
|         return self._get_videos_info_from_url(info_url, video_id) |         return self._get_videos_info_from_url(info_url, video_id) | ||||||
|  |  | ||||||
|     def _get_videos_info_from_url(self, url, video_id): |     def _get_videos_info_from_url(self, url, video_id): | ||||||
| @@ -256,13 +259,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): | |||||||
|  |  | ||||||
|     def _get_feed_url(self, uri): |     def _get_feed_url(self, uri): | ||||||
|         video_id = self._id_from_uri(uri) |         video_id = self._id_from_uri(uri) | ||||||
|         site_id = uri.replace(video_id, '') |         config = self._download_json( | ||||||
|         config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/' |             'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id) | ||||||
|                       'context52/config.xml'.format(site_id)) |         return self._remove_template_parameter(config['feedWithQueryParams']) | ||||||
|         config_doc = self._download_xml(config_url, video_id) |  | ||||||
|         feed_node = config_doc.find('.//feed') |  | ||||||
|         feed_url = feed_node.text.strip().split('?')[0] |  | ||||||
|         return feed_url |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|   | |||||||
| @@ -2,7 +2,6 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| from .mtv import MTVServicesInfoExtractor | from .mtv import MTVServicesInfoExtractor | ||||||
| from ..compat import compat_urllib_parse_urlencode |  | ||||||
| from ..utils import update_url_query | from ..utils import update_url_query | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -59,10 +58,10 @@ class NickIE(MTVServicesInfoExtractor): | |||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _get_feed_query(self, uri): |     def _get_feed_query(self, uri): | ||||||
|         return compat_urllib_parse_urlencode({ |         return { | ||||||
|             'feed': 'nick_arc_player_prime', |             'feed': 'nick_arc_player_prime', | ||||||
|             'mgid': uri, |             'mgid': uri, | ||||||
|         }) |         } | ||||||
|  |  | ||||||
|     def _extract_mgid(self, webpage): |     def _extract_mgid(self, webpage): | ||||||
|         return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') |         return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan