[youtube] Separate feed extractor
This commit is contained in:
		| @@ -49,6 +49,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |||||||
|             # YouTube sets the expire time to about two months |             # YouTube sets the expire time to about two months | ||||||
|             expire_time=time.time() + 2 * 30 * 24 * 3600) |             expire_time=time.time() + 2 * 30 * 24 * 3600) | ||||||
|  |  | ||||||
|  |     def _ids_to_results(self, ids): | ||||||
|  |         return [ | ||||||
|  |             self.url_result(vid_id, 'Youtube', video_id=vid_id) | ||||||
|  |             for vid_id in ids] | ||||||
|  |  | ||||||
|     def _login(self): |     def _login(self): | ||||||
|         """ |         """ | ||||||
|         Attempt to log in to YouTube. |         Attempt to log in to YouTube. | ||||||
| @@ -1261,11 +1266,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|     def _real_initialize(self): |     def _real_initialize(self): | ||||||
|         self._login() |         self._login() | ||||||
|  |  | ||||||
|     def _ids_to_results(self, ids): |  | ||||||
|         return [ |  | ||||||
|             self.url_result(vid_id, 'Youtube', video_id=vid_id) |  | ||||||
|             for vid_id in ids] |  | ||||||
|  |  | ||||||
|     def _extract_mix(self, playlist_id): |     def _extract_mix(self, playlist_id): | ||||||
|         # The mixes are generated from a single video |         # The mixes are generated from a single video | ||||||
|         # the id of the playlist is just 'RD' + video_id |         # the id of the playlist is just 'RD' + video_id | ||||||
| @@ -1601,20 +1601,10 @@ class YoutubeShowIE(InfoExtractor): | |||||||
|  |  | ||||||
| class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | ||||||
|     """ |     """ | ||||||
|     Base class for extractors that fetch info from |     Base class for feed extractors | ||||||
|     http://www.youtube.com/feed_ajax |  | ||||||
|     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. |     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. | ||||||
|     """ |     """ | ||||||
|     _LOGIN_REQUIRED = True |     _LOGIN_REQUIRED = True | ||||||
|     # use action_load_personal_feed instead of action_load_system_feed |  | ||||||
|     _PERSONAL_FEED = False |  | ||||||
|  |  | ||||||
|     @property |  | ||||||
|     def _FEED_TEMPLATE(self): |  | ||||||
|         action = 'action_load_system_feed' |  | ||||||
|         if self._PERSONAL_FEED: |  | ||||||
|             action = 'action_load_personal_feed' |  | ||||||
|         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME) |  | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def IE_NAME(self): |     def IE_NAME(self): | ||||||
| @@ -1624,58 +1614,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | |||||||
|         self._login() |         self._login() | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         feed_entries = [] |         page = self._download_webpage( | ||||||
|         paging = 0 |             'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE) | ||||||
|         for i in itertools.count(1): |  | ||||||
|             info = self._download_json( |  | ||||||
|                 self._FEED_TEMPLATE % paging, |  | ||||||
|                 '%s feed' % self._FEED_NAME, |  | ||||||
|                 'Downloading page %s' % i, |  | ||||||
|                 transform_source=uppercase_escape) |  | ||||||
|             feed_html = info.get('feed_html') or info.get('content_html') |  | ||||||
|             load_more_widget_html = info.get('load_more_widget_html') or feed_html |  | ||||||
|             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) |  | ||||||
|             ids = orderedSet(m.group(1) for m in m_ids) |  | ||||||
|             feed_entries.extend( |  | ||||||
|                 self.url_result(video_id, 'Youtube', video_id=video_id) |  | ||||||
|                 for video_id in ids) |  | ||||||
|             mobj = re.search( |  | ||||||
|                 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)', |  | ||||||
|                 load_more_widget_html) |  | ||||||
|             if mobj is None: |  | ||||||
|                 break |  | ||||||
|             paging = mobj.group('paging') |  | ||||||
|         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): |  | ||||||
|     IE_NAME = 'youtube:recommended' |  | ||||||
|     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' |  | ||||||
|     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' |  | ||||||
|     _FEED_NAME = 'recommended' |  | ||||||
|     _PLAYLIST_TITLE = 'Youtube Recommended videos' |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeWatchLaterIE(YoutubePlaylistIE): |  | ||||||
|     IE_NAME = 'youtube:watchlater' |  | ||||||
|     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' |  | ||||||
|     _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' |  | ||||||
|  |  | ||||||
|     _TESTS = []  # override PlaylistIE tests |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         return self._extract_playlist('WL') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeHistoryIE(YoutubePlaylistIE): |  | ||||||
|     IE_NAME = 'youtube:history' |  | ||||||
|     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' |  | ||||||
|     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' |  | ||||||
|     _TESTS = [] |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         title = 'Youtube History' |  | ||||||
|         page = self._download_webpage('https://www.youtube.com/feed/history', title) |  | ||||||
|  |  | ||||||
|         # The extraction process is the same as for playlists, but the regex |         # The extraction process is the same as for playlists, but the regex | ||||||
|         # for the video ids doesn't contain an index |         # for the video ids doesn't contain an index | ||||||
| @@ -1692,17 +1632,25 @@ class YoutubeHistoryIE(YoutubePlaylistIE): | |||||||
|                 break |                 break | ||||||
|  |  | ||||||
|             more = self._download_json( |             more = self._download_json( | ||||||
|                 'https://youtube.com/%s' % mobj.group('more'), title, |                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE, | ||||||
|                 'Downloading page #%s' % page_num, |                 'Downloading page #%s' % page_num, | ||||||
|                 transform_source=uppercase_escape) |                 transform_source=uppercase_escape) | ||||||
|             content_html = more['content_html'] |             content_html = more['content_html'] | ||||||
|             more_widget_html = more['load_more_widget_html'] |             more_widget_html = more['load_more_widget_html'] | ||||||
|  |  | ||||||
|         return { |         return self.playlist_result( | ||||||
|             '_type': 'playlist', |             self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE) | ||||||
|             'title': title, |  | ||||||
|             'entries': self._ids_to_results(ids), |  | ||||||
|         } | class YoutubeWatchLaterIE(YoutubePlaylistIE): | ||||||
|  |     IE_NAME = 'youtube:watchlater' | ||||||
|  |     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' | ||||||
|  |     _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' | ||||||
|  |  | ||||||
|  |     _TESTS = []  # override PlaylistIE tests | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         return self._extract_playlist('WL') | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | ||||||
| @@ -1717,42 +1665,25 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | |||||||
|         return self.url_result(playlist_id, 'YoutubePlaylist') |         return self.url_result(playlist_id, 'YoutubePlaylist') | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeSubscriptionsIE(YoutubePlaylistIE): | class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): | ||||||
|     IE_NAME = 'youtube:subscriptions' |     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' | ||||||
|  |     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' | ||||||
|  |     _FEED_NAME = 'recommended' | ||||||
|  |     _PLAYLIST_TITLE = 'Youtube Recommended videos' | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): | ||||||
|     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' |     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' | ||||||
|     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' |     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' | ||||||
|     _TESTS = [] |     _FEED_NAME = 'subscriptions' | ||||||
|  |     _PLAYLIST_TITLE = 'Youtube Subscriptions' | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         title = 'Youtube Subscriptions' |  | ||||||
|         page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title) |  | ||||||
|  |  | ||||||
|         # The extraction process is the same as for playlists, but the regex | class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): | ||||||
|         # for the video ids doesn't contain an index |     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' | ||||||
|         ids = [] |     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' | ||||||
|         more_widget_html = content_html = page |     _FEED_NAME = 'history' | ||||||
|  |     _PLAYLIST_TITLE = 'Youtube History' | ||||||
|         for page_num in itertools.count(1): |  | ||||||
|             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) |  | ||||||
|             new_ids = orderedSet(matches) |  | ||||||
|             ids.extend(new_ids) |  | ||||||
|  |  | ||||||
|             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) |  | ||||||
|             if not mobj: |  | ||||||
|                 break |  | ||||||
|  |  | ||||||
|             more = self._download_json( |  | ||||||
|                 'https://youtube.com/%s' % mobj.group('more'), title, |  | ||||||
|                 'Downloading page #%s' % page_num, |  | ||||||
|                 transform_source=uppercase_escape) |  | ||||||
|             content_html = more['content_html'] |  | ||||||
|             more_widget_html = more['load_more_widget_html'] |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             '_type': 'playlist', |  | ||||||
|             'title': title, |  | ||||||
|             'entries': self._ids_to_results(ids), |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeTruncatedURLIE(InfoExtractor): | class YoutubeTruncatedURLIE(InfoExtractor): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․