[ytsearch] Fix extraction (closes #26920)
This commit is contained in:
		| @@ -3181,54 +3181,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor): | |||||||
|     _MAX_RESULTS = float('inf') |     _MAX_RESULTS = float('inf') | ||||||
|     IE_NAME = 'youtube:search' |     IE_NAME = 'youtube:search' | ||||||
|     _SEARCH_KEY = 'ytsearch' |     _SEARCH_KEY = 'ytsearch' | ||||||
|     _EXTRA_QUERY_ARGS = {} |     _SEARCH_PARAMS = None | ||||||
|     _TESTS = [] |     _TESTS = [] | ||||||
|  |  | ||||||
|  |     def _entries(self, query, n): | ||||||
|  |         data = { | ||||||
|  |             'context': { | ||||||
|  |                 'client': { | ||||||
|  |                     'clientName': 'WEB', | ||||||
|  |                     'clientVersion': '2.20201021.03.00', | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             'query': query, | ||||||
|  |         } | ||||||
|  |         if self._SEARCH_PARAMS: | ||||||
|  |             data['params'] = self._SEARCH_PARAMS | ||||||
|  |         total = 0 | ||||||
|  |         for page_num in itertools.count(1): | ||||||
|  |             search = self._download_json( | ||||||
|  |                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', | ||||||
|  |                 video_id='query "%s"' % query, | ||||||
|  |                 note='Downloading page %s' % page_num, | ||||||
|  |                 errnote='Unable to download API page', fatal=False, | ||||||
|  |                 data=json.dumps(data).encode('utf8'), | ||||||
|  |                 headers={'content-type': 'application/json'}) | ||||||
|  |             if not search: | ||||||
|  |                 break | ||||||
|  |             slr_contents = try_get( | ||||||
|  |                 search, | ||||||
|  |                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], | ||||||
|  |                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), | ||||||
|  |                 list) | ||||||
|  |             if not slr_contents: | ||||||
|  |                 break | ||||||
|  |             isr_contents = try_get( | ||||||
|  |                 slr_contents, | ||||||
|  |                 lambda x: x[0]['itemSectionRenderer']['contents'], | ||||||
|  |                 list) | ||||||
|  |             if not isr_contents: | ||||||
|  |                 break | ||||||
|  |             for content in isr_contents: | ||||||
|  |                 if not isinstance(content, dict): | ||||||
|  |                     continue | ||||||
|  |                 video = content.get('videoRenderer') | ||||||
|  |                 if not isinstance(video, dict): | ||||||
|  |                     continue | ||||||
|  |                 video_id = video.get('videoId') | ||||||
|  |                 if not video_id: | ||||||
|  |                     continue | ||||||
|  |                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) | ||||||
|  |                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) | ||||||
|  |                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) | ||||||
|  |                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' | ||||||
|  |                 view_count = int_or_none(self._search_regex( | ||||||
|  |                     r'^(\d+)', re.sub(r'\s', '', view_count_text), | ||||||
|  |                     'view count', default=None)) | ||||||
|  |                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) | ||||||
|  |                 total += 1 | ||||||
|  |                 yield { | ||||||
|  |                     '_type': 'url_transparent', | ||||||
|  |                     'ie_key': YoutubeIE.ie_key(), | ||||||
|  |                     'id': video_id, | ||||||
|  |                     'url': video_id, | ||||||
|  |                     'title': title, | ||||||
|  |                     'description': description, | ||||||
|  |                     'duration': duration, | ||||||
|  |                     'view_count': view_count, | ||||||
|  |                     'uploader': uploader, | ||||||
|  |                 } | ||||||
|  |                 if total == n: | ||||||
|  |                     return | ||||||
|  |             token = try_get( | ||||||
|  |                 slr_contents, | ||||||
|  |                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], | ||||||
|  |                 compat_str) | ||||||
|  |             if not token: | ||||||
|  |                 break | ||||||
|  |             data['continuation'] = token | ||||||
|  |  | ||||||
|     def _get_n_results(self, query, n): |     def _get_n_results(self, query, n): | ||||||
|         """Get a specified number of results for a query""" |         """Get a specified number of results for a query""" | ||||||
|  |         return self.playlist_result(self._entries(query, n), query) | ||||||
|         videos = [] |  | ||||||
|         limit = n |  | ||||||
|  |  | ||||||
|         url_query = { |  | ||||||
|             'search_query': query.encode('utf-8'), |  | ||||||
|         } |  | ||||||
|         url_query.update(self._EXTRA_QUERY_ARGS) |  | ||||||
|         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) |  | ||||||
|  |  | ||||||
|         for pagenum in itertools.count(1): |  | ||||||
|             data = self._download_json( |  | ||||||
|                 result_url, video_id='query "%s"' % query, |  | ||||||
|                 note='Downloading page %s' % pagenum, |  | ||||||
|                 errnote='Unable to download API page', |  | ||||||
|                 query={'spf': 'navigate'}) |  | ||||||
|             html_content = data[1]['body']['content'] |  | ||||||
|  |  | ||||||
|             if 'class="search-message' in html_content: |  | ||||||
|                 raise ExtractorError( |  | ||||||
|                     '[youtube] No video results', expected=True) |  | ||||||
|  |  | ||||||
|             new_videos = list(self._process_page(html_content)) |  | ||||||
|             videos += new_videos |  | ||||||
|             if not new_videos or len(videos) > limit: |  | ||||||
|                 break |  | ||||||
|             next_link = self._html_search_regex( |  | ||||||
|                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', |  | ||||||
|                 html_content, 'next link', default=None) |  | ||||||
|             if next_link is None: |  | ||||||
|                 break |  | ||||||
|             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) |  | ||||||
|  |  | ||||||
|         if len(videos) > n: |  | ||||||
|             videos = videos[:n] |  | ||||||
|         return self.playlist_result(videos, query) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | class YoutubeSearchDateIE(YoutubeSearchIE): | ||||||
|     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' |     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' | ||||||
|     _SEARCH_KEY = 'ytsearchdate' |     _SEARCH_KEY = 'ytsearchdate' | ||||||
|     IE_DESC = 'YouTube.com searches, newest videos first' |     IE_DESC = 'YouTube.com searches, newest videos first' | ||||||
|     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} |     _SEARCH_PARAMS = 'CAI%3D' | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): | class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․