Match --download-archive during playlist processing (Fixes #1745)
This commit is contained in:
		| @@ -84,16 +84,16 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeChannelIE(dl) | ||||
|         #test paginated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] | ||||
|         result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') | ||||
|         self.assertTrue(len(result['entries']) > 90) | ||||
|         #test autogenerated channel | ||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] | ||||
|         result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||
|         self.assertTrue(len(result['entries']) >= 18) | ||||
|  | ||||
|     def test_youtube_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubeUserIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] | ||||
|         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') | ||||
|         self.assertTrue(len(result['entries']) >= 320) | ||||
|  | ||||
|     def test_youtube_safe_search(self): | ||||
|   | ||||
| @@ -355,15 +355,17 @@ class YoutubeDL(object): | ||||
|     def _match_entry(self, info_dict): | ||||
|         """ Returns None iff the file should be downloaded """ | ||||
|  | ||||
|         title = info_dict['title'] | ||||
|         matchtitle = self.params.get('matchtitle', False) | ||||
|         if matchtitle: | ||||
|             if not re.search(matchtitle, title, re.IGNORECASE): | ||||
|                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | ||||
|         rejecttitle = self.params.get('rejecttitle', False) | ||||
|         if rejecttitle: | ||||
|             if re.search(rejecttitle, title, re.IGNORECASE): | ||||
|                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | ||||
|         if 'title' in info_dict: | ||||
|             # This can happen when we're just evaluating the playlist | ||||
|             title = info_dict['title'] | ||||
|             matchtitle = self.params.get('matchtitle', False) | ||||
|             if matchtitle: | ||||
|                 if not re.search(matchtitle, title, re.IGNORECASE): | ||||
|                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | ||||
|             rejecttitle = self.params.get('rejecttitle', False) | ||||
|             if rejecttitle: | ||||
|                 if re.search(rejecttitle, title, re.IGNORECASE): | ||||
|                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | ||||
|         date = info_dict.get('upload_date', None) | ||||
|         if date is not None: | ||||
|             dateRange = self.params.get('daterange', DateRange()) | ||||
| @@ -374,8 +376,8 @@ class YoutubeDL(object): | ||||
|             if age_limit < info_dict.get('age_limit', 0): | ||||
|                 return u'Skipping "' + title + '" because it is age restricted' | ||||
|         if self.in_download_archive(info_dict): | ||||
|             return (u'%(title)s has already been recorded in archive' | ||||
|                     % info_dict) | ||||
|             return (u'%s has already been recorded in archive' | ||||
|                     % info_dict.get('title', info_dict.get('id', u'video'))) | ||||
|         return None | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -454,7 +456,7 @@ class YoutubeDL(object): | ||||
|                                      ie_key=ie_result.get('ie_key'), | ||||
|                                      extra_info=extra_info) | ||||
|         elif result_type == 'playlist': | ||||
|             self.add_extra_info(ie_result, extra_info) | ||||
|  | ||||
|             # We process each entry in the playlist | ||||
|             playlist = ie_result.get('title', None) or ie_result.get('id', None) | ||||
|             self.to_screen(u'[download] Downloading playlist: %s' % playlist) | ||||
| @@ -484,6 +486,12 @@ class YoutubeDL(object): | ||||
|                     'webpage_url': ie_result['webpage_url'], | ||||
|                     'extractor_key': ie_result['extractor_key'], | ||||
|                 } | ||||
|  | ||||
|                 reason = self._match_entry(entry) | ||||
|                 if reason is not None: | ||||
|                     self.to_screen(u'[download] ' + reason) | ||||
|                     continue | ||||
|  | ||||
|                 entry_result = self.process_ie_result(entry, | ||||
|                                                       download=download, | ||||
|                                                       extra_info=extra) | ||||
| @@ -810,7 +818,16 @@ class YoutubeDL(object): | ||||
|         fn = self.params.get('download_archive') | ||||
|         if fn is None: | ||||
|             return False | ||||
|         vid_id = info_dict['extractor'] + u' ' + info_dict['id'] | ||||
|         extractor = info_dict.get('extractor_id') | ||||
|         if extractor is None: | ||||
|             if 'id' in info_dict: | ||||
|                 extractor = info_dict.get('ie_key')  # key in a playlist | ||||
|         if extractor is None: | ||||
|             return False  # Incomplete video information | ||||
|         # Future-proof against any change in case | ||||
|         # and backwards compatibility with prior versions | ||||
|         extractor = extractor.lower() | ||||
|         vid_id = extractor + u' ' + info_dict['id'] | ||||
|         try: | ||||
|             with locked_file(fn, 'r', encoding='utf-8') as archive_file: | ||||
|                 for line in archive_file: | ||||
|   | ||||
| @@ -229,12 +229,14 @@ class InfoExtractor(object): | ||||
|         self.to_screen(u'Logging in') | ||||
|  | ||||
|     #Methods for following #608 | ||||
|     def url_result(self, url, ie=None): | ||||
|     def url_result(self, url, ie=None, video_id=None): | ||||
|         """Returns a url that points to a page that should be processed""" | ||||
|         #TODO: ie should be the class used for getting the info | ||||
|         video_info = {'_type': 'url', | ||||
|                       'url': url, | ||||
|                       'ie_key': ie} | ||||
|         if video_id is not None: | ||||
|             video_info['id'] = video_id | ||||
|         return video_info | ||||
|     def playlist_result(self, entries, playlist_id=None, playlist_title=None): | ||||
|         """Returns a playlist""" | ||||
|   | ||||
| @@ -1552,7 +1552,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|             video_id = query_dict['v'][0] | ||||
|             if self._downloader.params.get('noplaylist'): | ||||
|                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) | ||||
|                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') | ||||
|                 return self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|             else: | ||||
|                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|  | ||||
| @@ -1571,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
|         playlist_title = self._og_search_title(page) | ||||
|  | ||||
|         url_results = [self.url_result(vid, 'Youtube') for vid in ids] | ||||
|         url_results = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                        for video_id in ids] | ||||
|         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||
|  | ||||
|  | ||||
| @@ -1626,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor): | ||||
|  | ||||
|         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) | ||||
|  | ||||
|         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] | ||||
|         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] | ||||
|         return [self.playlist_result(url_entries, channel_id)] | ||||
|         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                        for video_id in video_ids] | ||||
|         return self.playlist_result(url_entries, channel_id) | ||||
|  | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
| @@ -1692,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor): | ||||
|             if len(ids_in_page) < self._GDATA_PAGE_SIZE: | ||||
|                 break | ||||
|  | ||||
|         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] | ||||
|         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] | ||||
|         return [self.playlist_result(url_results, playlist_title = username)] | ||||
|         url_results = [ | ||||
|             self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|             for video_id in video_ids] | ||||
|         return self.playlist_result(url_results, playlist_title=username) | ||||
|  | ||||
|  | ||||
| class YoutubeSearchIE(SearchInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com searches' | ||||
| @@ -1735,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|  | ||||
|         if len(video_ids) > n: | ||||
|             video_ids = video_ids[:n] | ||||
|         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] | ||||
|         videos = [self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                   for video_id in video_ids] | ||||
|         return self.playlist_result(videos, query) | ||||
|  | ||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | ||||
| @@ -1795,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|             feed_html = info['feed_html'] | ||||
|             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) | ||||
|             ids = orderedSet(m.group(1) for m in m_ids) | ||||
|             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) | ||||
|             feed_entries.extend( | ||||
|                 self.url_result(video_id, 'Youtube', video_id=video_id) | ||||
|                 for video_id in ids) | ||||
|             if info['paging'] is None: | ||||
|                 break | ||||
|         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister