[camdemy] Simplify and make more robust (#4938)
Do not throw errors if view count or upload date extraction fails. Dispose of re.MULTILINE, which had absolutely no effect without any ^ or $ in sight. Follow PEP8 naming conventions.
This commit is contained in:
		| @@ -1,11 +1,18 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import datetime | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import (compat_urllib_parse, compat_urlparse) | from ..compat import ( | ||||||
| from ..utils import parse_iso8601 |     compat_urllib_parse, | ||||||
|  |     compat_urlparse, | ||||||
|  | ) | ||||||
|  | from ..utils import ( | ||||||
|  |     parse_iso8601, | ||||||
|  |     str_to_int, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class CamdemyIE(InfoExtractor): | class CamdemyIE(InfoExtractor): | ||||||
| @@ -23,6 +30,7 @@ class CamdemyIE(InfoExtractor): | |||||||
|             'creator': 'ss11spring', |             'creator': 'ss11spring', | ||||||
|             'upload_date': '20130114', |             'upload_date': '20130114', | ||||||
|             'timestamp': 1358154556, |             'timestamp': 1358154556, | ||||||
|  |             'view_count': int, | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         # With non-empty description |         # With non-empty description | ||||||
| @@ -55,46 +63,43 @@ class CamdemyIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|         page = self._download_webpage(url, video_id) |         page = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         srcFrom = self._html_search_regex( |         src_from = self._html_search_regex( | ||||||
|             r"<div class='srcFrom'>Source: <a title='([^']+)'", page, |             r"<div class='srcFrom'>Source: <a title='([^']+)'", page, | ||||||
|             'external source', default=None) |             'external source', default=None) | ||||||
|  |         if src_from: | ||||||
|         if srcFrom: |             return self.url_result(src_from) | ||||||
|             return self.url_result(srcFrom) |  | ||||||
|  |  | ||||||
|         oembed_obj = self._download_json( |         oembed_obj = self._download_json( | ||||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) |             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||||
|  |  | ||||||
|         thumb_url = oembed_obj['thumbnail_url'] |         thumb_url = oembed_obj['thumbnail_url'] | ||||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') |         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') | ||||||
|         fileListXML = self._download_xml( |         file_list_doc = self._download_xml( | ||||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), |             compat_urlparse.urljoin(video_folder, 'fileList.xml'), | ||||||
|             video_id, 'Filelist XML') |             video_id, 'Filelist XML') | ||||||
|         fileName = fileListXML.find('./video/item/fileName').text |         file_name = file_list_doc.find('./video/item/fileName').text | ||||||
|  |         video_url = compat_urlparse.urljoin(video_folder, file_name) | ||||||
|  |  | ||||||
|         creation_time = self._html_search_regex( |         timestamp = parse_iso8601(self._html_search_regex( | ||||||
|             r"<div class='title'>Posted :</div>[\r\n ]*<div class='value'>([^<>]+)<", |             r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||||
|             page, 'creation time', flags=re.MULTILINE) + '+08:00' |             page, 'creation time', fatal=False), | ||||||
|         creation_timestamp = parse_iso8601(creation_time, delimiter=' ') |             delimiter=' ', timezone=datetime.timedelta(hours=8)) | ||||||
|  |         view_count = str_to_int(self._html_search_regex( | ||||||
|         view_count_str = self._html_search_regex( |             r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||||
|             r"<div class='title'>Views :</div>[\r\n ]*<div class='value'>([^<>]+)<", |             page, 'view count', fatal=False)) | ||||||
|             page, 'view count', flags=re.MULTILINE) |  | ||||||
|         views = int(view_count_str.replace(',', '')) |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'url': compat_urlparse.urljoin(video_folder, fileName), |             'url': video_url, | ||||||
|             'title': oembed_obj['title'], |             'title': oembed_obj['title'], | ||||||
|             'thumbnail': thumb_url, |             'thumbnail': thumb_url, | ||||||
|             'description': self._html_search_meta('description', page), |             'description': self._html_search_meta('description', page), | ||||||
|             'creator': oembed_obj['author_name'], |             'creator': oembed_obj['author_name'], | ||||||
|             'duration': oembed_obj['duration'], |             'duration': oembed_obj['duration'], | ||||||
|             'timestamp': creation_timestamp, |             'timestamp': timestamp, | ||||||
|             'view_count': views, |             'view_count': view_count, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -666,12 +666,13 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): | |||||||
|             req, **kwargs) |             req, **kwargs) | ||||||
|  |  | ||||||
|  |  | ||||||
| def parse_iso8601(date_str, delimiter='T'): | def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||||
|     """ Return a UNIX timestamp from the given date """ |     """ Return a UNIX timestamp from the given date """ | ||||||
|  |  | ||||||
|     if date_str is None: |     if date_str is None: | ||||||
|         return None |         return None | ||||||
|  |  | ||||||
|  |     if timezone is None: | ||||||
|         m = re.search( |         m = re.search( | ||||||
|             r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', |             r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||||
|             date_str) |             date_str) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister