From 9834872bf63b4e03b66c5e3b8f306556e735d8c5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 21 Jan 2014 18:10:14 +0100 Subject: [PATCH] [facebook] Add support for embeds Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html --- youtube_dl/extractor/facebook.py | 9 +++++++-- youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 4556079c8..8f9154c0e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -17,7 +17,12 @@ from ..utils import ( class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P\d+)(?:.*)' + _VALID_URL = r'''(?x) + (?:https?://)?(?:\w+\.)?facebook\.com/ + (?:[^#?]*\#!/)? + (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) + (?:v|video_id)=(?P[0-9]+) + (?:.*)''' _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _NETRC_MACHINE = 'facebook' @@ -90,7 +95,7 @@ class FacebookIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) - video_id = mobj.group('ID') + video_id = mobj.group('id') url = 'https://www.facebook.com/video/video.php?v=%s' % video_id webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3e96cb15f..91536075d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -319,6 +319,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'Novamov') + # Look for embedded Facebook player + mobj = re.search( + r']+?src=(["\'])(?Phttps://www.facebook.com/video/embed.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Facebook') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: