Compare commits

..

34 Commits

Author SHA1 Message Date
Philipp Hagemeister
fc492de31d release 2013.07.23.1 2013-07-23 18:37:52 +02:00
Philipp Hagemeister
a9c0f9bc63 Merge branch 'master' of github.com:rg3/youtube-dl 2013-07-23 18:37:09 +02:00
Philipp Hagemeister
b7cc9f5026 [soundcloud] Support URLs with a slash at the end (Fixes #1104) 2013-07-23 18:35:52 +02:00
Jaime Marquínez Ferrándiz
252580c561 YoutubeChannelE: switch ajax query from channel_ajax to c4_browse_ajax
It wasn't detecting when there aren't more videos
2013-07-23 14:58:01 +02:00
Jaime Marquínez Ferrándiz
acc47c1a3f Mark WatIE and TF1IE as broken (related #1103) 2013-07-23 14:29:30 +02:00
Jaime Marquínez Ferrándiz
70fa830e4d CollegeHumorIE: support Youtube videos and embed urls (fixes #1094) 2013-07-23 14:29:29 +02:00
Philipp Hagemeister
a7af0ebaf5 release 2013.07.23 2013-07-23 14:20:52 +02:00
Jaime Marquínez Ferrándiz
67ae7b4760 Fix BreakIE
Also detect videos that come from Youtube
2013-07-23 11:41:05 +02:00
Jaime Marquínez Ferrándiz
de48addae2 Fix CollegHumorIE
Now it downloads the video over http in one file, it doesn't downloads in fragments
Added a test and use the methods in InfoExtractor for downloading webpages
2013-07-23 11:14:11 +02:00
Jaime Marquínez Ferrándiz
ddbfd0f0c5 ComedyCentralIE: support the extended interviews urls (fixes #1079) 2013-07-21 11:04:56 +02:00
Jaime Marquínez Ferrándiz
d7ae0639b4 [youtube] Add an extractor for Youtube recommended videos (":ytrec" keyword) (closes #476)
The new extractor and YoutubeSubscriptionsIE are subclasses of YoutubeFeedsInfoExtractor, which allows to fetch videos from http://www.youtube.com/feed_ajax
2013-07-20 19:33:40 +02:00
Philipp Hagemeister
0382435990 [exfm] Add IE_* descriptions 2013-07-20 11:26:36 +02:00
Philipp Hagemeister
b390d85d95 Merge remote-tracking branch 'yasoob/master' 2013-07-20 11:23:56 +02:00
Philipp Hagemeister
be925dc64c release 2013.07.19 2013-07-19 23:42:29 +02:00
Jaime Marquínez Ferrándiz
de7a91bfe3 WeiboIE: extract the player urls from a json webpage
Also extract a Sina url that doesn't require to follow a redirection.
2013-07-19 20:43:44 +02:00
Jaime Marquínez Ferrándiz
a4358cbabd YoutubeIE: new algo for length 85 (closes #1080), thanks to @patrickslin 2013-07-19 17:12:40 +02:00
Jaime Marquínez Ferrándiz
177ed935a9 TEDIE: fix the title extraction 2013-07-19 16:13:31 +02:00
Jaime Marquínez Ferrándiz
c364f15ff1 Add WeiboIE (closes #1039)
It just embed video from other sites.
Modified the _VALID_URL of Youku to catch embed urls.
2013-07-19 16:09:14 +02:00
Jaime Marquínez Ferrándiz
e1f6e61e6a Add an extractor for 56.com (related #1039) 2013-07-19 15:17:34 +02:00
Jaime Marquínez Ferrándiz
0932300e3a Add SinaIE (related #1039): extractor for video.sina.com.cn 2013-07-18 15:31:50 +02:00
Jaime Marquínez Ferrándiz
3f40217704 InstagramIE: fix the extraction of the uploader_id and the title
The page title is now 'Instagram', so we build it.
Also extract the description
2013-07-18 13:12:27 +02:00
Philipp Hagemeister
f631c3311a Hint that --update may need sudo 2013-07-18 12:53:24 +02:00
Philipp Hagemeister
ad433bb372 release 2013.07.18 2013-07-18 12:41:49 +02:00
Jaime Marquínez Ferrándiz
3e0b3a1428 Remove the test to signature of lengths 43,43
It's already covered by the test for length 87
2013-07-18 12:29:09 +02:00
Jaime Marquínez Ferrándiz
444b116597 YoutubeIE: add algo for length 90 (closes #1064)
Order the cases from higher to lower length.
2013-07-18 12:25:41 +02:00
Jaime Marquínez Ferrándiz
2aea08eda1 Merge pull request #1068 from MiLk/genalgo-youtube-92
[youtube] Add generator for signature 92
2013-07-18 09:54:56 +02:00
M.Yasoob Khalid
8e5e059d7d forgot to import json json 2013-07-18 12:40:56 +05:00
M.Yasoob Khalid
2b1b511f6b removed some unnecessary imports 2013-07-18 12:37:47 +05:00
M.Yasoob Khalid
233ad24ecf corrected a typo and added myself to travis notifications. 2013-07-18 12:37:02 +05:00
M.Yasoob Khalid
c4949c50f9 added test for ex.fm 2013-07-18 12:33:31 +05:00
M.Yasoob Khalid
b6ef402905 added an IE for ex.fm 2013-07-18 12:30:21 +05:00
Emilien Kenler
ccf365475a [youtube] Add generator for signature 92 2013-07-17 17:43:44 +02:00
Jaime Marquínez Ferrándiz
e1fb245690 Add CondeNastIE
It supports some of the websites of the Condé Nast group: WIRED, GQ, Vogue, Glamour, W Magazine and Vanity Fair.
2013-07-17 14:39:02 +02:00
Jaime Marquínez Ferrándiz
5a76c6517e YoutubeIE: some encrypted signatures have more than two parts, print the size of all the parts 2013-07-17 12:08:10 +02:00
22 changed files with 426 additions and 79 deletions

View File

@@ -9,6 +9,7 @@ notifications:
- filippo.valsorda@gmail.com
- phihag@phihag.de
- jaime.marquinez.ferrandiz+travis@gmail.com
- yasoob.khld@gmail.com
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"

View File

@@ -16,7 +16,9 @@ which means you can modify it, redistribute it or use it however you like.
# OPTIONS
-h, --help print this help text and exit
--version print program version and exit
-U, --update update this program to latest version
-U, --update update this program to latest version. Make sure
that you have sufficient permissions (run with
sudo if needed)
-i, --ignore-errors continue on download errors
--dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent

View File

@@ -5,6 +5,12 @@
import sys
tests = [
# 92 - vflQw-fB4 2013/07/17
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
# 90
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
# 88
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
@@ -14,9 +20,9 @@ tests = [
# 86 - vfl_ymO4Z 2013/06/27
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
# 85
# 85 - vflSAFCP9 2013/07/19
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
"{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
# 84
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),

View File

@@ -13,9 +13,14 @@ from helper import FakeYDL
sig = YoutubeIE(FakeYDL())._decrypt_signature
class TestYoutubeSig(unittest.TestCase):
def test_43_43(self):
wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135'
right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE'
def test_92(self):
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
self.assertEqual(sig(wrong), right)
def test_90(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
self.assertEqual(sig(wrong), right)
def test_88(self):
@@ -35,7 +40,7 @@ class TestYoutubeSig(unittest.TestCase):
def test_85(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
self.assertEqual(sig(wrong), right)
def test_84(self):
@@ -58,10 +63,5 @@ class TestYoutubeSig(unittest.TestCase):
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
self.assertEqual(sig(wrong), right)
def test_92(self):
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
self.assertEqual(sig(wrong), right)
if __name__ == '__main__':
unittest.main()

View File

@@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None):
general.add_option('-v', '--version',
action='version', help='print program version and exit')
general.add_option('-U', '--update',
action='store_true', dest='update_self', help='update this program to latest version')
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
general.add_option('--dump-user-agent',

View File

@@ -9,6 +9,7 @@ from .brightcove import BrightcoveIE
from .canalplus import CanalplusIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE
@@ -18,6 +19,7 @@ from .dreisat import DreiSatIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .escapist import EscapistIE
from .exfm import ExfmIE
from .facebook import FacebookIE
from .flickr import FlickrIE
from .freesound import FreesoundIE
@@ -50,6 +52,7 @@ from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
from .sina import SinaIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
@@ -69,7 +72,9 @@ from .veoh import VeohIE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE
from .weibo import WeiboIE
from .wimp import WimpIE
from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
@@ -87,6 +92,7 @@ from .youtube import (
YoutubeChannelIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeRecommendedIE,
)
from .zdf import ZDFIE

View File

@@ -1,6 +1,8 @@
import re
import json
from .common import InfoExtractor
from ..utils import determine_ext
class BreakIE(InfoExtractor):
@@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split("-")[-1]
webpage = self._download_webpage(url, video_id)
video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
key = re.search(r"icon: '(.+?)',",webpage).group(1)
final_url = str(video_url)+"?"+str(key)
thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
ext = video_url.split('.')[-1]
embed_url = 'http://www.break.com/embed/%s' % video_id
webpage = self._download_webpage(embed_url, video_id)
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
u'info json', flags=re.DOTALL)
info = json.loads(info_json)
video_url = info['videoUri']
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
if m_youtube is not None:
return self.url_result(m_youtube.group(1), 'Youtube')
final_url = video_url + '?' + info['AuthToken']
return [{
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
'ext': determine_ext(final_url),
'title': info['contentName'],
'thumbnail': info['thumbUri'],
}]

View File

@@ -0,0 +1,36 @@
# coding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import determine_ext
class C56IE(InfoExtractor):
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
IE_NAME = u'56.com'
_TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
u'file': u'93440716.mp4',
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
u'info_dict': {
u'title': u'网事知多少 第32期车怒',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid')
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
text_id, u'Downloading video info')
info = json.loads(info_page)['info']
best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
video_url = best_format['url']
return {'id': info['vid'],
'title': info['Subject'],
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': info.get('bimg') or info.get('img'),
}

View File

@@ -1,26 +1,26 @@
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_urllib_request,
ExtractorError,
)
class CollegeHumorIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
def report_manifest(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Downloading XML manifest' % video_id)
_TEST = {
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
u'file': u'6902724.mp4',
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
u'info_dict': {
u'title': u'Comic-Con Cosplay Catastrophe',
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -36,14 +36,16 @@ class CollegeHumorIE(InfoExtractor):
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
try:
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
metaXml = self._download_webpage(xmlUrl, video_id,
u'Downloading info XML',
u'Unable to download video info XML')
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
if youtubeIdNode is not None:
return self.url_result(youtubeIdNode.text, 'Youtube')
info['description'] = videoNode.findall('./description')[0].text
info['title'] = videoNode.findall('./caption')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
@@ -52,11 +54,9 @@ class CollegeHumorIE(InfoExtractor):
raise ExtractorError(u'Invalid metadata XML file')
manifest_url += '?hdcore=2.10.3'
self.report_manifest(video_id)
try:
manifestXml = compat_urllib_request.urlopen(manifest_url).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
manifestXml = self._download_webpage(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
@@ -66,9 +66,8 @@ class CollegeHumorIE(InfoExtractor):
except IndexError as err:
raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(manifest_url)
url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url
info['ext'] = 'f4f'
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
info['ext'] = 'mp4'
return [info]

View File

@@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
(full-episodes/(?P<episode>.*)|
(?P<clip>
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
(?P<interview>
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
$"""
_TEST = {
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
@@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
else:
epTitle = mobj.group('cntitle')
dlNewest = False
elif mobj.group('interview'):
epTitle = mobj.group('interview_title')
dlNewest = False
else:
dlNewest = not mobj.group('episode')
if dlNewest:

View File

@@ -0,0 +1,106 @@
# coding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
orderedSet,
compat_urllib_parse_urlparse,
compat_urlparse,
)
class CondeNastIE(InfoExtractor):
"""
Condé Nast is a media group, some of its sites use a custom HTML5 player
that works the same in all of them.
"""
# The keys are the supported sites and the values are the name to be shown
# to the user and in the extractor description.
_SITES = {'wired': u'WIRED',
'gq': u'GQ',
'vogue': u'Vogue',
'glamour': u'Glamour',
'wmagazine': u'W Magazine',
'vanityfair': u'Vanity Fair',
}
_VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
_TEST = {
u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
u'md5': u'1921f713ed48aabd715691f774c451f7',
u'info_dict': {
u'title': u'3D Printed Speakers Lit With LED',
u'description': u'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
}
}
def _extract_series(self, url, webpage):
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
webpage, u'series title', flags=re.DOTALL)
url_object = compat_urllib_parse_urlparse(url)
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
webpage, flags=re.DOTALL)
paths = orderedSet(m.group(1) for m in m_paths)
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title)
def _extract_video(self, webpage):
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
r'<div class="video-post-content">(.+?)</div>',
],
webpage, u'description',
fatal=False, flags=re.DOTALL)
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
u'player params', flags=re.DOTALL)
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
data = compat_urllib_parse.urlencode({'videoId': video_id,
'playerId': player_id,
'target': target,
})
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
webpage, u'base info url',
default='http://player.cnevids.com/player/loader.js?')
info_url = base_info_url + data
info_page = self._download_webpage(info_url, video_id,
u'Downloading video info')
video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
video_info = json.loads(video_info)
def _formats_sort_key(f):
type_ord = 1 if f['type'] == 'video/mp4' else 0
quality_ord = 1 if f['quality'] == 'high' else 0
return (quality_ord, type_ord)
best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
return {'id': video_id,
'url': best_format['src'],
'ext': best_format['type'].split('/')[-1],
'title': video_info['title'],
'thumbnail': video_info['poster_frame'],
'description': description,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site = mobj.group('site')
url_type = mobj.group('type')
id = mobj.group('id')
self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
webpage = self._download_webpage(url, id)
if url_type == 'series':
return self._extract_series(url, webpage)
else:
return self._extract_video(webpage)

View File

@@ -0,0 +1,42 @@
import re
import json
from .common import InfoExtractor
class ExfmIE(InfoExtractor):
IE_NAME = u'exfm'
IE_DESC = u'ex.fm'
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
_TEST = {
u'url': u'http://ex.fm/song/1bgtzg',
u'file': u'1bgtzg.mp3',
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
u'info_dict': {
u"title": u"We Can't Stop",
u"uploader": u"Miley Cyrus",
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
song_id = mobj.group(1)
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
webpage = self._download_webpage(info_url, song_id)
info = json.loads(webpage)
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
if song_url is not None:
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
else:
song_url = info['song']['url']
return [{
'id': song_id,
'url': song_url,
'ext': 'mp3',
'title': info['song']['title'],
'thumbnail': info['song']['image']['large'],
'uploader': info['song']['artist'],
'view_count': info['song']['loved_count'],
}]

View File

@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
u'md5': u'0d2da106a9d2631273e192b372806516',
u'info_dict': {
u"uploader_id": u"naomipq",
u"title": u"Video by naomipq"
u"title": u"Video by naomipq",
u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
}
@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
html_title = self._html_search_regex(
r'<title>(.+?)</title>',
webpage, u'title', flags=re.DOTALL)
title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
uploader_id = self._html_search_regex(
r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
webpage, u'uploader id', fatal=False, flags=re.DOTALL)
ext = 'mp4'
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, u'uploader id', fatal=False)
desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
fatal=False)
return [{
'id': video_id,
'url': self._og_search_video_url(webpage),
'ext': ext,
'title': title,
'ext': 'mp4',
'title': u'Video by %s' % uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),
'uploader_id' : uploader_id
'uploader_id' : uploader_id,
'description': desc,
}]

View File

@@ -0,0 +1,67 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
compat_urllib_parse,
)
class SinaIE(InfoExtractor):
_VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
(
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
|
# This is used by external sites like Weibo
(api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
)
'''
_TEST = {
u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
u'file': u'110028898.flv',
u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
u'info_dict': {
u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
}
}
@classmethod
def suitable(cls, url):
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id})
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
video_id, u'Downloading video url')
image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, u'Downloading thumbnail info')
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
return {'id': video_id,
'url': url_doc.find('./durl/url').text,
'ext': 'flv',
'title': url_doc.find('./vname').text,
'thumbnail': image_page.split('=')[1],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
video_id = mobj.group('id')
if mobj.group('token') is not None:
# The video id is in the redirected url
self.to_screen(u'Getting video id')
request = compat_urllib_request.Request(url)
request.get_method = lambda: 'HEAD'
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl())
elif video_id is None:
pseudo_id = mobj.group('pseudo_id')
webpage = self._download_webpage(url, pseudo_id)
video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
return self._extract_video(video_id)

View File

@@ -19,7 +19,7 @@ class SoundcloudIE(InfoExtractor):
of the stream token and uid
"""
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$'
IE_NAME = u'soundcloud'
_TEST = {
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',

View File

@@ -67,7 +67,7 @@ class TEDIE(InfoExtractor):
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
self.report_extraction(video_name)
# If the url includes the language we get the title translated
title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
webpage, 'title')
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
webpage, 'json data')

View File

@@ -10,6 +10,7 @@ class TF1IE(InfoExtractor):
TF1 uses the wat.tv player, currently it can only download videos with the
html5 player enabled, it cannot download HD videos.
"""
_WORKING = False
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
_TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',

View File

@@ -12,6 +12,7 @@ from ..utils import (
class WatIE(InfoExtractor):
_WORKING = False
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
IE_NAME = 'wat.tv'
_TEST = {

View File

@@ -0,0 +1,48 @@
# coding: utf-8
import re
import json
from .common import InfoExtractor
class WeiboIE(InfoExtractor):
"""
The videos in Weibo come from different sites, this IE just finds the link
to the external video and returns it.
"""
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
_TEST = {
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
u'file': u'98322879.flv',
u'info_dict': {
u'title': u'魔声耳机最新广告“All Eyes On Us”',
},
u'note': u'Sina video',
u'params': {
u'skip_download': True,
},
}
# Additional example videos from different sites
# Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
# 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
video_id = mobj.group('id')
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
info_page = self._download_webpage(info_url, video_id)
info = json.loads(info_page)
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
#Prefer sina video since they have thumbnails
videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
player_url = videos_urls[-1]
m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
if m_sina is not None:
self.to_screen('Sina video detected')
sina_id = m_sina.group(1)
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
return self.url_result(player_url)

View File

@@ -13,7 +13,7 @@ from ..utils import (
class YoukuIE(InfoExtractor):
_VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
_VALID_URL = r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)'
_TEST = {
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
u"file": u"XNDgyMDQ2NTQw_part00.flv",

View File

@@ -179,14 +179,18 @@ class YoutubeIE(InfoExtractor):
def _decrypt_signature(self, s):
"""Turn the encrypted s field into a working signature"""
if len(s) == 88:
if len(s) == 92:
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
elif len(s) == 88:
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
elif len(s) == 87:
return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
elif len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
elif len(s) == 85:
return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
elif len(s) == 84:
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
elif len(s) == 83:
@@ -195,8 +199,6 @@ class YoutubeIE(InfoExtractor):
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
elif len(s) == 81:
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
elif len(s) == 92:
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83];
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
@@ -591,8 +593,9 @@ class YoutubeIE(InfoExtractor):
else:
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
self.to_screen('encrypted signature length %d (%d.%d), itag %s, %s' %
(len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(s), parts_sizes, url_data['itag'][0], player))
signature = self._decrypt_signature(url_data['s'][0])
url += '&signature=' + signature
if 'ratebypass' not in url:
@@ -728,7 +731,7 @@ class YoutubeChannelIE(InfoExtractor):
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'http://www.youtube.com/channel_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
_MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = u'youtube:channel'
def extract_videos_from_page(self, page):
@@ -895,12 +898,12 @@ class YoutubeShowIE(InfoExtractor):
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
class YoutubeSubscriptionsIE(YoutubeIE):
"""It's a subclass of YoutubeIE because we need to login"""
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
IE_NAME = u'youtube:subscriptions'
_FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
class YoutubeFeedsInfoExtractor(YoutubeIE):
"""
Base class for extractors that fetch info from
http://www.youtube.com/feed_ajax
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
"""
_PAGING_STEP = 30
# Overwrite YoutubeIE properties we don't want
@@ -909,18 +912,27 @@ class YoutubeSubscriptionsIE(YoutubeIE):
def suitable(cls, url):
return re.match(cls._VALID_URL, url) is not None
@property
def _FEED_TEMPLATE(self):
return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
@property
def IE_NAME(self):
return u'youtube:%s' % self._FEED_NAME
def _real_initialize(self):
(username, password) = self._get_login_info()
if username is None:
raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
super(YoutubeSubscriptionsIE, self)._real_initialize()
super(YoutubeFeedsInfoExtractor, self)._real_initialize()
def _real_extract(self, url):
feed_entries = []
# The step argument is available only in 2.7 or higher
for i in itertools.count(0):
paging = i*self._PAGING_STEP
info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
info = self._download_webpage(self._FEED_TEMPLATE % paging,
u'%s feed' % self._FEED_NAME,
u'Downloading page %s' % i)
info = json.loads(info)
feed_html = info['feed_html']
@@ -929,4 +941,16 @@ class YoutubeSubscriptionsIE(YoutubeIE):
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
if info['paging'] is None:
break
return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
_FEED_NAME = 'subscriptions'
_PLAYLIST_TITLE = u'Youtube Subscriptions'
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = u'Youtube Recommended videos'

View File

@@ -1,2 +1,2 @@
__version__ = '2013.07.17.1'
__version__ = '2013.07.23.1'