Compare commits
28 Commits
2013.06.34
...
2013.06.34
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca1c9cfe11 | ||
|
|
adeb4d7469 | ||
|
|
50587ee8ec | ||
|
|
8244288dfe | ||
|
|
6ffe72835a | ||
|
|
8ba5e990a5 | ||
|
|
9afb1afcc6 | ||
|
|
0e21093a8f | ||
|
|
9c5cd0948f | ||
|
|
1083705fe8 | ||
|
|
f3d294617f | ||
|
|
de33a30858 | ||
|
|
705f6f35bc | ||
|
|
e648b22dbd | ||
|
|
257a2501fa | ||
|
|
99afb3ddd4 | ||
|
|
a3c776203f | ||
|
|
53f350c165 | ||
|
|
f46d31f948 | ||
|
|
bf64ff72db | ||
|
|
bc2884afc1 | ||
|
|
023fa8c440 | ||
|
|
427023a1e6 | ||
|
|
a924876fed | ||
|
|
3f223f7b2e | ||
|
|
fc2c063e1e | ||
|
|
20db33e299 | ||
|
|
1f0483b4b1 |
@@ -168,7 +168,7 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `playlist`: The name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The index of the video in the playlist, a five-digit number.
|
||||
|
||||
The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment).
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
|
||||
@@ -5,18 +5,25 @@
|
||||
import sys
|
||||
|
||||
tests = [
|
||||
# 88
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
|
||||
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
|
||||
# 87
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
|
||||
# 86 - vfl_ymO4Z 2013/06/27
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
# 85
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
"{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
|
||||
# 84
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||
# 83
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
"D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
|
||||
# 82
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||
]
|
||||
|
||||
@@ -2,6 +2,7 @@ import io
|
||||
import json
|
||||
import os.path
|
||||
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl import YoutubeDL, YoutubeDLHandler
|
||||
from youtube_dl.utils import (
|
||||
compat_cookiejar,
|
||||
@@ -30,4 +31,14 @@ class FakeYDL(YoutubeDL):
|
||||
def trouble(self, s, tb=None):
|
||||
raise Exception(s)
|
||||
def download(self, x):
|
||||
self.result.append(x)
|
||||
self.result.append(x)
|
||||
|
||||
def get_testcases():
|
||||
for ie in youtube_dl.extractor.gen_extractors():
|
||||
t = getattr(ie, '_TEST', None)
|
||||
if t:
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
for t in getattr(ie, '_TESTS', []):
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
|
||||
@@ -7,7 +7,8 @@ import unittest
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE
|
||||
from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors
|
||||
from helper import get_testcases
|
||||
|
||||
class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_youtube_playlist_matching(self):
|
||||
@@ -50,5 +51,16 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
for tc in get_testcases():
|
||||
url = tc['url']
|
||||
for ie in ies:
|
||||
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
|
||||
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
||||
else:
|
||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -14,10 +14,8 @@ import binascii
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl.utils import *
|
||||
|
||||
DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json')
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
|
||||
RETRIES = 3
|
||||
@@ -56,17 +54,8 @@ def _file_md5(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
with io.open(DEF_FILE, encoding='utf-8') as deff:
|
||||
defs = json.load(deff)
|
||||
for ie in youtube_dl.extractor.gen_extractors():
|
||||
t = getattr(ie, '_TEST', None)
|
||||
if t:
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
defs.append(t)
|
||||
for t in getattr(ie, '_TESTS', []):
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
defs.append(t)
|
||||
|
||||
from helper import get_testcases
|
||||
defs = get_testcases()
|
||||
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
@@ -83,22 +72,23 @@ def generator(test_case):
|
||||
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
def print_skipping(reason):
|
||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||
if not ie._WORKING:
|
||||
print('Skipping: IE marked as not _WORKING')
|
||||
print_skipping('IE marked as not _WORKING')
|
||||
return
|
||||
if 'playlist' not in test_case and not test_case['file']:
|
||||
print('Skipping: No output file specified')
|
||||
print_skipping('No output file specified')
|
||||
return
|
||||
if 'skip' in test_case:
|
||||
print('Skipping: {0}'.format(test_case['skip']))
|
||||
print_skipping(test_case['skip'])
|
||||
return
|
||||
|
||||
params = self.parameters.copy()
|
||||
params.update(test_case.get('params', {}))
|
||||
|
||||
ydl = YoutubeDL(params)
|
||||
for ie in youtube_dl.extractor.gen_extractors():
|
||||
ydl.add_info_extractor(ie)
|
||||
ydl.add_default_info_extractors()
|
||||
finished_hook_called = set()
|
||||
def _hook(status):
|
||||
if status['status'] == 'finished':
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
[]
|
||||
@@ -13,7 +13,7 @@ import time
|
||||
import traceback
|
||||
|
||||
from .utils import *
|
||||
from .extractor import get_info_extractor
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .FileDownloader import FileDownloader
|
||||
|
||||
|
||||
@@ -113,6 +113,13 @@ class YoutubeDL(object):
|
||||
self._ies.append(ie)
|
||||
ie.set_downloader(self)
|
||||
|
||||
def add_default_info_extractors(self):
|
||||
"""
|
||||
Add the InfoExtractors returned by gen_extractors to the end of the list
|
||||
"""
|
||||
for ie in gen_extractors():
|
||||
self.add_info_extractor(ie)
|
||||
|
||||
def add_post_processor(self, pp):
|
||||
"""Add a PostProcessor object to the end of the chain."""
|
||||
self._pps.append(pp)
|
||||
|
||||
@@ -573,8 +573,7 @@ def _real_main(argv=None):
|
||||
ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
|
||||
ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
|
||||
|
||||
for extractor in extractors:
|
||||
ydl.add_info_extractor(extractor)
|
||||
ydl.add_default_info_extractors()
|
||||
|
||||
# PostProcessors
|
||||
if opts.extractaudio:
|
||||
|
||||
@@ -15,6 +15,7 @@ from .escapist import EscapistIE
|
||||
from .facebook import FacebookIE
|
||||
from .flickr import FlickrIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .generic import GenericIE
|
||||
from .googleplus import GooglePlusIE
|
||||
@@ -38,6 +39,7 @@ from .photobucket import PhotobucketIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
@@ -45,13 +47,16 @@ from .statigram import StatigramIE
|
||||
from .steam import SteamIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .ted import TEDIE
|
||||
from .tf1 import TF1IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .ustream import UstreamIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .vevo import VevoIE
|
||||
from .vimeo import VimeoIE
|
||||
from .vine import VineIE
|
||||
from .wat import WatIE
|
||||
from .wimp import WimpIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .xhamster import XHamsterIE
|
||||
@@ -65,83 +70,18 @@ from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserI
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
||||
_ALL_CLASSES = [
|
||||
klass
|
||||
for name, klass in globals().items()
|
||||
if name.endswith('IE') and name != 'GenericIE'
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE)
|
||||
|
||||
def gen_extractors():
|
||||
""" Return a list of an instance of every supported extractor.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
return [
|
||||
YoutubePlaylistIE(),
|
||||
YoutubeChannelIE(),
|
||||
YoutubeUserIE(),
|
||||
YoutubeSearchIE(),
|
||||
YoutubeIE(),
|
||||
MetacafeIE(),
|
||||
DailymotionIE(),
|
||||
GoogleSearchIE(),
|
||||
PhotobucketIE(),
|
||||
YahooIE(),
|
||||
YahooSearchIE(),
|
||||
DepositFilesIE(),
|
||||
FacebookIE(),
|
||||
BlipTVIE(),
|
||||
BlipTVUserIE(),
|
||||
VimeoIE(),
|
||||
MyVideoIE(),
|
||||
ComedyCentralIE(),
|
||||
EscapistIE(),
|
||||
CollegeHumorIE(),
|
||||
XVideosIE(),
|
||||
SoundcloudSetIE(),
|
||||
SoundcloudIE(),
|
||||
InfoQIE(),
|
||||
MixcloudIE(),
|
||||
StanfordOpenClassroomIE(),
|
||||
MTVIE(),
|
||||
YoukuIE(),
|
||||
XNXXIE(),
|
||||
YouJizzIE(),
|
||||
PornotubeIE(),
|
||||
YouPornIE(),
|
||||
GooglePlusIE(),
|
||||
ArteTvIE(),
|
||||
NBAIE(),
|
||||
WorldStarHipHopIE(),
|
||||
JustinTVIE(),
|
||||
FunnyOrDieIE(),
|
||||
SteamIE(),
|
||||
UstreamIE(),
|
||||
RBMARadioIE(),
|
||||
EightTracksIE(),
|
||||
KeekIE(),
|
||||
TEDIE(),
|
||||
MySpassIE(),
|
||||
SpiegelIE(),
|
||||
LiveLeakIE(),
|
||||
ARDIE(),
|
||||
ZDFIE(),
|
||||
TumblrIE(),
|
||||
BandcampIE(),
|
||||
RedTubeIE(),
|
||||
InaIE(),
|
||||
HowcastIE(),
|
||||
VineIE(),
|
||||
FlickrIE(),
|
||||
TeamcocoIE(),
|
||||
XHamsterIE(),
|
||||
HypemIE(),
|
||||
Vbox7IE(),
|
||||
GametrailersIE(),
|
||||
StatigramIE(),
|
||||
BreakIE(),
|
||||
VevoIE(),
|
||||
JukeboxIE(),
|
||||
TudouIE(),
|
||||
CSpanIE(),
|
||||
WimpIE(),
|
||||
HotNewHipHopIE(),
|
||||
AUEngineIE(),
|
||||
GenericIE()
|
||||
]
|
||||
return [klass() for klass in _ALL_CLASSES]
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
|
||||
@@ -44,6 +44,7 @@ class InfoExtractor(object):
|
||||
location: Physical location of the video.
|
||||
player_url: SWF Player URL (used for rtmpdump).
|
||||
subtitles: The subtitle file contents.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
|
||||
|
||||
45
youtube_dl/extractor/gamespot.py
Normal file
45
youtube_dl/extractor/gamespot.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
||||
u"file": u"6410818.mp4",
|
||||
u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
|
||||
u"info_dict": {
|
||||
u"title": u"Arma III - Community Guide: SITREP I",
|
||||
u"upload_date": u"20130627",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(3).split("-")[-1]
|
||||
info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
|
||||
info_xml = self._download_webpage(info_url, video_id)
|
||||
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||
clip_el = doc.find('./playList/clip')
|
||||
|
||||
video_url = clip_el.find('./URI').text
|
||||
title = clip_el.find('./title').text
|
||||
ext = video_url.rpartition('.')[2]
|
||||
thumbnail_url = clip_el.find('./screenGrabURI').text
|
||||
view_count = int(clip_el.find('./views').text)
|
||||
upload_date = unified_strdate(clip_el.find('./postDate').text)
|
||||
|
||||
return [{
|
||||
'id' : video_id,
|
||||
'url' : video_url,
|
||||
'ext' : ext,
|
||||
'title' : title,
|
||||
'thumbnail' : thumbnail_url,
|
||||
'upload_date' : upload_date,
|
||||
'view_count' : view_count,
|
||||
}]
|
||||
37
youtube_dl/extractor/ringtv.py
Normal file
37
youtube_dl/extractor/ringtv.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RingTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown",
|
||||
u"file": u"746619.mp4",
|
||||
u"md5": u"7c46b4057d22de32e0a539f017e64ad3",
|
||||
u"info_dict": {
|
||||
u"title": u"Canelo Alvarez talks about Mayweather showdown",
|
||||
u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king."
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1).split('-')[0]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' | RingTV','')
|
||||
description = self._search_regex(r'<div class="blurb">(.+?)</div>',
|
||||
webpage, 'Description')
|
||||
final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id))
|
||||
thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id))
|
||||
ext = final_url.split('.')[-1]
|
||||
return [{
|
||||
'id' : video_id,
|
||||
'url' : final_url,
|
||||
'ext' : ext,
|
||||
'title' : title,
|
||||
'thumbnail' : thumbnail_url,
|
||||
'description' : description,
|
||||
}]
|
||||
|
||||
@@ -19,7 +19,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
of the stream token and uid
|
||||
"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
|
||||
IE_NAME = u'soundcloud'
|
||||
_TEST = {
|
||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
@@ -86,7 +86,7 @@ class SoundcloudSetIE(InfoExtractor):
|
||||
of the stream token and uid
|
||||
"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||
IE_NAME = u'soundcloud:set'
|
||||
_TEST = {
|
||||
u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
|
||||
|
||||
@@ -17,7 +17,7 @@ class TEDIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
u'file': u'102.mp4',
|
||||
u'md5': u'8cd9dfa41ee000ce658fd48fb5d89a61',
|
||||
u'md5': u'2d76ee1576672e0bd8f187513267adf6',
|
||||
u'info_dict': {
|
||||
u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922",
|
||||
u"title": u"Dan Dennett: The illusion of consciousness"
|
||||
|
||||
35
youtube_dl/extractor/tf1.py
Normal file
35
youtube_dl/extractor/tf1.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""
|
||||
TF1 uses the wat.tv player, currently it can only download videos with the
|
||||
html5 player enabled, it cannot download HD videos.
|
||||
"""
|
||||
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
||||
_TEST = {
|
||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
u'file': u'10635995.mp4',
|
||||
u'md5': u'66789d3e91278d332f75e1feb7aea327',
|
||||
u'info_dict': {
|
||||
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
||||
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
|
||||
webpage, 'embed url')
|
||||
embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
|
||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||
wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
|
||||
wat_info = json.loads(wat_info)['media']
|
||||
wat_url = wat_info['url']
|
||||
return self.url_result(wat_url, 'Wat')
|
||||
@@ -13,7 +13,7 @@ class TumblrIE(InfoExtractor):
|
||||
u'file': u'53364321212.mp4',
|
||||
u'md5': u'0716d3dd51baf68a28b40fdf1251494e',
|
||||
u'info_dict': {
|
||||
u"title": u"Rafael Lemos | Tumblr"
|
||||
u"title": u"Rafael Lemos"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ class TumblrIE(InfoExtractor):
|
||||
|
||||
# The only place where you can get a title, it's not complete,
|
||||
# but searching in other places doesn't work for all videos
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
|
||||
webpage, u'title', flags=re.DOTALL)
|
||||
|
||||
return [{'id': video_id,
|
||||
|
||||
41
youtube_dl/extractor/tutv.py
Normal file
41
youtube_dl/extractor/tutv.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
class TutvIE(InfoExtractor):
|
||||
_VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
|
||||
u'file': u'2742556.flv',
|
||||
u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
|
||||
u'info_dict': {
|
||||
u"title": u"Noah en pabellon cuahutemoc"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<meta property="og:title" content="(.*?)">', webpage, u'title')
|
||||
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
|
||||
|
||||
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
||||
data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
|
||||
data = compat_parse_qs(data_content)
|
||||
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
|
||||
ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
|
||||
info = {
|
||||
'id': internal_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
}
|
||||
return [info]
|
||||
@@ -16,7 +16,7 @@ class VimeoIE(InfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
||||
IE_NAME = u'vimeo'
|
||||
_TEST = {
|
||||
u'url': u'http://vimeo.com/56015672',
|
||||
|
||||
84
youtube_dl/extractor/wat.py
Normal file
84
youtube_dl/extractor/wat.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TEST = {
|
||||
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||
u'file': u'10631273.mp4',
|
||||
u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
|
||||
u'info_dict': {
|
||||
u'title': u'World War Z - Philadelphia VOST',
|
||||
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
||||
}
|
||||
}
|
||||
|
||||
def download_video_info(self, real_id):
|
||||
# 'contentv4' is used in the website, but it also returns the related
|
||||
# videos, we don't need them
|
||||
info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
|
||||
info = json.loads(info)
|
||||
return info['media']
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
def real_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
short_id = mobj.group('shortID')
|
||||
webpage = self._download_webpage(url, short_id)
|
||||
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
|
||||
|
||||
video_info = self.download_video_info(real_id)
|
||||
chapters = video_info['chapters']
|
||||
first_chapter = chapters[0]
|
||||
|
||||
if real_id_for_chapter(first_chapter) != real_id:
|
||||
self.to_screen('Multipart video detected')
|
||||
chapter_urls = []
|
||||
for chapter in chapters:
|
||||
chapter_id = real_id_for_chapter(chapter)
|
||||
# Yes, when we this chapter is processed by WatIE,
|
||||
# it will download the info again
|
||||
chapter_info = self.download_video_info(chapter_id)
|
||||
chapter_urls.append(chapter_info['url'])
|
||||
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
||||
return self.playlist_result(entries, real_id, video_info['title'])
|
||||
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the short id for getting the video url
|
||||
player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
|
||||
'html5': '1'})
|
||||
player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
|
||||
real_id, u'Downloading player info')
|
||||
player = json.loads(player_info)['player']
|
||||
html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
|
||||
'html5 player')
|
||||
player_webpage = self._download_webpage(html5_player, real_id,
|
||||
u'Downloading player webpage')
|
||||
|
||||
video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
|
||||
'video url')
|
||||
info = {'id': real_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
'description': first_chapter['description'],
|
||||
'view_count': video_info['views'],
|
||||
}
|
||||
if 'date_diffusion' in first_chapter:
|
||||
info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
|
||||
|
||||
return info
|
||||
@@ -168,7 +168,7 @@ class YoutubeIE(InfoExtractor):
|
||||
self.to_screen(u'RTMP download detected')
|
||||
|
||||
def _decrypt_signature(self, s):
|
||||
"""Decrypt the key"""
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
|
||||
if len(s) == 88:
|
||||
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
|
||||
|
||||
@@ -474,7 +474,7 @@ class ExtractorError(Exception):
|
||||
""" tb, if given, is the original traceback (so that it can be printed out). """
|
||||
|
||||
if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||
msg = msg + u'; please report this issue on http://yt-dl.org/bug'
|
||||
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
|
||||
super(ExtractorError, self).__init__(msg)
|
||||
|
||||
self.traceback = tb
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.06.34.2'
|
||||
__version__ = '2013.06.34.4'
|
||||
|
||||
Reference in New Issue
Block a user