Compare commits

...

20 Commits

Author SHA1 Message Date
Philipp Hagemeister
510243ba58 release 2014.03.24.2 2014-03-24 15:00:47 +01:00
Sergey M․
b540697a8a [veoh] Improve extraction, fix youtube extraction (Closes #2616) 2014-03-24 20:53:03 +07:00
Philipp Hagemeister
0d3641e589 [cinemassacre] Fix #2815 2014-03-24 13:43:13 +01:00
Philipp Hagemeister
72546c831e Merge pull request #2553 from anisse/master
Add an option to specify custom HTTP headers
2014-03-24 10:42:58 +01:00
Philipp Hagemeister
d26db9269d release 2014.03.24.1 2014-03-24 10:25:58 +01:00
Philipp Hagemeister
4c0941853a [devscripts/release] Check version number 2014-03-24 10:25:49 +01:00
Philipp Hagemeister
c11726364e release 2014.03.24 2014-03-24 10:17:35 +01:00
Philipp Hagemeister
c577d735c6 release 2013.03.24.2 2014-03-24 02:24:31 +01:00
Philipp Hagemeister
9f0375f61a release 2013.03.24.1 2014-03-24 02:22:12 +01:00
Philipp Hagemeister
5e114e4bfe [soundcloud] Always add streaming formats 2014-03-24 02:21:17 +01:00
Philipp Hagemeister
83622b6d2f [soundcloud] Simplify string literals 2014-03-24 02:15:31 +01:00
Philipp Hagemeister
3d87426c2d release 2013.03.24 2014-03-24 01:42:14 +01:00
Philipp Hagemeister
ce328530a9 Merge remote-tracking branch 'origin/master' 2014-03-24 01:42:11 +01:00
Philipp Hagemeister
f70daac108 [RTS] Add extractor (Fixes #2608) 2014-03-24 01:41:14 +01:00
Philipp Hagemeister
912b38b428 [instagram] Fix info_dict key name 2014-03-24 01:40:09 +01:00
Philipp Hagemeister
6e25c58ed7 Merge pull request #2567 from jaimeMF/sphinx-docs
Add initial sphinx docs
2014-03-24 00:50:32 +01:00
Philipp Hagemeister
51fb2e98d2 [radiofrance] Modernize 2014-03-23 17:43:33 +01:00
Philipp Hagemeister
38d63d846e [extractor/common] Clarify preference key in formats 2014-03-23 17:41:43 +01:00
Anisse Astier
410afb2003 Add an option to specify custom HTTP headers 2014-03-17 16:40:41 +01:00
Jaime Marquínez Ferrándiz
685052fc7b Add initial sphinx docs
With an initial guide for using youtube_dl from python programs.
2014-03-15 19:08:09 +01:00
21 changed files with 637 additions and 104 deletions

View File

@@ -3,3 +3,5 @@ include test/*.py
include test/*.json
include youtube-dl.bash-completion
include youtube-dl.1
recursive-include docs *
prune docs/_build

View File

@@ -28,6 +28,9 @@ which means you can modify it, redistribute it or use it however you like.
--user-agent UA specify a custom user agent
--referer REF specify a custom referer, use if the video
access is restricted to one domain
--add-header FIELD:VALUE specify a custom HTTP header and its value,
separated by a colon ':'. You can use this
option multiple times
--list-extractors List all supported extractors and the URLs
they would handle
--extractor-descriptions Output descriptions of all supported

View File

@@ -22,6 +22,12 @@ fi
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
version="$1"
major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p')
if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then
echo "$version does not start with today's date!"
exit 1
fi
if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
useless_files=$(find youtube_dl -type f -not -name '*.py')

1
docs/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
_build/

177
docs/Makefile Normal file
View File

@@ -0,0 +1,177 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

71
docs/conf.py Normal file
View File

@@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
#
# youtube-dl documentation build configuration file, created by
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
# Allows to import youtube_dl
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# -- General configuration ------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'youtube-dl'
copyright = u'2014, Ricardo Garcia Gonzalez'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
import youtube_dl
version = youtube_dl.__version__
# The full version, including alpha/beta/rc tags.
release = version
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Output file base name for HTML help builder.
htmlhelp_basename = 'youtube-dldoc'

23
docs/index.rst Normal file
View File

@@ -0,0 +1,23 @@
Welcome to youtube-dl's documentation!
======================================
*youtube-dl* is a command-line program to download videos from YouTube.com and more sites.
It can also be used in Python code.
Developer guide
---------------
This section contains information for using *youtube-dl* from Python programs.
.. toctree::
:maxdepth: 2
module_guide
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

67
docs/module_guide.rst Normal file
View File

@@ -0,0 +1,67 @@
Using the ``youtube_dl`` module
===============================
When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
.. code-block:: python
>>> from youtube_dl import YoutubeDL
>>> ydl = YoutubeDL()
>>> ydl.add_default_info_extractors()
Extracting video information
----------------------------
You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary:
.. code-block:: python
>>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False)
[youtube] Setting language
[youtube] BaW_jenozKc: Downloading webpage
[youtube] BaW_jenozKc: Downloading video info webpage
[youtube] BaW_jenozKc: Extracting video information
>>> info['title']
'youtube-dl test video "\'/\\ä↭𝕐'
>>> info['height'], info['width']
(720, 1280)
If you want to download or play the video you can get its url:
.. code-block:: python
>>> info['url']
'https://...'
Extracting playlist information
-------------------------------
The playlist information is extracted in a similar way, but the dictionary is a bit different:
.. code-block:: python
>>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False)
[TED] open_source_open_world: Downloading playlist webpage
...
>>> playlist['title']
'Open-source, open world'
You can access the videos in the playlist with the ``entries`` field:
.. code-block:: python
>>> for video in playlist['entries']:
... print('Video #%d: %s' % (video['playlist_index'], video['title']))
Video #1: How Arduino is open-sourcing imagination
Video #2: The year open data went worldwide
Video #3: Massive-scale online collaboration
Video #4: The art of asking
Video #5: How cognitive surplus will change the world
Video #6: The birth of Wikipedia
Video #7: Coding a better government
Video #8: The era of open innovation
Video #9: The currency of the new economy is trust

View File

@@ -309,6 +309,8 @@ class TestPlaylists(unittest.TestCase):
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
}
expect_info_dict(self, EXPECTED, test_video)

View File

@@ -35,6 +35,7 @@ from youtube_dl.utils import (
url_basename,
urlencode_postdata,
xpath_with_ns,
parse_iso8601,
)
if sys.version_info < (3, 0):
@@ -266,5 +267,10 @@ class TestUtil(unittest.TestCase):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes))
def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
if __name__ == '__main__':
unittest.main()

View File

@@ -227,6 +227,9 @@ def parseOpts(overrideArguments=None):
general.add_option('--referer',
dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
metavar='REF', default=None)
general.add_option('--add-header',
dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
metavar='FIELD:VALUE')
general.add_option('--list-extractors',
action='store_true', dest='list_extractors',
help='List all supported extractors and the URLs they would handle', default=False)
@@ -556,6 +559,16 @@ def _real_main(argv=None):
if opts.referer is not None:
std_headers['Referer'] = opts.referer
# Custom HTTP headers
if opts.headers is not None:
for h in opts.headers:
if h.find(':', 1) < 0:
parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
key, value = h.split(':', 2)
if opts.verbose:
write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
std_headers[key] = value
# Dump user agent
if opts.dump_user_agent:
compat_print(std_headers['User-Agent'])

View File

@@ -195,6 +195,7 @@ from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .rts import RTSIE
from .rutube import (
RutubeIE,
RutubeChannelIE,

View File

@@ -9,12 +9,12 @@ from ..utils import (
class CinemassacreIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [
{
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'file': '19911.mp4',
'md5': 'fde81fbafaee331785f58cd6c0d46190',
'md5': '782f8504ca95a0eba8fc9177c373eec7',
'info_dict': {
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
@@ -24,7 +24,7 @@ class CinemassacreIE(InfoExtractor):
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'file': '521be8ef82b16.mp4',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
'info_dict': {
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
@@ -34,8 +34,9 @@ class CinemassacreIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, None) # Don't know video id yet
webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
if not mobj:
@@ -43,33 +44,36 @@ class CinemassacreIE(InfoExtractor):
playerdata_url = mobj.group('embed_url')
video_id = mobj.group('video_id')
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
webpage, 'title')
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
if len(video_description) == 0:
video_description = None
playerdata = self._download_webpage(playerdata_url, video_id)
sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
hd_url = self._html_search_regex(
r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
default=None)
video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
formats = [
{
formats = [{
'url': sd_url,
'ext': 'mp4',
'format': 'sd',
'format_id': 'sd',
},
{
'quality': 1,
}]
if hd_url:
formats.append({
'url': hd_url,
'ext': 'mp4',
'format': 'hd',
'format_id': 'hd',
},
]
'quality': 2,
})
self._sort_formats(formats)
return {
'id': video_id,

View File

@@ -74,7 +74,7 @@ class InfoExtractor(object):
"http", "https", "rtsp", "rtmp", "m3u8" or so.
* preference Order number of this format. If this field is
present and not None, the formats get sorted
by this field.
by this field, regardless of all other values.
-1 for default (order by other properties),
-2 or smaller for less than default.
* quality Order number of the video quality of this

View File

@@ -89,7 +89,7 @@ class InstagramUserIE(InfoExtractor):
'uploader': user.get('full_name'),
'uploader_id': user.get('username'),
'like_count': like_count,
'upload_timestamp': int_or_none(it.get('created_time')),
'timestamp': int_or_none(it.get('created_time')),
})
if not page['items']:

View File

@@ -1,4 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -6,16 +8,17 @@ from .common import InfoExtractor
class RadioFranceIE(InfoExtractor):
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
IE_NAME = u'radiofrance'
IE_NAME = 'radiofrance'
_TEST = {
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
u'file': u'one-one.ogg',
u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
u'info_dict': {
u"title": u"One to one",
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
u"uploader": u"Thomas Hercouët",
'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
'info_dict': {
'id': 'one-one',
'ext': 'ogg',
"title": "One to one",
"description": "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
"uploader": "Thomas Hercouët",
},
}
@@ -24,27 +27,28 @@ class RadioFranceIE(InfoExtractor):
video_id = m.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
webpage, u'description', fatal=False)
webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
webpage, u'uploader', fatal=False)
webpage, 'uploader', fatal=False)
formats_str = self._html_search_regex(
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
webpage, u'audio URLs')
webpage, 'audio URLs')
formats = [
{
'format_id': fm[0],
'url': fm[1],
'vcodec': 'none',
'preference': i,
}
for fm in
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
for i, fm in
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
]
# No sorting, we don't know any more about these formats
self._sort_formats(formats)
return {
'id': video_id,

View File

@@ -0,0 +1,61 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
unescapeHTML,
)
class RTSIE(InfoExtractor):
IE_DESC = 'RTS.ch'
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
_TEST = {
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': {
'id': '3449373',
'ext': 'mp4',
'duration': 1488,
'title': 'Les Enfants Terribles',
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
'uploader': 'Divers',
'upload_date': '19680921',
'timestamp': -40280400,
},
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
all_info = self._download_json(
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
info = all_info['video']['JSONinfo']
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
duration = parse_duration(info.get('duration'))
thumbnail = unescapeHTML(info.get('preview_image_url'))
formats = [{
'format_id': fid,
'url': furl,
'tbr': int_or_none(self._search_regex(
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
} for fid, furl in info['streams'].items()]
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'title': info['title'],
'description': info.get('intro'),
'duration': duration,
'uploader': info.get('programName'),
'timestamp': upload_timestamp,
}

View File

@@ -100,7 +100,7 @@ class SoundcloudIE(InfoExtractor):
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Resolving id' % video_id)
self.to_screen('%s: Resolving id' % video_id)
@classmethod
def _resolv_url(cls, url):
@@ -124,18 +124,20 @@ class SoundcloudIE(InfoExtractor):
'description': info['description'],
'thumbnail': thumbnail,
}
formats = []
if info.get('downloadable', False):
# We can build a direct link to the song
format_url = (
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
track_id, self._CLIENT_ID))
result['formats'] = [{
formats.append({
'format_id': 'download',
'ext': info.get('original_format', 'mp3'),
'url': format_url,
'vcodec': 'none',
}]
else:
'preference': 10,
})
# We have to retrieve the url
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
@@ -143,17 +145,16 @@ class SoundcloudIE(InfoExtractor):
streams_url,
track_id, 'Downloading track url')
formats = []
format_dict = json.loads(stream_json)
for key, stream_url in format_dict.items():
if key.startswith(u'http'):
if key.startswith('http'):
formats.append({
'format_id': key,
'ext': ext,
'url': stream_url,
'vcodec': 'none',
})
elif key.startswith(u'rtmp'):
elif key.startswith('rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
formats.append({
@@ -188,7 +189,7 @@ class SoundcloudIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id')
token = None
@@ -226,7 +227,7 @@ class SoundcloudSetIE(SoundcloudIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
raise ExtractorError('Invalid URL: %s' % url)
# extract uploader (which is in the url)
uploader = mobj.group(1)
@@ -243,7 +244,7 @@ class SoundcloudSetIE(SoundcloudIE):
info = json.loads(info_json)
if 'errors' in info:
for err in info['errors']:
self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message']))
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
return
self.report_extraction(full_title)

View File

@@ -4,26 +4,99 @@ import re
import json
from .common import InfoExtractor
from ..utils import compat_urllib_request
from ..utils import (
compat_urllib_request,
int_or_none,
)
class VeohIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/v(?P<id>\d*)'
_VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
_TEST = {
_TESTS = [
{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'file': '56314296.mp4',
'md5': '620e68e6a3cff80086df3348426c9ca3',
'info_dict': {
'id': '56314296',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'uploader': 'LUMOback',
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
}
},
},
{
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': {
'id': '27701988',
'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news',
'duration': 123,
},
},
{
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'note': 'Embedded ooyala video',
'info_dict': {
'id': '69525809',
'ext': 'mp4',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
},
]
def _extract_formats(self, source):
formats = []
link = source.get('aowPermalink')
if link:
formats.append({
'url': link,
'ext': 'mp4',
'format_id': 'aow',
})
link = source.get('fullPreviewHashLowPath')
if link:
formats.append({
'url': link,
'format_id': 'low',
})
link = source.get('fullPreviewHashHighPath')
if link:
formats.append({
'url': link,
'format_id': 'high',
})
return formats
def _extract_video(self, source):
return {
'id': source.get('videoId'),
'title': source.get('title'),
'description': source.get('description'),
'thumbnail': source.get('highResImage') or source.get('medResImage'),
'uploader': source.get('username'),
'duration': int_or_none(source.get('length')),
'view_count': int_or_none(source.get('views')),
'age_limit': 18 if source.get('isMature') == 'true' or source.get('isSexy') == 'true' else 0,
'formats': self._extract_formats(source),
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if video_id.startswith('v'):
rsp = self._download_xml(
r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
if rsp.get('stat') == 'ok':
return self._extract_video(rsp.find('./videoList/video'))
webpage = self._download_webpage(url, video_id)
age_limit = 0
if 'class="adultwarning-container"' in webpage:
@@ -33,24 +106,16 @@ class VeohIE(InfoExtractor):
request.add_header('Cookie', 'confirmedAdult=true')
webpage = self._download_webpage(request, video_id)
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|"|\?)', webpage)
if m_youtube is not None:
youtube_id = m_youtube.group(1)
self.to_screen('%s: detected Youtube video.' % video_id)
return self.url_result(youtube_id, 'Youtube')
self.report_extraction(video_id)
info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
info = json.loads(info)
video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
info = json.loads(
self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info').replace('\\\'', '\''))
return {
'id': info['videoId'],
'title': info['title'],
'url': video_url,
'uploader': info['username'],
'thumbnail': info.get('highResImage') or info.get('medResImage'),
'description': info['description'],
'view_count': info['views'],
'age_limit': age_limit,
}
video = self._extract_video(info)
video['age_limit'] = age_limit
return video

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import calendar
import contextlib
import ctypes
import datetime
@@ -501,13 +502,13 @@ def orderedSet(iterable):
res.append(el)
return res
def unescapeHTML(s):
"""
@param s a string
"""
assert type(s) == type(u'')
result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
def unescapeHTML(s):
if s is None:
return None
assert type(s) == compat_str
result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
return result
@@ -761,6 +762,31 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
https_response = http_response
def parse_iso8601(date_str):
""" Return a UNIX timestamp from the given date """
if date_str is None:
return None
m = re.search(
r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
date_str)
if not m:
timezone = datetime.timedelta()
else:
date_str = date_str[:-len(m.group(0))]
if not m.group('sign'):
timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
return calendar.timegm(dt.timetuple())
def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD"""

View File

@@ -1,2 +1,2 @@
__version__ = '2014.03.23'
__version__ = '2014.03.24.2'