diff -Nru youtube-dl-2012.09.27/bin/youtube-dl youtube-dl-2014.02.17/bin/youtube-dl --- youtube-dl-2012.09.27/bin/youtube-dl 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/bin/youtube-dl 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +import youtube_dl + +if __name__ == '__main__': + youtube_dl.main() diff -Nru youtube-dl-2012.09.27/build_exe.py youtube-dl-2014.02.17/build_exe.py --- youtube-dl-2012.09.27/build_exe.py 2012-09-27 09:25:46.000000000 +0000 +++ youtube-dl-2014.02.17/build_exe.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -from distutils.core import setup -import py2exe -import sys, os - -"""This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" - -# If run without args, build executables -if len(sys.argv) == 1: - sys.argv.append("py2exe") - -# os.chdir(os.path.dirname(os.path.abspath(sys.argv[0]))) # conflict with wine-py2exe.sh -sys.path.append('./youtube_dl') - -options = { - "bundle_files": 1, - "compressed": 1, - "optimize": 2, - "dist_dir": '.', - "dll_excludes": ['w9xpopen.exe'] -} - -console = [{ - "script":"./youtube_dl/__main__.py", - "dest_base": "youtube-dl", -}] - -init_file = open('./youtube_dl/__init__.py') -for line in init_file.readlines(): - if line.startswith('__version__'): - version = line[11:].strip(" ='\n") - break -else: - version = '' - -setup(name='youtube-dl', - version=version, - description='Small command-line program to download videos from YouTube.com and other video sites', - url='https://github.com/rg3/youtube-dl', - packages=['youtube_dl'], - - console = console, - options = {"py2exe": options}, - zipfile = None, -) - -import shutil -shutil.rmtree("build") - diff -Nru youtube-dl-2012.09.27/CHANGELOG youtube-dl-2014.02.17/CHANGELOG --- youtube-dl-2012.09.27/CHANGELOG 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/CHANGELOG 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,14 @@ +2013.01.02 Codename: GIULIA + + * Add support for ComedyCentral clips + * Corrected Vimeo description fetching + * Added the --no-post-overwrites argument + * --verbose offers more environment info + * New info_dict field: uploader_id + * New updates system, with signature checking + * New IEs: NBA, JustinTV, FunnyOrDie, TweetReel, Steam, Ustream + * Fixed IEs: BlipTv + * Fixed for Python 3 IEs: Xvideo, Youku, XNXX, Dailymotion, Vimeo, InfoQ + * Simplified IEs and test code + * Various (Python 3 and other) fixes + * Revamped and expanded tests diff -Nru youtube-dl-2012.09.27/debian/changelog youtube-dl-2014.02.17/debian/changelog --- youtube-dl-2012.09.27/debian/changelog 2012-10-10 21:23:23.000000000 +0000 +++ youtube-dl-2014.02.17/debian/changelog 2014-02-20 02:45:15.000000000 +0000 @@ -1,8 +1,333 @@ -youtube-dl (2012.09.27-1~ppa1) precise; urgency=low +youtube-dl (2014.02.17-1~ppa1) precise; urgency=low * Backport from Debian unstable - -- Francois Marier Thu, 11 Oct 2012 10:22:56 +1300 + -- Francois Marier Thu, 20 Feb 2014 15:44:57 +1300 + +youtube-dl (2014.02.17-1) unstable; urgency=medium + + * The "Way too many changes release". + * Imported Upstream version 2013.12.23 + * Imported Upstream version 2014.01.17.2 + * Imported Upstream version 2014.02.17. + + There are too many goodies that this new release brings us, + including that it is possible to combine/merge/multiplex audio and + video formats that Youtube now offers separately (See the previous + notes about Youtube using DASH for video and audio). + . + Now, if you want a 480p video in H.264 format, High profile, with + 128kbps AAC audio (this used to be Youtube's format 35), you can + specify format `-f 135+140` on the command-line, and so on. Only your + imagination is the limit. + + Fixes "using --list-subs triggers downloading video from vimeo". + Closes: #734647. + * debian/NEWS: + + Write news about new muxing feature of youtube-dl. + * debian/control: + + Massive update of supported sites in long description. + + Package complies with policy 3.9.5. + + -- Rogério Brito Mon, 17 Feb 2014 19:41:53 -0300 + +youtube-dl (2013.12.04-1) unstable; urgency=low + + * The "I still have a flu (but am getting better) release". + * Imported Upstream version 2013.12.04. + + Fixes "more youtube encrypted signature problems". Closes: #730556. + + Fixes "collegehumor.com - unicode encode error". Closes: #729730. + + Fixes listing 'youtube:search' twice in extractor list. + Thanks Jaime Marquínez Ferrándiz for the fix. Closes: #730730. + * debian/control: Update list of extractors. + + -- Rogério Brito Thu, 05 Dec 2013 02:24:18 -0200 + +youtube-dl (2013.11.11-2) unstable; urgency=low + + * debian/control: Add dependency on python-pkg-resources. + Closes: #729416. Thanks Josh Triplett. + + -- Rogério Brito Tue, 12 Nov 2013 16:34:16 -0200 + +youtube-dl (2013.11.11-1) unstable; urgency=low + + * Imported Upstream version 2013.11.11. Highlights: + + Add support for many sites (see long description of the package). + + Fix support for many sites (Closes: #728424). + + Improve support for many sites (including CNN, Youtube). + + Improve support for download of subtitles. + + Allow to download tracks marked as not 'streamable' via rtmpdump in + soundcloud. + + Avoid reencoding videos that are downloaded via HTTP Live Streaming + (Apple's version of DASH). These videos have to be downloaded with + ffmpeg (or avconv), as youtube-dl doesn't have a downloader for that + particular protocol. + . + See https://en.wikipedia.org/wiki/HTTP_Live_Streaming for more + information. + * debian/control: Update list of extractors. + + -- Rogério Brito Mon, 11 Nov 2013 23:06:01 -0200 + +youtube-dl (2013.10.23-1) unstable; urgency=low + + * Imported Upstream version 2013.10.23. + + Fix time display for > 60 minutes. Closes: #724834. + + All adult-related sites should respect the age being set via the + `--age-limit` option (which can be put in a global configuration file + under the /etc directory), thus making youtube-dl safe for Debian Edu + and Debian Jr. Closes: #715407. + + Document that the output template accepts specification of + downloaded formats (template: %(format)s). In particular, this: + - Avoids resuming downloads with different formats. Closes: #689294. + - Allows downloading all available formats. Closes: #643831. + * debian/control: + + Update long description with list of supported sites. + * debian/NEWS: + + Update with notes about split audio/video downloads. + + -- Rogério Brito Fri, 25 Oct 2013 01:33:56 -0200 + +youtube-dl (2013.10.04-1) unstable; urgency=low + + * Imported Upstream version 2013.10.04. Closes: #723592. + + -- Rogério Brito Fri, 04 Oct 2013 09:44:46 -0300 + +youtube-dl (2013.10.01-1) unstable; urgency=low + + * The "In a hurry release". + * Imported Upstream version 2013.10.01. + + Closes: #724321, #722343, #722298. + + -- Rogério Brito Thu, 03 Oct 2013 01:28:58 -0300 + +youtube-dl (2013.08.29-1) unstable; urgency=low + + * Imported Upstream version 2013.08.29. Highlights: + + Fix subtitle downloading. (Closes: #721257) + + Add/improve support cryptographic signatures of lengths 80, 82, 84, + 86, 88 on youtube. + + Add/improve support for the following sites: + - Youtube: Add support for DASH videos. See: + https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP + - Slashdot: tv.slashdot.org + - News sites: CNN, NBC news, PBS. + - MIT sites: video.mit.edu, techtv.mit.edu + - Other sites: Youporn, hark.com, AddAnime, RTLnow, jeuxvideo.com, + VOXnow, collegehumor, the generic extractor, Vimeo, funnyordie, + statigram, utv.unistra.fr, canalc2.tv, XHamster, 220.ro, + trilulilu.ro, canalplus, appletrailers, addanime, orf.at, + kankan.com. + + Allow embedding of subtitles in mp4 videos. + + Allow the use to specify languages for subtitles. + * debian/changelog: + + Fix long line in the previous uploaded entry, to please lintian and + other tools. + * debian/control: + + Remove upper limit on the python version that we support. + + -- Rogério Brito Thu, 29 Aug 2013 21:36:16 -0300 + +youtube-dl (2013.08.17-1) unstable; urgency=low + + * Imported Upstream version 2013.08.17. Highlights: + + Add/improve support cryptographic signatures of lengths 83, 85, 86, 87, + 89. (Closes: #719309). + + Fixing the signature of the lengths listed above makes it to download + many music videos (including those from VEVO). For those (like me) + that don't know what VEVO is/was, here is a snippet from Wikipedia: + + "Vevo, LLC (stylized vevo) is a joint venture music video website + operated by Sony Music Entertainment, Universal Music Group, Google + and Abu Dhabi Media with EMI licensing its content to the group + without taking an ownership stake." + + -- Rogério Brito Sun, 18 Aug 2013 08:03:58 -0300 + +youtube-dl (2013.08.08-1) unstable; urgency=low + + * Imported Upstream version 2013.08.02 + * Imported Upstream version 2013.08.08. Highlights: + + Improve GenericIE for compatibility with more sites. + + Youtube: + - Add/improve support cryptographic signatures of lengths 79, 81, 83, + 84, 85, 86, 87, 90, 92. (Closes: #718391, #717537, #716987) + - Support signatures with more than 2 parts. + - Add support for downloading recommended videos (via :ytrec) + - Support downloading videos from the user's "Watch Later" playlist. + - Add better support for "age protected videos". + - Add explicit support for 3D videos. + + Add support for: + - IGNE + - criterion.com + - Canalplus + - Livestream + - freesound.org + - thisav + - CondeNast + - ex.fm + - video.sina.com.cn + - 56.com + - Weibo + - roxwell.com + - kankan.com + - Ooyala + - videofy.me + - muzu.tv + + Improve support for: + - ComedyCentral. + - Instagram. + - MTV. + - Metacafe: support AnyClip videos. + - Soundcloud. + - traileraddict. + - keek. + - videos from VEVO (they just keep changing stuff to prevent people + downloading music videos). + + Fix support for: + - CollegeHumor. + - Break + * debian/control: Update long description with list of supported sites. + + -- Rogério Brito Wed, 07 Aug 2013 20:40:48 -0300 + +youtube-dl (2013.07.10-1) unstable; urgency=low + + * Imported Upstream version 2013.07.10. (Closes: #716092) + + -- Rogério Brito Wed, 10 Jul 2013 18:53:05 -0300 + +youtube-dl (2013.07.02-1) unstable; urgency=low + + * Imported Upstream version 2013.07.02. Highlights: + + Add support for hotnewhiphop.com. + + Add support for auengine.com. + + Add support for gamespot.com. + + Add support for RingTV. + + Add support for wat.tv. + + Add support for traileraddict.com. + + Add support for tu.tv. + + Add support for instagram.com. + + Improve support for encrypted signatures in Youtube. + + Improve support for videos from ArteTv. + + Minor improvements to the generic fallback information extractor. + + Change the default naming of videos to the template: + `%(title)s-%(id)s.%(ext)s` + * debian/control: + + Revise that package complies with standars version 3.9.4. + (No changes needed) + + -- Rogério Brito Thu, 04 Jul 2013 10:00:59 -0300 + +youtube-dl (2013.06.34-1) unstable; urgency=low + + * Imported Upstream version 2013.06.26 + * Imported Upstream version 2013.06.33. Highlights: + + VimeoIE: allow to download password protected videos. (Closes: #523326) + + YoutubeIE: A lot of work for downloading VEVO videos. + + Add support for Statigr.am. + + Add support for break.com. + + Add support for tudou.com. (Closes: #657148) + + Add support for Jukebox. + + Add specific code for VEVO videos. + + Modularize the code so that each site has its code in a + respective file (usable as python modules, as, for instance, + `youtube_dl.extractor.youtube`). + + Modularize the code so that the downloading class is split in two: + - YoutubeDL is the class that coordinates everything. + - FileDownloader gets a filename and an info dict and downloads the + video. + + Fix downloading from Google+ videos (new URL format). + * Imported Upstream version 2013.06.34. Highlights: + + Add support for downloading automatic transcribed subtitles. + + Add support for downloading subtitles in WebVTT (vtt) format. + + Add support for Wimp.com. + + Add support for CSpan. (Closes: #659623) + + Improve support for Youtube's crypto signing fields. + * debian/control: + + Add mplayer{,2} as recommends, needed for mms:// or rtsp:// schemes. + + Make explicit version of Python that we require. + * debian/rules: Use python2 plugin with debhelper. + * debian/control: + + Make determination of python versions automatic. + + Fix syntax error in Depends: field. + + -- Rogério Brito Sun, 23 Jun 2013 20:10:54 -0300 + +youtube-dl (2013.06.21-2) unstable; urgency=low + + * debian/control: + + Add missing Build-Dependency on python-pkg-resources. + Thanks to Sven Joachim for the hint (Closes: #713835) + * debian/rules: + + Fix misplaced README.txt.gz. + Thanks to Sven Joachim for the patch (Closes: #713834) + + -- Rogério Brito Sun, 23 Jun 2013 18:59:25 -0300 + +youtube-dl (2013.06.21-1) unstable; urgency=low + + * New upstream version. + Closes: #711733, #697086, #697594, #645925, #683795, #659515. + * Imported Upstream version 2013.05.14 + * debian/watch: + + Update to track github tags. Thanks to Bart Martens for the rewrite. + * debian/README.source: + + Update my own instructions. + * Imported Upstream version 2013.05.23 + * debian/watch: + + Download version specific for distributors. + * debian/control: + + Bump build-dep on debhelper to >= 9. + * debian/compat: + + Use level 9 of debhelper. + * Imported Upstream version 2013.06.21 + * debian/install: + + Remove, as we let setup.py install things. (Closes: #699043) + * debian/rules: + + Simplify rules file with use of buildsystem. + * debian/control: + + Remove B-D on pandoc. + + Remove B-D-I on perl. + + Remove obsolete DMUA flag. + * debian/rules: + + Move incorrectly (bash completion) installed file to its right place. + + Remove egg-info directory in the clean target. + + -- Rogério Brito Wed, 22 May 2013 21:01:15 -0300 + +youtube-dl (2012.12.11-1) unstable; urgency=low + + * debian/watch: + + Adapt to new upstream distribution scheme. + * Imported Upstream version 2012.12.11 + * debian/rules: + + Disable new test suites. They depend on network access. + + Remove repackaging need: Upstream adopted a new strategy. + + Leave it as vanilla as possible. + + Simplify the build process by not calling setup.py. + * debian/{control,manpages,rules,youtube-dl.pod}: + + Use manpage from upstream. + + -- Rogério Brito Tue, 18 Dec 2012 09:45:19 -0200 + +youtube-dl (2012.09.27+dfsg1-1) unstable; urgency=low + + * Urgency set to high to fix an RC bug. + * Imported Upstream version 2012.09.27+dfsg1: + + Remove binaries without sources. Thanks to Ansgar Burchardt for + notifying me. (Closes: #689493) + * debian/copyright: + + Rewrite in Copyright Format 1.0 for readability. + * debian/rules: + + Add a `get-orig-source` target to remove binaries without sources. + * debian/clean: + + Remove `*.pyc` files due to new upstream changes. + * debian/watch: + + Adapt for mangled debian version. + + -- Rogério Brito Sat, 17 Nov 2012 15:01:05 -0200 youtube-dl (2012.09.27-1) unstable; urgency=low diff -Nru youtube-dl-2012.09.27/debian/clean youtube-dl-2014.02.17/debian/clean --- youtube-dl-2012.09.27/debian/clean 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/clean 2014-02-17 21:15:54.000000000 +0000 @@ -1 +1,2 @@ debian/youtube-dl.1 +youtube_dl/*.pyc diff -Nru youtube-dl-2012.09.27/debian/compat youtube-dl-2014.02.17/debian/compat --- youtube-dl-2012.09.27/debian/compat 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/compat 2014-02-17 21:15:54.000000000 +0000 @@ -1 +1 @@ -7 +9 diff -Nru youtube-dl-2012.09.27/debian/control youtube-dl-2014.02.17/debian/control --- youtube-dl-2012.09.27/debian/control 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/control 2014-02-17 22:46:55.000000000 +0000 @@ -3,13 +3,12 @@ Priority: extra Maintainer: Rogério Brito Build-Depends: - debhelper (>= 7.0.50~) + debhelper (>= 9), + python-pkg-resources Build-Depends-Indep: - perl, - python, - zip -Standards-Version: 3.9.3 -DM-Upload-Allowed: yes + python +X-Python-Version: >= 2.6 +Standards-Version: 3.9.5 Homepage: http://rg3.github.com/youtube-dl/ Vcs-Git: git://anonscm.debian.org/collab-maint/youtube-dl.git Vcs-Browser: http://anonscm.debian.org/gitweb/?p=collab-maint/youtube-dl.git @@ -17,11 +16,13 @@ Package: youtube-dl Architecture: all Depends: - python (>= 2.6), - ${misc:Depends} + python-pkg-resources, + ${misc:Depends}, + ${python:Depends} Recommends: libav-tools | ffmpeg, libav-tools | ffmpeg (>= 4:0.6) | ffprobe, + mplayer2 | mplayer, rtmpdump Description: downloader of videos from YouTube and other sites youtube-dl is a small command-line program to download videos from @@ -33,7 +34,62 @@ determine the best (or worst) quality video to grab. It supports downloading entire playlists and all videos from a given user. . - Currently supported sites are: CollegeHumor, Comedy Central, Dailymotion, - Facebook, Metacafe, MyVideo, Photobucket, The Escapist, Vimeo, Yahoo!, - YouTube, blip.tv, depositfiles.com, video.google.com, xvideos, Soundcloud, - InfoQ, Mixcloud, OpenClassRoom. + Currently supported sites (or features of sites) are: + . + 1up.com, 220.ro, 3sat, 4tube, 56.com, 8tracks, 9gag, AcademicEarth:Course, + AddAnime, anitube.se, Aparat, AppleTrailers, archive.org videos, ARD, + arte.tv, arte.tv:+7, arte.tv:creative, arte.tv:ddc, arte.tv:future, + AUEngine, bambuser, bambuser:channel, Bandcamp, Bandcamp:album, BBC iPlayer, + blinkx, blip.tv:user, BlipTV, Bloomberg, Break, Brightcove, canalc2.tv, + canalplus.fr, CBS, Channel 9, Chilloutzone, Cinemassacre, clipfish, + cliphunter, Clipsyndicate, cmt.com, CNN, CNNBlogs, CollegeHumor, + ComedyCentral, The Daily Show / Colbert Report, Condé Nast media group: GQ, + Glamour, Vanity Fair, Vogue, W Magazine, WIRED, Criterion, Crunchyroll, + C-SPAN, culturebox.francetvinfo.fr, d8.tv, dailymotion, + dailymotion:playlist, dailymotion:user, daum.net, defense.gouv.fr, + DepositFiles, Discovery, Dotsub, Dropbox, EbaumsWorld, eHow, eitb.tv, El + País, Escapist, EveryonesMixtape, ex.fm, ExtremeTube, facebook, faz.net, + fernsehkritik.tv, fernsehkritik.tv:postecke, Firstpost.com, Видеоархив - + Первый канал, Flickr, france2.fr:generation-quoi, FranceInter, France 2, 3, + 4, 5 and Ô, francetvinfo.fr, Freesound, freespeech.org, FunnyOrDie, + Gamekings, GameSpot, Gametrailers, Generic downloader that works on some + sites, Hark, helsinki.fi, HotNewHipHop, Howcast, Huffington Post, Hypem, + ign.com, Internet Movie Database trailers, Internet Movie Database lists, + Ina, InfoQ, Instagram, InternetVideoArchive, IPrima, ivi.ru, ivi.ru + compilations, JadoreCettePub, JeuxVideo, jpopsuki.tv, Jukebox, justin.tv, + Kankan, keek, KeezMovies, KhanAcademy, KickStarter, KontrTube.ru - Труба + зовёт, la7.tv, LIFE | NEWS, LiveLeak, livestream, livestream:original, + lynda.com videos, lynda.com online courses, m6, MacGameStore trailers, + Malemotion, MDR, metacafe, Metacritic, mixcloud, Mofosex, Mooshare.biz, + MPORA, MTV, mtviggy.com, muzu.tv, MySpace, MySpass, myvideo, Naver, NBA, + NBCNews, NDR.de - Mediathek, NDTV, Newgrounds, National Film Board of + Canada, nhl.com, NHL videocenter category, ニコニコ動画, Normalboots, + Novamov, Nowness, NowVideo, Ooyala, ORF, PBS, photobucket, Google Plus, + pluzz.francetv.fr, podomatic, PornHd, PornHub, Pornotube, Pyvideo, + radiofrance, RBMARadio, RedTube, RingTV, RottenTomatoes, Roxwel, RTLnow, + Rutube videos, Rutube channels, Rutube movies, Rutube person videos, Yahoo + screen search (Example: "yvsearchall:purple fish" ), ServingSys, Sina, + Slashdot, Slideshare, Smotri.com, Smotri.com broadcasts, Smotri.com + community videos, Smotri.com user videos, Sohu, soundcloud, soundcloud:set, + soundcloud:user, southpark.de, southparkstudios.com, Space, Spankwire, + Spiegel, Spike, Stanford Open ClassRoom, Statigram, Steam, streamcloud.eu, + StreamCZ, Syfy, SztvHu, Teamcoco, TechTalks, techtv.mit.edu, TED, TF1, + ThePlatform, ThisAV, tinypic.com videos, tou.tv, Trilulilu, Tube8, Tudou, + Tumblr, Tutv, tvp.pl, Unistra, ustream, ustream:channel, Vbox7, VeeHD, Veoh, + Вести.Ru, Vevo, Vice, Viddler, Google Video search (Example: + "gvsearch5:falling cat" ), video.mit.edu, VideoDetective, videofy.me, + VideoPremium, viki, vimeo, vimeo:album, vimeo:channel, vimeo:group, Review + pages on vimeo, vimeo:user, Vine, vk.com, Vube.com, wat.tv, Weibo, Wimp, + Wistia, WorldStarHipHop, XHamster, XNXX, XTube, XVideos, Yahoo screen, Yahoo + screen, YouJizz, Youku, YouPorn, YouTube.com, YouTube.com channels, + YouTube.com favourite videos, "ytfav" keyword (requires authentication), + Youtube watch history, "ythistory" keyword (requires authentication), + YouTube.com playlists, YouTube.com recommended videos, "ytrec" keyword + (requires authentication), YouTube.com searches (Example: + "ytsearch10:running tortoise" ), YouTube.com searches, newest videos first + (Example: "ytsearchdateall:falling cat" ), YouTube.com (multi-season) shows, + YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication), + YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: + "yttoplist:music:Top Tracks"), YouTube.com user videos (URL or "ytuser" + keyword), Youtube watch later list, "ytwatchlater" keyword (requires + authentication), ZDF. diff -Nru youtube-dl-2012.09.27/debian/copyright youtube-dl-2014.02.17/debian/copyright --- youtube-dl-2012.09.27/debian/copyright 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/copyright 2014-02-17 21:15:54.000000000 +0000 @@ -1,30 +1,33 @@ -This package was debianized by Robert S. Edmonds on -Sun, 3 Sep 2006 19:43:27 -0400. - -The current maintainer is Rogério Brito . - -It was downloaded from: - -http://bitbucket.org/rg3/youtube-dl/raw/2010.04.04/youtube-dl - -Authors: - - * Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, - metacafe.com InfoExtractor and YouTube playlist InfoExtractor. - * Danny Colligan: YouTube search InfoExtractor, ideas and patches. - * Many other people contributing patches, code, ideas and kind messages. - Too many to be listed here. You know who you are. Thank you very much. - -Copyright © 2006-2010 Ricardo Garcia Gonzalez - -License: - - The program is in the Public Domain. - - The packaging is licensed under the GNU GPL License: - - Copyright © 2006, Robert S. Edmonds . - Copyright © 2009-2012, Rogério Brito . - -For the text of the GPL License in a Debian system, please see -`/usr/share/common-licenses/GPL-2'. +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: youtube-dl +Upstream-Contact: https://github.com/rg3/youtube-dl +Source: git://github.com/rg3/youtube-dl.git + +Files: * +Copyright: Not applicable. +License: Public-domain + +Files: debian/* +Copyright: © 2006, Robert S. Edmonds . + © 2009-2012, Rogério Brito . +License: GPL-2+ + +License: Public-domain + youtube-dl is released into the public domain by the copyright holders. + +License: GPL-2+ + This package is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + . + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + . + You should have received a copy of the GNU General Public License + along with this program. If not, see + . + On Debian systems, the complete text of the GNU General + Public License version 2 can be found in "/usr/share/common-licenses/GPL-2". diff -Nru youtube-dl-2012.09.27/debian/install youtube-dl-2014.02.17/debian/install --- youtube-dl-2012.09.27/debian/install 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/install 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -youtube-dl usr/bin diff -Nru youtube-dl-2012.09.27/debian/manpages youtube-dl-2014.02.17/debian/manpages --- youtube-dl-2012.09.27/debian/manpages 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/manpages 2014-02-17 21:15:54.000000000 +0000 @@ -1 +1 @@ -debian/youtube-dl.1 +youtube-dl.1 diff -Nru youtube-dl-2012.09.27/debian/NEWS youtube-dl-2014.02.17/debian/NEWS --- youtube-dl-2012.09.27/debian/NEWS 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/NEWS 2014-02-17 22:44:52.000000000 +0000 @@ -1,3 +1,79 @@ +youtube-dl (2014.02.17-1) unstable; urgency=medium + + There are too many goodies that this new release brings us, + including that it is possible to combine/merge/multiplex audio and + video formats that Youtube now offers separately (See the previous + notes about Youtube using DASH for video and audio). + + Now, if you want a 480p video in H.264 format, High profile, with + 128kbps AAC audio (this used to be Youtube's format 35), you can + specify format `-f 135+140` on the command-line, and so on. Only your + imagination is the limit. + + See http://cynic.cc/blog/posts/2014-02-17-youtube-dl_news/ for more + verbose news. + + -- Rogério Brito Mon, 17 Feb 2014 18:24:20 -0300 + +youtube-dl (2013.10.23-1) unstable; urgency=low + + From http://cynic.cc/blog/posts/2013-10-23-assorted_news/: + + [13]Some people may have noticed, others may not, but when downloading + videos from Youtube, they apparently are getting more aggressive with + the use of [14]Dynamic Adaptive Streaming over HTTP (also called DASH) + and, as a result, some (perhaps going to be all in the near future?) of + the videos may not be available in the resolution/formats that you used + to like (like me, with format number 35). + + By the way, one thing that is interesting with youtube videos provided + via DASH is that they are available in different streams: one for the + video and another for the audio. + + What does this mean in practical terms for users of youtube-dl? Well, + if you wanted to download videos in resolutions like the 480p (format + 35) that I mentioned, then you will probably have to change your way of + doing things, until a more automated solution is in place. + + You will have to download both the audio and the video and, then, + "combine" them (that is, multiplex them) to create one "normal" video + file with both the audio and the video. I usually do this via: + + ffmpeg -i audio.m4a -i video.mp4 -vcodec copy -acodec copy combined.mp4 + + If you prefer having a Matroska container instead of an mp4 container + (which, BTW, results in smaller muxing overhead), then you can use the + command line: + + mkvmerge -o combined.mkv audio.m4a video.mp4 + + Oh, those m4a and mp4 extensions are a new addition that [15]I just + sent upstream (in the past, both would have been named with an + extension of mp4). + + As, an aside, I like formats 135 for video and 140 for audio, for the + reasons that I mentioned in a comments to issue 1612: + + Otherwise, to download 480p videos (which I do for lectures and so + on with other projects of mine, like edx-dl) I have to call + youtube-dl twice: once for format 135 and another for format 140, + since the old (?) format 35 files are much smaller than the lower + resolution 360p files (due to the former being encoded in High + profile vs. the latter being encoded in Constrained Baseline + profile). + + While this is unfortunate for some, this is a good thing for others: I + once had a blind user of youtube-dl asking me if he could avoid + downloading the whole video just to extract the audio, so that he save + on bandwidth. Well, now this is possible. + + + 13. https://github.com/rg3/youtube-dl/issues/1612 + 14. https://en.wikipedia.org/wiki/Dynamic%20Adaptive%20Streaming%20over%20HTTP + 15. https://github.com/rg3/youtube-dl/pull/1622 + + -- Rogério Brito Fri, 25 Oct 2013 01:25:41 -0200 + youtube-dl (2010.07.22-1) unstable; urgency=medium The upstream author has removed support for the -b ("best format") and diff -Nru youtube-dl-2012.09.27/debian/README.source youtube-dl-2014.02.17/debian/README.source --- youtube-dl-2012.09.27/debian/README.source 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/README.source 2014-02-17 21:15:54.000000000 +0000 @@ -5,7 +5,7 @@ to, assuming that one already has the packages `devscripts`, `git-buildpackage`, and `pristine-tar` installed: - gbp-clone ssh://git.debian.org/git/collab-maint/youtube-dl.git + gbp-clone --all ssh://git.debian.org/git/collab-maint/youtube-dl.git cd youtube-dl git-import-orig --pristine-tar --uscan [ Make here some adaptations, like updating debian/patches, if needed ] @@ -15,4 +15,4 @@ deviations, say, in the release process of upstream releases (e.g., if upstream changes the location where the tarballs reside). - -- Rogério Brito , Sat, 17 Mar 2012 12:45:03 -0300 + -- Rogério Brito , Wed, 22 May 2013 20:58:07 -0300 diff -Nru youtube-dl-2012.09.27/debian/rules youtube-dl-2014.02.17/debian/rules --- youtube-dl-2012.09.27/debian/rules 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/rules 2014-02-17 21:15:54.000000000 +0000 @@ -1,11 +1,16 @@ #!/usr/bin/make -f %: - dh $@ + dh $@ --with python2 --buildsystem=python_distutils -override_dh_installman: - pod2man -r "youtube-dl" -c "User commands" debian/youtube-dl.pod > debian/youtube-dl.1 - dh_installman +override_dh_install: + mv debian/youtube-dl/usr/etc debian/youtube-dl/ + dh_install -override_dh_python2: +override_dh_installdocs: + dh_installdocs + mv debian/youtube-dl/usr/share/doc/youtube_dl/* debian/youtube-dl/usr/share/doc/youtube-dl + rmdir debian/youtube-dl/usr/share/doc/youtube_dl -override_dh_pysupport: +override_dh_clean: + dh_clean + rm -rf youtube_dl.egg-info diff -Nru youtube-dl-2012.09.27/debian/watch youtube-dl-2014.02.17/debian/watch --- youtube-dl-2012.09.27/debian/watch 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/watch 2014-02-17 21:15:54.000000000 +0000 @@ -1,2 +1,3 @@ version=3 -http://githubredir.debian.net/github/rg3/youtube-dl (?:.*/)?(\d+\.\d+\.\d+)\.tar\.gz +opts=dversionmangle=s/\+dfsg\d*$// \ +http://youtube-dl.org/downloads/(\d.*)/youtube-dl-(:?.*)\.tar\.gz diff -Nru youtube-dl-2012.09.27/debian/youtube-dl.pod youtube-dl-2014.02.17/debian/youtube-dl.pod --- youtube-dl-2012.09.27/debian/youtube-dl.pod 2012-09-28 05:26:26.000000000 +0000 +++ youtube-dl-2014.02.17/debian/youtube-dl.pod 1970-01-01 00:00:00.000000000 +0000 @@ -1,297 +0,0 @@ -# -*- pod -*- -=head1 NAME - -youtube-dl - download videos from youtube.com or other video platforms - -=head1 SYNOPSIS - - youtube-dl [options] url ... - -=head1 DESCRIPTION - -youtube-dl is a script to download videos from youtube.com or any other of -the supported video platforms. - -Currently supported sites are: CollegeHumor, Comedy Central, Dailymotion, -Facebook, Metacafe, MyVideo, Photobucket, The Escapist, Vimeo, Yahoo!, -YouTube, blip.tv, depositfiles.com, video.google.com, xvideos, Soundcloud, -InfoQ, Mixcloud, OpenClassRoom. - -Once it is installed in your system, you should be able to call it from -the command line. Usage instructions are easy. Use youtube-dl followed -by a video URL or identifier. As an example, consider: - - youtube-dl "http://www.youtube.com/watch?v=foobar" - -The video will be saved to the file C in that example. Many -YouTube.com videos are in Flash Video format and their extension would -be C. Other videos are encoded in H.264 and these usually have the -extension C. In Linux and other unices, video players using a -recent version of ffmpeg can play them. That includes MPlayer, VLC, -xine, among others. - -=head1 OPTIONS - -B accepts options in the following categories - -=over - -=item -h, --help - -Print help text and exit. - -=item -v, --version - -Print program version and exit. - -=item -U, --update - -Update this program to the latest stable version. - -=item -i, --ignore-errors - -Ignore errors during download and continue processing. - -=item -r B, --rate-limit=B - -Limit the download speed to the specified maximum B (e.g., 50k or 44.6m). - -=item -R B, --retries=B - -Number B of retries for a given download (default is 10). - -=item --playlist-start=B - -The number B of the video in a playlist where we should start downlading -(default is 1). - -=item --playlist-end=B - -The number B of the video in a playlist where we should stop downloading -(default is -1, which stands for the last video in the playlist). - -=item --dump-user-agent - -Display how youtube-dl will identify itself (the User-Agent string) to the -remote server. - -=item -u B, --username=B - -Specify the youtube account username B. Some videos require an -account to be downloaded, mostly because they're flagged as mature -content. - -=item -p B, --password=B - -Like the username, specifies the account password to be B. - -=item -n, --netrc - -Get authentication data from the standard unix .netrc file on the user's -home directory. The machine name is youtube regarding this usage. - -=item -f B, --format=B - -Specify the video format (quality) in which to download the video. - -For youtube.com, in particular, the meaning of the format codes is given as: - -=over - -=item WebM video at 480p: 43 - -=item WebM video at 720p: 45 - -=item H264 video in MP4 container at 480p: 18 - -=item H264 video in MP4 container at 720p: 22 - -=item H264 video in MP4 container at 1080p: 37 - -=item H264 video in FLV container at 360p: 34 - -=item H264 video in FLV container at 480p: 35 - -=item H263 video at 240p: 5 - -=item 3GP video: 17 - -=back - -Note that not all videos are available in all formats and that other -sites supported by B may have different conventions for -their video formats. - -By default, youtube-dl will download the best available format; if you want -to download the worst format to save bandwidth and time, use C<-f> I. - -=item --all-formats - -Downloads all formats for which a video may be available. - -=item --max-quality=B - -Limit the maximum quality of the videos to downloads to B. - -=item -q, --quiet - -Activates quiet mode, avoiding many messages being written to the -terminal. - -=item -s, --simulate - -Simulate the operation, but do not download the video. Useful for -testing. - -=item -g, --get-url - -Simulate the operation, like quiet mode, but show the URL that would be -used to download the video. Can be used with other download tools like -wget or aria2c. - -=item -e, --get-title - -Simulate the operation, like quiet mode, but show the title of the video -that would be downloaded. - -=item --get-thumbnail - -Simulate the operation, like quiet mode, but print the URL of of the video's -thumbnail. - -=item --get-description - -Simulate the operation, like quiet mode, but print the description of the -video. - -=item --get-filename - -Simulate the operation, like quiet mode, but print the output filename. - -=item --no-progress - -Do not print the progress bar during downloads. - -=item --console-title - -If possible, set the title of the console window with the progress of the -download. - -=item -t, --title - -Use the title of the video in the file name used to download the video. - -=item -l, --literal - -Use the literal title of the video in file name used to download the -video. Can contain "weird" characters that are not filtered like with -the -t option. - -=item -A, --auto-number - -When downloading multiple videos from a playlist, automatically number them, -in sequence, starting from 00000. - -=item -o B, --output=B - -Specify a template B for the names of the files to be created when -they are downloaded. The default filename is video_id.flv. But you can -also use the video title in the filename with the C<-t> or C<--title> -option, or preserve the literal title in the filename with the C<-l> or -C<--literal> option. - -=item -a B, --batch-file=B - -Specify the name of a file containing URLs of videos to download from -youtube in batch mode. The file must contain one URL per line. - -=item -w, --no-overwrites - -Do no overwrite already existing files. - -=item -c, --continue - -Resume partially downloaded files. - -=item --cookies=B - -Store the received cookies to file B (the "cookie jar"). - -=item --no-part - -Do not append the I<.part> suffix do files that have not yet been completed. - -=item --no-mtime - -Do not use the I header to set the file modification time. - -=item --extract-audio - -Create an audio-only file extracted from the video downloaded. Requires that -ffmpeg and ffprobe be installed. - -=item --audio-format=B - -Set the audio format to be used for the extraction. Possible values are -I, I, I, with I being the default. - -=back - -=head1 OUTPUT TEMPLATE - -The -o option allows users to indicate a template for the output file -names. The basic usage is not to set any template arguments when -downloading a single file, like in youtube-dl -o funny_video.flv -"http://some/video". However, it may contain special sequences that will -be replaced when downloading each video. - -The special sequences have the format C<%(NAME)s>. To clarify, that's a -percent symbol followed by a name in parenthesis, followed by a -lowercase S. Allowed names are: - -=over - -=item id - -The sequence will be replaced by the video identifier. - -=item url - -The sequence will be replaced by the video URL. - -=item uploader - -The sequence will be replaced by the nickname of the person who uploaded -the video. - -=item title - -The sequence will be replaced by the literal video title. - -=item stitle - -The sequence will be replaced by a simplified video title, restricted to -alphanumeric characters and dashes. - -=item ext - -The sequence will be replaced by the appropriate extension (like C -or C). - -=item epoch - -The sequence will be replaced by the Unix epoch when creating the file. - -=back - -As you may have guessed, the default template is C<%(id)s.%(ext)s>. When -some command line options are used, it's replaced by other templates -like C<%(title)s-%(id)s.%(ext)s>. You can specify your own. - -=head1 AUTHOR - -youtube-dl was written by Ricardo Garcia Gonzalez and many contributors -from all around the internet. This manpage was written by Rogerio Brito -. - -=cut diff -Nru youtube-dl-2012.09.27/devscripts/bash-completion.in youtube-dl-2014.02.17/devscripts/bash-completion.in --- youtube-dl-2012.09.27/devscripts/bash-completion.in 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/bash-completion.in 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,29 @@ +__youtube_dl() +{ + local cur prev opts fileopts diropts keywords + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + opts="{{flags}}" + keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" + fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" + diropts="--cache-dir" + + if [[ ${prev} =~ ${fileopts} ]]; then + COMPREPLY=( $(compgen -f -- ${cur}) ) + return 0 + elif [[ ${prev} =~ ${diropts} ]]; then + COMPREPLY=( $(compgen -d -- ${cur}) ) + return 0 + fi + + if [[ ${cur} =~ : ]]; then + COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) + return 0 + elif [[ ${cur} == * ]] ; then + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + fi +} + +complete -F __youtube_dl youtube-dl diff -Nru youtube-dl-2012.09.27/devscripts/bash-completion.py youtube-dl-2014.02.17/devscripts/bash-completion.py --- youtube-dl-2012.09.27/devscripts/bash-completion.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/bash-completion.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,26 @@ +#!/usr/bin/env python +import os +from os.path import dirname as dirn +import sys + +sys.path.append(dirn(dirn((os.path.abspath(__file__))))) +import youtube_dl + +BASH_COMPLETION_FILE = "youtube-dl.bash-completion" +BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" + +def build_completion(opt_parser): + opts_flag = [] + for group in opt_parser.option_groups: + for option in group.option_list: + #for every long flag + opts_flag.append(option.get_opt_string()) + with open(BASH_COMPLETION_TEMPLATE) as f: + template = f.read() + with open(BASH_COMPLETION_FILE, "w") as f: + #just using the special char + filled_template = template.replace("{{flags}}", " ".join(opts_flag)) + f.write(filled_template) + +parser = youtube_dl.parseOpts()[0] +build_completion(parser) diff -Nru youtube-dl-2012.09.27/devscripts/buildserver.py youtube-dl-2014.02.17/devscripts/buildserver.py --- youtube-dl-2012.09.27/devscripts/buildserver.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/buildserver.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,405 @@ +#!/usr/bin/python3 + +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn +import argparse +import ctypes +import functools +import sys +import threading +import traceback +import os.path + + +class BuildHTTPServer(ThreadingMixIn, HTTPServer): + allow_reuse_address = True + + +advapi32 = ctypes.windll.advapi32 + +SC_MANAGER_ALL_ACCESS = 0xf003f +SC_MANAGER_CREATE_SERVICE = 0x02 +SERVICE_WIN32_OWN_PROCESS = 0x10 +SERVICE_AUTO_START = 0x2 +SERVICE_ERROR_NORMAL = 0x1 +DELETE = 0x00010000 +SERVICE_STATUS_START_PENDING = 0x00000002 +SERVICE_STATUS_RUNNING = 0x00000004 +SERVICE_ACCEPT_STOP = 0x1 + +SVCNAME = 'youtubedl_builder' + +LPTSTR = ctypes.c_wchar_p +START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) + + +class SERVICE_TABLE_ENTRY(ctypes.Structure): + _fields_ = [ + ('lpServiceName', LPTSTR), + ('lpServiceProc', START_CALLBACK) + ] + + +HandlerEx = ctypes.WINFUNCTYPE( + ctypes.c_int, # return + ctypes.c_int, # dwControl + ctypes.c_int, # dwEventType + ctypes.c_void_p, # lpEventData, + ctypes.c_void_p, # lpContext, +) + + +def _ctypes_array(c_type, py_array): + ar = (c_type * len(py_array))() + ar[:] = py_array + return ar + + +def win_OpenSCManager(): + res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) + if not res: + raise Exception('Opening service manager failed - ' + 'are you running this as administrator?') + return res + + +def win_install_service(service_name, cmdline): + manager = win_OpenSCManager() + try: + h = advapi32.CreateServiceW( + manager, service_name, None, + SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, + SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, + cmdline, None, None, None, None, None) + if not h: + raise OSError('Service creation failed: %s' % ctypes.FormatError()) + + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def win_uninstall_service(service_name): + manager = win_OpenSCManager() + try: + h = advapi32.OpenServiceW(manager, service_name, DELETE) + if not h: + raise OSError('Could not find service %s: %s' % ( + service_name, ctypes.FormatError())) + + try: + if not advapi32.DeleteService(h): + raise OSError('Deletion failed: %s' % ctypes.FormatError()) + finally: + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def win_service_report_event(service_name, msg, is_error=True): + with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: + f.write(msg + '\n') + + event_log = advapi32.RegisterEventSourceW(None, service_name) + if not event_log: + raise OSError('Could not report event: %s' % ctypes.FormatError()) + + try: + type_id = 0x0001 if is_error else 0x0004 + event_id = 0xc0000000 if is_error else 0x40000000 + lines = _ctypes_array(LPTSTR, [msg]) + + if not advapi32.ReportEventW( + event_log, type_id, 0, event_id, None, len(lines), 0, + lines, None): + raise OSError('Event reporting failed: %s' % ctypes.FormatError()) + finally: + advapi32.DeregisterEventSource(event_log) + + +def win_service_handler(stop_event, *args): + try: + raise ValueError('Handler called with args ' + repr(args)) + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def win_service_set_status(handle, status_code): + svcStatus = SERVICE_STATUS() + svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS + svcStatus.dwCurrentState = status_code + svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP + + svcStatus.dwServiceSpecificExitCode = 0 + + if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): + raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) + + +def win_service_main(service_name, real_main, argc, argv_raw): + try: + #args = [argv_raw[i].value for i in range(argc)] + stop_event = threading.Event() + handler = HandlerEx(functools.partial(stop_event, win_service_handler)) + h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) + if not h: + raise OSError('Handler registration failed: %s' % + ctypes.FormatError()) + + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def win_service_start(service_name, real_main): + try: + cb = START_CALLBACK( + functools.partial(win_service_main, service_name, real_main)) + dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ + SERVICE_TABLE_ENTRY( + service_name, + cb + ), + SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) + ]) + + if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): + raise OSError('ctypes start failed: %s' % ctypes.FormatError()) + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def main(args=None): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--install', + action='store_const', dest='action', const='install', + help='Launch at Windows startup') + parser.add_argument('-u', '--uninstall', + action='store_const', dest='action', const='uninstall', + help='Remove Windows service') + parser.add_argument('-s', '--service', + action='store_const', dest='action', const='service', + help='Run as a Windows service') + parser.add_argument('-b', '--bind', metavar='', + action='store', default='localhost:8142', + help='Bind to host:port (default %default)') + options = parser.parse_args(args=args) + + if options.action == 'install': + fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') + cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) + win_install_service(SVCNAME, cmdline) + return + + if options.action == 'uninstall': + win_uninstall_service(SVCNAME) + return + + if options.action == 'service': + win_service_start(SVCNAME, main) + return + + host, port_str = options.bind.split(':') + port = int(port_str) + + print('Listening on %s:%d' % (host, port)) + srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) + thr = threading.Thread(target=srv.serve_forever) + thr.start() + input('Press ENTER to shut down') + srv.shutdown() + thr.join() + + +def rmtree(path): + for name in os.listdir(path): + fname = os.path.join(path, name) + if os.path.isdir(fname): + rmtree(fname) + else: + os.chmod(fname, 0o666) + os.remove(fname) + os.rmdir(path) + +#============================================================================== + +class BuildError(Exception): + def __init__(self, output, code=500): + self.output = output + self.code = code + + def __str__(self): + return self.output + + +class HTTPError(BuildError): + pass + + +class PythonBuilder(object): + def __init__(self, **kwargs): + pythonVersion = kwargs.pop('python', '2.7') + try: + key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) + try: + self.pythonPath, _ = _winreg.QueryValueEx(key, '') + finally: + _winreg.CloseKey(key) + except Exception: + raise BuildError('No such Python version: %s' % pythonVersion) + + super(PythonBuilder, self).__init__(**kwargs) + + +class GITInfoBuilder(object): + def __init__(self, **kwargs): + try: + self.user, self.repoName = kwargs['path'][:2] + self.rev = kwargs.pop('rev') + except ValueError: + raise BuildError('Invalid path') + except KeyError as e: + raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) + + path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) + if not os.path.exists(path): + os.makedirs(path) + self.basePath = tempfile.mkdtemp(dir=path) + self.buildPath = os.path.join(self.basePath, 'build') + + super(GITInfoBuilder, self).__init__(**kwargs) + + +class GITBuilder(GITInfoBuilder): + def build(self): + try: + subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) + subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(GITBuilder, self).build() + + +class YoutubeDLBuilder(object): + authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] + + def __init__(self, **kwargs): + if self.repoName != 'youtube-dl': + raise BuildError('Invalid repository "%s"' % self.repoName) + if self.user not in self.authorizedUsers: + raise HTTPError('Unauthorized user "%s"' % self.user, 401) + + super(YoutubeDLBuilder, self).__init__(**kwargs) + + def build(self): + try: + subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], + cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(YoutubeDLBuilder, self).build() + + +class DownloadBuilder(object): + def __init__(self, **kwargs): + self.handler = kwargs.pop('handler') + self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) + self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) + if not self.srcPath.startswith(self.buildPath): + raise HTTPError(self.srcPath, 401) + + super(DownloadBuilder, self).__init__(**kwargs) + + def build(self): + if not os.path.exists(self.srcPath): + raise HTTPError('No such file', 404) + if os.path.isdir(self.srcPath): + raise HTTPError('Is a directory: %s' % self.srcPath, 401) + + self.handler.send_response(200) + self.handler.send_header('Content-Type', 'application/octet-stream') + self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) + self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) + self.handler.end_headers() + + with open(self.srcPath, 'rb') as src: + shutil.copyfileobj(src, self.handler.wfile) + + super(DownloadBuilder, self).build() + + +class CleanupTempDir(object): + def build(self): + try: + rmtree(self.basePath) + except Exception as e: + print('WARNING deleting "%s": %s' % (self.basePath, e)) + + super(CleanupTempDir, self).build() + + +class Null(object): + def __init__(self, **kwargs): + pass + + def start(self): + pass + + def close(self): + pass + + def build(self): + pass + + +class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): + pass + + +class BuildHTTPRequestHandler(BaseHTTPRequestHandler): + actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching. + + def do_GET(self): + path = urlparse.urlparse(self.path) + paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) + action, _, path = path.path.strip('/').partition('/') + if path: + path = path.split('/') + if action in self.actionDict: + try: + builder = self.actionDict[action](path=path, handler=self, **paramDict) + builder.start() + try: + builder.build() + finally: + builder.close() + except BuildError as e: + self.send_response(e.code) + msg = unicode(e).encode('UTF-8') + self.send_header('Content-Type', 'text/plain; charset=UTF-8') + self.send_header('Content-Length', len(msg)) + self.end_headers() + self.wfile.write(msg) + except HTTPError as e: + self.send_response(e.code, str(e)) + else: + self.send_response(500, 'Unknown build method "%s"' % action) + else: + self.send_response(500, 'Malformed URL') + +#============================================================================== + +if __name__ == '__main__': + main() diff -Nru youtube-dl-2012.09.27/devscripts/check-porn.py youtube-dl-2014.02.17/devscripts/check-porn.py --- youtube-dl-2012.09.27/devscripts/check-porn.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/check-porn.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site + +A second approach implemented relies on a list of porn domains, to activate it +pass the list filename as the only argument +""" + +# Allow direct execution +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_testcases +from youtube_dl.utils import compat_urllib_parse_urlparse +from youtube_dl.utils import compat_urllib_request + +if len(sys.argv) > 1: + METHOD = 'LIST' + LIST = open(sys.argv[1]).read().decode('utf8').strip() +else: + METHOD = 'EURISTIC' + +for test in get_testcases(): + if METHOD == 'EURISTIC': + try: + webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() + except: + print('\nFail: {0}'.format(test['name'])) + continue + + webpage = webpage.decode('utf8', 'replace') + + RESULT = 'porn' in webpage.lower() + + elif METHOD == 'LIST': + domain = compat_urllib_parse_urlparse(test['url']).netloc + if not domain: + print('\nFail: {0}'.format(test['name'])) + continue + domain = '.'.join(domain.split('.')[-2:]) + + RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) + + if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): + print('\nPotential missing age_limit check: {0}'.format(test['name'])) + + elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] + and test['info_dict']['age_limit'] == 18): + print('\nPotential false negative: {0}'.format(test['name'])) + + else: + sys.stdout.write('.') + sys.stdout.flush() + +print() diff -Nru youtube-dl-2012.09.27/devscripts/gh-pages/add-version.py youtube-dl-2014.02.17/devscripts/gh-pages/add-version.py --- youtube-dl-2012.09.27/devscripts/gh-pages/add-version.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/gh-pages/add-version.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +import json +import sys +import hashlib +import os.path + + +if len(sys.argv) <= 1: + print('Specify the version number as parameter') + sys.exit() +version = sys.argv[1] + +with open('update/LATEST_VERSION', 'w') as f: + f.write(version) + +versions_info = json.load(open('update/versions.json')) +if 'signature' in versions_info: + del versions_info['signature'] + +new_version = {} + +filenames = { + 'bin': 'youtube-dl', + 'exe': 'youtube-dl.exe', + 'tar': 'youtube-dl-%s.tar.gz' % version} +build_dir = os.path.join('..', '..', 'build', version) +for key, filename in filenames.items(): + url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) + fn = os.path.join(build_dir, filename) + with open(fn, 'rb') as f: + data = f.read() + if not data: + raise ValueError('File %s is empty!' % fn) + sha256sum = hashlib.sha256(data).hexdigest() + new_version[key] = (url, sha256sum) + +versions_info['versions'][version] = new_version +versions_info['latest'] = version + +with open('update/versions.json', 'w') as jsonf: + json.dump(versions_info, jsonf, indent=4, sort_keys=True) diff -Nru youtube-dl-2012.09.27/devscripts/gh-pages/generate-download.py youtube-dl-2014.02.17/devscripts/gh-pages/generate-download.py --- youtube-dl-2012.09.27/devscripts/gh-pages/generate-download.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/gh-pages/generate-download.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +import hashlib +import shutil +import subprocess +import tempfile +import urllib.request +import json + +versions_info = json.load(open('update/versions.json')) +version = versions_info['latest'] +URL = versions_info['versions'][version]['bin'][0] + +data = urllib.request.urlopen(URL).read() + +# Read template page +with open('download.html.in', 'r', encoding='utf-8') as tmplf: + template = tmplf.read() + +md5sum = hashlib.md5(data).hexdigest() +sha1sum = hashlib.sha1(data).hexdigest() +sha256sum = hashlib.sha256(data).hexdigest() +template = template.replace('@PROGRAM_VERSION@', version) +template = template.replace('@PROGRAM_URL@', URL) +template = template.replace('@PROGRAM_MD5SUM@', md5sum) +template = template.replace('@PROGRAM_SHA1SUM@', sha1sum) +template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) +template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) +template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) +template = template.replace('@TAR_URL@', versions_info['versions'][version]['tar'][0]) +template = template.replace('@TAR_SHA256SUM@', versions_info['versions'][version]['tar'][1]) +with open('download.html', 'w', encoding='utf-8') as dlf: + dlf.write(template) diff -Nru youtube-dl-2012.09.27/devscripts/gh-pages/sign-versions.py youtube-dl-2014.02.17/devscripts/gh-pages/sign-versions.py --- youtube-dl-2012.09.27/devscripts/gh-pages/sign-versions.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/gh-pages/sign-versions.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import rsa +import json +from binascii import hexlify + +try: + input = raw_input +except NameError: + pass + +versions_info = json.load(open('update/versions.json')) +if 'signature' in versions_info: + del versions_info['signature'] + +print('Enter the PKCS1 private key, followed by a blank line:') +privkey = b'' +while True: + try: + line = input() + except EOFError: + break + if line == '': + break + privkey += line.encode('ascii') + b'\n' +privkey = rsa.PrivateKey.load_pkcs1(privkey) + +signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode() +print('signature: ' + signature) + +versions_info['signature'] = signature +json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True) \ No newline at end of file diff -Nru youtube-dl-2012.09.27/devscripts/gh-pages/update-copyright.py youtube-dl-2014.02.17/devscripts/gh-pages/update-copyright.py --- youtube-dl-2012.09.27/devscripts/gh-pages/update-copyright.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/gh-pages/update-copyright.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import with_statement + +import datetime +import glob +import io # For Python 2 compatibilty +import os +import re + +year = str(datetime.datetime.now().year) +for fn in glob.glob('*.html*'): + with io.open(fn, encoding='utf-8') as f: + content = f.read() + newc = re.sub(u'(?PCopyright © 2006-)(?P[0-9]{4})', u'Copyright © 2006-' + year, content) + if content != newc: + tmpFn = fn + '.part' + with io.open(tmpFn, 'wt', encoding='utf-8') as outf: + outf.write(newc) + os.rename(tmpFn, fn) diff -Nru youtube-dl-2012.09.27/devscripts/gh-pages/update-feed.py youtube-dl-2014.02.17/devscripts/gh-pages/update-feed.py --- youtube-dl-2012.09.27/devscripts/gh-pages/update-feed.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/gh-pages/update-feed.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +import datetime +import io +import json +import textwrap + + +atom_template = textwrap.dedent("""\ + + + + youtube-dl releases + https://yt-dl.org/feed/youtube-dl-updates-feed + @TIMESTAMP@ + @ENTRIES@ + """) + +entry_template = textwrap.dedent(""" + + https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@ + New version @VERSION@ + + +
+ Downloads available at https://yt-dl.org/downloads/@VERSION@/ +
+
+ + The youtube-dl maintainers + + @TIMESTAMP@ +
+ """) + +now = datetime.datetime.now() +now_iso = now.isoformat() + 'Z' + +atom_template = atom_template.replace('@TIMESTAMP@', now_iso) + +versions_info = json.load(open('update/versions.json')) +versions = list(versions_info['versions'].keys()) +versions.sort() + +entries = [] +for v in versions: + fields = v.split('.') + year, month, day = map(int, fields[:3]) + faked = 0 + patchlevel = 0 + while True: + try: + datetime.date(year, month, day) + except ValueError: + day -= 1 + faked += 1 + assert day > 0 + continue + break + if len(fields) >= 4: + try: + patchlevel = int(fields[3]) + except ValueError: + patchlevel = 1 + timestamp = '%04d-%02d-%02dT00:%02d:%02dZ' % (year, month, day, faked, patchlevel) + + entry = entry_template.replace('@TIMESTAMP@', timestamp) + entry = entry.replace('@VERSION@', v) + entries.append(entry) + +entries_str = textwrap.indent(''.join(entries), '\t') +atom_template = atom_template.replace('@ENTRIES@', entries_str) + +with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file: + atom_file.write(atom_template) + diff -Nru youtube-dl-2012.09.27/devscripts/gh-pages/update-sites.py youtube-dl-2014.02.17/devscripts/gh-pages/update-sites.py --- youtube-dl-2012.09.27/devscripts/gh-pages/update-sites.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/gh-pages/update-sites.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import sys +import os +import textwrap + +# We must be able to import youtube_dl +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +import youtube_dl + +def main(): + with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: + template = tmplf.read() + + ie_htmls = [] + for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): + ie_html = '{}'.format(ie.IE_NAME) + ie_desc = getattr(ie, 'IE_DESC', None) + if ie_desc is False: + continue + elif ie_desc is not None: + ie_html += ': {}'.format(ie.IE_DESC) + if ie.working() == False: + ie_html += ' (Currently broken)' + ie_htmls.append('
  • {}
  • '.format(ie_html)) + + template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) + + with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: + sitesf.write(template) + +if __name__ == '__main__': + main() diff -Nru youtube-dl-2012.09.27/devscripts/make_readme.py youtube-dl-2014.02.17/devscripts/make_readme.py --- youtube-dl-2012.09.27/devscripts/make_readme.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/make_readme.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,24 @@ +import io +import sys +import re + +README_FILE = 'README.md' +helptext = sys.stdin.read() + +if isinstance(helptext, bytes): + helptext = helptext.decode('utf-8') + +with io.open(README_FILE, encoding='utf-8') as f: + oldreadme = f.read() + +header = oldreadme[:oldreadme.index('# OPTIONS')] +footer = oldreadme[oldreadme.index('# CONFIGURATION'):] + +options = helptext[helptext.index(' General Options:') + 19:] +options = re.sub(r'^ (\w.+)$', r'## \1', options, flags=re.M) +options = '# OPTIONS\n' + options + '\n' + +with io.open(README_FILE, 'w', encoding='utf-8') as f: + f.write(header) + f.write(options) + f.write(footer) diff -Nru youtube-dl-2012.09.27/devscripts/release.sh youtube-dl-2014.02.17/devscripts/release.sh --- youtube-dl-2012.09.27/devscripts/release.sh 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/release.sh 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,103 @@ +#!/bin/bash + +# IMPORTANT: the following assumptions are made +# * the GH repo is on the origin remote +# * the gh-pages branch is named so locally +# * the git config user.signingkey is properly set + +# You will need +# pip install coverage nose rsa + +# TODO +# release notes +# make hash on local files + +set -e + +skip_tests=false +if [ "$1" = '--skip-test' ]; then + skip_tests=true + shift +fi + +if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi +version="$1" +if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi +if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi +useless_files=$(find youtube_dl -type f -not -name '*.py') +if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi +if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi + +/bin/echo -e "\n### First of all, testing..." +make cleanall +if $skip_tests ; then + echo 'SKIPPING TESTS' +else + nosetests --verbose --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1 +fi + +/bin/echo -e "\n### Changing version in version.py..." +sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py + +/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." +make README.md +git add CHANGELOG README.md youtube_dl/version.py +git commit -m "release $version" + +/bin/echo -e "\n### Now tagging, signing and pushing..." +git tag -s -m "Release $version" "$version" +git show "$version" +read -p "Is it good, can I push? (y/n) " -n 1 +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi +echo +MASTER=$(git rev-parse --abbrev-ref HEAD) +git push origin $MASTER:master +git push origin "$version" + +/bin/echo -e "\n### OK, now it is time to build the binaries..." +REV=$(git rev-parse HEAD) +make youtube-dl youtube-dl.tar.gz +read -p "VM running? (y/n) " -n 1 +wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +mkdir -p "build/$version" +mv youtube-dl youtube-dl.exe "build/$version" +mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" +RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" +(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS) +(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS) +(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) +(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) +git checkout HEAD -- youtube-dl youtube-dl.exe + +/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." +for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done +scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ +ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" +ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" + +/bin/echo -e "\n### Now switching to gh-pages..." +git clone --branch gh-pages --single-branch . build/gh-pages +ROOT=$(pwd) +( + set -e + ORIGIN_URL=$(git config --get remote.origin.url) + cd build/gh-pages + "$ROOT/devscripts/gh-pages/add-version.py" $version + "$ROOT/devscripts/gh-pages/update-feed.py" + "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" + "$ROOT/devscripts/gh-pages/generate-download.py" + "$ROOT/devscripts/gh-pages/update-copyright.py" + "$ROOT/devscripts/gh-pages/update-sites.py" + git add *.html *.html.in update + git commit -m "release $version" + git push "$ROOT" gh-pages + git push "$ORIGIN_URL" gh-pages +) +rm -rf build + +make pypi-files +echo "Uploading to PyPi ..." +python setup.py sdist upload +make clean + +/bin/echo -e "\n### DONE!" diff -Nru youtube-dl-2012.09.27/devscripts/transition_helper_exe/setup.py youtube-dl-2014.02.17/devscripts/transition_helper_exe/setup.py --- youtube-dl-2012.09.27/devscripts/transition_helper_exe/setup.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/transition_helper_exe/setup.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,12 @@ +from distutils.core import setup +import py2exe + +py2exe_options = { + "bundle_files": 1, + "compressed": 1, + "optimize": 2, + "dist_dir": '.', + "dll_excludes": ['w9xpopen.exe'] +} + +setup(console=['youtube-dl.py'], options={ "py2exe": py2exe_options }, zipfile=None) \ No newline at end of file diff -Nru youtube-dl-2012.09.27/devscripts/transition_helper_exe/youtube-dl.py youtube-dl-2014.02.17/devscripts/transition_helper_exe/youtube-dl.py --- youtube-dl-2012.09.27/devscripts/transition_helper_exe/youtube-dl.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/transition_helper_exe/youtube-dl.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +import sys, os +import urllib2 +import json, hashlib + +def rsa_verify(message, signature, key): + from struct import pack + from hashlib import sha256 + from sys import version_info + def b(x): + if version_info[0] == 2: return x + else: return x.encode('latin1') + assert(type(message) == type(b(''))) + block_size = 0 + n = key[0] + while n: + block_size += 1 + n >>= 8 + signature = pow(int(signature, 16), key[1], key[0]) + raw_bytes = [] + while signature: + raw_bytes.insert(0, pack("B", signature & 0xFF)) + signature >>= 8 + signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes) + if signature[0:2] != b('\x00\x01'): return False + signature = signature[2:] + if not b('\x00') in signature: return False + signature = signature[signature.index(b('\x00'))+1:] + if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False + signature = signature[19:] + if signature != sha256(message).digest(): return False + return True + +sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n') +sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n') +sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n') + +raw_input() + +filename = sys.argv[0] + +UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" +VERSION_URL = UPDATE_URL + 'LATEST_VERSION' +JSON_URL = UPDATE_URL + 'versions.json' +UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) + +if not os.access(filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + +exe = os.path.abspath(filename) +directory = os.path.dirname(exe) +if not os.access(directory, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % directory) + +try: + versions_info = urllib2.urlopen(JSON_URL).read().decode('utf-8') + versions_info = json.loads(versions_info) +except: + sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.') +if not 'signature' in versions_info: + sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.') +signature = versions_info['signature'] +del versions_info['signature'] +if not rsa_verify(json.dumps(versions_info, sort_keys=True), signature, UPDATES_RSA_KEY): + sys.exit(u'ERROR: the versions file signature is invalid. Aborting.') + +version = versions_info['versions'][versions_info['latest']] + +try: + urlh = urllib2.urlopen(version['exe'][0]) + newcontent = urlh.read() + urlh.close() +except (IOError, OSError) as err: + sys.exit('ERROR: unable to download latest version') + +newcontent_hash = hashlib.sha256(newcontent).hexdigest() +if newcontent_hash != version['exe'][1]: + sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.') + +try: + with open(exe + '.new', 'wb') as outf: + outf.write(newcontent) +except (IOError, OSError) as err: + sys.exit(u'ERROR: unable to write the new version') + +try: + bat = os.path.join(directory, 'youtube-dl-updater.bat') + b = open(bat, 'w') + b.write(""" +echo Updating youtube-dl... +ping 127.0.0.1 -n 5 -w 1000 > NUL +move /Y "%s.new" "%s" +del "%s" + \n""" %(exe, exe, bat)) + b.close() + + os.startfile(bat) +except (IOError, OSError) as err: + sys.exit('ERROR: unable to overwrite current version') + +sys.stderr.write(u'Done! Now you can run youtube-dl.\n') diff -Nru youtube-dl-2012.09.27/devscripts/transition_helper.py youtube-dl-2014.02.17/devscripts/transition_helper.py --- youtube-dl-2012.09.27/devscripts/transition_helper.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/devscripts/transition_helper.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +import sys, os + +try: + import urllib.request as compat_urllib_request +except ImportError: # Python 2 + import urllib2 as compat_urllib_request + +sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n') +sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n') +sys.stderr.write(u'The new location of the binaries is https://github.com/rg3/youtube-dl/downloads, not the git repository.\n\n') + +try: + raw_input() +except NameError: # Python 3 + input() + +filename = sys.argv[0] + +API_URL = "https://api.github.com/repos/rg3/youtube-dl/downloads" +BIN_URL = "https://github.com/downloads/rg3/youtube-dl/youtube-dl" + +if not os.access(filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + +try: + urlh = compat_urllib_request.urlopen(BIN_URL) + newcontent = urlh.read() + urlh.close() +except (IOError, OSError) as err: + sys.exit('ERROR: unable to download latest version') + +try: + with open(filename, 'wb') as outf: + outf.write(newcontent) +except (IOError, OSError) as err: + sys.exit('ERROR: unable to overwrite current version') + +sys.stderr.write(u'Done! Now you can run youtube-dl.\n') diff -Nru youtube-dl-2012.09.27/.gitignore youtube-dl-2014.02.17/.gitignore --- youtube-dl-2012.09.27/.gitignore 2012-09-27 09:25:46.000000000 +0000 +++ youtube-dl-2014.02.17/.gitignore 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ -*.pyc -*.pyo -*~ -wine-py2exe/ -py2exe.log diff -Nru youtube-dl-2012.09.27/LATEST_VERSION youtube-dl-2014.02.17/LATEST_VERSION --- youtube-dl-2012.09.27/LATEST_VERSION 2012-09-27 09:25:46.000000000 +0000 +++ youtube-dl-2014.02.17/LATEST_VERSION 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -2012.09.27 diff -Nru youtube-dl-2012.09.27/LICENSE youtube-dl-2014.02.17/LICENSE --- youtube-dl-2012.09.27/LICENSE 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/LICENSE 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff -Nru youtube-dl-2012.09.27/Makefile youtube-dl-2014.02.17/Makefile --- youtube-dl-2012.09.27/Makefile 2012-09-27 09:25:46.000000000 +0000 +++ youtube-dl-2014.02.17/Makefile 2014-01-27 02:06:44.000000000 +0000 @@ -1,26 +1,79 @@ -default: update +all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion -update: compile update-readme update-latest +clean: + rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz -update-latest: - ./youtube-dl.dev --version > LATEST_VERSION +cleanall: clean + rm -f youtube-dl youtube-dl.exe -update-readme: - @options=$$(COLUMNS=80 ./youtube-dl.dev --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ - header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ - footer=$$(sed -e '1,/.*## FAQ/ d' README.md) && \ - echo "$${header}" > README.md && \ - echo >> README.md && \ - echo '## OPTIONS' >> README.md && \ - echo "$${options}" >> README.md&& \ - echo >> README.md && \ - echo '## FAQ' >> README.md && \ - echo "$${footer}" >> README.md - -compile: - zip --quiet --junk-paths youtube-dl youtube_dl/*.py - echo '#!/usr/bin/env python' > youtube-dl +PREFIX=/usr/local +BINDIR=$(PREFIX)/bin +MANDIR=$(PREFIX)/man +PYTHON=/usr/bin/env python + +# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local +ifeq ($(PREFIX),/usr) + SYSCONFDIR=/etc +else + ifeq ($(PREFIX),/usr/local) + SYSCONFDIR=/etc + else + SYSCONFDIR=$(PREFIX)/etc + endif +endif + +install: youtube-dl youtube-dl.1 youtube-dl.bash-completion + install -d $(DESTDIR)$(BINDIR) + install -m 755 youtube-dl $(DESTDIR)$(BINDIR) + install -d $(DESTDIR)$(MANDIR)/man1 + install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1 + install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d + install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl + +test: + #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test + nosetests --verbose test + +tar: youtube-dl.tar.gz + +.PHONY: all clean install test tar bash-completion pypi-files + +pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 + +youtube-dl: youtube_dl/*.py youtube_dl/*/*.py + zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py + zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py + echo '#!$(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl rm youtube-dl.zip + chmod a+x youtube-dl + +README.md: youtube_dl/*.py youtube_dl/*/*.py + COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py + +README.txt: README.md + pandoc -f markdown -t plain README.md -o README.txt + +youtube-dl.1: README.md + pandoc -s -f markdown -t man README.md -o youtube-dl.1 + +youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in + python devscripts/bash-completion.py + +bash-completion: youtube-dl.bash-completion -.PHONY: default compile update update-latest update-readme +youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion + @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ + --exclude '*.DS_Store' \ + --exclude '*.kate-swp' \ + --exclude '*.pyc' \ + --exclude '*.pyo' \ + --exclude '*~' \ + --exclude '__pycache' \ + --exclude '.git' \ + --exclude 'testdata' \ + -- \ + bin devscripts test youtube_dl \ + CHANGELOG LICENSE README.md README.txt \ + Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ + youtube-dl diff -Nru youtube-dl-2012.09.27/MANIFEST.in youtube-dl-2014.02.17/MANIFEST.in --- youtube-dl-2012.09.27/MANIFEST.in 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/MANIFEST.in 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,5 @@ +include README.md +include test/*.py +include test/*.json +include youtube-dl.bash-completion +include youtube-dl.1 diff -Nru youtube-dl-2012.09.27/README.md youtube-dl-2014.02.17/README.md --- youtube-dl-2012.09.27/README.md 2012-09-27 09:25:46.000000000 +0000 +++ youtube-dl-2014.02.17/README.md 2014-02-17 10:33:13.000000000 +0000 @@ -1,109 +1,302 @@ -# youtube-dl +% YOUTUBE-DL(1) -## USAGE -youtube-dl [options] url [url...] +# NAME +youtube-dl - download videos from youtube.com or other video platforms -## DESCRIPTION +# SYNOPSIS +**youtube-dl** [OPTIONS] URL [URL...] + +# DESCRIPTION **youtube-dl** is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version -2.x (x being at least 6), and it is not platform specific. It should work in -your Unix box, in Windows or in Mac OS X. It is released to the public domain, +2.6, 2.7, or 3.3+, and it is not platform specific. It should work on +your Unix box, on Windows or on Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. -## OPTIONS - -h, --help print this help text and exit - --version print program version and exit - -U, --update update this program to latest version - -i, --ignore-errors continue on download errors - -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) - -R, --retries RETRIES number of retries (default is 10) - --dump-user-agent display the current browser identification - --list-extractors List all supported extractors and the URLs they - would handle - -### Video Selection: - --playlist-start NUMBER playlist video to start at (default is 1) - --playlist-end NUMBER playlist video to end at (default is last) - --match-title REGEX download only matching titles (regex or caseless - sub-string) - --reject-title REGEX skip download for matching titles (regex or - caseless sub-string) - --max-downloads NUMBER Abort after downloading NUMBER files - -### Filesystem Options: - -t, --title use title in file name - -l, --literal use literal title in file name - -A, --auto-number number downloaded files starting from 00000 - -o, --output TEMPLATE output filename template. Use %(stitle)s to get the - title, %(uploader)s for the uploader name, - %(autonumber)s to get an automatically incremented - number, %(ext)s for the filename extension, - %(upload_date)s for the upload date (YYYYMMDD), and - %% for a literal percent. Use - to output to - stdout. - -a, --batch-file FILE file containing URLs to download ('-' for stdin) - -w, --no-overwrites do not overwrite files - -c, --continue resume partially downloaded files - --no-continue do not resume partially downloaded files (restart - from beginning) - --cookies FILE file to read cookies from and dump cookie jar in - --no-part do not use .part files - --no-mtime do not use the Last-modified header to set the file - modification time - --write-description write video description to a .description file - --write-info-json write video metadata to a .info.json file - -### Verbosity / Simulation Options: - -q, --quiet activates quiet mode - -s, --simulate do not download the video and do not write anything - to disk - --skip-download do not download the video - -g, --get-url simulate, quiet but print URL - -e, --get-title simulate, quiet but print title - --get-thumbnail simulate, quiet but print thumbnail URL - --get-description simulate, quiet but print video description - --get-filename simulate, quiet but print output filename - --get-format simulate, quiet but print output format - --no-progress do not print progress bar - --console-title display progress in console titlebar - -v, --verbose print various debugging information - -### Video Format Options: - -f, --format FORMAT video format code - --all-formats download all available video formats - --prefer-free-formats prefer free video formats unless a specific one is - requested - --max-quality FORMAT highest quality format to download - -F, --list-formats list all available formats (currently youtube only) - --write-srt write video closed captions to a .srt file - (currently youtube only) - --srt-lang LANG language of the closed captions to download - (optional) use IETF language tags like 'en' - -### Authentication Options: - -u, --username USERNAME account username - -p, --password PASSWORD account password - -n, --netrc use .netrc authentication data - -### Post-processing Options: - --extract-audio convert video files to audio-only files (requires - ffmpeg or avconv and ffprobe or avprobe) - --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav"; - best by default - --audio-quality QUALITY ffmpeg/avconv audio bitrate specification, 128k by - default - -k, --keep-video keeps the video file on disk after the post- - processing; the video is erased by default +# OPTIONS + -h, --help print this help text and exit + --version print program version and exit + -U, --update update this program to latest version. Make + sure that you have sufficient permissions + (run with sudo if needed) + -i, --ignore-errors continue on download errors, for example to + to skip unavailable videos in a playlist + --abort-on-error Abort downloading of further videos (in the + playlist or the command line) if an error + occurs + --dump-user-agent display the current browser identification + --user-agent UA specify a custom user agent + --referer REF specify a custom referer, use if the video + access is restricted to one domain + --list-extractors List all supported extractors and the URLs + they would handle + --extractor-descriptions Output descriptions of all supported + extractors + --proxy URL Use the specified HTTP/HTTPS proxy. Pass in + an empty string (--proxy "") for direct + connection + --no-check-certificate Suppress HTTPS certificate validation. + --cache-dir DIR Location in the filesystem where youtube-dl + can store some downloaded information + permanently. By default $XDG_CACHE_HOME + /youtube-dl or ~/.cache/youtube-dl . At the + moment, only YouTube player files (for + videos with obfuscated signatures) are + cached, but that may change. + --no-cache-dir Disable filesystem caching + --socket-timeout None Time to wait before giving up, in seconds + --bidi-workaround Work around terminals that lack + bidirectional text support. Requires bidiv + or fribidi executable in PATH + --default-search PREFIX Use this prefix for unqualified URLs. For + example "gvsearch2:" downloads two videos + from google videos for youtube-dl "large + apple". By default (with value "auto") + youtube-dl guesses. + --ignore-config Do not read configuration files. When given + in the global configuration file /etc + /youtube-dl.conf: do not read the user + configuration in ~/.config/youtube-dl.conf + (%APPDATA%/youtube-dl/config.txt on + Windows) + +## Video Selection: + --playlist-start NUMBER playlist video to start at (default is 1) + --playlist-end NUMBER playlist video to end at (default is last) + --match-title REGEX download only matching titles (regex or + caseless sub-string) + --reject-title REGEX skip download for matching titles (regex or + caseless sub-string) + --max-downloads NUMBER Abort after downloading NUMBER files + --min-filesize SIZE Do not download any videos smaller than + SIZE (e.g. 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE + (e.g. 50k or 44.6m) + --date DATE download only videos uploaded in this date + --datebefore DATE download only videos uploaded on or before + this date (i.e. inclusive) + --dateafter DATE download only videos uploaded on or after + this date (i.e. inclusive) + --min-views COUNT Do not download any videos with less than + COUNT views + --max-views COUNT Do not download any videos with more than + COUNT views + --no-playlist download only the currently playing video + --age-limit YEARS download only videos suitable for the given + age + --download-archive FILE Download only videos not listed in the + archive file. Record the IDs of all + downloaded videos in it. + --include-ads Download advertisements as well + (experimental) + --youtube-include-dash-manifest Try to download the DASH manifest on + YouTube videos (experimental) + +## Download Options: + -r, --rate-limit LIMIT maximum download rate in bytes per second + (e.g. 50K or 4.2M) + -R, --retries RETRIES number of retries (default is 10) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) + (default is 1024) + --no-resize-buffer do not automatically adjust the buffer + size. By default, the buffer size is + automatically resized from an initial value + of SIZE. + +## Filesystem Options: + -t, --title use title in file name (default) + --id use only video ID in file name + -l, --literal [deprecated] alias of --title + -A, --auto-number number downloaded files starting from 00000 + -o, --output TEMPLATE output filename template. Use %(title)s to + get the title, %(uploader)s for the + uploader name, %(uploader_id)s for the + uploader nickname if different, + %(autonumber)s to get an automatically + incremented number, %(ext)s for the + filename extension, %(format)s for the + format description (like "22 - 1280x720" or + "HD"), %(format_id)s for the unique id of + the format (like Youtube's itags: "137"), + %(upload_date)s for the upload date + (YYYYMMDD), %(extractor)s for the provider + (youtube, metacafe, etc), %(id)s for the + video id, %(playlist)s for the playlist the + video is in, %(playlist_index)s for the + position in the playlist and %% for a + literal percent. Use - to output to stdout. + Can also be used to download to a different + directory, for example with -o '/my/downloa + ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . + --autonumber-size NUMBER Specifies the number of digits in + %(autonumber)s when it is present in output + filename template or --auto-number option + is given + --restrict-filenames Restrict filenames to only ASCII + characters, and avoid "&" and spaces in + filenames + -a, --batch-file FILE file containing URLs to download ('-' for + stdin) + --load-info FILE json file containing the video information + (created with the "--write-json" option) + -w, --no-overwrites do not overwrite files + -c, --continue force resume of partially downloaded files. + By default, youtube-dl will resume + downloads if possible. + --no-continue do not resume partially downloaded files + (restart from beginning) + --cookies FILE file to read cookies from and dump cookie + jar in + --no-part do not use .part files + --no-mtime do not use the Last-modified header to set + the file modification time + --write-description write video description to a .description + file + --write-info-json write video metadata to a .info.json file + --write-annotations write video annotations to a .annotation + file + --write-thumbnail write thumbnail image to disk + +## Verbosity / Simulation Options: + -q, --quiet activates quiet mode + -s, --simulate do not download the video and do not write + anything to disk + --skip-download do not download the video + -g, --get-url simulate, quiet but print URL + -e, --get-title simulate, quiet but print title + --get-id simulate, quiet but print id + --get-thumbnail simulate, quiet but print thumbnail URL + --get-description simulate, quiet but print video description + --get-duration simulate, quiet but print video length + --get-filename simulate, quiet but print output filename + --get-format simulate, quiet but print output format + -j, --dump-json simulate, quiet but print JSON information + --newline output progress bar as new lines + --no-progress do not print progress bar + --console-title display progress in console titlebar + -v, --verbose print various debugging information + --dump-intermediate-pages print downloaded pages to debug problems + (very verbose) + --write-pages Write downloaded intermediary pages to + files in the current directory to debug + problems + --print-traffic Display sent and read HTTP traffic + +## Video Format Options: + -f, --format FORMAT video format code, specify the order of + preference using slashes: "-f 22/17/18". + "-f mp4" and "-f flv" are also supported. + You can also use the special names "best", + "bestaudio", "worst", and "worstaudio". By + default, youtube-dl will pick the best + quality. + --all-formats download all available video formats + --prefer-free-formats prefer free video formats unless a specific + one is requested + --max-quality FORMAT highest quality format to download + -F, --list-formats list all available formats + +## Subtitle Options: + --write-sub write subtitle file + --write-auto-sub write automatic subtitle file (youtube + only) + --all-subs downloads all the available subtitles of + the video + --list-subs lists all available subtitles for the video + --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] + youtube only) + --sub-lang LANGS languages of the subtitles to download + (optional) separated by commas, use IETF + language tags like 'en,pt' + +## Authentication Options: + -u, --username USERNAME account username + -p, --password PASSWORD account password + -n, --netrc use .netrc authentication data + --video-password PASSWORD video password (vimeo, smotri) + +## Post-processing Options: + -x, --extract-audio convert video files to audio-only files + (requires ffmpeg or avconv and ffprobe or + avprobe) + --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", + "opus", or "wav"; best by default + --audio-quality QUALITY ffmpeg/avconv audio quality specification, + insert a value between 0 (better) and 9 + (worse) for VBR or a specific bitrate like + 128K (default 5) + --recode-video FORMAT Encode the video to another format if + necessary (currently supported: + mp4|flv|ogg|webm) + -k, --keep-video keeps the video file on disk after the + post-processing; the video is erased by + default + --no-post-overwrites do not overwrite post-processed files; the + post-processed files are overwritten by + default + --embed-subs embed subtitles in the video (only for mp4 + videos) + --add-metadata write metadata to the video file + --xattrs write metadata to the video file's xattrs + (using dublin core and xdg standards) + --prefer-avconv Prefer avconv over ffmpeg for running the + postprocessors (default) + --prefer-ffmpeg Prefer ffmpeg over avconv for running the + postprocessors + +# CONFIGURATION + +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\\youtube-dl.conf`. + +# OUTPUT TEMPLATE + +The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are: + + - `id`: The sequence will be replaced by the video identifier. + - `url`: The sequence will be replaced by the video URL. + - `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video. + - `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format. + - `title`: The sequence will be replaced by the video title. + - `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4). + - `epoch`: The sequence will be replaced by the Unix epoch when creating the file. + - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. + - `playlist`: The name or the id of the playlist that contains the video. + - `playlist_index`: The index of the video in the playlist, a five-digit number. + +The current default template is `%(title)s-%(id)s.%(ext)s`. + +In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: + + $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc + youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters + $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames + youtube-dl_test_video_.mp4 # A simple file name + +# VIDEO SELECTION + +Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats: + + - Absolute dates: Dates in the format `YYYYMMDD`. + - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?` + +Examples: + + $ # Download only the videos uploaded in the last 6 months + $ youtube-dl --dateafter now-6months + $ # Download only the videos uploaded on January 1, 1970 + $ youtube-dl --date 19700101 + $ # will only download the videos uploaded in the 200x decade + $ youtube-dl --dateafter 20000101 --datebefore 20091231 -## FAQ +# FAQ ### Can you please put the -b option back? -Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. ### I get HTTP error 402 when trying to download a video. What's this? -Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. ### I have downloaded a video but how can I play it? @@ -117,19 +310,113 @@ youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. -## COPYRIGHT +### ERROR: unable to download video ### -youtube-dl is released into the public domain by the copyright holders. +youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. -This README file was originally written by Daniel Bolton () and is likewise released into the public domain. +### SyntaxError: Non-ASCII character ### + +The error + + File "youtube-dl", line 2 + SyntaxError: Non-ASCII character '\x93' ... + +means you're using an outdated version of Python. Please update to Python 2.6 or 2.7. + +### What is this binary file? Where has the code gone? + +Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. + +### The exe throws a *Runtime error from Visual C++* + +To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). + +# DEVELOPER INSTRUCTIONS + +Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. + +To run youtube-dl as a developer, you don't need to build anything either. Simply execute + + python -m youtube_dl + +To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: + + python -m unittest discover + python test/test_download.py + nosetests + +If you want to create a build of youtube-dl yourself, you'll need + +* python +* make +* pandoc +* zip +* nosetests + +### Adding support for a new site + +If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). + +# BUGS + +Bugs and suggestions should be reported at: . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. + +Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. + +For discussions, join us in the irc channel #youtube-dl on freenode. + +When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist): -## BUGS +### Is the description of the issue itself sufficient? -Bugs and suggestions should be reported at: +We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. -Please include: +So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious -* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem. -* The output of `youtube-dl --version` -* The output of `python --version` -* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough). +- What the problem is +- How it could be fixed +- How your proposed solution would look like + +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. + +For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. + +Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. + +### Are you using the latest version? + +Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. + +### Is the issue already documented? + +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or at https://github.com/rg3/youtube-dl/search?type=Issues . If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. + +### Why are existing options not enough? + +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. + +### Is there enough context in your bug report? + +People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). + +We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. + +### Does the issue involve one problem, and one problem only? + +Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. + +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. + +### Is anyone going to need the feature? + +Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. + +# COPYRIGHT + +youtube-dl is released into the public domain by the copyright holders. + +This README file was originally written by Daniel Bolton () and is likewise released into the public domain. diff -Nru youtube-dl-2012.09.27/README.txt youtube-dl-2014.02.17/README.txt --- youtube-dl-2012.09.27/README.txt 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/README.txt 2014-02-17 10:33:26.000000000 +0000 @@ -0,0 +1,582 @@ +NAME +==== + +youtube-dl - download videos from youtube.com or other video platforms + +SYNOPSIS +======== + +youtube-dl OPTIONS URL [URL...] + +DESCRIPTION +=========== + +youtube-dl is a small command-line program to download videos from +YouTube.com and a few more sites. It requires the Python interpreter, +version 2.6, 2.7, or 3.3+, and it is not platform specific. It should +work on your Unix box, on Windows or on Mac OS X. It is released to the +public domain, which means you can modify it, redistribute it or use it +however you like. + +OPTIONS +======= + + -h, --help print this help text and exit + --version print program version and exit + -U, --update update this program to latest version. Make + sure that you have sufficient permissions + (run with sudo if needed) + -i, --ignore-errors continue on download errors, for example to + to skip unavailable videos in a playlist + --abort-on-error Abort downloading of further videos (in the + playlist or the command line) if an error + occurs + --dump-user-agent display the current browser identification + --user-agent UA specify a custom user agent + --referer REF specify a custom referer, use if the video + access is restricted to one domain + --list-extractors List all supported extractors and the URLs + they would handle + --extractor-descriptions Output descriptions of all supported + extractors + --proxy URL Use the specified HTTP/HTTPS proxy. Pass in + an empty string (--proxy "") for direct + connection + --no-check-certificate Suppress HTTPS certificate validation. + --cache-dir DIR Location in the filesystem where youtube-dl + can store some downloaded information + permanently. By default $XDG_CACHE_HOME + /youtube-dl or ~/.cache/youtube-dl . At the + moment, only YouTube player files (for + videos with obfuscated signatures) are + cached, but that may change. + --no-cache-dir Disable filesystem caching + --socket-timeout None Time to wait before giving up, in seconds + --bidi-workaround Work around terminals that lack + bidirectional text support. Requires bidiv + or fribidi executable in PATH + --default-search PREFIX Use this prefix for unqualified URLs. For + example "gvsearch2:" downloads two videos + from google videos for youtube-dl "large + apple". By default (with value "auto") + youtube-dl guesses. + --ignore-config Do not read configuration files. When given + in the global configuration file /etc + /youtube-dl.conf: do not read the user + configuration in ~/.config/youtube-dl.conf + (%APPDATA%/youtube-dl/config.txt on + Windows) + +Video Selection: +---------------- + + --playlist-start NUMBER playlist video to start at (default is 1) + --playlist-end NUMBER playlist video to end at (default is last) + --match-title REGEX download only matching titles (regex or + caseless sub-string) + --reject-title REGEX skip download for matching titles (regex or + caseless sub-string) + --max-downloads NUMBER Abort after downloading NUMBER files + --min-filesize SIZE Do not download any videos smaller than + SIZE (e.g. 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE + (e.g. 50k or 44.6m) + --date DATE download only videos uploaded in this date + --datebefore DATE download only videos uploaded on or before + this date (i.e. inclusive) + --dateafter DATE download only videos uploaded on or after + this date (i.e. inclusive) + --min-views COUNT Do not download any videos with less than + COUNT views + --max-views COUNT Do not download any videos with more than + COUNT views + --no-playlist download only the currently playing video + --age-limit YEARS download only videos suitable for the given + age + --download-archive FILE Download only videos not listed in the + archive file. Record the IDs of all + downloaded videos in it. + --include-ads Download advertisements as well + (experimental) + --youtube-include-dash-manifest Try to download the DASH manifest on + YouTube videos (experimental) + +Download Options: +----------------- + + -r, --rate-limit LIMIT maximum download rate in bytes per second + (e.g. 50K or 4.2M) + -R, --retries RETRIES number of retries (default is 10) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) + (default is 1024) + --no-resize-buffer do not automatically adjust the buffer + size. By default, the buffer size is + automatically resized from an initial value + of SIZE. + +Filesystem Options: +------------------- + + -t, --title use title in file name (default) + --id use only video ID in file name + -l, --literal [deprecated] alias of --title + -A, --auto-number number downloaded files starting from 00000 + -o, --output TEMPLATE output filename template. Use %(title)s to + get the title, %(uploader)s for the + uploader name, %(uploader_id)s for the + uploader nickname if different, + %(autonumber)s to get an automatically + incremented number, %(ext)s for the + filename extension, %(format)s for the + format description (like "22 - 1280x720" or + "HD"), %(format_id)s for the unique id of + the format (like Youtube's itags: "137"), + %(upload_date)s for the upload date + (YYYYMMDD), %(extractor)s for the provider + (youtube, metacafe, etc), %(id)s for the + video id, %(playlist)s for the playlist the + video is in, %(playlist_index)s for the + position in the playlist and %% for a + literal percent. Use - to output to stdout. + Can also be used to download to a different + directory, for example with -o '/my/downloa + ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . + --autonumber-size NUMBER Specifies the number of digits in + %(autonumber)s when it is present in output + filename template or --auto-number option + is given + --restrict-filenames Restrict filenames to only ASCII + characters, and avoid "&" and spaces in + filenames + -a, --batch-file FILE file containing URLs to download ('-' for + stdin) + --load-info FILE json file containing the video information + (created with the "--write-json" option) + -w, --no-overwrites do not overwrite files + -c, --continue force resume of partially downloaded files. + By default, youtube-dl will resume + downloads if possible. + --no-continue do not resume partially downloaded files + (restart from beginning) + --cookies FILE file to read cookies from and dump cookie + jar in + --no-part do not use .part files + --no-mtime do not use the Last-modified header to set + the file modification time + --write-description write video description to a .description + file + --write-info-json write video metadata to a .info.json file + --write-annotations write video annotations to a .annotation + file + --write-thumbnail write thumbnail image to disk + +Verbosity / Simulation Options: +------------------------------- + + -q, --quiet activates quiet mode + -s, --simulate do not download the video and do not write + anything to disk + --skip-download do not download the video + -g, --get-url simulate, quiet but print URL + -e, --get-title simulate, quiet but print title + --get-id simulate, quiet but print id + --get-thumbnail simulate, quiet but print thumbnail URL + --get-description simulate, quiet but print video description + --get-duration simulate, quiet but print video length + --get-filename simulate, quiet but print output filename + --get-format simulate, quiet but print output format + -j, --dump-json simulate, quiet but print JSON information + --newline output progress bar as new lines + --no-progress do not print progress bar + --console-title display progress in console titlebar + -v, --verbose print various debugging information + --dump-intermediate-pages print downloaded pages to debug problems + (very verbose) + --write-pages Write downloaded intermediary pages to + files in the current directory to debug + problems + --print-traffic Display sent and read HTTP traffic + +Video Format Options: +--------------------- + + -f, --format FORMAT video format code, specify the order of + preference using slashes: "-f 22/17/18". + "-f mp4" and "-f flv" are also supported. + You can also use the special names "best", + "bestaudio", "worst", and "worstaudio". By + default, youtube-dl will pick the best + quality. + --all-formats download all available video formats + --prefer-free-formats prefer free video formats unless a specific + one is requested + --max-quality FORMAT highest quality format to download + -F, --list-formats list all available formats + +Subtitle Options: +----------------- + + --write-sub write subtitle file + --write-auto-sub write automatic subtitle file (youtube + only) + --all-subs downloads all the available subtitles of + the video + --list-subs lists all available subtitles for the video + --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] + youtube only) + --sub-lang LANGS languages of the subtitles to download + (optional) separated by commas, use IETF + language tags like 'en,pt' + +Authentication Options: +----------------------- + + -u, --username USERNAME account username + -p, --password PASSWORD account password + -n, --netrc use .netrc authentication data + --video-password PASSWORD video password (vimeo, smotri) + +Post-processing Options: +------------------------ + + -x, --extract-audio convert video files to audio-only files + (requires ffmpeg or avconv and ffprobe or + avprobe) + --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", + "opus", or "wav"; best by default + --audio-quality QUALITY ffmpeg/avconv audio quality specification, + insert a value between 0 (better) and 9 + (worse) for VBR or a specific bitrate like + 128K (default 5) + --recode-video FORMAT Encode the video to another format if + necessary (currently supported: + mp4|flv|ogg|webm) + -k, --keep-video keeps the video file on disk after the + post-processing; the video is erased by + default + --no-post-overwrites do not overwrite post-processed files; the + post-processed files are overwritten by + default + --embed-subs embed subtitles in the video (only for mp4 + videos) + --add-metadata write metadata to the video file + --xattrs write metadata to the video file's xattrs + (using dublin core and xdg standards) + --prefer-avconv Prefer avconv over ffmpeg for running the + postprocessors (default) + --prefer-ffmpeg Prefer ffmpeg over avconv for running the + postprocessors + +CONFIGURATION +============= + +You can configure youtube-dl by placing default arguments (such as +--extract-audio --no-mtime to always extract the audio and not copy the +mtime) into /etc/youtube-dl.conf and/or ~/.config/youtube-dl.conf. On +Windows, the configuration file locations are +%APPDATA%\youtube-dl\config.txt and C:\Users\\youtube-dl.conf. + +OUTPUT TEMPLATE +=============== + +The -o option allows users to indicate a template for the output file +names. The basic usage is not to set any template arguments when +downloading a single file, like in +youtube-dl -o funny_video.flv "http://some/video". However, it may +contain special sequences that will be replaced when downloading each +video. The special sequences have the format %(NAME)s. To clarify, that +is a percent symbol followed by a name in parenthesis, followed by a +lowercase S. Allowed names are: + +- id: The sequence will be replaced by the video identifier. +- url: The sequence will be replaced by the video URL. +- uploader: The sequence will be replaced by the nickname of the + person who uploaded the video. +- upload_date: The sequence will be replaced by the upload date in + YYYYMMDD format. +- title: The sequence will be replaced by the video title. +- ext: The sequence will be replaced by the appropriate extension + (like flv or mp4). +- epoch: The sequence will be replaced by the Unix epoch when creating + the file. +- autonumber: The sequence will be replaced by a five-digit number + that will be increased with each download, starting at zero. +- playlist: The name or the id of the playlist that contains the + video. +- playlist_index: The index of the video in the playlist, a five-digit + number. + +The current default template is %(title)s-%(id)s.%(ext)s. + +In some cases, you don't want special characters such as 中, spaces, or +&, such as when transferring the downloaded filename to a Windows system +or the filename through an 8bit-unsafe channel. In these cases, add the +--restrict-filenames flag to get a shorter title: + + $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc + youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters + $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames + youtube-dl_test_video_.mp4 # A simple file name + +VIDEO SELECTION +=============== + +Videos can be filtered by their upload date using the options --date, +--datebefore or --dateafter, they accept dates in two formats: + +- Absolute dates: Dates in the format YYYYMMDD. +- Relative dates: Dates in the format + (now|today)[+-][0-9](day|week|month|year)(s)? + +Examples: + +$ # Download only the videos uploaded in the last 6 months $ youtube-dl +--dateafter now-6months $ # Download only the videos uploaded on January +1, 1970 $ youtube-dl --date 19700101 $ # will only download the videos +uploaded in the 200x decade $ youtube-dl --dateafter 20000101 +--datebefore 20091231 + +FAQ +=== + +Can you please put the -b option back? + +Most people asking this question are not aware that youtube-dl now +defaults to downloading the highest available quality as reported by +YouTube, which will be 1080p or 720p in some cases, so you no longer +need the -b option. For some specific videos, maybe YouTube does not +report them to be available in a specific high quality format you're +interested in. In that case, simply request it with the -f option and +youtube-dl will try to download it. + +I get HTTP error 402 when trying to download a video. What's this? + +Apparently YouTube requires you to pass a CAPTCHA test if you download +too much. We're considering to provide a way to let you solve the +CAPTCHA, but at the moment, your best course of action is pointing a +webbrowser to the youtube URL, solving the CAPTCHA, and restart +youtube-dl. + +I have downloaded a video but how can I play it? + +Once the video is fully downloaded, use any video player, such as vlc or +mplayer. + +The links provided by youtube-dl -g are not working anymore + +The URLs youtube-dl outputs require the downloader to have the correct +cookies. Use the --cookies option to write the required cookies into a +file, and advise your downloader to read cookies from that file. Some +sites also require a common user agent to be used, use --dump-user-agent +to see the one in use by youtube-dl. + +ERROR: no fmt_url_map or conn information found in video info + +youtube has switched to a new video info format in July 2011 which is +not supported by old versions of youtube-dl. You can update youtube-dl +with sudo youtube-dl --update. + +ERROR: unable to download video + +youtube requires an additional signature since September 2012 which is +not supported by old versions of youtube-dl. You can update youtube-dl +with sudo youtube-dl --update. + +SyntaxError: Non-ASCII character + +The error + + File "youtube-dl", line 2 + SyntaxError: Non-ASCII character '\x93' ... + +means you're using an outdated version of Python. Please update to +Python 2.6 or 2.7. + +What is this binary file? Where has the code gone? + +Since June 2012 (#342) youtube-dl is packed as an executable zipfile, +simply unzip it (might need renaming to youtube-dl.zip first on some +systems) or clone the git repository, as laid out above. If you modify +the code, you can run it by executing the __main__.py file. To recompile +the executable, run make youtube-dl. + +The exe throws a Runtime error from Visual C++ + +To run the exe you need to install first the Microsoft Visual C++ 2008 +Redistributable Package. + +DEVELOPER INSTRUCTIONS +====================== + +Most users do not need to build youtube-dl and can download the builds +or get them from their distribution. + +To run youtube-dl as a developer, you don't need to build anything +either. Simply execute + + python -m youtube_dl + +To run the test, simply invoke your favorite test runner, or execute a +test file directly; any of the following work: + + python -m unittest discover + python test/test_download.py + nosetests + +If you want to create a build of youtube-dl yourself, you'll need + +- python +- make +- pandoc +- zip +- nosetests + +Adding support for a new site + +If you want to add support for a new site, copy any recently modified +file in youtube_dl/extractor, add an import in +youtube_dl/extractor/__init__.py. Have a look at +youtube_dl/common/extractor/common.py for possible helper methods and a +detailed description of what your extractor should return. Don't forget +to run the tests with +python test/test_download.py Test_Download.test_YourExtractor! For a +detailed tutorial, refer to this blog post. + +BUGS +==== + +Bugs and suggestions should be reported at: +https://github.com/rg3/youtube-dl/issues . Unless you were prompted so +or there is another pertinent reason (e.g. GitHub fails to accept the +bug report), please do not send bug reports via personal email. + +Please include the full output of the command when run with --verbose. +The output (including the first lines) contain important debugging +information. Issues without the full output are often not reproducible +and therefore do not get solved in short order, if ever. + +For discussions, join us in the irc channel #youtube-dl on freenode. + +When you submit a request, please re-read it once to avoid a couple of +mistakes (you can and should use this as a checklist): + +Is the description of the issue itself sufficient? + +We often get issue reports that we cannot really decipher. While in most +cases we eventually get the required information after asking back +multiple times, this poses an unnecessary drain on our resources. Many +contributors, including myself, are also not native speakers, so we may +misread some parts. + +So please elaborate on what feature you are requesting, or what bug you +want to be fixed. Make sure that it's obvious + +- What the problem is +- How it could be fixed +- How your proposed solution would look like + +If your report is shorter than two lines, it is almost certainly missing +some of these, which makes it hard for us to respond to it. We're often +too polite to close the issue outright, but the missing info makes +misinterpretation likely. As a commiter myself, I often get frustrated +by these issues, since the only possible way for me to move forward on +them is to ask for clarification over and over. + +For bug reports, this means that your report should contain the complete +output of youtube-dl when called with the -v flag. The error message you +get for (most) bugs even says so, but you would not believe how many of +our bug reports do not contain this information. + +Site support requests must contain an example URL. An example URL is a +URL you might want to download, like +http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious +video present. Except under very special circumstances, the main page of +a video service (e.g. http://www.youtube.com/ ) is not an example URL. + +Are you using the latest version? + +Before reporting any issue, type youtube-dl -U. This should report that +you're up-to-date. About 20% of the reports we receive are already +fixed, but people are using outdated versions. This goes for feature +requests as well. + +Is the issue already documented? + +Make sure that someone has not already opened the issue you're trying to +open. Search at the top of the window or at +https://github.com/rg3/youtube-dl/search?type=Issues . If there is an +issue, feel free to write something along the lines of "This affects me +as well, with version 2015.01.01. Here is some more information on the +issue: ...". While some issues may be old, a new post into them often +spurs rapid activity. + +Why are existing options not enough? + +Before requesting a new feature, please have a quick peek at the list of +supported options. Many feature requests are for features that actually +exist already! Please, absolutely do show off your work in the issue +report and detail how the existing similar options do not solve your +problem. + +Is there enough context in your bug report? + +People want to solve problems, and often think they do us a favor by +breaking down their larger problems (e.g. wanting to skip already +downloaded files) to a specific request (e.g. requesting us to look +whether the file exists before downloading the info page). However, what +often happens is that they break down the problem into two steps: One +simple, and one impossible (or extremely complicated one). + +We are then presented with a very complicated request when the original +problem could be solved far easier, e.g. by recording the downloaded +video IDs in a separate file. To avoid this, you must include the +greater context where it is non-obvious. In particular, every feature +request that does not consist of adding support for a new site should +contain a use case scenario that explains in what situation the missing +feature would be useful. + +Does the issue involve one problem, and one problem only? + +Some of our users seem to think there is a limit of issues they can or +should open. There is no limit of issues they can or should open. While +it may seem appealing to be able to dump all your issues into one +ticket, that means that someone who solves one of your issues cannot +mark the issue as closed. Typically, reporting a bunch of issues leads +to the ticket lingering since nobody wants to attack that behemoth, +until someone mercifully splits the issue into multiple ones. + +In particular, every site support request issue should only pertain to +services at one site (generally under a common domain, but always using +the same backend technology). Do not request support for vimeo user +videos, Whitehouse podcasts, and Google Plus pages in the same issue. +Also, make sure that you don't post bug reports alongside feature +requests. As a rule of thumb, a feature request does not include outputs +of youtube-dl that are not immediately related to the feature at hand. +Do not post reports of a network error alongside the request for a new +video service. + +Is anyone going to need the feature? + +Only post features that you (or an incapicated friend you can personally +talk to) require. Do not post features because they seem like a good +idea. If they are really useful, they will be requested by someone who +requires them. + +Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely +unrelated to youtube-dl and relate to a different or even the reporter's +own application. Please make sure that you are actually using +youtube-dl. If you are using a UI for youtube-dl, report the bug to the +maintainer of the actual application providing the UI. On the other +hand, if your UI for youtube-dl fails in some way you believe is related +to youtube-dl, by all means, go ahead and report the bug. + +COPYRIGHT +========= + +youtube-dl is released into the public domain by the copyright holders. + +This README file was originally written by Daniel Bolton +(https://github.com/dbbolton) and is likewise released into the public +domain. diff -Nru youtube-dl-2012.09.27/setup.py youtube-dl-2014.02.17/setup.py --- youtube-dl-2012.09.27/setup.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/setup.py 2014-01-27 05:21:14.000000000 +0000 @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import os.path +import pkg_resources +import warnings +import sys + +try: + from setuptools import setup + setuptools_available = True +except ImportError: + from distutils.core import setup + setuptools_available = False + +try: + # This will create an exe that needs Microsoft Visual C++ 2008 + # Redistributable Package + import py2exe +except ImportError: + if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': + print("Cannot import py2exe", file=sys.stderr) + exit(1) + +py2exe_options = { + "bundle_files": 1, + "compressed": 1, + "optimize": 2, + "dist_dir": '.', + "dll_excludes": ['w9xpopen.exe'], +} + +py2exe_console = [{ + "script": "./youtube_dl/__main__.py", + "dest_base": "youtube-dl", +}] + +py2exe_params = { + 'console': py2exe_console, + 'options': {"py2exe": py2exe_options}, + 'zipfile': None +} + +if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': + params = py2exe_params +else: + files_spec = [ + ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('share/doc/youtube_dl', ['README.txt']), + ('share/man/man1', ['youtube-dl.1']) + ] + root = os.path.dirname(os.path.abspath(__file__)) + data_files = [] + for dirname, files in files_spec: + resfiles = [] + for fn in files: + if not os.path.exists(fn): + warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn) + else: + resfiles.append(fn) + data_files.append((dirname, resfiles)) + + params = { + 'data_files': data_files, + } + if setuptools_available: + params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} + else: + params['scripts'] = ['bin/youtube-dl'] + +# Get the version from youtube_dl/version.py without importing the package +exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) + +setup( + name='youtube_dl', + version=__version__, + description='YouTube video downloader', + long_description='Small command-line program to download videos from' + ' YouTube.com and other video sites.', + url='https://github.com/rg3/youtube-dl', + author='Ricardo Garcia', + author_email='ytdl@yt-dl.org', + maintainer='Philipp Hagemeister', + maintainer_email='phihag@phihag.de', + packages=[ + 'youtube_dl', + 'youtube_dl.extractor', 'youtube_dl.downloader', + 'youtube_dl.postprocessor'], + + # Provokes warning on most systems (why?!) + # test_suite = 'nose.collector', + # test_requires = ['nosetest'], + + classifiers=[ + "Topic :: Multimedia :: Video", + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "License :: Public Domain", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.3" + ], + + **params +) diff -Nru youtube-dl-2012.09.27/test/helper.py youtube-dl-2014.02.17/test/helper.py --- youtube-dl-2012.09.27/test/helper.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/helper.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,85 @@ +import errno +import io +import hashlib +import json +import os.path +import re +import types +import sys + +import youtube_dl.extractor +from youtube_dl import YoutubeDL +from youtube_dl.utils import preferredencoding + + +def get_params(override=None): + PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "parameters.json") + with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + if override: + parameters.update(override) + return parameters + + +def try_rm(filename): + """ Remove a file if it exists """ + try: + os.remove(filename) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + +def report_warning(message): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if sys.stderr.isatty() and os.name != 'nt': + _msg_header = u'\033[0;33mWARNING:\033[0m' + else: + _msg_header = u'WARNING:' + output = u'%s %s\n' % (_msg_header, message) + if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: + output = output.encode(preferredencoding()) + sys.stderr.write(output) + + +class FakeYDL(YoutubeDL): + def __init__(self, override=None): + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + params = get_params(override=override) + super(FakeYDL, self).__init__(params) + self.result = [] + + def to_screen(self, s, skip_eol=None): + print(s) + + def trouble(self, s, tb=None): + raise Exception(s) + + def download(self, x): + self.result.append(x) + + def expect_warning(self, regex): + # Silence an expected warning matching a regex + old_report_warning = self.report_warning + def report_warning(self, message): + if re.match(regex, message): return + old_report_warning(message) + self.report_warning = types.MethodType(report_warning, self) + +def get_testcases(): + for ie in youtube_dl.extractor.gen_extractors(): + t = getattr(ie, '_TEST', None) + if t: + t['name'] = type(ie).__name__[:-len('IE')] + yield t + for t in getattr(ie, '_TESTS', []): + t['name'] = type(ie).__name__[:-len('IE')] + yield t + + +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff -Nru youtube-dl-2012.09.27/test/parameters.json youtube-dl-2014.02.17/test/parameters.json --- youtube-dl-2012.09.27/test/parameters.json 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/parameters.json 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,44 @@ +{ + "consoletitle": false, + "continuedl": true, + "forcedescription": false, + "forcefilename": false, + "forceformat": false, + "forcethumbnail": false, + "forcetitle": false, + "forceurl": false, + "format": null, + "format_limit": null, + "ignoreerrors": false, + "listformats": null, + "logtostderr": false, + "matchtitle": null, + "max_downloads": null, + "nooverwrites": false, + "nopart": false, + "noprogress": false, + "outtmpl": "%(id)s.%(ext)s", + "password": null, + "playlistend": -1, + "playliststart": 1, + "prefer_free_formats": false, + "quiet": false, + "ratelimit": null, + "rejecttitle": null, + "retries": 10, + "simulate": false, + "skip_download": false, + "subtitleslang": null, + "subtitlesformat": "srt", + "test": true, + "updatetime": true, + "usenetrc": false, + "username": null, + "verbose": true, + "writedescription": false, + "writeinfojson": true, + "writesubtitles": false, + "allsubtitles": false, + "listssubtitles": false, + "socket_timeout": 20 +} diff -Nru youtube-dl-2012.09.27/test/test_age_restriction.py youtube-dl-2014.02.17/test/test_age_restriction.py --- youtube-dl-2012.09.27/test/test_age_restriction.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_age_restriction.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import try_rm + + +from youtube_dl import YoutubeDL + + +def _download_restricted(url, filename, age): + """ Returns true iff the file has been downloaded """ + + params = { + 'age_limit': age, + 'skip_download': True, + 'writeinfojson': True, + "outtmpl": "%(id)s.%(ext)s", + } + ydl = YoutubeDL(params) + ydl.add_default_info_extractors() + json_filename = os.path.splitext(filename)[0] + '.info.json' + try_rm(json_filename) + ydl.download([url]) + res = os.path.exists(json_filename) + try_rm(json_filename) + return res + + +class TestAgeRestriction(unittest.TestCase): + def _assert_restricted(self, url, filename, age, old_age=None): + self.assertTrue(_download_restricted(url, filename, old_age)) + self.assertFalse(_download_restricted(url, filename, age)) + + def test_youtube(self): + self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + + def test_youporn(self): + self._assert_restricted( + 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', + '505835.mp4', 2, old_age=25) + + def test_pornotube(self): + self._assert_restricted( + 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', + '1689755.flv', 13) + + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_all_urls.py youtube-dl-2014.02.17/test/test_all_urls.py --- youtube-dl-2012.09.27/test/test_all_urls.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_all_urls.py 2014-02-08 18:03:07.000000000 +0000 @@ -0,0 +1,137 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import get_testcases + +from youtube_dl.extractor import ( + FacebookIE, + gen_extractors, + JustinTVIE, + PBSIE, + YoutubeIE, +) + + +class TestAllURLsMatching(unittest.TestCase): + def setUp(self): + self.ies = gen_extractors() + + def matching_ies(self, url): + return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] + + def assertMatch(self, url, ie_list): + self.assertEqual(self.matching_ies(url), ie_list) + + def test_youtube_playlist_matching(self): + assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) + assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585 + assertPlaylist('PL63F0C78739B09958') + assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668 + self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) + # Top tracks + assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') + + def test_youtube_matching(self): + self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) + self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 + self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) + + def test_youtube_channel_matching(self): + assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') + + def test_youtube_user_matching(self): + self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) + + def test_youtube_feeds(self): + self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later']) + self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) + self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) + self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) + + def test_youtube_show_matching(self): + self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) + + def test_justin_tv_channelid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) + self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) + + def test_justintv_videoid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) + + def test_justin_tv_chapterid_matching(self): + self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) + + def test_youtube_extract(self): + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) + assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') + assertExtractId('BaW_jenozKc', 'BaW_jenozKc') + + def test_facebook_matching(self): + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) + + def test_no_duplicates(self): + ies = gen_extractors() + for tc in get_testcases(): + url = tc['url'] + for ie in ies: + if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): + self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + else: + self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) + + def test_keywords(self): + self.assertMatch(':ytsubs', ['youtube:subscriptions']) + self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) + self.assertMatch(':ythistory', ['youtube:history']) + self.assertMatch(':thedailyshow', ['ComedyCentralShows']) + self.assertMatch(':tds', ['ComedyCentralShows']) + self.assertMatch(':colbertreport', ['ComedyCentralShows']) + self.assertMatch(':cr', ['ComedyCentralShows']) + + def test_vimeo_matching(self): + self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) + self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) + self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) + + # https://github.com/rg3/youtube-dl/issues/1930 + def test_soundcloud_not_matching_sets(self): + self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) + + def test_tumblr(self): + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) + + def test_pbs(self): + # https://github.com/rg3/youtube-dl/issues/2350 + self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_div.py youtube-dl-2014.02.17/test/test_div.py --- youtube-dl-2012.09.27/test/test_div.py 2012-09-27 09:25:46.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_div.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- - -# Various small unit tests - -import os,sys -sys.path.append(os.path.dirname(os.path.dirname(__file__))) - -import youtube_dl - -def test_simplify_title(): - assert youtube_dl._simplify_title(u'abc') == u'abc' - assert youtube_dl._simplify_title(u'abc_d-e') == u'abc_d-e' - - assert youtube_dl._simplify_title(u'123') == u'123' - - assert u'/' not in youtube_dl._simplify_title(u'abc/de') - assert u'abc' in youtube_dl._simplify_title(u'abc/de') - assert u'de' in youtube_dl._simplify_title(u'abc/de') - assert u'/' not in youtube_dl._simplify_title(u'abc/de///') - - assert u'\\' not in youtube_dl._simplify_title(u'abc\\de') - assert u'abc' in youtube_dl._simplify_title(u'abc\\de') - assert u'de' in youtube_dl._simplify_title(u'abc\\de') - - assert youtube_dl._simplify_title(u'ä') == u'ä' - assert youtube_dl._simplify_title(u'кириллица') == u'кириллица' - - # Strip underlines - assert youtube_dl._simplify_title(u'\'a_') == u'a' diff -Nru youtube-dl-2012.09.27/test/test_download.py youtube-dl-2014.02.17/test/test_download.py --- youtube-dl-2012.09.27/test/test_download.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_download.py 2014-02-06 03:18:11.000000000 +0000 @@ -0,0 +1,179 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import ( + get_params, + get_testcases, + try_rm, + md5, + report_warning +) + + +import hashlib +import io +import json +import socket + +import youtube_dl.YoutubeDL +from youtube_dl.utils import ( + compat_http_client, + compat_str, + compat_urllib_error, + compat_HTTPError, + DownloadError, + ExtractorError, + UnavailableVideoError, +) +from youtube_dl.extractor import get_info_extractor + +RETRIES = 3 + +class YoutubeDL(youtube_dl.YoutubeDL): + def __init__(self, *args, **kwargs): + self.to_stderr = self.to_screen + self.processed_info_dicts = [] + super(YoutubeDL, self).__init__(*args, **kwargs) + def report_warning(self, message): + # Don't accept warnings during tests + raise ExtractorError(message) + def process_info(self, info_dict): + self.processed_info_dicts.append(info_dict) + return super(YoutubeDL, self).process_info(info_dict) + +def _file_md5(fn): + with open(fn, 'rb') as f: + return hashlib.md5(f.read()).hexdigest() + +defs = get_testcases() + + +class TestDownload(unittest.TestCase): + maxDiff = None + def setUp(self): + self.defs = defs + +### Dynamically generate tests +def generator(test_case): + + def test_template(self): + ie = youtube_dl.extractor.get_info_extractor(test_case['name']) + other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])] + def print_skipping(reason): + print('Skipping %s: %s' % (test_case['name'], reason)) + if not ie.working(): + print_skipping('IE marked as not _WORKING') + return + if 'playlist' not in test_case: + info_dict = test_case.get('info_dict', {}) + if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): + print_skipping('The output file cannot be know, the "file" ' + 'key is missing or the info_dict is incomplete') + return + if 'skip' in test_case: + print_skipping(test_case['skip']) + return + for other_ie in other_ies: + if not other_ie.working(): + print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) + return + + params = get_params(test_case.get('params', {})) + + ydl = YoutubeDL(params) + ydl.add_default_info_extractors() + finished_hook_called = set() + def _hook(status): + if status['status'] == 'finished': + finished_hook_called.add(status['filename']) + ydl.add_progress_hook(_hook) + + def get_tc_filename(tc): + return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) + + test_cases = test_case.get('playlist', [test_case]) + def try_rm_tcs_files(): + for tc in test_cases: + tc_filename = get_tc_filename(tc) + try_rm(tc_filename) + try_rm(tc_filename + '.part') + try_rm(os.path.splitext(tc_filename)[0] + '.info.json') + try_rm_tcs_files() + try: + try_num = 1 + while True: + try: + ydl.download([test_case['url']]) + except (DownloadError, ExtractorError) as err: + # Check if the exception is not a network related one + if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): + raise + + if try_num == RETRIES: + report_warning(u'Failed due to network errors, skipping...') + return + + print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) + + try_num += 1 + else: + break + + for tc in test_cases: + tc_filename = get_tc_filename(tc) + if not test_case.get('params', {}).get('skip_download', False): + self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) + self.assertTrue(tc_filename in finished_hook_called) + info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' + self.assertTrue(os.path.exists(info_json_fn)) + if 'md5' in tc: + md5_for_file = _file_md5(tc_filename) + self.assertEqual(md5_for_file, tc['md5']) + with io.open(info_json_fn, encoding='utf-8') as infof: + info_dict = json.load(infof) + for (info_field, expected) in tc.get('info_dict', {}).items(): + if isinstance(expected, compat_str) and expected.startswith('md5:'): + got = 'md5:' + md5(info_dict.get(info_field)) + else: + got = info_dict.get(info_field) + self.assertEqual(expected, got, + u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) + + # If checkable fields are missing from the test case, print the info_dict + test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) + for key, value in info_dict.items() + if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) + if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): + sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') + + # Check for the presence of mandatory fields + for key in ('id', 'url', 'title', 'ext'): + self.assertTrue(key in info_dict.keys() and info_dict[key]) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) + finally: + try_rm_tcs_files() + + return test_template + +### And add them to TestDownload +for n, test_case in enumerate(defs): + test_method = generator(test_case) + tname = 'test_' + str(test_case['name']) + i = 1 + while hasattr(TestDownload, tname): + tname = 'test_' + str(test_case['name']) + '_' + str(i) + i += 1 + test_method.__name__ = tname + setattr(TestDownload, test_method.__name__, test_method) + del test_method + + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_execution.py youtube-dl-2014.02.17/test/test_execution.py --- youtube-dl-2012.09.27/test/test_execution.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_execution.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,26 @@ +import unittest + +import sys +import os +import subprocess + +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +try: + _DEV_NULL = subprocess.DEVNULL +except AttributeError: + _DEV_NULL = open(os.devnull, 'wb') + +class TestExecution(unittest.TestCase): + def test_import(self): + subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) + + def test_module_exec(self): + if sys.version_info >= (2,7): # Python 2.6 doesn't support package execution + subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + + def test_main_exec(self): + subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_playlists.py youtube-dl-2014.02.17/test/test_playlists.py --- youtube-dl-2012.09.27/test/test_playlists.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_playlists.py 2014-02-15 14:34:17.000000000 +0000 @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# encoding: utf-8 + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL + + +from youtube_dl.extractor import ( + AcademicEarthCourseIE, + DailymotionPlaylistIE, + DailymotionUserIE, + VimeoChannelIE, + VimeoUserIE, + VimeoAlbumIE, + VimeoGroupsIE, + UstreamChannelIE, + SoundcloudSetIE, + SoundcloudUserIE, + LivestreamIE, + NHLVideocenterIE, + BambuserChannelIE, + BandcampAlbumIE, + SmotriCommunityIE, + SmotriUserIE, + IviCompilationIE, + ImdbListIE, + KhanAcademyIE, + EveryonesMixtapeIE, + RutubeChannelIE, + GoogleSearchIE, + GenericIE, +) + + +class TestPlaylists(unittest.TestCase): + def assertIsPlaylist(self, info): + """Make sure the info has '_type' set to 'playlist'""" + self.assertEqual(info['_type'], 'playlist') + + def test_dailymotion_playlist(self): + dl = FakeYDL() + ie = DailymotionPlaylistIE(dl) + result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'SPORT') + self.assertTrue(len(result['entries']) > 20) + + def test_dailymotion_user(self): + dl = FakeYDL() + ie = DailymotionUserIE(dl) + result = ie.extract('https://www.dailymotion.com/user/nqtv') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'Rémi Gaillard') + self.assertTrue(len(result['entries']) >= 100) + + def test_vimeo_channel(self): + dl = FakeYDL() + ie = VimeoChannelIE(dl) + result = ie.extract('http://vimeo.com/channels/tributes') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'Vimeo Tributes') + self.assertTrue(len(result['entries']) > 24) + + def test_vimeo_user(self): + dl = FakeYDL() + ie = VimeoUserIE(dl) + result = ie.extract('http://vimeo.com/nkistudio/videos') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'Nki') + self.assertTrue(len(result['entries']) > 65) + + def test_vimeo_album(self): + dl = FakeYDL() + ie = VimeoAlbumIE(dl) + result = ie.extract('http://vimeo.com/album/2632481') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'Staff Favorites: November 2013') + self.assertTrue(len(result['entries']) > 12) + + def test_vimeo_groups(self): + dl = FakeYDL() + ie = VimeoGroupsIE(dl) + result = ie.extract('http://vimeo.com/groups/rolexawards') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'Rolex Awards for Enterprise') + self.assertTrue(len(result['entries']) > 72) + + def test_ustream_channel(self): + dl = FakeYDL() + ie = UstreamChannelIE(dl) + result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '5124905') + self.assertTrue(len(result['entries']) >= 11) + + def test_soundcloud_set(self): + dl = FakeYDL() + ie = SoundcloudSetIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'The Royal Concept EP') + self.assertTrue(len(result['entries']) >= 6) + + def test_soundcloud_user(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '9615865') + self.assertTrue(len(result['entries']) >= 12) + + def test_livestream_event(self): + dl = FakeYDL() + ie = LivestreamIE(dl) + result = ie.extract('http://new.livestream.com/tedx/cityenglish') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'TEDCity2.0 (English)') + self.assertTrue(len(result['entries']) >= 4) + + def test_nhl_videocenter(self): + dl = FakeYDL() + ie = NHLVideocenterIE(dl) + result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '999') + self.assertEqual(result['title'], 'Highlights') + self.assertEqual(len(result['entries']), 12) + + def test_bambuser_channel(self): + dl = FakeYDL() + ie = BambuserChannelIE(dl) + result = ie.extract('http://bambuser.com/channel/pixelversity') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'pixelversity') + self.assertTrue(len(result['entries']) >= 60) + + def test_bandcamp_album(self): + dl = FakeYDL() + ie = BandcampAlbumIE(dl) + result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'Nightmare Night EP') + self.assertTrue(len(result['entries']) >= 4) + + def test_smotri_community(self): + dl = FakeYDL() + ie = SmotriCommunityIE(dl) + result = ie.extract('http://smotri.com/community/video/kommuna') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'kommuna') + self.assertEqual(result['title'], 'КПРФ') + self.assertTrue(len(result['entries']) >= 4) + + def test_smotri_user(self): + dl = FakeYDL() + ie = SmotriUserIE(dl) + result = ie.extract('http://smotri.com/user/inspector') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'inspector') + self.assertEqual(result['title'], 'Inspector') + self.assertTrue(len(result['entries']) >= 9) + + def test_AcademicEarthCourse(self): + dl = FakeYDL() + ie = AcademicEarthCourseIE(dl) + result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'building-dynamic-websites') + self.assertEqual(result['title'], 'Building Dynamic Websites') + self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") + self.assertEqual(len(result['entries']), 10) + + def test_ivi_compilation(self): + dl = FakeYDL() + ie = IviCompilationIE(dl) + result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'dezhurnyi_angel') + self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') + self.assertTrue(len(result['entries']) >= 36) + + def test_ivi_compilation_season(self): + dl = FakeYDL() + ie = IviCompilationIE(dl) + result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'dezhurnyi_angel/season2') + self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') + self.assertTrue(len(result['entries']) >= 20) + + def test_imdb_list(self): + dl = FakeYDL() + ie = ImdbListIE(dl) + result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'JFs9NWw6XI0') + self.assertEqual(result['title'], 'March 23, 2012 Releases') + self.assertEqual(len(result['entries']), 7) + + def test_khanacademy_topic(self): + dl = FakeYDL() + ie = KhanAcademyIE(dl) + result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'cryptography') + self.assertEqual(result['title'], 'Journey into cryptography') + self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') + self.assertTrue(len(result['entries']) >= 3) + + def test_EveryonesMixtape(self): + dl = FakeYDL() + ie = EveryonesMixtapeIE(dl) + result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'm7m0jJAbMQi') + self.assertEqual(result['title'], 'Driving') + self.assertEqual(len(result['entries']), 24) + + def test_rutube_channel(self): + dl = FakeYDL() + ie = RutubeChannelIE(dl) + result = ie.extract('http://rutube.ru/tags/video/1409') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '1409') + self.assertTrue(len(result['entries']) >= 34) + + def test_multiple_brightcove_videos(self): + # https://github.com/rg3/youtube-dl/issues/2283 + dl = FakeYDL() + ie = GenericIE(dl) + result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'always-never-nuclear-command-and-control') + self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') + self.assertEqual(len(result['entries']), 3) + + def test_GoogleSearch(self): + dl = FakeYDL() + ie = GoogleSearchIE(dl) + result = ie.extract('gvsearch15:python language') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'python language') + self.assertEqual(result['title'], 'python language') + self.assertTrue(len(result['entries']) == 15) + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_subtitles.py youtube-dl-2014.02.17/test/test_subtitles.py --- youtube-dl-2012.09.27/test/test_subtitles.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_subtitles.py 2014-02-03 14:18:07.000000000 +0000 @@ -0,0 +1,283 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, md5 + + +from youtube_dl.extractor import ( + BlipTVIE, + YoutubeIE, + DailymotionIE, + TEDIE, + VimeoIE, +) + + +class BaseTestSubtitles(unittest.TestCase): + url = None + IE = None + def setUp(self): + self.DL = FakeYDL() + self.ie = self.IE(self.DL) + + def getInfoDict(self): + info_dict = self.ie.extract(self.url) + return info_dict + + def getSubtitles(self): + info_dict = self.getInfoDict() + return info_dict['subtitles'] + + +class TestYoutubeSubtitles(BaseTestSubtitles): + url = 'QRS8MkLhQmM' + IE = YoutubeIE + + def test_youtube_no_writesubtitles(self): + self.DL.params['writesubtitles'] = False + subtitles = self.getSubtitles() + self.assertEqual(subtitles, None) + + def test_youtube_subtitles(self): + self.DL.params['writesubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') + + def test_youtube_subtitles_lang(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitleslangs'] = ['it'] + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') + + def test_youtube_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles.keys()), 13) + + def test_youtube_subtitles_sbv_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'sbv' + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') + + def test_youtube_subtitles_vtt_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'vtt' + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') + + def test_youtube_list_subtitles(self): + self.DL.expect_warning(u'Video doesn\'t have automatic captions') + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, None) + + def test_youtube_automatic_captions(self): + self.url = '8YoUxe5ncPo' + self.DL.params['writeautomaticsub'] = True + self.DL.params['subtitleslangs'] = ['it'] + subtitles = self.getSubtitles() + self.assertTrue(subtitles['it'] is not None) + + def test_youtube_nosubtitles(self): + self.DL.expect_warning(u'video doesn\'t have subtitles') + self.url = 'sAjKT8FhjI8' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles), 0) + + def test_youtube_multiple_langs(self): + self.url = 'QRS8MkLhQmM' + self.DL.params['writesubtitles'] = True + langs = ['it', 'fr', 'de'] + self.DL.params['subtitleslangs'] = langs + subtitles = self.getSubtitles() + for lang in langs: + self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) + + +class TestDailymotionSubtitles(BaseTestSubtitles): + url = 'http://www.dailymotion.com/video/xczg00' + IE = DailymotionIE + + def test_no_writesubtitles(self): + subtitles = self.getSubtitles() + self.assertEqual(subtitles, None) + + def test_subtitles(self): + self.DL.params['writesubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') + + def test_subtitles_lang(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitleslangs'] = ['fr'] + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles.keys()), 5) + + def test_list_subtitles(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, None) + + def test_automatic_captions(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['writeautomaticsub'] = True + self.DL.params['subtitleslang'] = ['en'] + subtitles = self.getSubtitles() + self.assertTrue(len(subtitles.keys()) == 0) + + def test_nosubtitles(self): + self.DL.expect_warning(u'video doesn\'t have subtitles') + self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles), 0) + + def test_multiple_langs(self): + self.DL.params['writesubtitles'] = True + langs = ['es', 'fr', 'de'] + self.DL.params['subtitleslangs'] = langs + subtitles = self.getSubtitles() + for lang in langs: + self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) + + +class TestTedSubtitles(BaseTestSubtitles): + url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' + IE = TEDIE + + def test_no_writesubtitles(self): + subtitles = self.getSubtitles() + self.assertEqual(subtitles, None) + + def test_subtitles(self): + self.DL.params['writesubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') + + def test_subtitles_lang(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitleslangs'] = ['fr'] + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles.keys()), 28) + + def test_list_subtitles(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, None) + + def test_automatic_captions(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['writeautomaticsub'] = True + self.DL.params['subtitleslang'] = ['en'] + subtitles = self.getSubtitles() + self.assertTrue(len(subtitles.keys()) == 0) + + def test_multiple_langs(self): + self.DL.params['writesubtitles'] = True + langs = ['es', 'fr', 'de'] + self.DL.params['subtitleslangs'] = langs + subtitles = self.getSubtitles() + for lang in langs: + self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) + + +class TestBlipTVSubtitles(BaseTestSubtitles): + url = 'http://blip.tv/a/a-6603250' + IE = BlipTVIE + + def test_list_subtitles(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, None) + + def test_allsubtitles(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4') + + +class TestVimeoSubtitles(BaseTestSubtitles): + url = 'http://vimeo.com/76979871' + IE = VimeoIE + + def test_no_writesubtitles(self): + subtitles = self.getSubtitles() + self.assertEqual(subtitles, None) + + def test_subtitles(self): + self.DL.params['writesubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') + + def test_subtitles_lang(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitleslangs'] = ['fr'] + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) + + def test_list_subtitles(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, None) + + def test_automatic_captions(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') + self.DL.params['writeautomaticsub'] = True + self.DL.params['subtitleslang'] = ['en'] + subtitles = self.getSubtitles() + self.assertTrue(len(subtitles.keys()) == 0) + + def test_nosubtitles(self): + self.DL.expect_warning(u'video doesn\'t have subtitles') + self.url = 'http://vimeo.com/56015672' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles), 0) + + def test_multiple_langs(self): + self.DL.params['writesubtitles'] = True + langs = ['es', 'fr', 'de'] + self.DL.params['subtitleslangs'] = langs + subtitles = self.getSubtitles() + for lang in langs: + self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) + + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_unicode_literals.py youtube-dl-2014.02.17/test/test_unicode_literals.py --- youtube-dl-2012.09.27/test/test_unicode_literals.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_unicode_literals.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,47 @@ +from __future__ import unicode_literals + +import io +import os +import re +import unittest + +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +IGNORED_FILES = [ + 'setup.py', # http://bugs.python.org/issue13943 +] + + +class TestUnicodeLiterals(unittest.TestCase): + def test_all_files(self): + print('Skipping this test (not yet fully implemented)') + return + + for dirpath, _, filenames in os.walk(rootDir): + for basename in filenames: + if not basename.endswith('.py'): + continue + if basename in IGNORED_FILES: + continue + + fn = os.path.join(dirpath, basename) + with io.open(fn, encoding='utf-8') as inf: + code = inf.read() + + if "'" not in code and '"' not in code: + continue + imps = 'from __future__ import unicode_literals' + self.assertTrue( + imps in code, + ' %s missing in %s' % (imps, fn)) + + m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code) + if m is not None: + self.assertTrue( + m is None, + 'u present in %s, around %s' % ( + fn, code[m.start() - 10:m.end() + 10])) + + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_utils.py youtube-dl-2014.02.17/test/test_utils.py --- youtube-dl-2012.09.27/test/test_utils.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_utils.py 2014-02-17 10:22:30.000000000 +0000 @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +# Various small unit tests +import xml.etree.ElementTree + +#from youtube_dl.utils import htmlentity_transform +from youtube_dl.utils import ( + DateRange, + encodeFilename, + find_xpath_attr, + fix_xml_ampersands, + get_meta_content, + orderedSet, + PagedList, + parse_duration, + sanitize_filename, + shell_quote, + smuggle_url, + str_to_int, + struct_unpack, + timeconvert, + unescapeHTML, + unified_strdate, + unsmuggle_url, + url_basename, + xpath_with_ns, +) + +if sys.version_info < (3, 0): + _compat_str = lambda b: b.decode('unicode-escape') +else: + _compat_str = lambda s: s + + +class TestUtil(unittest.TestCase): + def test_timeconvert(self): + self.assertTrue(timeconvert('') is None) + self.assertTrue(timeconvert('bougrg') is None) + + def test_sanitize_filename(self): + self.assertEqual(sanitize_filename('abc'), 'abc') + self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') + + self.assertEqual(sanitize_filename('123'), '123') + + self.assertEqual('abc_de', sanitize_filename('abc/de')) + self.assertFalse('/' in sanitize_filename('abc/de///')) + + self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de')) + self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|')) + self.assertEqual('yes no', sanitize_filename('yes? no')) + self.assertEqual('this - that', sanitize_filename('this: that')) + + self.assertEqual(sanitize_filename('AT&T'), 'AT&T') + aumlaut = _compat_str('\xe4') + self.assertEqual(sanitize_filename(aumlaut), aumlaut) + tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430') + self.assertEqual(sanitize_filename(tests), tests) + + forbidden = '"\0\\/' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc)) + + def test_sanitize_filename_restricted(self): + self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') + self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') + + self.assertEqual(sanitize_filename('123', restricted=True), '123') + + self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) + self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True)) + + self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) + self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) + self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) + self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) + + tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c') + self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') + self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '') # No empty filename + + forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) + + # Handle a common case more neatly + self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song') + self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech') + # .. but make sure the file name is never empty + self.assertTrue(sanitize_filename('-', restricted=True) != '') + self.assertTrue(sanitize_filename(':', restricted=True) != '') + + def test_sanitize_ids(self): + self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') + self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') + self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') + + def test_ordered_set(self): + self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) + self.assertEqual(orderedSet([]), []) + self.assertEqual(orderedSet([1]), [1]) + #keep the list ordered + self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) + + def test_unescape_html(self): + self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;')) + + def test_daterange(self): + _20century = DateRange("19000101","20000101") + self.assertFalse("17890714" in _20century) + _ac = DateRange("00010101") + self.assertTrue("19690721" in _ac) + _firstmilenium = DateRange(end="10000101") + self.assertTrue("07110427" in _firstmilenium) + + def test_unified_dates(self): + self.assertEqual(unified_strdate('December 21, 2010'), '20101221') + self.assertEqual(unified_strdate('8/7/2009'), '20090708') + self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') + self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') + self.assertEqual(unified_strdate('1968-12-10'), '19681210') + + def test_find_xpath_attr(self): + testxml = u''' + + + + + ''' + doc = xml.etree.ElementTree.fromstring(testxml) + + self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) + + def test_meta_parser(self): + testhtml = u''' + + + + + ''' + get_meta = lambda name: get_meta_content(name, testhtml) + self.assertEqual(get_meta('description'), u'foo & bar') + self.assertEqual(get_meta('author'), 'Plato') + + def test_xpath_with_ns(self): + testxml = u''' + + The Author + http://server.com/download.mp3 + + ''' + doc = xml.etree.ElementTree.fromstring(testxml) + find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) + self.assertTrue(find('media:song') is not None) + self.assertEqual(find('media:song/media:author').text, u'The Author') + self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') + + def test_smuggle_url(self): + data = {u"ö": u"ö", u"abc": [3]} + url = 'https://foo.bar/baz?x=y#a' + smug_url = smuggle_url(url, data) + unsmug_url, unsmug_data = unsmuggle_url(smug_url) + self.assertEqual(url, unsmug_url) + self.assertEqual(data, unsmug_data) + + res_url, res_data = unsmuggle_url(url) + self.assertEqual(res_url, url) + self.assertEqual(res_data, None) + + def test_shell_quote(self): + args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')] + self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""") + + def test_str_to_int(self): + self.assertEqual(str_to_int('123,456'), 123456) + self.assertEqual(str_to_int('123.456'), 123456) + + def test_url_basename(self): + self.assertEqual(url_basename(u'http://foo.de/'), u'') + self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz') + self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz') + self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz') + self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz') + self.assertEqual( + url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'), + u'trailer.mp4') + + def test_parse_duration(self): + self.assertEqual(parse_duration(None), None) + self.assertEqual(parse_duration('1'), 1) + self.assertEqual(parse_duration('1337:12'), 80232) + self.assertEqual(parse_duration('9:12:43'), 33163) + self.assertEqual(parse_duration('12:00'), 720) + self.assertEqual(parse_duration('00:01:01'), 61) + self.assertEqual(parse_duration('x:y'), None) + self.assertEqual(parse_duration('3h11m53s'), 11513) + self.assertEqual(parse_duration('62m45s'), 3765) + self.assertEqual(parse_duration('6m59s'), 419) + self.assertEqual(parse_duration('49s'), 49) + self.assertEqual(parse_duration('0h0m0s'), 0) + self.assertEqual(parse_duration('0m0s'), 0) + self.assertEqual(parse_duration('0s'), 0) + + def test_fix_xml_ampersands(self): + self.assertEqual( + fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') + self.assertEqual( + fix_xml_ampersands('"&x=y&wrong;&z=a'), + '"&x=y&wrong;&z=a') + self.assertEqual( + fix_xml_ampersands('&'><"'), + '&'><"') + self.assertEqual( + fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') + self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') + + def test_paged_list(self): + def testPL(size, pagesize, sliceargs, expected): + def get_page(pagenum): + firstid = pagenum * pagesize + upto = min(size, pagenum * pagesize + pagesize) + for i in range(firstid, upto): + yield i + + pl = PagedList(get_page, pagesize) + got = pl.getslice(*sliceargs) + self.assertEqual(got, expected) + + testPL(5, 2, (), [0, 1, 2, 3, 4]) + testPL(5, 2, (1,), [1, 2, 3, 4]) + testPL(5, 2, (2,), [2, 3, 4]) + testPL(5, 2, (4,), [4]) + testPL(5, 2, (0, 3), [0, 1, 2]) + testPL(5, 2, (1, 4), [1, 2, 3]) + testPL(5, 2, (2, 99), [2, 3, 4]) + testPL(5, 2, (20, 99), []) + + def test_struct_unpack(self): + self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_write_annotations.py youtube-dl-2014.02.17/test/test_write_annotations.py --- youtube-dl-2012.09.27/test/test_write_annotations.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_write_annotations.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_params, try_rm + + +import io + +import xml.etree.ElementTree + +import youtube_dl.YoutubeDL +import youtube_dl.extractor + + +class YoutubeDL(youtube_dl.YoutubeDL): + def __init__(self, *args, **kwargs): + super(YoutubeDL, self).__init__(*args, **kwargs) + self.to_stderr = self.to_screen + +params = get_params({ + 'writeannotations': True, + 'skip_download': True, + 'writeinfojson': False, + 'format': 'flv', +}) + + + +TEST_ID = 'gr51aVj-mLg' +ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml' +EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] + +class TestAnnotations(unittest.TestCase): + def setUp(self): + # Clear old files + self.tearDown() + + + def test_info_json(self): + expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text. + ie = youtube_dl.extractor.YoutubeIE() + ydl = YoutubeDL(params) + ydl.add_info_extractor(ie) + ydl.download([TEST_ID]) + self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) + annoxml = None + with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: + annoxml = xml.etree.ElementTree.parse(annof) + self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') + root = annoxml.getroot() + self.assertEqual(root.tag, 'document') + annotationsTag = root.find('annotations') + self.assertEqual(annotationsTag.tag, 'annotations') + annotations = annotationsTag.findall('annotation') + + #Not all the annotations have TEXT children and the annotations are returned unsorted. + for a in annotations: + self.assertEqual(a.tag, 'annotation') + if a.get('type') == 'text': + textTag = a.find('TEXT') + text = textTag.text + self.assertTrue(text in expected) #assertIn only added in python 2.7 + #remove the first occurance, there could be more than one annotation with the same text + expected.remove(text) + #We should have seen (and removed) all the expected annotation texts. + self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') + + + def tearDown(self): + try_rm(ANNOTATIONS_FILE) + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_write_info_json.py youtube-dl-2014.02.17/test/test_write_info_json.py --- youtube-dl-2012.09.27/test/test_write_info_json.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_write_info_json.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# coding: utf-8 + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_params + + +import io +import json + +import youtube_dl.YoutubeDL +import youtube_dl.extractor + + +class YoutubeDL(youtube_dl.YoutubeDL): + def __init__(self, *args, **kwargs): + super(YoutubeDL, self).__init__(*args, **kwargs) + self.to_stderr = self.to_screen + +params = get_params({ + 'writeinfojson': True, + 'skip_download': True, + 'writedescription': True, +}) + + +TEST_ID = 'BaW_jenozKc' +INFO_JSON_FILE = TEST_ID + '.info.json' +DESCRIPTION_FILE = TEST_ID + '.mp4.description' +EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 +test URL: https://github.com/rg3/youtube-dl/issues/1892 + +This is a test video for youtube-dl. + +For more information, contact phihag@phihag.de .''' + + +class TestInfoJSON(unittest.TestCase): + def setUp(self): + # Clear old files + self.tearDown() + + def test_info_json(self): + ie = youtube_dl.extractor.YoutubeIE() + ydl = YoutubeDL(params) + ydl.add_info_extractor(ie) + ydl.download([TEST_ID]) + self.assertTrue(os.path.exists(INFO_JSON_FILE)) + with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf: + jd = json.load(jsonf) + self.assertEqual(jd['upload_date'], u'20121002') + self.assertEqual(jd['description'], EXPECTED_DESCRIPTION) + self.assertEqual(jd['id'], TEST_ID) + self.assertEqual(jd['extractor'], 'youtube') + self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''') + self.assertEqual(jd['uploader'], 'Philipp Hagemeister') + + self.assertTrue(os.path.exists(DESCRIPTION_FILE)) + with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf: + descr = descf.read() + self.assertEqual(descr, EXPECTED_DESCRIPTION) + + def tearDown(self): + if os.path.exists(INFO_JSON_FILE): + os.remove(INFO_JSON_FILE) + if os.path.exists(DESCRIPTION_FILE): + os.remove(DESCRIPTION_FILE) + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_YoutubeDL.py youtube-dl-2014.02.17/test/test_YoutubeDL.py --- youtube-dl-2012.09.27/test/test_YoutubeDL.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_YoutubeDL.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,249 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL +from youtube_dl import YoutubeDL +from youtube_dl.extractor import YoutubeIE + + +class YDL(FakeYDL): + def __init__(self, *args, **kwargs): + super(YDL, self).__init__(*args, **kwargs) + self.downloaded_info_dicts = [] + self.msgs = [] + + def process_info(self, info_dict): + self.downloaded_info_dicts.append(info_dict) + + def to_screen(self, msg): + self.msgs.append(msg) + + +class TestFormatSelection(unittest.TestCase): + def test_prefer_free_formats(self): + # Same resolution => download webm + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {'ext': 'webm', 'height': 460}, + {'ext': 'mp4', 'height': 460}, + ] + info_dict = {'formats': formats, 'extractor': 'test'} + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'webm') + + # Different resolution => download best quality (mp4) + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {'ext': 'webm', 'height': 720}, + {'ext': 'mp4', 'height': 1080}, + ] + info_dict['formats'] = formats + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'mp4') + + # No prefer_free_formats => prefer mp4 and flv for greater compatibilty + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {'ext': 'webm', 'height': 720}, + {'ext': 'mp4', 'height': 720}, + {'ext': 'flv', 'height': 720}, + ] + info_dict['formats'] = formats + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'mp4') + + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {'ext': 'flv', 'height': 720}, + {'ext': 'webm', 'height': 720}, + ] + info_dict['formats'] = formats + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'flv') + + def test_format_limit(self): + formats = [ + {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1}, + {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2}, + {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3}, + {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4}, + ] + info_dict = { + 'formats': formats, 'extractor': 'test', 'id': 'testvid'} + + ydl = YDL() + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'excellent') + + ydl = YDL({'format_limit': 'good'}) + assert ydl.params['format_limit'] == 'good' + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'good') + + ydl = YDL({'format_limit': 'great', 'format': 'all'}) + ydl.process_ie_result(info_dict.copy()) + self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh') + self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good') + self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great') + self.assertTrue('3' in ydl.msgs[0]) + + ydl = YDL() + ydl.params['format_limit'] = 'excellent' + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'excellent') + + def test_format_selection(self): + formats = [ + {'format_id': '35', 'ext': 'mp4', 'preference': 1}, + {'format_id': '45', 'ext': 'webm', 'preference': 2}, + {'format_id': '47', 'ext': 'webm', 'preference': 3}, + {'format_id': '2', 'ext': 'flv', 'preference': 4}, + ] + info_dict = {'formats': formats, 'extractor': 'test'} + + ydl = YDL({'format': '20/47'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '47') + + ydl = YDL({'format': '20/71/worst'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '35') + + ydl = YDL() + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '2') + + ydl = YDL({'format': 'webm/mp4'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '47') + + ydl = YDL({'format': '3gp/40/mp4'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '35') + + def test_format_selection_audio(self): + formats = [ + {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'}, + {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'}, + {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 4}, + ] + info_dict = {'formats': formats, 'extractor': 'test'} + + ydl = YDL({'format': 'bestaudio'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'audio-high') + + ydl = YDL({'format': 'worstaudio'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'audio-low') + + formats = [ + {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1}, + {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2}, + ] + info_dict = {'formats': formats, 'extractor': 'test'} + + ydl = YDL({'format': 'bestaudio/worstaudio/best'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vid-high') + + def test_youtube_format_selection(self): + order = [ + '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13', + # Apple HTTP Live Streaming + '96', '95', '94', '93', '92', '132', '151', + # 3D + '85', '84', '102', '83', '101', '82', '100', + # Dash video + '138', '137', '248', '136', '247', '135', '246', + '245', '244', '134', '243', '133', '242', '160', + # Dash audio + '141', '172', '140', '139', '171', + ] + + for f1id, f2id in zip(order, order[1:]): + f1 = YoutubeIE._formats[f1id].copy() + f1['format_id'] = f1id + f2 = YoutubeIE._formats[f2id].copy() + f2['format_id'] = f2id + + info_dict = {'formats': [f1, f2], 'extractor': 'youtube'} + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], f1id) + + info_dict = {'formats': [f2, f1], 'extractor': 'youtube'} + ydl = YDL() + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], f1id) + + def test_add_extra_info(self): + test_dict = { + 'extractor': 'Foo', + } + extra_info = { + 'extractor': 'Bar', + 'playlist': 'funny videos', + } + YDL.add_extra_info(test_dict, extra_info) + self.assertEqual(test_dict['extractor'], 'Foo') + self.assertEqual(test_dict['playlist'], 'funny videos') + + def test_prepare_filename(self): + info = { + 'id': '1234', + 'ext': 'mp4', + 'width': None, + } + def fname(templ): + ydl = YoutubeDL({'outtmpl': templ}) + return ydl.prepare_filename(info) + self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4') + self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') + # Replace missing fields with 'NA' + self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') + + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_youtube_lists.py youtube-dl-2014.02.17/test/test_youtube_lists.py --- youtube-dl-2012.09.27/test/test_youtube_lists.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_youtube_lists.py 2014-02-08 18:00:32.000000000 +0000 @@ -0,0 +1,135 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL + + +from youtube_dl.extractor import ( + YoutubeUserIE, + YoutubePlaylistIE, + YoutubeIE, + YoutubeChannelIE, + YoutubeShowIE, + YoutubeTopListIE, +) + + +class TestYoutubeLists(unittest.TestCase): + def assertIsPlaylist(self, info): + """Make sure the info has '_type' set to 'playlist'""" + self.assertEqual(info['_type'], 'playlist') + + def test_youtube_playlist(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'ytdl test PL') + ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] + self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) + + def test_youtube_playlist_noplaylist(self): + dl = FakeYDL() + dl.params['noplaylist'] = True + ie = YoutubePlaylistIE(dl) + result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') + self.assertEqual(result['_type'], 'url') + self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') + + def test_issue_673(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('PLBB231211A4F62143') + self.assertTrue(len(result['entries']) > 25) + + def test_youtube_playlist_long(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + self.assertIsPlaylist(result) + self.assertTrue(len(result['entries']) >= 799) + + def test_youtube_playlist_with_deleted(self): + #651 + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] + self.assertFalse('pElCt5oNDuI' in ytie_results) + self.assertFalse('KdPEApIVdWM' in ytie_results) + + def test_youtube_playlist_empty(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx') + self.assertIsPlaylist(result) + self.assertEqual(len(result['entries']), 0) + + def test_youtube_course(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + # TODO find a > 100 (paginating?) videos course + result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + entries = result['entries'] + self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(len(entries), 25) + self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') + + def test_youtube_channel(self): + dl = FakeYDL() + ie = YoutubeChannelIE(dl) + #test paginated channel + result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') + self.assertTrue(len(result['entries']) > 90) + #test autogenerated channel + result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') + self.assertTrue(len(result['entries']) >= 18) + + def test_youtube_user(self): + dl = FakeYDL() + ie = YoutubeUserIE(dl) + result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') + self.assertTrue(len(result['entries']) >= 320) + + def test_youtube_safe_search(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl') + self.assertEqual(len(result['entries']), 2) + + def test_youtube_show(self): + dl = FakeYDL() + ie = YoutubeShowIE(dl) + result = ie.extract('http://www.youtube.com/show/airdisasters') + self.assertTrue(len(result) >= 3) + + def test_youtube_mix(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y') + entries = result['entries'] + self.assertTrue(len(entries) >= 20) + original_video = entries[0] + self.assertEqual(original_video['id'], 'rjFaenf1T-Y') + + def test_youtube_toptracks(self): + dl = FakeYDL() + ie = YoutubePlaylistIE(dl) + result = ie.extract('https://www.youtube.com/playlist?list=MCUS') + entries = result['entries'] + self.assertEqual(len(entries), 100) + + def test_youtube_toplist(self): + dl = FakeYDL() + ie = YoutubeTopListIE(dl) + result = ie.extract('yttoplist:music:Trending') + entries = result['entries'] + self.assertTrue(len(entries) >= 5) + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/test/test_youtube_signature.py youtube-dl-2014.02.17/test/test_youtube_signature.py --- youtube-dl-2012.09.27/test/test_youtube_signature.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/test/test_youtube_signature.py 2014-02-04 21:37:49.000000000 +0000 @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import io +import re +import string + +from youtube_dl.extractor import YoutubeIE +from youtube_dl.utils import compat_str, compat_urlretrieve + +_TESTS = [ + ( + u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', + u'js', + 86, + u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', + ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', + u'js', + 85, + u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', + ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', + u'js', + 90, + u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', + ), +] + + +class TestSignature(unittest.TestCase): + def setUp(self): + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') + if not os.path.exists(self.TESTDATA_DIR): + os.mkdir(self.TESTDATA_DIR) + + +def make_tfunc(url, stype, sig_length, expected_sig): + basename = url.rpartition('/')[2] + m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) + assert m, '%r should follow URL format' % basename + test_id = m.group(1) + + def test_func(self): + fn = os.path.join(self.TESTDATA_DIR, basename) + + if not os.path.exists(fn): + compat_urlretrieve(url, fn) + + ie = YoutubeIE() + if stype == 'js': + with io.open(fn, encoding='utf-8') as testf: + jscode = testf.read() + func = ie._parse_sig_js(jscode) + else: + assert stype == 'swf' + with open(fn, 'rb') as testf: + swfcode = testf.read() + func = ie._parse_sig_swf(swfcode) + src_sig = compat_str(string.printable[:sig_length]) + got_sig = func(src_sig) + self.assertEqual(got_sig, expected_sig) + + test_func.__name__ = str('test_signature_' + stype + '_' + test_id) + setattr(TestSignature, test_func.__name__, test_func) + +for test_spec in _TESTS: + make_tfunc(*test_spec) + + +if __name__ == '__main__': + unittest.main() diff -Nru youtube-dl-2012.09.27/youtube_dl/aes.py youtube-dl-2014.02.17/youtube_dl/aes.py --- youtube-dl-2012.09.27/youtube_dl/aes.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/aes.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,310 @@ +__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] + +import base64 +from math import ceil + +from .utils import bytes_to_intlist, intlist_to_bytes + +BLOCK_SIZE_BYTES = 16 + +def aes_ctr_decrypt(data, key, counter): + """ + Decrypt with aes in counter mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block) + returns the next counter block + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + decrypted_data=[] + for i in range(block_count): + counter_block = counter.next_value() + block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES] + block += [0]*(BLOCK_SIZE_BYTES - len(block)) + + cipher_counter_block = aes_encrypt(counter_block, expanded_key) + decrypted_data += xor(block, cipher_counter_block) + decrypted_data = decrypted_data[:len(data)] + + return decrypted_data + +def aes_cbc_decrypt(data, key, iv): + """ + Decrypt with aes in CBC mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + decrypted_data=[] + previous_cipher_block = iv + for i in range(block_count): + block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES] + block += [0]*(BLOCK_SIZE_BYTES - len(block)) + + decrypted_block = aes_decrypt(block, expanded_key) + decrypted_data += xor(decrypted_block, previous_cipher_block) + previous_cipher_block = block + decrypted_data = decrypted_data[:len(data)] + + return decrypted_data + +def key_expansion(data): + """ + Generate key schedule + + @param {int[]} data 16/24/32-Byte cipher key + @returns {int[]} 176/208/240-Byte expanded key + """ + data = data[:] # copy + rcon_iteration = 1 + key_size_bytes = len(data) + expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES + + while len(data) < expanded_key_size_bytes: + temp = data[-4:] + temp = key_schedule_core(temp, rcon_iteration) + rcon_iteration += 1 + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + for _ in range(3): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + if key_size_bytes == 32: + temp = data[-4:] + temp = sub_bytes(temp) + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + data = data[:expanded_key_size_bytes] + + return data + +def aes_encrypt(data, expanded_key): + """ + Encrypt one block with aes + + @param {int[]} data 16-Byte state + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte cipher + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + for i in range(1, rounds+1): + data = sub_bytes(data) + data = shift_rows(data) + if i != rounds: + data = mix_columns(data) + data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) + + return data + +def aes_decrypt(data, expanded_key): + """ + Decrypt one block with aes + + @param {int[]} data 16-Byte cipher + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte state + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + for i in range(rounds, 0, -1): + data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) + if i != rounds: + data = mix_columns_inv(data) + data = shift_rows_inv(data) + data = sub_bytes_inv(data) + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + + return data + +def aes_decrypt_text(data, password, key_size_bytes): + """ + Decrypt text + - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter + - The cipher key is retrieved by encrypting the first 16 Byte of 'password' + with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) + - Mode of operation is 'counter' + + @param {str} data Base64 encoded string + @param {str,unicode} password Password (will be encoded with utf-8) + @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit + @returns {str} Decrypted data + """ + NONCE_LENGTH_BYTES = 8 + + data = bytes_to_intlist(base64.b64decode(data)) + password = bytes_to_intlist(password.encode('utf-8')) + + key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password)) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) + + nonce = data[:NONCE_LENGTH_BYTES] + cipher = data[NONCE_LENGTH_BYTES:] + + class Counter: + __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) + def next_value(self): + temp = self.__value + self.__value = inc(self.__value) + return temp + + decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) + plaintext = intlist_to_bytes(decrypted_data) + + return plaintext + +RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) +SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) +SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) +MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1), + (0x1,0x2,0x3,0x1), + (0x1,0x1,0x2,0x3), + (0x3,0x1,0x1,0x2)) +MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9), + (0x9,0xE,0xB,0xD), + (0xD,0x9,0xE,0xB), + (0xB,0xD,0x9,0xE)) +RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, + 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, + 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, + 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, + 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, + 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, + 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, + 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, + 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, + 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, + 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, + 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, + 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, + 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, + 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, + 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) +RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, + 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, + 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, + 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, + 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, + 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, + 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, + 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, + 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, + 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, + 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, + 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, + 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, + 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, + 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, + 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) + +def sub_bytes(data): + return [SBOX[x] for x in data] + +def sub_bytes_inv(data): + return [SBOX_INV[x] for x in data] + +def rotate(data): + return data[1:] + [data[0]] + +def key_schedule_core(data, rcon_iteration): + data = rotate(data) + data = sub_bytes(data) + data[0] = data[0] ^ RCON[rcon_iteration] + + return data + +def xor(data1, data2): + return [x^y for x, y in zip(data1, data2)] + +def rijndael_mul(a, b): + if(a==0 or b==0): + return 0 + return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] + +def mix_column(data, matrix): + data_mixed = [] + for row in range(4): + mixed = 0 + for column in range(4): + # xor is (+) and (-) + mixed ^= rijndael_mul(data[column], matrix[row][column]) + data_mixed.append(mixed) + return data_mixed + +def mix_columns(data, matrix=MIX_COLUMN_MATRIX): + data_mixed = [] + for i in range(4): + column = data[i*4 : (i+1)*4] + data_mixed += mix_column(column, matrix) + return data_mixed + +def mix_columns_inv(data): + return mix_columns(data, MIX_COLUMN_MATRIX_INV) + +def shift_rows(data): + data_shifted = [] + for column in range(4): + for row in range(4): + data_shifted.append( data[((column + row) & 0b11) * 4 + row] ) + return data_shifted + +def shift_rows_inv(data): + data_shifted = [] + for column in range(4): + for row in range(4): + data_shifted.append( data[((column - row) & 0b11) * 4 + row] ) + return data_shifted + +def inc(data): + data = data[:] # copy + for i in range(len(data)-1,-1,-1): + if data[i] == 255: + data[i] = 0 + else: + data[i] = data[i] + 1 + break + return data diff -Nru youtube-dl-2012.09.27/youtube_dl/downloader/common.py youtube-dl-2014.02.17/youtube_dl/downloader/common.py --- youtube-dl-2012.09.27/youtube_dl/downloader/common.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/downloader/common.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,316 @@ +import os +import re +import sys +import time + +from ..utils import ( + encodeFilename, + timeconvert, + format_bytes, +) + + +class FileDownloader(object): + """File Downloader class. + + File downloader objects are the ones responsible of downloading the + actual video file and writing it to disk. + + File downloaders accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. + + Available options: + + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + ratelimit: Download speed limit, in bytes/sec. + retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + test: Download only first bytes to test the downloader. + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size + + Subclasses of this one must re-define the real_download method. + """ + + params = None + + def __init__(self, ydl, params): + """Create a FileDownloader object with the given options.""" + self.ydl = ydl + self._progress_hooks = [] + self.params = params + + @staticmethod + def format_seconds(seconds): + (mins, secs) = divmod(seconds, 60) + (hours, mins) = divmod(mins, 60) + if hours > 99: + return '--:--:--' + if hours == 0: + return '%02d:%02d' % (mins, secs) + else: + return '%02d:%02d:%02d' % (hours, mins, secs) + + @staticmethod + def calc_percent(byte_counter, data_len): + if data_len is None: + return None + return float(byte_counter) / float(data_len) * 100.0 + + @staticmethod + def format_percent(percent): + if percent is None: + return '---.-%' + return '%6s' % ('%3.1f%%' % percent) + + @staticmethod + def calc_eta(start, now, total, current): + if total is None: + return None + dif = now - start + if current == 0 or dif < 0.001: # One millisecond + return None + rate = float(current) / dif + return int((float(total) - float(current)) / rate) + + @staticmethod + def format_eta(eta): + if eta is None: + return '--:--' + return FileDownloader.format_seconds(eta) + + @staticmethod + def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < 0.001: # One millisecond + return None + return float(bytes) / dif + + @staticmethod + def format_speed(speed): + if speed is None: + return '%10s' % '---b/s' + return '%10s' % ('%s/s' % format_bytes(speed)) + + @staticmethod + def best_block_size(elapsed_time, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + if elapsed_time < 0.001: + return int(new_max) + rate = bytes / elapsed_time + if rate > new_max: + return int(new_max) + if rate < new_min: + return int(new_min) + return int(rate) + + @staticmethod + def parse_bytes(bytestr): + """Parse a string indicating a byte quantity into an integer.""" + matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + if matchobj is None: + return None + number = float(matchobj.group(1)) + multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) + return int(round(number * multiplier)) + + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, **kargs) + + def to_stderr(self, message): + self.ydl.to_screen(message) + + def to_console_title(self, message): + self.ydl.to_console_title(message) + + def trouble(self, *args, **kargs): + self.ydl.trouble(*args, **kargs) + + def report_warning(self, *args, **kargs): + self.ydl.report_warning(*args, **kargs) + + def report_error(self, *args, **kargs): + self.ydl.report_error(*args, **kargs) + + def slow_down(self, start_time, byte_counter): + """Sleep if the download speed is over the rate limit.""" + rate_limit = self.params.get('ratelimit', None) + if rate_limit is None or byte_counter == 0: + return + now = time.time() + elapsed = now - start_time + if elapsed <= 0.0: + return + speed = float(byte_counter) / elapsed + if speed > rate_limit: + time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + + def temp_name(self, filename): + """Returns a temporary filename for the given filename.""" + if self.params.get('nopart', False) or filename == u'-' or \ + (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): + return filename + return filename + u'.part' + + def undo_temp_name(self, filename): + if filename.endswith(u'.part'): + return filename[:-len(u'.part')] + return filename + + def try_rename(self, old_filename, new_filename): + try: + if old_filename == new_filename: + return + os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) + except (IOError, OSError) as err: + self.report_error(u'unable to rename file: %s' % str(err)) + + def try_utime(self, filename, last_modified_hdr): + """Try to set the last-modified time of the given file.""" + if last_modified_hdr is None: + return + if not os.path.isfile(encodeFilename(filename)): + return + timestr = last_modified_hdr + if timestr is None: + return + filetime = timeconvert(timestr) + if filetime is None: + return filetime + # Ignore obviously invalid dates + if filetime == 0: + return + try: + os.utime(filename, (time.time(), filetime)) + except: + pass + return filetime + + def report_destination(self, filename): + """Report destination filename.""" + self.to_screen(u'[download] Destination: ' + filename) + + def _report_progress_status(self, msg, is_last_line=False): + fullmsg = u'[download] ' + msg + if self.params.get('progress_with_newline', False): + self.to_screen(fullmsg) + else: + if os.name == 'nt': + prev_len = getattr(self, '_report_progress_prev_line_length', + 0) + if prev_len > len(fullmsg): + fullmsg += u' ' * (prev_len - len(fullmsg)) + self._report_progress_prev_line_length = len(fullmsg) + clear_line = u'\r' + else: + clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r') + self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) + self.to_console_title(u'youtube-dl ' + msg) + + def report_progress(self, percent, data_len_str, speed, eta): + """Report download progress.""" + if self.params.get('noprogress', False): + return + if eta is not None: + eta_str = self.format_eta(eta) + else: + eta_str = 'Unknown ETA' + if percent is not None: + percent_str = self.format_percent(percent) + else: + percent_str = 'Unknown %' + speed_str = self.format_speed(speed) + + msg = (u'%s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str)) + self._report_progress_status(msg) + + def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): + if self.params.get('noprogress', False): + return + downloaded_str = format_bytes(downloaded_data_len) + speed_str = self.format_speed(speed) + elapsed_str = FileDownloader.format_seconds(elapsed) + msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) + self._report_progress_status(msg) + + def report_finish(self, data_len_str, tot_time): + """Report download finished.""" + if self.params.get('noprogress', False): + self.to_screen(u'[download] Download completed') + else: + self._report_progress_status( + (u'100%% of %s in %s' % + (data_len_str, self.format_seconds(tot_time))), + is_last_line=True) + + def report_resuming_byte(self, resume_len): + """Report attempt to resume at given byte.""" + self.to_screen(u'[download] Resuming download at byte %s' % resume_len) + + def report_retry(self, count, retries): + """Report retry in case of HTTP error 5xx""" + self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) + + def report_file_already_downloaded(self, file_name): + """Report file has already been fully downloaded.""" + try: + self.to_screen(u'[download] %s has already been downloaded' % file_name) + except UnicodeEncodeError: + self.to_screen(u'[download] The file has already been downloaded') + + def report_unable_to_resume(self): + """Report it was impossible to resume download.""" + self.to_screen(u'[download] Unable to resume') + + def download(self, filename, info_dict): + """Download to a filename using the info from info_dict + Return True on success and False otherwise + """ + # Check file already present + if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): + self.report_file_already_downloaded(filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), + }) + return True + + return self.real_download(filename, info_dict) + + def real_download(self, filename, info_dict): + """Real download process. Redefine in subclasses.""" + raise NotImplementedError(u'This method must be implemented by sublcasses') + + def _hook_progress(self, status): + for ph in self._progress_hooks: + ph(status) + + def add_progress_hook(self, ph): + """ ph gets called on download progress, with a dictionary with the entries + * filename: The final filename + * status: One of "downloading" and "finished" + + It can also have some of the following entries: + + * downloaded_bytes: Bytes on disks + * total_bytes: Total bytes, None if unknown + * tmpfilename: The filename we're currently writing to + * eta: The estimated time in seconds, None if unknown + * speed: The download speed in bytes/second, None if unknown + + Hooks are guaranteed to be called at least once (with status "finished") + if the download is successful. + """ + self._progress_hooks.append(ph) diff -Nru youtube-dl-2012.09.27/youtube_dl/downloader/f4m.py youtube-dl-2014.02.17/youtube_dl/downloader/f4m.py --- youtube-dl-2012.09.27/youtube_dl/downloader/f4m.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/downloader/f4m.py 2014-02-15 19:45:12.000000000 +0000 @@ -0,0 +1,315 @@ +from __future__ import unicode_literals + +import base64 +import io +import itertools +import os +import time +import xml.etree.ElementTree as etree + +from .common import FileDownloader +from .http import HttpFD +from ..utils import ( + struct_pack, + struct_unpack, + compat_urllib_request, + compat_urlparse, + format_bytes, + encodeFilename, + sanitize_open, +) + + +class FlvReader(io.BytesIO): + """ + Reader for Flv files + The file format is documented in https://www.adobe.com/devnet/f4v.html + """ + + # Utility functions for reading numbers and strings + def read_unsigned_long_long(self): + return struct_unpack('!Q', self.read(8))[0] + + def read_unsigned_int(self): + return struct_unpack('!I', self.read(4))[0] + + def read_unsigned_char(self): + return struct_unpack('!B', self.read(1))[0] + + def read_string(self): + res = b'' + while True: + char = self.read(1) + if char == b'\x00': + break + res += char + return res + + def read_box_info(self): + """ + Read a box and return the info as a tuple: (box_size, box_type, box_data) + """ + real_size = size = self.read_unsigned_int() + box_type = self.read(4) + header_end = 8 + if size == 1: + real_size = self.read_unsigned_long_long() + header_end = 16 + return real_size, box_type, self.read(real_size-header_end) + + def read_asrt(self): + # version + self.read_unsigned_char() + # flags + self.read(3) + quality_entry_count = self.read_unsigned_char() + # QualityEntryCount + for i in range(quality_entry_count): + self.read_string() + + segment_run_count = self.read_unsigned_int() + segments = [] + for i in range(segment_run_count): + first_segment = self.read_unsigned_int() + fragments_per_segment = self.read_unsigned_int() + segments.append((first_segment, fragments_per_segment)) + + return { + 'segment_run': segments, + } + + def read_afrt(self): + # version + self.read_unsigned_char() + # flags + self.read(3) + # time scale + self.read_unsigned_int() + + quality_entry_count = self.read_unsigned_char() + # QualitySegmentUrlModifiers + for i in range(quality_entry_count): + self.read_string() + + fragments_count = self.read_unsigned_int() + fragments = [] + for i in range(fragments_count): + first = self.read_unsigned_int() + first_ts = self.read_unsigned_long_long() + duration = self.read_unsigned_int() + if duration == 0: + discontinuity_indicator = self.read_unsigned_char() + else: + discontinuity_indicator = None + fragments.append({ + 'first': first, + 'ts': first_ts, + 'duration': duration, + 'discontinuity_indicator': discontinuity_indicator, + }) + + return { + 'fragments': fragments, + } + + def read_abst(self): + # version + self.read_unsigned_char() + # flags + self.read(3) + # BootstrapinfoVersion + bootstrap_info_version = self.read_unsigned_int() + # Profile,Live,Update,Reserved + self.read(1) + # time scale + self.read_unsigned_int() + # CurrentMediaTime + self.read_unsigned_long_long() + # SmpteTimeCodeOffset + self.read_unsigned_long_long() + # MovieIdentifier + movie_identifier = self.read_string() + server_count = self.read_unsigned_char() + # ServerEntryTable + for i in range(server_count): + self.read_string() + quality_count = self.read_unsigned_char() + # QualityEntryTable + for i in range(server_count): + self.read_string() + # DrmData + self.read_string() + # MetaData + self.read_string() + + segments_count = self.read_unsigned_char() + segments = [] + for i in range(segments_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'asrt' + segment = FlvReader(box_data).read_asrt() + segments.append(segment) + fragments_run_count = self.read_unsigned_char() + fragments = [] + for i in range(fragments_run_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'afrt' + fragments.append(FlvReader(box_data).read_afrt()) + + return { + 'segments': segments, + 'fragments': fragments, + } + + def read_bootstrap_info(self): + total_size, box_type, box_data = self.read_box_info() + assert box_type == b'abst' + return FlvReader(box_data).read_abst() + + +def read_bootstrap_info(bootstrap_bytes): + return FlvReader(bootstrap_bytes).read_bootstrap_info() + + +def build_fragments_list(boot_info): + """ Return a list of (segment, fragment) for each fragment in the video """ + res = [] + segment_run_table = boot_info['segments'][0] + # I've only found videos with one segment + segment_run_entry = segment_run_table['segment_run'][0] + n_frags = segment_run_entry[1] + fragment_run_entry_table = boot_info['fragments'][0]['fragments'] + first_frag_number = fragment_run_entry_table[0]['first'] + for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)): + res.append((1, frag_number)) + return res + + +def write_flv_header(stream, metadata): + """Writes the FLV header and the metadata to stream""" + # FLV header + stream.write(b'FLV\x01') + stream.write(b'\x05') + stream.write(b'\x00\x00\x00\x09') + # FLV File body + stream.write(b'\x00\x00\x00\x00') + # FLVTAG + # Script data + stream.write(b'\x12') + # Size of the metadata with 3 bytes + stream.write(struct_pack('!L', len(metadata))[1:]) + stream.write(b'\x00\x00\x00\x00\x00\x00\x00') + stream.write(metadata) + # Magic numbers extracted from the output files produced by AdobeHDS.php + #(https://github.com/K-S-V/Scripts) + stream.write(b'\x00\x00\x01\x73') + + +def _add_ns(prop): + return '{http://ns.adobe.com/f4m/1.0}%s' % prop + + +class HttpQuietDownloader(HttpFD): + def to_screen(self, *args, **kargs): + pass + + +class F4mFD(FileDownloader): + """ + A downloader for f4m manifests or AdobeHDS. + """ + + def real_download(self, filename, info_dict): + man_url = info_dict['url'] + self.to_screen('[download] Downloading f4m manifest') + manifest = self.ydl.urlopen(man_url).read() + self.report_destination(filename) + http_dl = HttpQuietDownloader(self.ydl, + { + 'continuedl': True, + 'quiet': True, + 'noprogress': True, + 'test': self.params.get('test', False), + }) + + doc = etree.fromstring(manifest) + formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] + formats = sorted(formats, key=lambda f: f[0]) + rate, media = formats[-1] + base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) + bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) + metadata = base64.b64decode(media.find(_add_ns('metadata')).text) + boot_info = read_bootstrap_info(bootstrap) + fragments_list = build_fragments_list(boot_info) + if self.params.get('test', False): + # We only download the first fragment + fragments_list = fragments_list[:1] + total_frags = len(fragments_list) + + tmpfilename = self.temp_name(filename) + (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') + write_flv_header(dest_stream, metadata) + + # This dict stores the download progress, it's updated by the progress + # hook + state = { + 'downloaded_bytes': 0, + 'frag_counter': 0, + } + start = time.time() + + def frag_progress_hook(status): + frag_total_bytes = status.get('total_bytes', 0) + estimated_size = (state['downloaded_bytes'] + + (total_frags - state['frag_counter']) * frag_total_bytes) + if status['status'] == 'finished': + state['downloaded_bytes'] += frag_total_bytes + state['frag_counter'] += 1 + progress = self.calc_percent(state['frag_counter'], total_frags) + byte_counter = state['downloaded_bytes'] + else: + frag_downloaded_bytes = status['downloaded_bytes'] + byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes + frag_progress = self.calc_percent(frag_downloaded_bytes, + frag_total_bytes) + progress = self.calc_percent(state['frag_counter'], total_frags) + progress += frag_progress / float(total_frags) + + eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) + self.report_progress(progress, format_bytes(estimated_size), + status.get('speed'), eta) + http_dl.add_progress_hook(frag_progress_hook) + + frags_filenames = [] + for (seg_i, frag_i) in fragments_list: + name = 'Seg%d-Frag%d' % (seg_i, frag_i) + url = base_url + name + frag_filename = '%s-%s' % (tmpfilename, name) + success = http_dl.download(frag_filename, {'url': url}) + if not success: + return False + with open(frag_filename, 'rb') as down: + down_data = down.read() + reader = FlvReader(down_data) + while True: + _, box_type, box_data = reader.read_box_info() + if box_type == b'mdat': + dest_stream.write(box_data) + break + frags_filenames.append(frag_filename) + + self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) + + self.try_rename(tmpfilename, filename) + for frag_file in frags_filenames: + os.remove(frag_file) + + fsize = os.path.getsize(encodeFilename(filename)) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + + return True diff -Nru youtube-dl-2012.09.27/youtube_dl/downloader/hls.py youtube-dl-2014.02.17/youtube_dl/downloader/hls.py --- youtube-dl-2012.09.27/youtube_dl/downloader/hls.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/downloader/hls.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,44 @@ +import os +import subprocess + +from .common import FileDownloader +from ..utils import ( + encodeFilename, +) + + +class HlsFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', tmpfilename] + + for program in ['avconv', 'ffmpeg']: + try: + subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + break + except (OSError, IOError): + pass + else: + self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found') + cmd = [program] + args + + retval = subprocess.call(cmd) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'\r[%s] %s bytes' % (cmd[0], fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr(u"\n") + self.report_error(u'ffmpeg exited with code %d' % retval) + return False diff -Nru youtube-dl-2012.09.27/youtube_dl/downloader/http.py youtube-dl-2014.02.17/youtube_dl/downloader/http.py --- youtube-dl-2012.09.27/youtube_dl/downloader/http.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/downloader/http.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,186 @@ +import os +import time + +from .common import FileDownloader +from ..utils import ( + compat_urllib_request, + compat_urllib_error, + ContentTooShortError, + + encodeFilename, + sanitize_open, + format_bytes, +) + + +class HttpFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + tmpfilename = self.temp_name(filename) + stream = None + + # Do not include the Accept-Encoding header + headers = {'Youtubedl-no-compression': 'True'} + if 'user_agent' in info_dict: + headers['Youtubedl-user-agent'] = info_dict['user_agent'] + basic_request = compat_urllib_request.Request(url, None, headers) + request = compat_urllib_request.Request(url, None, headers) + + if self.params.get('test', False): + request.add_header('Range', 'bytes=0-10240') + + # Establish possible resume length + if os.path.isfile(encodeFilename(tmpfilename)): + resume_len = os.path.getsize(encodeFilename(tmpfilename)) + else: + resume_len = 0 + + open_mode = 'wb' + if resume_len != 0: + if self.params.get('continuedl', False): + self.report_resuming_byte(resume_len) + request.add_header('Range', 'bytes=%d-' % resume_len) + open_mode = 'ab' + else: + resume_len = 0 + + count = 0 + retries = self.params.get('retries', 0) + while count <= retries: + # Establish connection + try: + data = compat_urllib_request.urlopen(request) + break + except (compat_urllib_error.HTTPError, ) as err: + if (err.code < 500 or err.code >= 600) and err.code != 416: + # Unexpected HTTP error + raise + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = compat_urllib_request.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (compat_urllib_error.HTTPError, ) as err: + if err.code < 500 or err.code >= 600: + raise + else: + # Examine the reported length + if (content_length is not None and + (resume_len - 100 < int(content_length) < resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(filename) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + }) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) + + if count > retries: + self.report_error(u'giving up after %s retries' % retries) + return False + + data_len = data.info().get('Content-length', None) + if data_len is not None: + data_len = int(data_len) + resume_len + min_data_len = self.params.get("min_filesize", None) + max_data_len = self.params.get("max_filesize", None) + if min_data_len is not None and data_len < min_data_len: + self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + return False + + data_len_str = format_bytes(data_len) + byte_counter = 0 + resume_len + block_size = self.params.get('buffersize', 1024) + start = time.time() + while True: + # Download and write + before = time.time() + data_block = data.read(block_size) + after = time.time() + if len(data_block) == 0: + break + byte_counter += len(data_block) + + # Open file just in time + if stream is None: + try: + (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) + assert stream is not None + filename = self.undo_temp_name(tmpfilename) + self.report_destination(filename) + except (OSError, IOError) as err: + self.report_error(u'unable to open for writing: %s' % str(err)) + return False + try: + stream.write(data_block) + except (IOError, OSError) as err: + self.to_stderr(u"\n") + self.report_error(u'unable to write data: %s' % str(err)) + return False + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) + + # Progress message + speed = self.calc_speed(start, time.time(), byte_counter - resume_len) + if data_len is None: + eta = percent = None + else: + percent = self.calc_percent(byte_counter, data_len) + eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + self.report_progress(percent, data_len_str, speed, eta) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'eta': eta, + 'speed': speed, + }) + + # Apply rate limit + self.slow_down(start, byte_counter - resume_len) + + if stream is None: + self.to_stderr(u"\n") + self.report_error(u'Did not get any data blocks') + return False + stream.close() + self.report_finish(data_len_str, (time.time() - start)) + if data_len is not None and byte_counter != data_len: + raise ContentTooShortError(byte_counter, int(data_len)) + self.try_rename(tmpfilename, filename) + + # Update file modification time + if self.params.get('updatetime', True): + info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': filename, + 'status': 'finished', + }) + + return True diff -Nru youtube-dl-2012.09.27/youtube_dl/downloader/__init__.py youtube-dl-2014.02.17/youtube_dl/downloader/__init__.py --- youtube-dl-2012.09.27/youtube_dl/downloader/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/downloader/__init__.py 2014-02-15 14:31:20.000000000 +0000 @@ -0,0 +1,29 @@ +from __future__ import unicode_literals + +from .common import FileDownloader +from .hls import HlsFD +from .http import HttpFD +from .mplayer import MplayerFD +from .rtmp import RtmpFD +from .f4m import F4mFD + +from ..utils import ( + determine_ext, +) + + +def get_suitable_downloader(info_dict): + """Get the downloader class that can handle the info dict.""" + url = info_dict['url'] + protocol = info_dict.get('protocol') + + if url.startswith('rtmp'): + return RtmpFD + if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): + return HlsFD + if url.startswith('mms') or url.startswith('rtsp'): + return MplayerFD + if determine_ext(url) == 'f4m': + return F4mFD + else: + return HttpFD diff -Nru youtube-dl-2012.09.27/youtube_dl/downloader/mplayer.py youtube-dl-2014.02.17/youtube_dl/downloader/mplayer.py --- youtube-dl-2012.09.27/youtube_dl/downloader/mplayer.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/downloader/mplayer.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,40 @@ +import os +import subprocess + +from .common import FileDownloader +from ..utils import ( + encodeFilename, +) + + +class MplayerFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] + # Check for mplayer first + try: + subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + except (OSError, IOError): + self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0]) + return False + + # Download using mplayer. + retval = subprocess.call(args) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr(u"\n") + self.report_error(u'mplayer exited with code %d' % retval) + return False diff -Nru youtube-dl-2012.09.27/youtube_dl/downloader/rtmp.py youtube-dl-2014.02.17/youtube_dl/downloader/rtmp.py --- youtube-dl-2012.09.27/youtube_dl/downloader/rtmp.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/downloader/rtmp.py 2014-02-08 15:23:28.000000000 +0000 @@ -0,0 +1,184 @@ +import os +import re +import subprocess +import sys +import time + +from .common import FileDownloader +from ..utils import ( + encodeFilename, + format_bytes, +) + + +class RtmpFD(FileDownloader): + def real_download(self, filename, info_dict): + def run_rtmpdump(args): + start = time.time() + resume_percent = None + resume_downloaded_data_len = None + proc = subprocess.Popen(args, stderr=subprocess.PIPE) + cursor_in_new_line = True + proc_stderr_closed = False + while not proc_stderr_closed: + # read line from stderr + line = u'' + while True: + char = proc.stderr.read(1) + if not char: + proc_stderr_closed = True + break + if char in [b'\r', b'\n']: + break + line += char.decode('ascii', 'replace') + if not line: + # proc_stderr_closed is True + continue + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1))*1024) + percent = float(mobj.group(2)) + if not resume_percent: + resume_percent = percent + resume_downloaded_data_len = downloaded_data_len + eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent) + speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len) + data_len = None + if percent > 0: + data_len = int(downloaded_data_len * 100 / percent) + data_len_str = u'~' + format_bytes(data_len) + self.report_progress(percent, data_len_str, speed, eta) + cursor_in_new_line = False + self._hook_progress({ + 'downloaded_bytes': downloaded_data_len, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'eta': eta, + 'speed': speed, + }) + else: + # no percent for live streams + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1))*1024) + time_now = time.time() + speed = self.calc_speed(start, time_now, downloaded_data_len) + self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) + cursor_in_new_line = False + self._hook_progress({ + 'downloaded_bytes': downloaded_data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'speed': speed, + }) + elif self.params.get('verbose', False): + if not cursor_in_new_line: + self.to_screen(u'') + cursor_in_new_line = True + self.to_screen(u'[rtmpdump] '+line) + proc.wait() + if not cursor_in_new_line: + self.to_screen(u'') + return proc.returncode + + url = info_dict['url'] + player_url = info_dict.get('player_url', None) + page_url = info_dict.get('page_url', None) + app = info_dict.get('app', None) + play_path = info_dict.get('play_path', None) + tc_url = info_dict.get('tc_url', None) + flash_version = info_dict.get('flash_version', None) + live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn', None) + + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + test = self.params.get('test', False) + + # Check for rtmpdump first + try: + subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + except (OSError, IOError): + self.report_error(u'RTMP download detected but "rtmpdump" could not be run') + return False + + # Download using rtmpdump. rtmpdump returns exit code 2 when + # the connection was interrumpted and resuming appears to be + # possible. This is part of rtmpdump's normal usage, AFAIK. + basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] + if player_url is not None: + basic_args += ['--swfVfy', player_url] + if page_url is not None: + basic_args += ['--pageUrl', page_url] + if app is not None: + basic_args += ['--app', app] + if play_path is not None: + basic_args += ['--playpath', play_path] + if tc_url is not None: + basic_args += ['--tcUrl', url] + if test: + basic_args += ['--stop', '1'] + if flash_version is not None: + basic_args += ['--flashVer', flash_version] + if live: + basic_args += ['--live'] + if conn: + basic_args += ['--conn', conn] + args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] + + if sys.platform == 'win32' and sys.version_info < (3, 0): + # Windows subprocess module does not actually support Unicode + # on Python 2.x + # See http://stackoverflow.com/a/9951851/35070 + subprocess_encoding = sys.getfilesystemencoding() + args = [a.encode(subprocess_encoding, 'ignore') for a in args] + else: + subprocess_encoding = None + + if self.params.get('verbose', False): + if subprocess_encoding: + str_args = [ + a.decode(subprocess_encoding) if isinstance(a, bytes) else a + for a in args] + else: + str_args = args + try: + import pipes + shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) + except ImportError: + shell_quote = repr + self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args)) + + retval = run_rtmpdump(args) + + while (retval == 2 or retval == 1) and not test: + prevsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'[rtmpdump] %s bytes' % prevsize) + time.sleep(5.0) # This seems to be needed + retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) + cursize = os.path.getsize(encodeFilename(tmpfilename)) + if prevsize == cursize and retval == 1: + break + # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those + if prevsize == cursize and retval == 2 and cursize > 1024: + self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + retval = 0 + break + if retval == 0 or (test and retval == 2): + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'[rtmpdump] %s bytes' % fsize) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr(u"\n") + self.report_error(u'rtmpdump exited with code %d' % retval) + return False diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/academicearth.py youtube-dl-2014.02.17/youtube_dl/extractor/academicearth.py --- youtube-dl-2012.09.27/youtube_dl/extractor/academicearth.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/academicearth.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,32 @@ +from __future__ import unicode_literals +import re + +from .common import InfoExtractor + + +class AcademicEarthCourseIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P[^?#/]+)' + IE_NAME = 'AcademicEarth:Course' + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + playlist_id = m.group('id') + + webpage = self._download_webpage(url, playlist_id) + title = self._html_search_regex( + r'

    (.*?)

    ', webpage, u'title') + description = self._html_search_regex( + r'

    (.*?)

    ', + webpage, u'description', fatal=False) + urls = re.findall( + r'

    ', + webpage) + entries = [self.url_result(u) for u in urls] + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': title, + 'description': description, + 'entries': entries, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/addanime.py youtube-dl-2014.02.17/youtube_dl/extractor/addanime.py --- youtube-dl-2012.09.27/youtube_dl/extractor/addanime.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/addanime.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,86 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + compat_HTTPError, + compat_str, + compat_urllib_parse, + compat_urllib_parse_urlparse, + + ExtractorError, +) + + +class AddAnimeIE(InfoExtractor): + + _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P[\w_]+)(?:.*)' + IE_NAME = u'AddAnime' + _TEST = { + u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', + u'file': u'24MR3YO5SAS9.mp4', + u'md5': u'72954ea10bc979ab5e2eb288b21425a0', + u'info_dict': { + u"description": u"One Piece 606", + u"title": u"One Piece 606" + } + } + + def _real_extract(self, url): + try: + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + webpage = self._download_webpage(url, video_id) + except ExtractorError as ee: + if not isinstance(ee.cause, compat_HTTPError) or \ + ee.cause.code != 503: + raise + + redir_webpage = ee.cause.read().decode('utf-8') + action = self._search_regex( + r'
    ', + redir_webpage, u'redirect vc value') + av = re.search( + r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', + redir_webpage) + if av is None: + raise ExtractorError(u'Cannot find redirect math task') + av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) + + parsed_url = compat_urllib_parse_urlparse(url) + av_val = av_res + len(parsed_url.netloc) + confirm_url = ( + parsed_url.scheme + u'://' + parsed_url.netloc + + action + '?' + + compat_urllib_parse.urlencode({ + 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) + self._download_webpage( + confirm_url, video_id, + note=u'Confirming after redirect') + webpage = self._download_webpage(url, video_id) + + formats = [] + for format_id in ('normal', 'hq'): + rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) + video_url = self._search_regex(rex, webpage, u'video file URLx', + fatal=False) + if not video_url: + continue + formats.append({ + 'format_id': format_id, + 'url': video_url, + }) + if not formats: + raise ExtractorError(u'Cannot find any video format!') + video_title = self._og_search_title(webpage) + video_description = self._og_search_description(webpage) + + return { + '_type': 'video', + 'id': video_id, + 'formats': formats, + 'title': video_title, + 'description': video_description + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/anitube.py youtube-dl-2014.02.17/youtube_dl/extractor/anitube.py --- youtube-dl-2012.09.27/youtube_dl/extractor/anitube.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/anitube.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,53 @@ +import re + +from .common import InfoExtractor + + +class AnitubeIE(InfoExtractor): + IE_NAME = u'anitube.se' + _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' + + _TEST = { + u'url': u'http://www.anitube.se/video/36621', + u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', + u'file': u'36621.mp4', + u'info_dict': { + u'id': u'36621', + u'ext': u'mp4', + u'title': u'Recorder to Randoseru 01', + }, + u'skip': u'Blocked in the US', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', + webpage, u'key') + + config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, + key) + + video_title = config_xml.find('title').text + + formats = [] + video_url = config_xml.find('file') + if video_url is not None: + formats.append({ + 'format_id': 'sd', + 'url': video_url.text, + }) + video_url = config_xml.find('filehd') + if video_url is not None: + formats.append({ + 'format_id': 'hd', + 'url': video_url.text, + }) + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/aparat.py youtube-dl-2014.02.17/youtube_dl/extractor/aparat.py --- youtube-dl-2012.09.27/youtube_dl/extractor/aparat.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/aparat.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,56 @@ +#coding: utf-8 + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + HEADRequest, +) + + +class AparatIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' + + _TEST = { + u'url': u'http://www.aparat.com/v/wP8On', + u'file': u'wP8On.mp4', + u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1', + u'info_dict': { + u"title": u"تیم گلکسی 11 - زومیت", + }, + #u'skip': u'Extremely unreliable', + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + # Note: There is an easier-to-parse configuration at + # http://www.aparat.com/video/video/config/videohash/%video_id + # but the URL in there does not work + embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' + + video_id + u'/vt/frame') + webpage = self._download_webpage(embed_url, video_id) + + video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) + for i, video_url in enumerate(video_urls): + req = HEADRequest(video_url) + res = self._request_webpage( + req, video_id, note=u'Testing video URL %d' % i, errnote=False) + if res: + break + else: + raise ExtractorError(u'No working video URLs found') + + title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title') + thumbnail = self._search_regex( + r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'ext': 'mp4', + 'thumbnail': thumbnail, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/appletrailers.py youtube-dl-2014.02.17/youtube_dl/extractor/appletrailers.py --- youtube-dl-2012.09.27/youtube_dl/extractor/appletrailers.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/appletrailers.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,135 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + determine_ext, +) + + +class AppleTrailersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P[^/]+)/(?P[^/]+)' + _TEST = { + "url": "http://trailers.apple.com/trailers/wb/manofsteel/", + "playlist": [ + { + "file": "manofsteel-trailer4.mov", + "md5": "d97a8e575432dbcb81b7c3acb741f8a8", + "info_dict": { + "duration": 111, + "title": "Trailer 4", + "upload_date": "20130523", + "uploader_id": "wb", + }, + }, + { + "file": "manofsteel-trailer3.mov", + "md5": "b8017b7131b721fb4e8d6f49e1df908c", + "info_dict": { + "duration": 182, + "title": "Trailer 3", + "upload_date": "20130417", + "uploader_id": "wb", + }, + }, + { + "file": "manofsteel-trailer.mov", + "md5": "d0f1e1150989b9924679b441f3404d48", + "info_dict": { + "duration": 148, + "title": "Trailer", + "upload_date": "20121212", + "uploader_id": "wb", + }, + }, + { + "file": "manofsteel-teaser.mov", + "md5": "5fe08795b943eb2e757fa95cb6def1cb", + "info_dict": { + "duration": 93, + "title": "Teaser", + "upload_date": "20120721", + "uploader_id": "wb", + }, + } + ] + } + + _JSON_RE = r'iTunes.playURL\((.*?)\);' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + movie = mobj.group('movie') + uploader_id = mobj.group('company') + + playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') + def fix_html(s): + s = re.sub(r'(?s).*?', u'', s) + s = re.sub(r'', r'', s) + # The ' in the onClick attributes are not escaped, it couldn't be parsed + # like: http://trailers.apple.com/trailers/wb/gravity/ + def _clean_json(m): + return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + s = re.sub(self._JSON_RE, _clean_json, s) + s = u'' + s + u'' + return s + doc = self._download_xml(playlist_url, movie, transform_source=fix_html) + + playlist = [] + for li in doc.findall('./div/ul/li'): + on_click = li.find('.//a').attrib['onClick'] + trailer_info_json = self._search_regex(self._JSON_RE, + on_click, u'trailer info') + trailer_info = json.loads(trailer_info_json) + title = trailer_info['title'] + video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() + thumbnail = li.find('.//img').attrib['src'] + upload_date = trailer_info['posted'].replace('-', '') + + runtime = trailer_info['runtime'] + m = re.search(r'(?P[0-9]+):(?P[0-9]{1,2})', runtime) + duration = None + if m: + duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) + + first_url = trailer_info['url'] + trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() + settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') + settings = json.loads(settings_json) + + formats = [] + for format in settings['metadata']['sizes']: + # The src is a file pointing to the real video file + format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) + formats.append({ + 'url': format_url, + 'ext': determine_ext(format_url), + 'format': format['type'], + 'width': format['width'], + 'height': int(format['height']), + }) + + self._sort_formats(formats) + + playlist.append({ + '_type': 'video', + 'id': video_id, + 'title': title, + 'formats': formats, + 'title': title, + 'duration': duration, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'user_agent': 'QuickTime compatible (youtube-dl)', + }) + + return { + '_type': 'playlist', + 'id': movie, + 'entries': playlist, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/archiveorg.py youtube-dl-2014.02.17/youtube_dl/extractor/archiveorg.py --- youtube-dl-2012.09.27/youtube_dl/extractor/archiveorg.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/archiveorg.py 2014-02-09 17:07:22.000000000 +0000 @@ -0,0 +1,61 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + + +class ArchiveOrgIE(InfoExtractor): + IE_NAME = 'archive.org' + IE_DESC = 'archive.org videos' + _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P[^?/]+)(?:[?].*)?$' + _TEST = { + "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", + 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', + 'md5': '8af1d4cf447933ed3c7f4871162602db', + 'info_dict': { + "title": "1968 Demo - FJCC Conference Presentation Reel #1", + "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also Doug's 1968 Demo page for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | Reel 2 | Reel 3", + "upload_date": "19681210", + "uploader": "SRI International" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + json_url = url + ('?' if '?' in url else '&') + 'output=json' + json_data = self._download_webpage(json_url, video_id) + data = json.loads(json_data) + + title = data['metadata']['title'][0] + description = data['metadata']['description'][0] + uploader = data['metadata']['creator'][0] + upload_date = unified_strdate(data['metadata']['date'][0]) + + formats = [ + { + 'format': fdata['format'], + 'url': 'http://' + data['server'] + data['dir'] + fn, + 'file_size': int(fdata['size']), + } + for fn, fdata in data['files'].items() + if 'Video' in fdata['format']] + + self._sort_formats(formats) + + return { + '_type': 'video', + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'upload_date': upload_date, + 'thumbnail': data.get('misc', {}).get('image'), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ard.py youtube-dl-2014.02.17/youtube_dl/extractor/ard.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ard.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ard.py 2014-01-27 17:39:41.000000000 +0000 @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, +) + + +class ARDIE(InfoExtractor): + _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P[^/\?]+)(?:\?.*)?' + + _TEST = { + 'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786', + 'file': '19288786.mp4', + 'md5': '515bf47ce209fb3f5a61b7aad364634c', + 'info_dict': { + 'title': 'Edward Snowden im Interview - Held oder Verräter?', + 'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.', + 'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037', + }, + 'skip': 'Blocked outside of Germany', + } + + def _real_extract(self, url): + # determine video id from url + m = re.match(self._VALID_URL, url) + + numid = re.search(r'documentId=([0-9]+)', url) + if numid: + video_id = numid.group(1) + else: + video_id = m.group('video_id') + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'(.*?)

    ', webpage, 'title') + description = self._html_search_meta( + 'dcterms.abstract', webpage, 'description') + thumbnail = self._og_search_thumbnail(webpage) + + streams = [ + mo.groupdict() + for mo in re.finditer( + r'mediaCollection\.addMediaStream\((?P\d+), (?P\d+), "(?P[^"]*)", "(?P[^"]*)", "[^"]*"\)', webpage)] + if not streams: + if '"fsk"' in webpage: + raise ExtractorError('This video is only available after 20:00') + + formats = [] + for s in streams: + format = { + 'quality': int(s['quality']), + } + if s.get('rtmp_url'): + format['protocol'] = 'rtmp' + format['url'] = s['rtmp_url'] + format['playpath'] = s['video_url'] + else: + format['url'] = s['video_url'] + + quality_name = self._search_regex( + r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'], + 'quality name', default='NA') + format['format_id'] = '%s-%s-%s-%s' % ( + determine_ext(format['url']), quality_name, s['media_type'], + s['quality']) + + formats.append(format) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'thumbnail': thumbnail, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/arte.py youtube-dl-2014.02.17/youtube_dl/extractor/arte.py --- youtube-dl-2012.09.27/youtube_dl/extractor/arte.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/arte.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,282 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + find_xpath_attr, + unified_strdate, + determine_ext, + get_element_by_id, + compat_str, + get_element_by_attribute, +) + +# There are different sources of video in arte.tv, the extraction process +# is different for each one. The videos usually expire in 7 days, so we can't +# add tests. + +class ArteTvIE(InfoExtractor): + _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?Pfr|de)/.*-(?P.*?)\.html' + _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?Pfr|de)/(?P.+?)/(?P.+)' + _LIVE_URL = r'index-[0-9]+\.html$' + + IE_NAME = 'arte.tv' + + @classmethod + def suitable(cls, url): + return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL)) + + # TODO implement Live Stream + # from ..utils import compat_urllib_parse + # def extractLiveStream(self, url): + # video_lang = url.split('/')[-4] + # info = self.grep_webpage( + # url, + # r'src="(.*?/videothek_js.*?\.js)', + # 0, + # [ + # (1, 'url', 'Invalid URL: %s' % url) + # ] + # ) + # http_host = url.split('/')[2] + # next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) + # info = self.grep_webpage( + # next_url, + # r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + + # '(http://.*?\.swf).*?' + + # '(rtmp://.*?)\'', + # re.DOTALL, + # [ + # (1, 'path', 'could not extract video path: %s' % url), + # (2, 'player', 'could not extract video player: %s' % url), + # (3, 'url', 'could not extract video url: %s' % url) + # ] + # ) + # video_url = '%s/%s' % (info.get('url'), info.get('path')) + + def _real_extract(self, url): + mobj = re.match(self._VIDEOS_URL, url) + if mobj is not None: + id = mobj.group('id') + lang = mobj.group('lang') + return self._extract_video(url, id, lang) + + mobj = re.match(self._LIVEWEB_URL, url) + if mobj is not None: + name = mobj.group('name') + lang = mobj.group('lang') + return self._extract_liveweb(url, name, lang) + + if re.search(self._LIVE_URL, url) is not None: + raise ExtractorError(u'Arte live streams are not yet supported, sorry') + # self.extractLiveStream(url) + # return + + def _extract_video(self, url, video_id, lang): + """Extract from videos.arte.tv""" + ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') + ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') + ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata') + config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) + config_xml_url = config_node.attrib['ref'] + config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') + + video_urls = list(re.finditer(r'(?P.*?)', config_xml)) + def _key(m): + quality = m.group('quality') + if quality == 'hd': + return 2 + else: + return 1 + # We pick the best quality + video_urls = sorted(video_urls, key=_key) + video_url = list(video_urls)[-1].group('url') + + title = self._html_search_regex(r'(.*?)', config_xml, 'title') + thumbnail = self._html_search_regex(r'(.*?)', + config_xml, 'thumbnail') + return {'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'url': video_url, + 'ext': 'flv', + } + + def _extract_liveweb(self, url, name, lang): + """Extract form http://liveweb.arte.tv/""" + webpage = self._download_webpage(url, name) + video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id') + config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, + video_id, 'Downloading information') + event_doc = config_doc.find('event') + url_node = event_doc.find('video').find('urlHd') + if url_node is None: + url_node = event_doc.find('urlSd') + + return {'id': video_id, + 'title': event_doc.find('name%s' % lang.capitalize()).text, + 'url': url_node.text.replace('MP4', 'mp4'), + 'ext': 'flv', + 'thumbnail': self._og_search_thumbnail(webpage), + } + + +class ArteTVPlus7IE(InfoExtractor): + IE_NAME = 'arte.tv:+7' + _VALID_URL = r'https?://www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' + + @classmethod + def _extract_url_info(cls, url): + mobj = re.match(cls._VALID_URL, url) + lang = mobj.group('lang') + # This is not a real id, it can be for example AJT for the news + # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal + video_id = mobj.group('id') + return video_id, lang + + def _real_extract(self, url): + video_id, lang = self._extract_url_info(url) + webpage = self._download_webpage(url, video_id) + return self._extract_from_webpage(webpage, video_id, lang) + + def _extract_from_webpage(self, webpage, video_id, lang): + json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') + return self._extract_from_json_url(json_url, video_id, lang) + + def _extract_from_json_url(self, json_url, video_id, lang): + json_info = self._download_webpage(json_url, video_id, 'Downloading info json') + self.report_extraction(video_id) + info = json.loads(json_info) + player_info = info['videoJsonPlayer'] + + info_dict = { + 'id': player_info['VID'], + 'title': player_info['VTI'], + 'description': player_info.get('VDE'), + 'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]), + 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), + } + + all_formats = player_info['VSR'].values() + # Some formats use the m3u8 protocol + all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) + def _match_lang(f): + if f.get('versionCode') is None: + return True + # Return true if that format is in the language of the url + if lang == 'fr': + l = 'F' + elif lang == 'de': + l = 'A' + regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] + return any(re.match(r, f['versionCode']) for r in regexes) + # Some formats may not be in the same language as the url + formats = filter(_match_lang, all_formats) + formats = list(formats) # in python3 filter returns an iterator + if not formats: + # Some videos are only available in the 'Originalversion' + # they aren't tagged as being in French or German + if all(f['versionCode'] == 'VO' for f in all_formats): + formats = all_formats + else: + raise ExtractorError(u'The formats list is empty') + + if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: + def sort_key(f): + return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) + else: + def sort_key(f): + return ( + # Sort first by quality + int(f.get('height',-1)), + int(f.get('bitrate',-1)), + # The original version with subtitles has lower relevance + re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None, + # The version with sourds/mal subtitles has also lower relevance + re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None, + ) + formats = sorted(formats, key=sort_key) + def _format(format_info): + quality = '' + height = format_info.get('height') + if height is not None: + quality = compat_str(height) + bitrate = format_info.get('bitrate') + if bitrate is not None: + quality += '-%d' % bitrate + if format_info.get('versionCode') is not None: + format_id = '%s-%s' % (quality, format_info['versionCode']) + else: + format_id = quality + info = { + 'format_id': format_id, + 'format_note': format_info.get('versionLibelle'), + 'width': format_info.get('width'), + 'height': height, + } + if format_info['mediaType'] == 'rtmp': + info['url'] = format_info['streamer'] + info['play_path'] = 'mp4:' + format_info['url'] + info['ext'] = 'flv' + else: + info['url'] = format_info['url'] + info['ext'] = determine_ext(info['url']) + return info + info_dict['formats'] = [_format(f) for f in formats] + + return info_dict + + +# It also uses the arte_vp_url url from the webpage to extract the information +class ArteTVCreativeIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:creative' + _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de)/magazine?/(?P.+)' + + _TEST = { + 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', + 'file': '050489-002.mp4', + 'info_dict': { + 'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design', + }, + } + + +class ArteTVFutureIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:future' + _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de)/(thema|sujet)/.*?#article-anchor-(?P\d+)' + + _TEST = { + 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', + 'file': '050940-003.mp4', + 'info_dict': { + 'title': 'Les champignons au secours de la planète', + }, + } + + def _real_extract(self, url): + anchor_id, lang = self._extract_url_info(url) + webpage = self._download_webpage(url, anchor_id) + row = get_element_by_id(anchor_id, webpage) + return self._extract_from_webpage(row, anchor_id, lang) + + +class ArteTVDDCIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:ddc' + _VALID_URL = r'http?://ddc\.arte\.tv/(?Pemission|folge)/(?P.+)' + + def _real_extract(self, url): + video_id, lang = self._extract_url_info(url) + if lang == 'folge': + lang = 'de' + elif lang == 'emission': + lang = 'fr' + webpage = self._download_webpage(url, video_id) + scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage) + script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url') + javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') + json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') + return self._extract_from_json_url(json_url, video_id, lang) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/auengine.py youtube-dl-2014.02.17/youtube_dl/extractor/auengine.py --- youtube-dl-2012.09.27/youtube_dl/extractor/auengine.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/auengine.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + determine_ext, + ExtractorError, +) + + +class AUEngineIE(InfoExtractor): + _TEST = { + 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', + 'file': 'lfvlytY6.mp4', + 'md5': '48972bdbcf1a3a2f5533e62425b41d4f', + 'info_dict': { + 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]' + } + } + _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'(?P<title>.+?)', + webpage, 'title') + title = title.strip() + links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) + links = map(compat_urllib_parse.unquote, links) + + thumbnail = None + video_url = None + for link in links: + if link.endswith('.png'): + thumbnail = link + elif '/videos/' in link: + video_url = link + if not video_url: + raise ExtractorError(u'Could not find video URL') + ext = '.' + determine_ext(video_url) + if ext == title[-len(ext):]: + title = title[:-len(ext)] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/bambuser.py youtube-dl-2014.02.17/youtube_dl/extractor/bambuser.py --- youtube-dl-2012.09.27/youtube_dl/extractor/bambuser.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/bambuser.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,88 @@ +from __future__ import unicode_literals + +import re +import json +import itertools + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, +) + + +class BambuserIE(InfoExtractor): + IE_NAME = 'bambuser' + _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' + _API_KEY = '005f64509e19a868399060af746a00aa' + + _TEST = { + 'url': 'http://bambuser.com/v/4050584', + # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 + #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641', + 'info_dict': { + 'id': '4050584', + 'ext': 'flv', + 'title': 'Education engineering days - lightning talks', + 'duration': 3741, + 'uploader': 'pixelversity', + 'uploader_id': '344706', + }, + 'params': { + # It doesn't respect the 'Range' header, it would download the whole video + # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info_url = ('http://player-c.api.bambuser.com/getVideo.json?' + '&api_key=%s&vid=%s' % (self._API_KEY, video_id)) + info_json = self._download_webpage(info_url, video_id) + info = json.loads(info_json)['result'] + + return { + 'id': video_id, + 'title': info['title'], + 'url': info['url'], + 'thumbnail': info.get('preview'), + 'duration': int(info['length']), + 'view_count': int(info['views_total']), + 'uploader': info['username'], + 'uploader_id': info['uid'], + } + + +class BambuserChannelIE(InfoExtractor): + IE_NAME = 'bambuser:channel' + _VALID_URL = r'https?://bambuser\.com/channel/(?P.*?)(?:/|#|\?|$)' + # The maximum number we can get with each request + _STEP = 50 + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + urls = [] + last_id = '' + for i in itertools.count(1): + req_url = ('http://bambuser.com/xhr-api/index.php?username={user}' + '&sort=created&access_mode=0%2C1%2C2&limit={count}' + '&method=broadcast&format=json&vid_older_than={last}' + ).format(user=user, count=self._STEP, last=last_id) + req = compat_urllib_request.Request(req_url) + # Without setting this header, we wouldn't get any result + req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) + info_json = self._download_webpage(req, user, + 'Downloading page %d' % i) + results = json.loads(info_json)['result'] + if len(results) == 0: + break + last_id = results[-1]['vid'] + urls.extend(self.url_result(v['page'], 'Bambuser') for v in results) + + return { + '_type': 'playlist', + 'title': user, + 'entries': urls, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/bandcamp.py youtube-dl-2014.02.17/youtube_dl/extractor/bandcamp.py --- youtube-dl-2012.09.27/youtube_dl/extractor/bandcamp.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/bandcamp.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,144 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + compat_str, + compat_urlparse, + ExtractorError, +) + + +class BandcampIE(InfoExtractor): + _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P.*)' + _TESTS = [{ + 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', + 'file': '1812978515.mp3', + 'md5': 'c557841d5e50261777a6585648adf439', + 'info_dict': { + "title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", + "duration": 10, + }, + '_skip': 'There is a limit of 200 free downloads / month for the test song' + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + # We get the link to the free download page + m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) + if m_download is None: + m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) + if m_trackinfo: + json_code = m_trackinfo.group(1) + data = json.loads(json_code) + d = data[0] + + duration = int(round(d['duration'])) + formats = [] + for format_id, format_url in d['file'].items(): + ext, _, abr_str = format_id.partition('-') + + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': format_id.partition('-')[0], + 'vcodec': 'none', + 'acodec': format_id.partition('-')[0], + 'abr': int(format_id.partition('-')[2]), + }) + + self._sort_formats(formats) + + return { + 'id': compat_str(d['id']), + 'title': d['title'], + 'formats': formats, + 'duration': duration, + } + else: + raise ExtractorError('No free songs found') + + download_link = m_download.group(1) + video_id = re.search( + r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', + webpage, re.MULTILINE | re.DOTALL).group('id') + + download_webpage = self._download_webpage(download_link, video_id, + 'Downloading free downloads page') + # We get the dictionary of the track from some javascrip code + info = re.search(r'items: (.*?),$', + download_webpage, re.MULTILINE).group(1) + info = json.loads(info)[0] + # We pick mp3-320 for now, until format selection can be easily implemented. + mp3_info = info['downloads']['mp3-320'] + # If we try to use this url it says the link has expired + initial_url = mp3_info['url'] + re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' + m_url = re.match(re_url, initial_url) + #We build the url we will use to get the final track url + # This url is build in Bandcamp in the script download_bunde_*.js + request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) + final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') + # If we could correctly generate the .rand field the url would be + #in the "download_url" key + final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) + + return { + 'id': video_id, + 'title': info['title'], + 'ext': 'mp3', + 'vcodec': 'none', + 'url': final_url, + 'thumbnail': info.get('thumb_url'), + 'uploader': info.get('artist'), + } + + +class BandcampAlbumIE(InfoExtractor): + IE_NAME = 'Bandcamp:album' + _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' + + _TEST = { + 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', + 'playlist': [ + { + 'file': '1353101989.mp3', + 'md5': '39bc1eded3476e927c724321ddf116cf', + 'info_dict': { + 'title': 'Intro', + } + }, + { + 'file': '38097443.mp3', + 'md5': '1a2c32e2691474643e912cc6cd4bffaa', + 'info_dict': { + 'title': 'Kero One - Keep It Alive (Blazo remix)', + } + }, + ], + 'params': { + 'playlistend': 2 + }, + 'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) + if not tracks_paths: + raise ExtractorError('The page doesn\'t contain any tracks') + entries = [ + self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) + for t_path in tracks_paths] + title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title') + return { + '_type': 'playlist', + 'title': title, + 'entries': entries, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/bbccouk.py youtube-dl-2014.02.17/youtube_dl/extractor/bbccouk.py --- youtube-dl-2012.09.27/youtube_dl/extractor/bbccouk.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/bbccouk.py 2014-02-09 16:54:52.000000000 +0000 @@ -0,0 +1,217 @@ +from __future__ import unicode_literals + +import re + +from .subtitles import SubtitlesInfoExtractor +from ..utils import ExtractorError + + +class BBCCoUkIE(SubtitlesInfoExtractor): + IE_NAME = 'bbc.co.uk' + IE_DESC = 'BBC iPlayer' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})' + + _TESTS = [ + { + 'url': 'http://www.bbc.co.uk/programmes/p01q7wz1', + 'info_dict': { + 'id': 'p01q7wz4', + 'ext': 'flv', + 'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix', + 'description': 'Blu Mar Ten deliver a Guest Mix for Friction.', + 'duration': 1936, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, + { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/', + 'info_dict': { + 'id': 'b00yng1d', + 'ext': 'flv', + 'title': 'The Man in Black: Series 3: The Printed Name', + 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.", + 'duration': 1800, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, + { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/', + 'info_dict': { + 'id': 'b00yng1d', + 'ext': 'flv', + 'title': 'The Voice UK: Series 3: Blind Auditions 5', + 'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.", + 'duration': 5100, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + } + ] + + def _extract_asx_playlist(self, connection, programme_id): + asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') + return [ref.get('href') for ref in asx.findall('./Entry/ref')] + + def _extract_connection(self, connection, programme_id): + formats = [] + protocol = connection.get('protocol') + supplier = connection.get('supplier') + if protocol == 'http': + href = connection.get('href') + # ASX playlist + if supplier == 'asx': + for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): + formats.append({ + 'url': ref, + 'format_id': 'ref%s_%s' % (i, supplier), + }) + # Direct link + else: + formats.append({ + 'url': href, + 'format_id': supplier, + }) + elif protocol == 'rtmp': + application = connection.get('application', 'ondemand') + auth_string = connection.get('authString') + identifier = connection.get('identifier') + server = connection.get('server') + formats.append({ + 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'play_path': identifier, + 'app': '%s?%s' % (application, auth_string), + 'page_url': 'http://www.bbc.co.uk', + 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', + 'rtmp_live': False, + 'ext': 'flv', + 'format_id': supplier, + }) + return formats + + def _extract_items(self, playlist): + return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item') + + def _extract_medias(self, media_selection): + return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media') + + def _extract_connections(self, media): + return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection') + + def _extract_video(self, media, programme_id): + formats = [] + vbr = int(media.get('bitrate')) + vcodec = media.get('encoding') + service = media.get('service') + width = int(media.get('width')) + height = int(media.get('height')) + file_size = int(media.get('media_file_size')) + for connection in self._extract_connections(media): + conn_formats = self._extract_connection(connection, programme_id) + for format in conn_formats: + format.update({ + 'format_id': '%s_%s' % (service, format['format_id']), + 'width': width, + 'height': height, + 'vbr': vbr, + 'vcodec': vcodec, + 'filesize': file_size, + }) + formats.extend(conn_formats) + return formats + + def _extract_audio(self, media, programme_id): + formats = [] + abr = int(media.get('bitrate')) + acodec = media.get('encoding') + service = media.get('service') + for connection in self._extract_connections(media): + conn_formats = self._extract_connection(connection, programme_id) + for format in conn_formats: + format.update({ + 'format_id': '%s_%s' % (service, format['format_id']), + 'abr': abr, + 'acodec': acodec, + }) + formats.extend(conn_formats) + return formats + + def _extract_captions(self, media, programme_id): + subtitles = {} + for connection in self._extract_connections(media): + captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') + lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') + ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}')) + srt = '' + for pos, p in enumerate(ps): + srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), + p.text.strip() if p.text is not None else '') + subtitles[lang] = srt + return subtitles + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + group_id = mobj.group('id') + + playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id, + 'Downloading playlist XML') + + no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') + if no_items is not None: + reason = no_items.get('reason') + if reason == 'preAvailability': + msg = 'Episode %s is not yet available' % group_id + elif reason == 'postAvailability': + msg = 'Episode %s is no longer available' % group_id + else: + msg = 'Episode %s is not available: %s' % (group_id, reason) + raise ExtractorError(msg, expected=True) + + formats = [] + subtitles = None + + for item in self._extract_items(playlist): + kind = item.get('kind') + if kind != 'programme' and kind != 'radioProgramme': + continue + title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text + description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text + + programme_id = item.get('identifier') + duration = int(item.get('duration')) + + media_selection = self._download_xml( + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, + programme_id, 'Downloading media selection XML') + + for media in self._extract_medias(media_selection): + kind = media.get('kind') + if kind == 'audio': + formats.extend(self._extract_audio(media, programme_id)) + elif kind == 'video': + formats.extend(self._extract_video(media, programme_id)) + elif kind == 'captions': + subtitles = self._extract_captions(media, programme_id) + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(programme_id, subtitles) + return + + self._sort_formats(formats) + + return { + 'id': programme_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + } \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/blinkx.py youtube-dl-2014.02.17/youtube_dl/extractor/blinkx.py --- youtube-dl-2012.09.27/youtube_dl/extractor/blinkx.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/blinkx.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,95 @@ +from __future__ import unicode_literals + +import datetime +import json +import re + +from .common import InfoExtractor +from ..utils import ( + remove_start, +) + + +class BlinkxIE(InfoExtractor): + _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' + IE_NAME = 'blinkx' + + _TEST = { + 'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', + 'file': '8aQUy7GV.mp4', + 'md5': '2e9a07364af40163a908edbf10bb2492', + 'info_dict': { + "title": "Police Car Rolls Away", + "uploader": "stupidvideos.com", + "upload_date": "20131215", + "description": "A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!", + "duration": 14.886, + "thumbnails": [{ + "width": 100, + "height": 76, + "url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg", + }], + }, + } + + def _real_extract(self, rl): + m = re.match(self._VALID_URL, rl) + video_id = m.group('id') + display_id = video_id[:8] + + api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' + + 'video=%s' % video_id) + data_json = self._download_webpage(api_url, display_id) + data = json.loads(data_json)['api']['results'][0] + dt = datetime.datetime.fromtimestamp(data['pubdate_epoch']) + pload_date = dt.strftime('%Y%m%d') + + duration = None + thumbnails = [] + formats = [] + for m in data['media']: + if m['type'] == 'jpg': + thumbnails.append({ + 'url': m['link'], + 'width': int(m['w']), + 'height': int(m['h']), + }) + elif m['type'] == 'original': + duration = m['d'] + elif m['type'] == 'youtube': + yt_id = m['link'] + self.to_screen(u'Youtube video detected: %s' % yt_id) + return self.url_result(yt_id, 'Youtube', video_id=yt_id) + elif m['type'] in ('flv', 'mp4'): + vcodec = remove_start(m['vcodec'], 'ff') + acodec = remove_start(m['acodec'], 'ff') + tbr = (int(m['vbr']) + int(m['abr'])) // 1000 + format_id = (u'%s-%sk-%s' % + (vcodec, + tbr, + m['w'])) + formats.append({ + 'format_id': format_id, + 'url': m['link'], + 'vcodec': vcodec, + 'acodec': acodec, + 'abr': int(m['abr']) // 1000, + 'vbr': int(m['vbr']) // 1000, + 'tbr': tbr, + 'width': int(m['w']), + 'height': int(m['h']), + }) + + self._sort_formats(formats) + + return { + 'id': display_id, + 'fullid': video_id, + 'title': data['title'], + 'formats': formats, + 'uploader': data['channel_name'], + 'upload_date': pload_date, + 'description': data.get('description'), + 'thumbnails': thumbnails, + 'duration': duration, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/bliptv.py youtube-dl-2014.02.17/youtube_dl/extractor/bliptv.py --- youtube-dl-2012.09.27/youtube_dl/extractor/bliptv.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/bliptv.py 2014-02-04 09:25:00.000000000 +0000 @@ -0,0 +1,178 @@ +from __future__ import unicode_literals + +import datetime +import re + +from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor +from ..utils import ( + compat_str, + compat_urllib_request, + + unescapeHTML, +) + + +class BlipTVIE(SubtitlesInfoExtractor): + """Information extractor for blip.tv""" + + _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$' + + _TESTS = [{ + 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', + 'md5': 'c6934ad0b6acf2bd920720ec888eb812', + 'info_dict': { + 'id': '5779306', + 'ext': 'mov', + 'upload_date': '20111205', + 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', + 'uploader': 'Comic Book Resources - CBR TV', + 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', + } + }, { + # https://github.com/rg3/youtube-dl/pull/2274 + 'note': 'Video with subtitles', + 'url': 'http://blip.tv/play/h6Uag5OEVgI.html', + 'md5': '309f9d25b820b086ca163ffac8031806', + 'info_dict': { + 'id': '6586561', + 'ext': 'mp4', + 'uploader': 'Red vs. Blue', + 'description': 'One-Zero-One', + 'upload_date': '20130614', + 'title': 'Red vs. Blue Season 11 Episode 1', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + presumptive_id = mobj.group('presumptive_id') + + # See https://github.com/rg3/youtube-dl/issues/857 + embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url) + if embed_mobj: + info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1) + info_page = self._download_webpage(info_url, embed_mobj.group(1)) + video_id = self._search_regex( + r'data-episode-id="([0-9]+)', info_page, 'video_id') + return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV') + + cchar = '&' if '?' in url else '?' + json_url = url + cchar + 'skin=json&version=2&no_wrap=1' + request = compat_urllib_request.Request(json_url) + request.add_header('User-Agent', 'iTunes/10.6.1') + + json_data = self._download_json(request, video_id=presumptive_id) + + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data + + video_id = compat_str(data['item_id']) + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + subtitles = {} + formats = [] + if 'additionalMedia' in data: + for f in data['additionalMedia']: + if f.get('file_type_srt') == 1: + LANGS = { + 'english': 'en', + } + lang = f['role'].rpartition('-')[-1].strip().lower() + langcode = LANGS.get(lang, lang) + subtitles[langcode] = f['url'] + continue + if not int(f['media_width']): # filter m3u8 + continue + formats.append({ + 'url': f['url'], + 'format_id': f['role'], + 'width': int(f['media_width']), + 'height': int(f['media_height']), + }) + else: + formats.append({ + 'url': data['media']['url'], + 'width': int(data['media']['width']), + 'height': int(data['media']['height']), + }) + self._sort_formats(formats) + + # subtitles + video_subtitles = self.extract_subtitles(video_id, subtitles) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + + return { + 'id': video_id, + 'uploader': data['display_name'], + 'upload_date': upload_date, + 'title': data['title'], + 'thumbnail': data['thumbnailUrl'], + 'description': data['description'], + 'user_agent': 'iTunes/10.6.1', + 'formats': formats, + 'subtitles': video_subtitles, + } + + def _download_subtitle_url(self, sub_lang, url): + # For some weird reason, blip.tv serves a video instead of subtitles + # when we request with a common UA + req = compat_urllib_request.Request(url) + req.add_header('Youtubedl-user-agent', 'youtube-dl') + return self._download_webpage(req, None, note=False) + + +class BlipTVUserIE(InfoExtractor): + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' + _PAGE_SIZE = 12 + IE_NAME = 'blip.tv:user' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + username = mobj.group(1) + + page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' + + page = self._download_webpage(url, username, 'Downloading user page') + mobj = re.search(r'data-users-id="([^"]+)"', page) + page_base = page_base % mobj.group(1) + + # Download video ids using BlipTV Ajax calls. Result size per + # query is limited (currently to 12 videos) so we need to query + # page by page until there are no video ids - it means we got + # all of them. + + video_ids = [] + pagenum = 1 + + while True: + url = page_base + "&page=" + str(pagenum) + page = self._download_webpage( + url, username, 'Downloading video ids from page %d' % pagenum) + + # Extract video identifiers + ids_in_page = [] + + for mobj in re.finditer(r'href="/([^"]+)"', page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(unescapeHTML(mobj.group(1))) + + video_ids.extend(ids_in_page) + + # A little optimization - if current page is not + # "full", ie. does not contain PAGE_SIZE video ids then + # we can assume that this page is the last one - there + # are no more ids on further pages - no need to query + # again. + + if len(ids_in_page) < self._PAGE_SIZE: + break + + pagenum += 1 + + urls = ['http://blip.tv/%s' % video_id for video_id in video_ids] + url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls] + return [self.playlist_result(url_entries, playlist_title=username)] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/bloomberg.py youtube-dl-2014.02.17/youtube_dl/extractor/bloomberg.py --- youtube-dl-2012.09.27/youtube_dl/extractor/bloomberg.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/bloomberg.py 2014-02-09 16:54:52.000000000 +0000 @@ -0,0 +1,30 @@ +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class BloombergIE(InfoExtractor): + _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html' + + _TEST = { + u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', + u'info_dict': { + u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', + u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', + }, + u'params': { + # Requires ffmpeg (m3u8 manifest) + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + embed_code = self._search_regex( + r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage, + 'embed code') + return OoyalaIE._build_url_result(embed_code) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/breakcom.py youtube-dl-2014.02.17/youtube_dl/extractor/breakcom.py --- youtube-dl-2012.09.27/youtube_dl/extractor/breakcom.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/breakcom.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class BreakIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?break\.com/video/([^/]+)' + _TEST = { + 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', + 'md5': 'a3513fb1547fba4fb6cfac1bffc6c46b', + 'info_dict': { + 'id': '2468056', + 'ext': 'mp4', + 'title': 'When Girls Act Like D-Bags', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1).split("-")[-1] + embed_url = 'http://www.break.com/embed/%s' % video_id + webpage = self._download_webpage(embed_url, video_id) + info_json = self._search_regex(r'var embedVars = ({.*?});', webpage, + 'info json', flags=re.DOTALL) + info = json.loads(info_json) + video_url = info['videoUri'] + m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) + if m_youtube is not None: + return self.url_result(m_youtube.group(1), 'Youtube') + final_url = video_url + '?' + info['AuthToken'] + return { + 'id': video_id, + 'url': final_url, + 'title': info['contentName'], + 'thumbnail': info['thumbUri'], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/brightcove.py youtube-dl-2014.02.17/youtube_dl/extractor/brightcove.py --- youtube-dl-2012.09.27/youtube_dl/extractor/brightcove.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/brightcove.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,247 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + find_xpath_attr, + fix_xml_ampersands, + compat_urlparse, + compat_str, + compat_urllib_request, + compat_parse_qs, + + ExtractorError, + unsmuggle_url, + unescapeHTML, +) + + +class BrightcoveIE(InfoExtractor): + _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' + _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' + + _TESTS = [ + { + # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', + 'file': '2371591881001.mp4', + 'md5': '5423e113865d26e40624dce2e4b45d95', + 'note': 'Test Brightcove downloads and detection in GenericIE', + 'info_dict': { + 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', + 'uploader': '8TV', + 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', + } + }, + { + # From http://medianetwork.oracle.com/video/player/1785452137001 + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', + 'file': '1785452137001.flv', + 'info_dict': { + 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', + 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', + 'uploader': 'Oracle', + }, + }, + { + # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ + 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001', + 'info_dict': { + 'id': '2750934548001', + 'ext': 'mp4', + 'title': 'This Bracelet Acts as a Personal Thermostat', + 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', + 'uploader': 'Mashable', + }, + }, + { + # test that the default referer works + # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/ + 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001', + 'info_dict': { + 'id': '2878862109001', + 'ext': 'mp4', + 'title': 'Lost in Motion II', + 'description': 'md5:363109c02998fee92ec02211bd8000df', + 'uploader': 'National Ballet of Canada', + }, + } + ] + + @classmethod + def _build_brighcove_url(cls, object_str): + """ + Build a Brightcove url from a xml string containing + <object class="BrightcoveExperience">{params}</object> + """ + + # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 + object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', + lambda m: m.group(1) + '/>', object_str) + # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 + object_str = object_str.replace('<--', '<!--') + object_str = fix_xml_ampersands(object_str) + + object_doc = xml.etree.ElementTree.fromstring(object_str) + + fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') + if fv_el is not None: + flashvars = dict( + (k, v[0]) + for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + else: + flashvars = {} + + def find_param(name): + if name in flashvars: + return flashvars[name] + node = find_xpath_attr(object_doc, './param', 'name', name) + if node is not None: + return node.attrib['value'] + return None + + params = {} + + playerID = find_param('playerID') + if playerID is None: + raise ExtractorError('Cannot find player ID') + params['playerID'] = playerID + + playerKey = find_param('playerKey') + # Not all pages define this value + if playerKey is not None: + params['playerKey'] = playerKey + # The three fields hold the id of the video + videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') + if videoPlayer is not None: + params['@videoPlayer'] = videoPlayer + linkBase = find_param('linkBaseURL') + if linkBase is not None: + params['linkBaseURL'] = linkBase + data = compat_urllib_parse.urlencode(params) + return cls._FEDERATED_URL_TEMPLATE % data + + @classmethod + def _extract_brightcove_url(cls, webpage): + """Try to extract the brightcove url from the webpage, returns None + if it can't be found + """ + urls = cls._extract_brightcove_urls(webpage) + return urls[0] if urls else None + + @classmethod + def _extract_brightcove_urls(cls, webpage): + """Return a list of all Brightcove URLs from the webpage """ + + url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) + if url_m: + return [unescapeHTML(url_m.group(1))] + + matches = re.findall( + r'''(?sx)<object + (?: + [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | + [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ + ).+?</object>''', + webpage) + return [cls._build_brighcove_url(m) for m in matches] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + # Change the 'videoId' and others field to '@videoPlayer' + url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) + # Change bckey (used by bcove.me urls) to playerKey + url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) + mobj = re.match(self._VALID_URL, url) + query_str = mobj.group('query') + query = compat_urlparse.parse_qs(query_str) + + videoPlayer = query.get('@videoPlayer') + if videoPlayer: + # We set the original url as the default 'Referer' header + referer = smuggled_data.get('Referer', url) + return self._get_video_info( + videoPlayer[0], query_str, query, referer=referer) + else: + player_key = query['playerKey'] + return self._get_playlist_info(player_key[0]) + + def _get_video_info(self, video_id, query_str, query, referer=None): + request_url = self._FEDERATED_URL_TEMPLATE % query_str + req = compat_urllib_request.Request(request_url) + linkBase = query.get('linkBaseURL') + if linkBase is not None: + referer = linkBase[0] + if referer is not None: + req.add_header('Referer', referer) + webpage = self._download_webpage(req, video_id) + + self.report_extraction(video_id) + info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') + info = json.loads(info)['data'] + video_info = info['programmedContent']['videoPlayer']['mediaDTO'] + video_info['_youtubedl_adServerURL'] = info.get('adServerURL') + + return self._extract_video_info(video_info) + + def _get_playlist_info(self, player_key): + info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key + playlist_info = self._download_webpage( + info_url, player_key, 'Downloading playlist information') + + json_data = json.loads(playlist_info) + if 'videoList' not in json_data: + raise ExtractorError('Empty playlist') + playlist_info = json_data['videoList'] + videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] + + return self.playlist_result(videos, playlist_id=playlist_info['id'], + playlist_title=playlist_info['mediaCollectionDTO']['displayName']) + + def _extract_video_info(self, video_info): + info = { + 'id': compat_str(video_info['id']), + 'title': video_info['displayName'].strip(), + 'description': video_info.get('shortDescription'), + 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), + 'uploader': video_info.get('publisherName'), + } + + renditions = video_info.get('renditions') + if renditions: + renditions = sorted(renditions, key=lambda r: r['size']) + info['formats'] = [{ + 'url': rend['defaultURL'], + 'height': rend.get('frameHeight'), + 'width': rend.get('frameWidth'), + } for rend in renditions] + elif video_info.get('FLVFullLengthURL') is not None: + info.update({ + 'url': video_info['FLVFullLengthURL'], + }) + + if self._downloader.params.get('include_ads', False): + adServerURL = video_info.get('_youtubedl_adServerURL') + if adServerURL: + ad_info = { + '_type': 'url', + 'url': adServerURL, + } + if 'url' in info: + return { + '_type': 'playlist', + 'title': info['title'], + 'entries': [ad_info, info], + } + else: + return ad_info + + if 'url' not in info and not info.get('formats'): + raise ExtractorError('Unable to extract video url for %s' % info['id']) + return info diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/c56.py youtube-dl-2014.02.17/youtube_dl/extractor/c56.py --- youtube-dl-2012.09.27/youtube_dl/extractor/c56.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/c56.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class C56IE(InfoExtractor): + _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)' + IE_NAME = '56.com' + _TEST = { + 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', + 'file': '93440716.flv', + 'md5': 'e59995ac63d0457783ea05f93f12a866', + 'info_dict': { + 'title': '网事知多少 第32期:车怒', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + text_id = mobj.group('textid') + info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id, + text_id, 'Downloading video info') + info = json.loads(info_page)['info'] + formats = [{ + 'format_id': f['type'], + 'filesize': int(f['filesize']), + 'url': f['url'] + } for f in info['rfiles']] + self._sort_formats(formats) + + return { + 'id': info['vid'], + 'title': info['Subject'], + 'formats': formats, + 'thumbnail': info.get('bimg') or info.get('img'), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/canalc2.py youtube-dl-2014.02.17/youtube_dl/extractor/canalc2.py --- youtube-dl-2012.09.27/youtube_dl/extractor/canalc2.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/canalc2.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,37 @@ +# coding: utf-8 +import re + +from .common import InfoExtractor + + +class Canalc2IE(InfoExtractor): + IE_NAME = 'canalc2.tv' + _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)' + + _TEST = { + u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', + u'file': u'12163.mp4', + u'md5': u'060158428b650f896c542dfbb3d6487f', + u'info_dict': { + u'title': u'Terrasses du Numérique' + } + } + + def _real_extract(self, url): + video_id = re.match(self._VALID_URL, url).group('id') + # We need to set the voir field for getting the file name + url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id + webpage = self._download_webpage(url, video_id) + file_name = self._search_regex( + r"so\.addVariable\('file','(.*?)'\);", + webpage, 'file name') + video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name + + title = self._html_search_regex( + r'class="evenement8">(.*?)</a>', webpage, u'title') + + return {'id': video_id, + 'ext': 'mp4', + 'url': video_url, + 'title': title, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/canalplus.py youtube-dl-2014.02.17/youtube_dl/extractor/canalplus.py --- youtube-dl-2012.09.27/youtube_dl/extractor/canalplus.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/canalplus.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,53 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class CanalplusIE(InfoExtractor): + _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' + _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' + IE_NAME = u'canalplus.fr' + + _TEST = { + u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', + u'file': u'922470.flv', + u'info_dict': { + u'title': u'Zapping - 26/08/13', + u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', + u'upload_date': u'20130826', + }, + u'params': { + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.groupdict().get('id') + if video_id is None: + webpage = self._download_webpage(url, mobj.group('path')) + video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') + info_url = self._VIDEO_INFO_TEMPLATE % video_id + doc = self._download_xml(info_url,video_id, + u'Downloading video info') + + self.report_extraction(video_id) + video_info = [video for video in doc if video.find('ID').text == video_id][0] + infos = video_info.find('INFOS') + media = video_info.find('MEDIA') + formats = [media.find('VIDEOS/%s' % format) + for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']] + video_url = [format.text for format in formats if format is not None][-1] + + return {'id': video_id, + 'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text, + infos.find('TITRAGE/SOUS_TITRE').text), + 'url': video_url, + 'ext': 'flv', + 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), + 'thumbnail': media.find('IMAGES/GRAND').text, + 'description': infos.find('DESCRIPTION').text, + 'view_count': int(infos.find('NB_VUES').text), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/cbs.py youtube-dl-2014.02.17/youtube_dl/extractor/cbs.py --- youtube-dl-2012.09.27/youtube_dl/extractor/cbs.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/cbs.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,30 @@ +import re + +from .common import InfoExtractor + + +class CBSIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*' + + _TEST = { + u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', + u'file': u'4JUVEwq3wUT7.flv', + u'info_dict': { + u'title': u'Connect Chat feat. Garth Brooks', + u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', + u'duration': 1495, + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + real_id = self._search_regex( + r"video\.settings\.pid\s*=\s*'([^']+)';", + webpage, u'real video ID') + return self.url_result(u'theplatform:%s' % real_id) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/channel9.py youtube-dl-2014.02.17/youtube_dl/extractor/channel9.py --- youtube-dl-2012.09.27/youtube_dl/extractor/channel9.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/channel9.py 2014-02-07 14:35:52.000000000 +0000 @@ -0,0 +1,273 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + +class Channel9IE(InfoExtractor): + ''' + Common extractor for channel9.msdn.com. + + The type of provided URL (video or playlist) is determined according to + meta Search.PageType from web page HTML rather than URL itself, as it is + not always possible to do. + ''' + IE_DESC = 'Channel 9' + IE_NAME = 'channel9' + _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?' + + _TESTS = [ + { + 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', + 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', + 'info_dict': { + 'id': 'Events/TechEd/Australia/2013/KOS002', + 'ext': 'mp4', + 'title': 'Developer Kick-Off Session: Stuff We Love', + 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', + 'duration': 4576, + 'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg', + 'session_code': 'KOS002', + 'session_day': 'Day 1', + 'session_room': 'Arena 1A', + 'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ], + }, + }, + { + 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', + 'info_dict': { + 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'ext': 'mp4', + 'title': 'Self-service BI with Power BI - nuclear testing', + 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', + 'duration': 1540, + 'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', + 'authors': [ 'Mike Wilmot' ], + }, + } + ] + + _RSS_URL = 'http://channel9.msdn.com/%s/RSS' + + # Sorted by quality + _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4'] + + def _restore_bytes(self, formatted_size): + if not formatted_size: + return 0 + m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size) + if not m: + return 0 + units = m.group('units') + try: + exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper()) + except ValueError: + return 0 + size = float(m.group('size')) + return int(size * (1024 ** exponent)) + + def _formats_from_html(self, html): + FORMAT_REGEX = r''' + (?x) + <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s* + <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s* + (?:<div\s+class="popup\s+rounded">\s* + <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s* + </div>)? # File size part may be missing + ''' + # Extract known formats + formats = [{ + 'url': x.group('url'), + 'format_id': x.group('quality'), + 'format_note': x.group('note'), + 'format': '%s (%s)' % (x.group('quality'), x.group('note')), + 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate + 'preference': self._known_formats.index(x.group('quality')), + 'vcodec': 'none' if x.group('note') == 'Audio only' else None, + } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] + + self._sort_formats(formats) + + return formats + + def _extract_title(self, html): + title = self._html_search_meta('title', html, 'title') + if title is None: + title = self._og_search_title(html) + TITLE_SUFFIX = ' (Channel 9)' + if title is not None and title.endswith(TITLE_SUFFIX): + title = title[:-len(TITLE_SUFFIX)] + return title + + def _extract_description(self, html): + DESCRIPTION_REGEX = r'''(?sx) + <div\s+class="entry-content">\s* + <div\s+id="entry-body">\s* + (?P<description>.+?)\s* + </div>\s* + </div> + ''' + m = re.search(DESCRIPTION_REGEX, html) + if m is not None: + return m.group('description') + return self._html_search_meta('description', html, 'description') + + def _extract_duration(self, html): + m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html) + return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None + + def _extract_slides(self, html): + m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html) + return m.group('slidesurl') if m is not None else None + + def _extract_zip(self, html): + m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html) + return m.group('zipurl') if m is not None else None + + def _extract_avg_rating(self, html): + m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html) + return float(m.group('avgrating')) if m is not None else 0 + + def _extract_rating_count(self, html): + m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html) + return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0 + + def _extract_view_count(self, html): + m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html) + return int(self._fix_count(m.group('viewcount'))) if m is not None else 0 + + def _extract_comment_count(self, html): + m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html) + return int(self._fix_count(m.group('commentcount'))) if m is not None else 0 + + def _fix_count(self, count): + return int(str(count).replace(',', '')) if count is not None else None + + def _extract_authors(self, html): + m = re.search(r'(?s)<li class="author">(.*?)</li>', html) + if m is None: + return None + return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1)) + + def _extract_session_code(self, html): + m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html) + return m.group('code') if m is not None else None + + def _extract_session_day(self, html): + m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html) + return m.group('day') if m is not None else None + + def _extract_session_room(self, html): + m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html) + return m.group('room') if m is not None else None + + def _extract_session_speakers(self, html): + return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html) + + def _extract_content(self, html, content_path): + # Look for downloadable content + formats = self._formats_from_html(html) + slides = self._extract_slides(html) + zip_ = self._extract_zip(html) + + # Nothing to download + if len(formats) == 0 and slides is None and zip_ is None: + self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path) + return + + # Extract meta + title = self._extract_title(html) + description = self._extract_description(html) + thumbnail = self._og_search_thumbnail(html) + duration = self._extract_duration(html) + avg_rating = self._extract_avg_rating(html) + rating_count = self._extract_rating_count(html) + view_count = self._extract_view_count(html) + comment_count = self._extract_comment_count(html) + + common = {'_type': 'video', + 'id': content_path, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'avg_rating': avg_rating, + 'rating_count': rating_count, + 'view_count': view_count, + 'comment_count': comment_count, + } + + result = [] + + if slides is not None: + d = common.copy() + d.update({ 'title': title + '-Slides', 'url': slides }) + result.append(d) + + if zip_ is not None: + d = common.copy() + d.update({ 'title': title + '-Zip', 'url': zip_ }) + result.append(d) + + if len(formats) > 0: + d = common.copy() + d.update({ 'title': title, 'formats': formats }) + result.append(d) + + return result + + def _extract_entry_item(self, html, content_path): + contents = self._extract_content(html, content_path) + if contents is None: + return contents + + authors = self._extract_authors(html) + + for content in contents: + content['authors'] = authors + + return contents + + def _extract_session(self, html, content_path): + contents = self._extract_content(html, content_path) + if contents is None: + return contents + + session_meta = {'session_code': self._extract_session_code(html), + 'session_day': self._extract_session_day(html), + 'session_room': self._extract_session_room(html), + 'session_speakers': self._extract_session_speakers(html), + } + + for content in contents: + content.update(session_meta) + + return contents + + def _extract_list(self, content_path): + rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS') + entries = [self.url_result(session_url.text, 'Channel9') + for session_url in rss.findall('./channel/item/link')] + title_text = rss.find('./channel/title').text + return self.playlist_result(entries, content_path, title_text) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + content_path = mobj.group('contentpath') + + webpage = self._download_webpage(url, content_path, 'Downloading web page') + + page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage) + if page_type_m is None: + raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True) + + page_type = page_type_m.group('pagetype') + if page_type == 'List': # List page, may contain list of 'item'-like objects + return self._extract_list(content_path) + elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content + return self._extract_entry_item(webpage, content_path) + elif page_type == 'Session': # Event session page, may contain downloadable content + return self._extract_session(webpage, content_path) + else: + raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True) \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/chilloutzone.py youtube-dl-2014.02.17/youtube_dl/extractor/chilloutzone.py --- youtube-dl-2012.09.27/youtube_dl/extractor/chilloutzone.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/chilloutzone.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,97 @@ +from __future__ import unicode_literals + +import re +import base64 +import json + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError +) + + +class ChilloutzoneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html' + _TESTS = [{ + 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', + 'md5': 'a76f3457e813ea0037e5244f509e66d1', + 'info_dict': { + 'id': 'enemene-meck-alle-katzen-weg', + 'ext': 'mp4', + 'title': 'Enemene Meck - Alle Katzen weg', + 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', + }, + }, { + 'note': 'Video hosted at YouTube', + 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', + 'info_dict': { + 'id': '1YVQaAgHyRU', + 'ext': 'mp4', + 'title': '16 Photos Taken 1 Second Before Disaster', + 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', + 'uploader': 'BuzzFeedVideo', + 'uploader_id': 'BuzzFeedVideo', + 'upload_date': '20131105', + }, + }, { + 'note': 'Video hosted at Vimeo', + 'url': 'http://www.chilloutzone.net/video/icon-blending.html', + 'md5': '2645c678b8dc4fefcc0e1b60db18dac1', + 'info_dict': { + 'id': '85523671', + 'ext': 'mp4', + 'title': 'The Sunday Times - Icons', + 'description': 'md5:3e1c0dc6047498d6728dcdaad0891762', + 'uploader': 'Us', + 'uploader_id': 'usfilms', + 'upload_date': '20140131' + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + base64_video_info = self._html_search_regex( + r'var cozVidData = "(.+?)";', webpage, 'video data') + decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8") + video_info_dict = json.loads(decoded_video_info) + + # get video information from dict + video_url = video_info_dict['mediaUrl'] + description = clean_html(video_info_dict.get('description')) + title = video_info_dict['title'] + native_platform = video_info_dict['nativePlatform'] + native_video_id = video_info_dict['nativeVideoId'] + source_priority = video_info_dict['sourcePriority'] + + # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) + if native_platform is None: + youtube_url = self._html_search_regex( + r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', + webpage, 'fallback video URL', default=None) + if youtube_url is not None: + return self.url_result(youtube_url, ie='Youtube') + + # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or + # the own CDN + if source_priority == 'native': + if native_platform == 'youtube': + return self.url_result(native_video_id, ie='Youtube') + if native_platform == 'vimeo': + return self.url_result( + 'http://vimeo.com/' + native_video_id, ie='Vimeo') + + if not video_url: + raise ExtractorError('No video found') + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': description, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/cinemassacre.py youtube-dl-2014.02.17/youtube_dl/extractor/cinemassacre.py --- youtube-dl-2012.09.27/youtube_dl/extractor/cinemassacre.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/cinemassacre.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,90 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class CinemassacreIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?' + _TESTS = [{ + u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', + u'file': u'19911.flv', + u'info_dict': { + u'upload_date': u'20121110', + u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', + u'description': u'md5:fb87405fcb42a331742a0dce2708560b', + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + }, + { + u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', + u'file': u'521be8ef82b16.flv', + u'info_dict': { + u'upload_date': u'20131002', + u'title': u'The Mummy’s Hand (1940)', + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + webpage_url = u'http://' + mobj.group('url') + webpage = self._download_webpage(webpage_url, None) # Don't know video id yet + video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') + mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) + if not mobj: + raise ExtractorError(u'Can\'t extract embed url and video id') + playerdata_url = mobj.group(u'embed_url') + video_id = mobj.group(u'video_id') + + video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', + webpage, u'title') + video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', + webpage, u'description', flags=re.DOTALL, fatal=False) + if len(video_description) == 0: + video_description = None + + playerdata = self._download_webpage(playerdata_url, video_id) + url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url') + + sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file') + hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file') + video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False) + + formats = [ + { + 'url': url, + 'play_path': 'mp4:' + sd_file, + 'rtmp_live': True, # workaround + 'ext': 'flv', + 'format': 'sd', + 'format_id': 'sd', + }, + { + 'url': url, + 'play_path': 'mp4:' + hd_file, + 'rtmp_live': True, # workaround + 'ext': 'flv', + 'format': 'hd', + 'format_id': 'hd', + }, + ] + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/clipfish.py youtube-dl-2014.02.17/youtube_dl/extractor/clipfish.py --- youtube-dl-2012.09.27/youtube_dl/extractor/clipfish.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/clipfish.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,58 @@ +import re +import time +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class ClipfishIE(InfoExtractor): + IE_NAME = u'clipfish' + + _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/' + _TEST = { + u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', + u'file': u'3966754.mp4', + u'md5': u'2521cd644e862936cf2e698206e47385', + u'info_dict': { + u'title': u'FIFA 14 - E3 2013 Trailer', + u'duration': 82, + }, + u'skip': 'Blocked in the US' + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + + info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % + (video_id, int(time.time()))) + doc = self._download_xml( + info_url, video_id, note=u'Downloading info page') + title = doc.find('title').text + video_url = doc.find('filename').text + if video_url is None: + xml_bytes = xml.etree.ElementTree.tostring(doc) + raise ExtractorError(u'Cannot find video URL in document %r' % + xml_bytes) + thumbnail = doc.find('imageurl').text + duration_str = doc.find('duration').text + m = re.match( + r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$', + duration_str) + if m: + duration = ( + (int(m.group('hours')) * 60 * 60) + + (int(m.group('minutes')) * 60) + + (int(m.group('seconds'))) + ) + else: + duration = None + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': thumbnail, + 'duration': duration, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/cliphunter.py youtube-dl-2014.02.17/youtube_dl/extractor/cliphunter.py --- youtube-dl-2012.09.27/youtube_dl/extractor/cliphunter.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/cliphunter.py 2014-02-02 10:56:50.000000000 +0000 @@ -0,0 +1,56 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +translation_table = { + 'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n', + 'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r', + 'y': 'l', 'z': 'i', + '$': ':', '&': '.', '(': '=', '^': '&', '=': '/', +} + + +class CliphunterIE(InfoExtractor): + IE_NAME = 'cliphunter' + + _VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/ + (?P<id>[0-9]+)/ + (?P<seo>.+?)(?:$|[#\?]) + ''' + _TEST = { + 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', + 'file': '1012420.flv', + 'md5': '15e7740f30428abf70f4223478dc1225', + 'info_dict': { + 'title': 'Fun Jynx Maze solo', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + pl_fiji = self._search_regex( + r'pl_fiji = \'([^\']+)\'', webpage, 'video data') + pl_c_qual = self._search_regex( + r'pl_c_qual = "(.)"', webpage, 'video quality') + video_title = self._search_regex( + r'mediaTitle = "([^"]+)"', webpage, 'title') + + video_url = ''.join(translation_table.get(c, c) for c in pl_fiji) + + formats = [{ + 'url': video_url, + 'format_id': pl_c_qual, + }] + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/clipsyndicate.py youtube-dl-2014.02.17/youtube_dl/extractor/clipsyndicate.py --- youtube-dl-2012.09.27/youtube_dl/extractor/clipsyndicate.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/clipsyndicate.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,50 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + find_xpath_attr, + fix_xml_ampersands +) + + +class ClipsyndicateIE(InfoExtractor): + _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' + + _TEST = { + u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', + u'md5': u'4d7d549451bad625e0ff3d7bd56d776c', + u'info_dict': { + u'id': u'4629301', + u'ext': u'mp4', + u'title': u'Brick Briscoe', + u'duration': 612, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + js_player = self._download_webpage( + 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, + video_id, u'Downlaoding player') + # it includes a required token + flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars') + + pdoc = self._download_xml( + 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, + video_id, u'Downloading video info', + transform_source=fix_xml_ampersands) + + track_doc = pdoc.find('trackList/track') + def find_param(name): + node = find_xpath_attr(track_doc, './/param', 'name', name) + if node is not None: + return node.attrib['value'] + + return { + 'id': video_id, + 'title': find_param('title'), + 'url': track_doc.find('location').text, + 'thumbnail': find_param('thumbnail'), + 'duration': int(find_param('duration')), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/cmt.py youtube-dl-2014.02.17/youtube_dl/extractor/cmt.py --- youtube-dl-2012.09.27/youtube_dl/extractor/cmt.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/cmt.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,19 @@ +from .mtv import MTVIE + +class CMTIE(MTVIE): + IE_NAME = u'cmt.com' + _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml' + _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + + _TESTS = [ + { + u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', + u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', + u'info_dict': { + u'id': u'989124', + u'ext': u'mp4', + u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', + u'description': u'Blame It All On My Roots', + }, + }, + ] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/cnn.py youtube-dl-2014.02.17/youtube_dl/extractor/cnn.py --- youtube-dl-2012.09.27/youtube_dl/extractor/cnn.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/cnn.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,126 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + url_basename, +) + + +class CNNIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/ + (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' + + _TESTS = [{ + 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', + 'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4', + 'md5': '3e6121ea48df7e2259fe73a0628605c4', + 'info_dict': { + 'title': 'Nadal wins 8th French Open title', + 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', + 'duration': 135, + 'upload_date': '20130609', + }, + }, + { + "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", + "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", + "md5": "b5cc60c60a3477d185af8f19a2a26f4e", + "info_dict": { + "title": "Student's epic speech stuns new freshmen", + "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", + "upload_date": "20130821", + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + path = mobj.group('path') + page_title = mobj.group('title') + info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path + info = self._download_xml(info_url, page_title) + + formats = [] + rex = re.compile(r'''(?x) + (?P<width>[0-9]+)x(?P<height>[0-9]+) + (?:_(?P<bitrate>[0-9]+)k)? + ''') + for f in info.findall('files/file'): + video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) + fdct = { + 'format_id': f.attrib['bitrate'], + 'url': video_url, + } + + mf = rex.match(f.attrib['bitrate']) + if mf: + fdct['width'] = int(mf.group('width')) + fdct['height'] = int(mf.group('height')) + fdct['tbr'] = int_or_none(mf.group('bitrate')) + else: + mf = rex.search(f.text) + if mf: + fdct['width'] = int(mf.group('width')) + fdct['height'] = int(mf.group('height')) + fdct['tbr'] = int_or_none(mf.group('bitrate')) + else: + mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) + if mi: + if mi.group(1) == 'audio': + fdct['vcodec'] = 'none' + fdct['ext'] = 'm4a' + else: + fdct['tbr'] = int(mi.group(1)) + + formats.append(fdct) + + self._sort_formats(formats) + + thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) + thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] + + metas_el = info.find('metas') + upload_date = ( + metas_el.attrib.get('version') if metas_el is not None else None) + + duration_el = info.find('length') + duration = parse_duration(duration_el.text) + + return { + 'id': info.attrib['id'], + 'title': info.find('headline').text, + 'formats': formats, + 'thumbnail': thumbnails[-1][1], + 'thumbnails': thumbs_dict, + 'description': info.find('description').text, + 'duration': duration, + 'upload_date': upload_date, + } + + +class CNNBlogsIE(InfoExtractor): + _VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+' + _TEST = { + 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', + 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', + 'info_dict': { + 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', + 'ext': 'mp4', + 'title': 'Criminalizing journalism?', + 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', + 'upload_date': '20140209', + }, + 'add_ie': ['CNN'], + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, url_basename(url)) + cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url') + return { + '_type': 'url', + 'url': cnn_url, + 'ie_key': CNNIE.ie_key(), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/collegehumor.py youtube-dl-2014.02.17/youtube_dl/extractor/collegehumor.py --- youtube-dl-2012.09.27/youtube_dl/extractor/collegehumor.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/collegehumor.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,99 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class CollegeHumorIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' + + _TESTS = [{ + 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', + 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', + 'info_dict': { + 'id': '6902724', + 'ext': 'mp4', + 'title': 'Comic-Con Cosplay Catastrophe', + 'description': 'Fans get creative this year', + 'age_limit': 13, + }, + }, + { + 'url': 'http://www.collegehumor.com/video/3505939/font-conference', + 'md5': '72fa701d8ef38664a4dbb9e2ab721816', + 'info_dict': { + 'id': '3505939', + 'ext': 'mp4', + 'title': 'Font Conference', + 'description': 'This video wasn\'t long enough,', + 'age_limit': 10, + 'duration': 179, + }, + }, + # embedded youtube video + { + 'url': 'http://www.collegehumor.com/embed/6950457', + 'info_dict': { + 'id': 'W5gMp3ZjYg4', + 'ext': 'mp4', + 'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', + 'uploader': 'Funnyplox TV', + 'uploader_id': 'funnyploxtv', + 'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', + 'upload_date': '20140128', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Youtube'], + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + + jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json' + data = json.loads(self._download_webpage( + jsonUrl, video_id, 'Downloading info JSON')) + vdata = data['video'] + if vdata.get('youtubeId') is not None: + return { + '_type': 'url', + 'url': vdata['youtubeId'], + 'ie_key': 'Youtube', + } + + AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0} + rating = vdata.get('rating') + if rating: + age_limit = AGE_LIMITS.get(rating.lower()) + else: + age_limit = None # None = No idea + + PREFS = {'high_quality': 2, 'low_quality': 0} + formats = [] + for format_key in ('mp4', 'webm'): + for qname, qurl in vdata.get(format_key, {}).items(): + formats.append({ + 'format_id': format_key + '_' + qname, + 'url': qurl, + 'format': format_key, + 'preference': PREFS.get(qname), + }) + self._sort_formats(formats) + + duration = int_or_none(vdata.get('duration'), 1000) + + return { + 'id': video_id, + 'title': vdata['title'], + 'description': vdata.get('description'), + 'thumbnail': vdata.get('thumbnail'), + 'formats': formats, + 'age_limit': age_limit, + 'duration': duration, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/comedycentral.py youtube-dl-2014.02.17/youtube_dl/extractor/comedycentral.py --- youtube-dl-2012.09.27/youtube_dl/extractor/comedycentral.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/comedycentral.py 2014-01-30 03:51:52.000000000 +0000 @@ -0,0 +1,200 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .mtv import MTVServicesInfoExtractor +from ..utils import ( + compat_str, + compat_urllib_parse, + + ExtractorError, + unified_strdate, +) + + +class ComedyCentralIE(MTVServicesInfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/ + (video-clips|episodes|cc-studios|video-collections) + /(?P<title>.*)''' + _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + + _TEST = { + 'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', + 'md5': '4167875aae411f903b751a21f357f1ee', + 'info_dict': { + 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', + 'ext': 'mp4', + 'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother', + 'description': 'After a certain point, breastfeeding becomes c**kblocking.', + }, + } + + +class ComedyCentralShowsIE(InfoExtractor): + IE_DESC = 'The Daily Show / Colbert Report' + # urls can be abbreviations like :thedailyshow or :colbert + # urls for episodes like: + # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day + # or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news + # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 + _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) + |(https?://)?(www\.)? + (?P<showname>thedailyshow|colbertnation)\.com/ + (full-episodes/(?P<episode>.*)| + (?P<clip> + (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) + |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))| + (?P<interview> + extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?))) + $""" + _TEST = { + 'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', + 'file': '422212.mp4', + 'md5': '4e2f5cb088a83cd8cdb7756132f9739d', + 'info_dict': { + "upload_date": "20121214", + "description": "Kristen Stewart", + "uploader": "thedailyshow", + "title": "thedailyshow-kristen-stewart part 1" + } + } + + _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] + + _video_extensions = { + '3500': 'mp4', + '2200': 'mp4', + '1700': 'mp4', + '1200': 'mp4', + '750': 'mp4', + '400': 'mp4', + } + _video_dimensions = { + '3500': (1280, 720), + '2200': (960, 540), + '1700': (768, 432), + '1200': (640, 360), + '750': (512, 288), + '400': (384, 216), + } + + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + + @staticmethod + def _transform_rtmp_url(rtmp_video_url): + m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url) + if not m: + raise ExtractorError('Cannot transform RTMP url') + base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' + return base + m.group('finalid') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, re.VERBOSE) + if mobj is None: + raise ExtractorError('Invalid URL: %s' % url) + + if mobj.group('shortname'): + if mobj.group('shortname') in ('tds', 'thedailyshow'): + url = 'http://www.thedailyshow.com/full-episodes/' + else: + url = 'http://www.colbertnation.com/full-episodes/' + mobj = re.match(self._VALID_URL, url, re.VERBOSE) + assert mobj is not None + + if mobj.group('clip'): + if mobj.group('showname') == 'thedailyshow': + epTitle = mobj.group('tdstitle') + else: + epTitle = mobj.group('cntitle') + dlNewest = False + elif mobj.group('interview'): + epTitle = mobj.group('interview_title') + dlNewest = False + else: + dlNewest = not mobj.group('episode') + if dlNewest: + epTitle = mobj.group('showname') + else: + epTitle = mobj.group('episode') + + self.report_extraction(epTitle) + webpage,htmlHandle = self._download_webpage_handle(url, epTitle) + if dlNewest: + url = htmlHandle.geturl() + mobj = re.match(self._VALID_URL, url, re.VERBOSE) + if mobj is None: + raise ExtractorError('Invalid redirected URL: ' + url) + if mobj.group('episode') == '': + raise ExtractorError('Redirected URL is still not specific: ' + url) + epTitle = mobj.group('episode') + + mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) + + if len(mMovieParams) == 0: + # The Colbert Report embeds the information in a without + # a URL prefix; so extract the alternate reference + # and then add the URL prefix manually. + + altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage) + if len(altMovieParams) == 0: + raise ExtractorError('unable to find Flash URL in webpage ' + url) + else: + mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] + + uri = mMovieParams[0][1] + indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) + idoc = self._download_xml(indexUrl, epTitle, + 'Downloading show index', + 'unable to download episode index') + + results = [] + + itemEls = idoc.findall('.//item') + for partNum,itemEl in enumerate(itemEls): + mediaId = itemEl.findall('./guid')[0].text + shortMediaId = mediaId.split(':')[-1] + showId = mediaId.split(':')[-2].replace('.com', '') + officialTitle = itemEl.findall('./title')[0].text + officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text) + + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + + compat_urllib_parse.urlencode({'uri': mediaId})) + cdoc = self._download_xml(configUrl, epTitle, + 'Downloading configuration for %s' % shortMediaId) + + turls = [] + for rendition in cdoc.findall('.//rendition'): + finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) + turls.append(finfo) + + if len(turls) == 0: + self._downloader.report_error('unable to download ' + mediaId + ': No videos found') + continue + + formats = [] + for format, rtmp_video_url in turls: + w, h = self._video_dimensions.get(format, (None, None)) + formats.append({ + 'url': self._transform_rtmp_url(rtmp_video_url), + 'ext': self._video_extensions.get(format, 'mp4'), + 'format_id': format, + 'height': h, + 'width': w, + }) + + effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1) + results.append({ + 'id': shortMediaId, + 'formats': formats, + 'uploader': showId, + 'upload_date': officialDate, + 'title': effTitle, + 'thumbnail': None, + 'description': compat_str(officialTitle), + }) + + return results diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/common.py youtube-dl-2014.02.17/youtube_dl/extractor/common.py --- youtube-dl-2012.09.27/youtube_dl/extractor/common.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/common.py 2014-02-09 16:42:58.000000000 +0000 @@ -0,0 +1,573 @@ +import base64 +import hashlib +import json +import os +import re +import socket +import sys +import netrc +import xml.etree.ElementTree + +from ..utils import ( + compat_http_client, + compat_urllib_error, + compat_urllib_parse_urlparse, + compat_str, + + clean_html, + compiled_regex_type, + ExtractorError, + RegexNotFoundError, + sanitize_filename, + unescapeHTML, +) +_NO_DEFAULT = object() + + +class InfoExtractor(object): + """Information Extractor class. + + Information extractors are the classes that, given a URL, extract + information about the video (or videos) the URL refers to. This + information includes the real video URL, the video title, author and + others. The information is stored in a dictionary which is then + passed to the FileDownloader. The FileDownloader processes this + information possibly downloading the video to the file system, among + other possible outcomes. + + The dictionaries must include the following fields: + + id: Video identifier. + title: Video title, unescaped. + + Additionally, it must contain either a formats entry or a url one: + + formats: A list of dictionaries for each format available, ordered + from worst to best quality. + + Potential fields: + * url Mandatory. The URL of the video file + * ext Will be calculated from url if missing + * format A human-readable description of the format + ("mp4 container with h264/opus"). + Calculated from the format_id, width, height. + and format_note fields if missing. + * format_id A short description of the format + ("mp4_h264_opus" or "19"). + Technically optional, but strongly recommended. + * format_note Additional info about the format + ("3D" or "DASH video") + * width Width of the video, if known + * height Height of the video, if known + * resolution Textual description of width and height + * tbr Average bitrate of audio and video in KBit/s + * abr Average audio bitrate in KBit/s + * acodec Name of the audio codec in use + * asr Audio sampling rate in Hertz + * vbr Average video bitrate in KBit/s + * vcodec Name of the video codec in use + * container Name of the container format + * filesize The number of bytes, if known in advance + * player_url SWF Player URL (used for rtmpdump). + * protocol The protocol that will be used for the actual + download, lower-case. + "http", "https", "rtsp", "rtmp", "m3u8" or so. + * preference Order number of this format. If this field is + present and not None, the formats get sorted + by this field. + -1 for default (order by other properties), + -2 or smaller for less than default. + * quality Order number of the video quality of this + format, irrespective of the file format. + -1 for default (order by other properties), + -2 or smaller for less than default. + url: Final video URL. + ext: Video filename extension. + format: The video format, defaults to ext (used for --get-format) + player_url: SWF Player URL (used for rtmpdump). + + The following fields are optional: + + thumbnails: A list of dictionaries (with the entries "resolution" and + "url") for the varying thumbnails + thumbnail: Full URL to a video thumbnail image. + description: One-line video description. + uploader: Full name of the video uploader. + upload_date: Video upload date (YYYYMMDD). + uploader_id: Nickname or id of the video uploader. + location: Physical location of the video. + subtitles: The subtitle file contents as a dictionary in the format + {language: subtitles}. + duration: Length of the video in seconds, as an integer. + view_count: How many users have watched the video on the platform. + like_count: Number of positive ratings of the video + dislike_count: Number of negative ratings of the video + comment_count: Number of comments on the video + age_limit: Age restriction for the video, as an integer (years) + webpage_url: The url to the video webpage, if given to youtube-dl it + should allow to get the same result again. (It will be set + by YoutubeDL if it's missing) + + Unless mentioned otherwise, the fields should be Unicode strings. + + Subclasses of this one should re-define the _real_initialize() and + _real_extract() methods and define a _VALID_URL regexp. + Probably, they should also be added to the list of extractors. + + _real_extract() must return a *list* of information dictionaries as + described above. + + Finally, the _WORKING attribute should be set to False for broken IEs + in order to warn the users and skip the tests. + """ + + _ready = False + _downloader = None + _WORKING = True + + def __init__(self, downloader=None): + """Constructor. Receives an optional downloader.""" + self._ready = False + self.set_downloader(downloader) + + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + + # This does not use has/getattr intentionally - we want to know whether + # we have cached the regexp for *this* class, whereas getattr would also + # match the superclass + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + return cls._VALID_URL_RE.match(url) is not None + + @classmethod + def working(cls): + """Getter method for _WORKING.""" + return cls._WORKING + + def initialize(self): + """Initializes an instance (authentication, etc).""" + if not self._ready: + self._real_initialize() + self._ready = True + + def extract(self, url): + """Extracts URL information and returns it in list of dicts.""" + self.initialize() + return self._real_extract(url) + + def set_downloader(self, downloader): + """Sets the downloader for this IE.""" + self._downloader = downloader + + def _real_initialize(self): + """Real initialization process. Redefine in subclasses.""" + pass + + def _real_extract(self, url): + """Real extraction process. Redefine in subclasses.""" + pass + + @classmethod + def ie_key(cls): + """A string for getting the InfoExtractor with get_info_extractor""" + return cls.__name__[:-2] + + @property + def IE_NAME(self): + return type(self).__name__[:-2] + + def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + """ Returns the response handle """ + if note is None: + self.report_download_webpage(video_id) + elif note is not False: + if video_id is None: + self.to_screen(u'%s' % (note,)) + else: + self.to_screen(u'%s: %s' % (video_id, note)) + try: + return self._downloader.urlopen(url_or_request) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + if errnote is False: + return False + if errnote is None: + errnote = u'Unable to download webpage' + errmsg = u'%s: %s' % (errnote, compat_str(err)) + if fatal: + raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) + else: + self._downloader.report_warning(errmsg) + return False + + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + """ Returns a tuple (page content as string, URL handle) """ + + # Strip hashes from the URL (#1038) + if isinstance(url_or_request, (compat_str, str)): + url_or_request = url_or_request.partition('#')[0] + + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) + if urlh is False: + assert not fatal + return False + content_type = urlh.headers.get('Content-Type', '') + webpage_bytes = urlh.read() + m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) + if m: + encoding = m.group(1) + else: + m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]', + webpage_bytes[:1024]) + if m: + encoding = m.group(1).decode('ascii') + elif webpage_bytes.startswith(b'\xff\xfe'): + encoding = 'utf-16' + else: + encoding = 'utf-8' + if self._downloader.params.get('dump_intermediate_pages', False): + try: + url = url_or_request.get_full_url() + except AttributeError: + url = url_or_request + self.to_screen(u'Dumping request to ' + url) + dump = base64.b64encode(webpage_bytes).decode('ascii') + self._downloader.to_screen(dump) + if self._downloader.params.get('write_pages', False): + try: + url = url_or_request.get_full_url() + except AttributeError: + url = url_or_request + if len(url) > 200: + h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest() + url = url[:200 - len(h)] + h + raw_filename = ('%s_%s.dump' % (video_id, url)) + filename = sanitize_filename(raw_filename, restricted=True) + self.to_screen(u'Saving request to ' + filename) + with open(filename, 'wb') as outf: + outf.write(webpage_bytes) + + content = webpage_bytes.decode(encoding, 'replace') + return (content, urlh) + + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + """ Returns the data of the page as a string """ + res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + if res is False: + return res + else: + content, _ = res + return content + + def _download_xml(self, url_or_request, video_id, + note=u'Downloading XML', errnote=u'Unable to download XML', + transform_source=None): + """Return the xml as an xml.etree.ElementTree.Element""" + xml_string = self._download_webpage(url_or_request, video_id, note, errnote) + if transform_source: + xml_string = transform_source(xml_string) + return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) + + def _download_json(self, url_or_request, video_id, + note=u'Downloading JSON metadata', + errnote=u'Unable to download JSON metadata', + transform_source=None): + json_string = self._download_webpage(url_or_request, video_id, note, errnote) + if transform_source: + json_string = transform_source(json_string) + try: + return json.loads(json_string) + except ValueError as ve: + raise ExtractorError('Failed to download JSON', cause=ve) + + def report_warning(self, msg, video_id=None): + idstr = u'' if video_id is None else u'%s: ' % video_id + self._downloader.report_warning( + u'[%s] %s%s' % (self.IE_NAME, idstr, msg)) + + def to_screen(self, msg): + """Print msg to screen, prefixing it with '[ie_name]'""" + self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) + + def report_extraction(self, id_or_name): + """Report information extraction.""" + self.to_screen(u'%s: Extracting information' % id_or_name) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self.to_screen(u'%s: Downloading webpage' % video_id) + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self.to_screen(u'Confirming age') + + def report_login(self): + """Report attempt to log in.""" + self.to_screen(u'Logging in') + + #Methods for following #608 + @staticmethod + def url_result(url, ie=None, video_id=None): + """Returns a url that points to a page that should be processed""" + #TODO: ie should be the class used for getting the info + video_info = {'_type': 'url', + 'url': url, + 'ie_key': ie} + if video_id is not None: + video_info['id'] = video_id + return video_info + @staticmethod + def playlist_result(entries, playlist_id=None, playlist_title=None): + """Returns a playlist""" + video_info = {'_type': 'playlist', + 'entries': entries} + if playlist_id: + video_info['id'] = playlist_id + if playlist_title: + video_info['title'] = playlist_title + return video_info + + def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): + """ + Perform a regex search on the given string, using a single or a list of + patterns returning the first matching group. + In case of failure return a default value or raise a WARNING or a + RegexNotFoundError, depending on fatal, specifying the field name. + """ + if isinstance(pattern, (str, compat_str, compiled_regex_type)): + mobj = re.search(pattern, string, flags) + else: + for p in pattern: + mobj = re.search(p, string, flags) + if mobj: break + + if os.name != 'nt' and sys.stderr.isatty(): + _name = u'\033[0;34m%s\033[0m' % name + else: + _name = name + + if mobj: + # return the first matching group + return next(g for g in mobj.groups() if g is not None) + elif default is not _NO_DEFAULT: + return default + elif fatal: + raise RegexNotFoundError(u'Unable to extract %s' % _name) + else: + self._downloader.report_warning(u'unable to extract %s; ' + u'please report this issue on http://yt-dl.org/bug' % _name) + return None + + def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): + """ + Like _search_regex, but strips HTML tags and unescapes entities. + """ + res = self._search_regex(pattern, string, name, default, fatal, flags) + if res: + return clean_html(res).strip() + else: + return res + + def _get_login_info(self): + """ + Get the the login info as (username, password) + It will look in the netrc file using the _NETRC_MACHINE value + If there's no info available, return (None, None) + """ + if self._downloader is None: + return (None, None) + + username = None + password = None + downloader_params = self._downloader.params + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) is not None: + username = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError) as err: + self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) + + return (username, password) + + # Helper functions for extracting OpenGraph info + @staticmethod + def _og_regexes(prop): + content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')' + property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop) + template = r'<meta[^>]+?%s[^>]+?%s' + return [ + template % (property_re, content_re), + template % (content_re, property_re), + ] + + def _og_search_property(self, prop, html, name=None, **kargs): + if name is None: + name = 'OpenGraph %s' % prop + escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs) + if escaped is None: + return None + return unescapeHTML(escaped) + + def _og_search_thumbnail(self, html, **kargs): + return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs) + + def _og_search_description(self, html, **kargs): + return self._og_search_property('description', html, fatal=False, **kargs) + + def _og_search_title(self, html, **kargs): + return self._og_search_property('title', html, **kargs) + + def _og_search_video_url(self, html, name='video url', secure=True, **kargs): + regexes = self._og_regexes('video') + if secure: regexes = self._og_regexes('video:secure_url') + regexes + return self._html_search_regex(regexes, html, name, **kargs) + + def _html_search_meta(self, name, html, display_name=None): + if display_name is None: + display_name = name + return self._html_search_regex( + r'''(?ix)<meta + (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) + [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), + html, display_name, fatal=False) + + def _dc_search_uploader(self, html): + return self._html_search_meta('dc.creator', html, 'uploader') + + def _rta_search(self, html): + # See http://www.rtalabel.org/index.php?content=howtofaq#single + if re.search(r'(?ix)<meta\s+name="rating"\s+' + r' content="RTA-5042-1996-1400-1577-RTA"', + html): + return 18 + return 0 + + def _media_rating_search(self, html): + # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ + rating = self._html_search_meta('rating', html) + + if not rating: + return None + + RATING_TABLE = { + 'safe for kids': 0, + 'general': 8, + '14 years': 14, + 'mature': 17, + 'restricted': 19, + } + return RATING_TABLE.get(rating.lower(), None) + + def _twitter_search_player(self, html): + return self._html_search_meta('twitter:player', html, + 'twitter card player') + + def _sort_formats(self, formats): + if not formats: + raise ExtractorError(u'No video formats found') + + def _formats_key(f): + # TODO remove the following workaround + from ..utils import determine_ext + if not f.get('ext') and 'url' in f: + f['ext'] = determine_ext(f['url']) + + preference = f.get('preference') + if preference is None: + proto = f.get('protocol') + if proto is None: + proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme + + preference = 0 if proto in ['http', 'https'] else -0.1 + if f.get('ext') in ['f4f', 'f4m']: # Not yet supported + preference -= 0.5 + + if f.get('vcodec') == 'none': # audio only + if self._downloader.params.get('prefer_free_formats'): + ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] + else: + ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] + ext_preference = 0 + try: + audio_ext_preference = ORDER.index(f['ext']) + except ValueError: + audio_ext_preference = -1 + else: + if self._downloader.params.get('prefer_free_formats'): + ORDER = [u'flv', u'mp4', u'webm'] + else: + ORDER = [u'webm', u'flv', u'mp4'] + try: + ext_preference = ORDER.index(f['ext']) + except ValueError: + ext_preference = -1 + audio_ext_preference = 0 + + return ( + preference, + f.get('quality') if f.get('quality') is not None else -1, + f.get('height') if f.get('height') is not None else -1, + f.get('width') if f.get('width') is not None else -1, + ext_preference, + f.get('tbr') if f.get('tbr') is not None else -1, + f.get('vbr') if f.get('vbr') is not None else -1, + f.get('abr') if f.get('abr') is not None else -1, + audio_ext_preference, + f.get('filesize') if f.get('filesize') is not None else -1, + f.get('format_id'), + ) + formats.sort(key=_formats_key) + + +class SearchInfoExtractor(InfoExtractor): + """ + Base class for paged search queries extractors. + They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query} + Instances should define _SEARCH_KEY and _MAX_RESULTS. + """ + + @classmethod + def _make_valid_url(cls): + return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY + + @classmethod + def suitable(cls, url): + return re.match(cls._make_valid_url(), url) is not None + + def _real_extract(self, query): + mobj = re.match(self._make_valid_url(), query) + if mobj is None: + raise ExtractorError(u'Invalid search query "%s"' % query) + + prefix = mobj.group('prefix') + query = mobj.group('query') + if prefix == '': + return self._get_n_results(query, 1) + elif prefix == 'all': + return self._get_n_results(query, self._MAX_RESULTS) + else: + n = int(prefix) + if n <= 0: + raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query)) + elif n > self._MAX_RESULTS: + self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) + n = self._MAX_RESULTS + return self._get_n_results(query, n) + + def _get_n_results(self, query, n): + """Get a specified number of results for a query""" + raise NotImplementedError("This method must be implemented by subclasses") + + @property + def SEARCH_KEY(self): + return self._SEARCH_KEY diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/condenast.py youtube-dl-2014.02.17/youtube_dl/extractor/condenast.py --- youtube-dl-2012.09.27/youtube_dl/extractor/condenast.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/condenast.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,110 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + orderedSet, + compat_urllib_parse_urlparse, + compat_urlparse, +) + + +class CondeNastIE(InfoExtractor): + """ + Condé Nast is a media group, some of its sites use a custom HTML5 player + that works the same in all of them. + """ + + # The keys are the supported sites and the values are the name to be shown + # to the user and in the extractor description. + _SITES = { + 'wired': 'WIRED', + 'gq': 'GQ', + 'vogue': 'Vogue', + 'glamour': 'Glamour', + 'wmagazine': 'W Magazine', + 'vanityfair': 'Vanity Fair', + } + + _VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) + IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) + + _TEST = { + 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', + 'file': '5171b343c2b4c00dd0c1ccb3.mp4', + 'md5': '1921f713ed48aabd715691f774c451f7', + 'info_dict': { + 'title': '3D Printed Speakers Lit With LED', + 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', + } + } + + def _extract_series(self, url, webpage): + title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>', + webpage, 'series title', flags=re.DOTALL) + url_object = compat_urllib_parse_urlparse(url) + base_url = '%s://%s' % (url_object.scheme, url_object.netloc) + m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', + webpage, flags=re.DOTALL) + paths = orderedSet(m.group(1) for m in m_paths) + build_url = lambda path: compat_urlparse.urljoin(base_url, path) + entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] + return self.playlist_result(entries, playlist_title=title) + + def _extract_video(self, webpage): + description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>', + r'<div class="video-post-content">(.+?)</div>', + ], + webpage, 'description', + fatal=False, flags=re.DOTALL) + params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, + 'player params', flags=re.DOTALL) + video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') + player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id') + target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target') + data = compat_urllib_parse.urlencode({'videoId': video_id, + 'playerId': player_id, + 'target': target, + }) + base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]', + webpage, 'base info url', + default='http://player.cnevids.com/player/loader.js?') + info_url = base_info_url + data + info_page = self._download_webpage(info_url, video_id, + 'Downloading video info') + video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info') + video_info = json.loads(video_info) + + formats = [{ + 'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), + 'url': fdata['src'], + 'ext': fdata['type'].split('/')[-1], + 'quality': 1 if fdata['quality'] == 'high' else 0, + } for fdata in video_info['sources'][0]] + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': video_info['title'], + 'thumbnail': video_info['poster_frame'], + 'description': description, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + site = mobj.group('site') + url_type = mobj.group('type') + id = mobj.group('id') + + self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site]) + webpage = self._download_webpage(url, id) + + if url_type == 'series': + return self._extract_series(url, webpage) + else: + return self._extract_video(webpage) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/criterion.py youtube-dl-2014.02.17/youtube_dl/extractor/criterion.py --- youtube-dl-2012.09.27/youtube_dl/extractor/criterion.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/criterion.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +import re + +from .common import InfoExtractor +from ..utils import determine_ext + +class CriterionIE(InfoExtractor): + _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' + _TEST = { + u'url': u'http://www.criterion.com/films/184-le-samourai', + u'file': u'184.mp4', + u'md5': u'bc51beba55685509883a9a7830919ec3', + u'info_dict': { + u"title": u"Le Samouraï", + u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + + final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', + webpage, 'video url') + title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />', + webpage, 'video title') + description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', + webpage, 'video description') + thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + webpage, 'thumbnail url') + + return {'id': video_id, + 'url' : final_url, + 'title': title, + 'ext': determine_ext(final_url), + 'description': description, + 'thumbnail': thumbnail, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/crunchyroll.py youtube-dl-2014.02.17/youtube_dl/extractor/crunchyroll.py --- youtube-dl-2012.09.27/youtube_dl/extractor/crunchyroll.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/crunchyroll.py 2014-01-30 04:30:16.000000000 +0000 @@ -0,0 +1,178 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re, base64, zlib +from hashlib import sha1 +from math import pow, sqrt, floor +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse, + compat_urllib_request, + bytes_to_intlist, + intlist_to_bytes, + unified_strdate, + clean_html, +) +from ..aes import ( + aes_cbc_decrypt, + inc, +) + +class CrunchyrollIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' + _TESTS = [{ + 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', + 'file': '645513.flv', + #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', + 'info_dict': { + 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', + 'description': 'md5:2d17137920c64f2f49981a7797d275ef', + 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', + 'uploader': 'Yomiuri Telecasting Corporation (YTV)', + 'upload_date': '20131013', + }, + 'params': { + # rtmp + 'skip_download': True, + }, + }] + + _FORMAT_IDS = { + '360': ('60', '106'), + '480': ('61', '106'), + '720': ('62', '106'), + '1080': ('80', '108'), + } + + def _decrypt_subtitles(self, data, iv, id): + data = bytes_to_intlist(data) + iv = bytes_to_intlist(iv) + id = int(id) + + def obfuscate_key_aux(count, modulo, start): + output = list(start) + for _ in range(count): + output.append(output[-1] + output[-2]) + # cut off start values + output = output[2:] + output = list(map(lambda x: x % modulo + 33, output)) + return output + + def obfuscate_key(key): + num1 = int(floor(pow(2, 25) * sqrt(6.9))) + num2 = (num1 ^ key) << 5 + num3 = key ^ num1 + num4 = num3 ^ (num3 >> 3) ^ num2 + prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) + shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) + # Extend 160 Bit hash to 256 Bit + return shaHash + [0] * 12 + + key = obfuscate_key(id) + class Counter: + __value = iv + def next_value(self): + temp = self.__value + self.__value = inc(self.__value) + return temp + decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) + return zlib.decompress(decrypted_data) + + def _convert_subtitles_to_srt(self, subtitles): + i=1 + output = '' + for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): + start = start.replace('.', ',') + end = end.replace('.', ',') + text = clean_html(text) + text = text.replace('\\N', '\n') + if not text: + continue + output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) + i+=1 + return output + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + if mobj.group('prefix') == 'm': + mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') + webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url') + else: + webpage_url = 'http://www.' + mobj.group('url') + + webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') + note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='') + if note_m: + raise ExtractorError(note_m) + + video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) + video_title = re.sub(r' {2,}', ' ', video_title) + video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') + if not video_description: + video_description = None + video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) + if video_upload_date: + video_upload_date = unified_strdate(video_upload_date) + video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) + + playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) + playerdata_req = compat_urllib_request.Request(playerdata_url) + playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) + playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') + playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') + + stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') + video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) + + formats = [] + for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): + stream_quality, stream_format = self._FORMAT_IDS[fmt] + video_format = fmt+'p' + streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') + # urlencode doesn't work! + streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format + streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') + streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) + streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format) + video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url') + video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path') + formats.append({ + 'url': video_url, + 'play_path': video_play_path, + 'ext': 'flv', + 'format': video_format, + 'format_id': video_format, + }) + + subtitles = {} + for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): + sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\ + video_id, note='Downloading subtitles for '+sub_name) + id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) + iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) + data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) + if not id or not iv or not data: + continue + id = int(id) + iv = base64.b64decode(iv) + data = base64.b64decode(data) + + subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') + lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) + if not lang_code: + continue + subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) + + return { + 'id': video_id, + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + 'upload_date': video_upload_date, + 'subtitles': subtitles, + 'formats': formats, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/cspan.py youtube-dl-2014.02.17/youtube_dl/extractor/cspan.py --- youtube-dl-2012.09.27/youtube_dl/extractor/cspan.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/cspan.py 2014-02-02 17:36:13.000000000 +0000 @@ -0,0 +1,60 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + unescapeHTML, + find_xpath_attr, +) + + +class CSpanIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)' + IE_DESC = 'C-SPAN' + _TEST = { + 'url': 'http://www.c-span.org/video/?313572-1/HolderonV', + 'md5': '8e44ce11f0f725527daccc453f553eb0', + 'info_dict': { + 'id': '315139', + 'ext': 'mp4', + 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', + 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', + }, + 'skip': 'Regularly fails on travis, for unknown reasons', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + page_id = mobj.group('id') + webpage = self._download_webpage(url, page_id) + video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id') + + description = self._html_search_regex( + [ + # The full description + r'<div class=\'expandable\'>(.*?)<a href=\'#\'', + # If the description is small enough the other div is not + # present, otherwise this is a stripped version + r'<p class=\'initial\'>(.*?)</p>' + ], + webpage, 'description', flags=re.DOTALL) + + info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id + data = self._download_json(info_url, video_id) + + url = unescapeHTML(data['video']['files'][0]['path']['#text']) + + doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id, + video_id) + + def find_string(s): + return find_xpath_attr(doc, './/string', 'name', s).text + + return { + 'id': video_id, + 'title': find_string('title'), + 'url': url, + 'description': description, + 'thumbnail': find_string('poster'), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/d8.py youtube-dl-2014.02.17/youtube_dl/extractor/d8.py --- youtube-dl-2012.09.27/youtube_dl/extractor/d8.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/d8.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,25 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .canalplus import CanalplusIE + + +class D8IE(CanalplusIE): + _VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)' + _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s' + IE_NAME = 'd8.tv' + + _TEST = { + 'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html', + 'file': '966289.flv', + 'info_dict': { + 'title': 'Campagne intime - Documentaire exceptionnel', + 'description': 'md5:d2643b799fb190846ae09c61e59a859f', + 'upload_date': '20131108', + }, + 'params': { + # rtmp + 'skip_download': True, + }, + 'skip': 'videos get deleted after a while', + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/dailymotion.py youtube-dl-2014.02.17/youtube_dl/extractor/dailymotion.py --- youtube-dl-2012.09.27/youtube_dl/extractor/dailymotion.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/dailymotion.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,231 @@ +import re +import json +import itertools + +from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor + +from ..utils import ( + compat_urllib_request, + compat_str, + get_element_by_attribute, + get_element_by_id, + orderedSet, + str_to_int, + + ExtractorError, +) + +class DailymotionBaseInfoExtractor(InfoExtractor): + @staticmethod + def _build_request(url): + """Build a request with the family filter disabled""" + request = compat_urllib_request.Request(url) + request.add_header('Cookie', 'family_filter=off') + request.add_header('Cookie', 'ff=off') + return request + +class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): + """Information Extractor for Dailymotion""" + + _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' + IE_NAME = u'dailymotion' + + _FORMATS = [ + (u'stream_h264_ld_url', u'ld'), + (u'stream_h264_url', u'standard'), + (u'stream_h264_hq_url', u'hq'), + (u'stream_h264_hd_url', u'hd'), + (u'stream_h264_hd1080_url', u'hd180'), + ] + + _TESTS = [ + { + u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', + u'file': u'x33vw9.mp4', + u'md5': u'392c4b85a60a90dc4792da41ce3144eb', + u'info_dict': { + u"uploader": u"Amphora Alex and Van .", + u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" + } + }, + # Vevo video + { + u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', + u'file': u'USUV71301934.mp4', + u'info_dict': { + u'title': u'Roar (Official)', + u'uploader': u'Katy Perry', + u'upload_date': u'20130905', + }, + u'params': { + u'skip_download': True, + }, + u'skip': u'VEVO is only available in some countries', + }, + # age-restricted video + { + u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', + u'file': u'xyh2zz.mp4', + u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', + u'info_dict': { + u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', + u'uploader': 'HotWaves1012', + u'age_limit': 18, + } + + } + ] + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + + url = 'http://www.dailymotion.com/video/%s' % video_id + + # Retrieve video webpage to extract further information + request = self._build_request(url) + webpage = self._download_webpage(request, video_id) + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + + # It may just embed a vevo video: + m_vevo = re.search( + r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)', + webpage) + if m_vevo is not None: + vevo_id = m_vevo.group('id') + self.to_screen(u'Vevo video detected: %s' % vevo_id) + return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') + + age_limit = self._rta_search(webpage) + + video_upload_date = None + mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) + if mobj is not None: + video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) + + embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id + embed_page = self._download_webpage(embed_url, video_id, + u'Downloading embed page') + info = self._search_regex(r'var info = ({.*?}),$', embed_page, + 'video info', flags=re.MULTILINE) + info = json.loads(info) + if info.get('error') is not None: + msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] + raise ExtractorError(msg, expected=True) + + formats = [] + for (key, format_id) in self._FORMATS: + video_url = info.get(key) + if video_url is not None: + m_size = re.search(r'H264-(\d+)x(\d+)', video_url) + if m_size is not None: + width, height = m_size.group(1), m_size.group(2) + else: + width, height = None, None + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': format_id, + 'width': width, + 'height': height, + }) + if not formats: + raise ExtractorError(u'Unable to extract video URL') + + # subtitles + video_subtitles = self.extract_subtitles(video_id, webpage) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, webpage) + return + + view_count = self._search_regex( + r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False) + if view_count is not None: + view_count = str_to_int(view_count) + + return { + 'id': video_id, + 'formats': formats, + 'uploader': info['owner_screenname'], + 'upload_date': video_upload_date, + 'title': self._og_search_title(webpage), + 'subtitles': video_subtitles, + 'thumbnail': info['thumbnail_url'], + 'age_limit': age_limit, + 'view_count': view_count, + } + + def _get_available_subtitles(self, video_id, webpage): + try: + sub_list = self._download_webpage( + 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, + video_id, note=False) + except ExtractorError as err: + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} + info = json.loads(sub_list) + if (info['total'] > 0): + sub_lang_list = dict((l['language'], l['url']) for l in info['list']) + return sub_lang_list + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} + + +class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): + IE_NAME = u'dailymotion:playlist' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' + _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' + _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' + + def _extract_entries(self, id): + video_ids = [] + for pagenum in itertools.count(1): + request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) + webpage = self._download_webpage(request, + id, u'Downloading page %s' % pagenum) + + playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage) + video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) + + if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: + break + return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + for video_id in orderedSet(video_ids)] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + webpage = self._download_webpage(url, playlist_id) + + return {'_type': 'playlist', + 'id': playlist_id, + 'title': get_element_by_id(u'playlist_name', webpage), + 'entries': self._extract_entries(playlist_id), + } + + +class DailymotionUserIE(DailymotionPlaylistIE): + IE_NAME = u'dailymotion:user' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' + _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>' + _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + webpage = self._download_webpage(url, user) + full_user = self._html_search_regex( + r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user), + webpage, u'user', flags=re.DOTALL) + + return { + '_type': 'playlist', + 'id': user, + 'title': full_user, + 'entries': self._extract_entries(user), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/daum.py youtube-dl-2014.02.17/youtube_dl/extractor/daum.py --- youtube-dl-2012.09.27/youtube_dl/extractor/daum.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/daum.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,68 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + determine_ext, +) + + +class DaumIE(InfoExtractor): + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' + IE_NAME = u'daum.net' + + _TEST = { + u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', + u'file': u'52554690.mp4', + u'info_dict': { + u'title': u'DOTA 2GETHER 시즌2 6회 - 2부', + u'description': u'DOTA 2GETHER 시즌2 6회 - 2부', + u'upload_date': u'20130831', + u'duration': 3868, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + canonical_url = 'http://tvpot.daum.net/v/%s' % video_id + webpage = self._download_webpage(canonical_url, video_id) + full_id = self._search_regex( + r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', + webpage, u'full id') + query = compat_urllib_parse.urlencode({'vid': full_id}) + info = self._download_xml( + 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, + u'Downloading video info') + urls = self._download_xml( + 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, + video_id, u'Downloading video formats info') + + self.to_screen(u'%s: Getting video urls' % video_id) + formats = [] + for format_el in urls.findall('result/output_list/output_list'): + profile = format_el.attrib['profile'] + format_query = compat_urllib_parse.urlencode({ + 'vid': full_id, + 'profile': profile, + }) + url_doc = self._download_xml( + 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, + video_id, note=False) + format_url = url_doc.find('result/url').text + formats.append({ + 'url': format_url, + 'ext': determine_ext(format_url), + 'format_id': profile, + }) + + return { + 'id': video_id, + 'title': info.find('TITLE').text, + 'formats': formats, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': info.find('CONTENTS').text, + 'duration': int(info.find('DURATION').text), + 'upload_date': info.find('REGDTTM').text[:8], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/defense.py youtube-dl-2014.02.17/youtube_dl/extractor/defense.py --- youtube-dl-2012.09.27/youtube_dl/extractor/defense.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/defense.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,40 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class DefenseGouvFrIE(InfoExtractor): + IE_NAME = 'defense.gouv.fr' + _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' + r'ligthboxvideo/base-de-medias/webtv/(.*)') + + _TEST = { + 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', + 'file': '11213.mp4', + 'md5': '75bba6124da7e63d2d60b5244ec9430c', + "info_dict": { + "title": "attaque-chimique-syrienne-du-21-aout-2013-1" + } + } + + def _real_extract(self, url): + title = re.match(self._VALID_URL, url).group(1) + webpage = self._download_webpage(url, title) + video_id = self._search_regex( + r"flashvars.pvg_id=\"(\d+)\";", + webpage, 'ID') + + json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' + + video_id) + info = self._download_webpage(json_url, title, + 'Downloading JSON config') + video_url = json.loads(info)['renditions'][0]['url'] + + return {'id': video_id, + 'ext': 'mp4', + 'url': video_url, + 'title': title, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/depositfiles.py youtube-dl-2014.02.17/youtube_dl/extractor/depositfiles.py --- youtube-dl-2012.09.27/youtube_dl/extractor/depositfiles.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/depositfiles.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,60 @@ +import re +import os +import socket + +from .common import InfoExtractor +from ..utils import ( + compat_http_client, + compat_str, + compat_urllib_error, + compat_urllib_parse, + compat_urllib_request, + + ExtractorError, +) + + +class DepositFilesIE(InfoExtractor): + """Information extractor for depositfiles.com""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' + + def _real_extract(self, url): + file_id = url.split('/')[-1] + # Rebuild url in english locale + url = 'http://depositfiles.com/en/files/' + file_id + + # Retrieve file webpage with 'Free download' button pressed + free_download_indication = {'gateway_result' : '1'} + request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) + try: + self.report_download_webpage(file_id) + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) + + # Search for the real file URL + mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) + if (mobj is None) or (mobj.group(1) is None): + # Try to figure out reason of the error. + mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) + if (mobj is not None) and (mobj.group(1) is not None): + restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() + raise ExtractorError(u'%s' % restriction_message) + else: + raise ExtractorError(u'Unable to extract download URL from: %s' % url) + + file_url = mobj.group(1) + file_extension = os.path.splitext(file_url)[1][1:] + + # Search for file title + file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') + + return [{ + 'id': file_id.decode('utf-8'), + 'url': file_url.decode('utf-8'), + 'uploader': None, + 'upload_date': None, + 'title': file_title, + 'ext': file_extension.decode('utf-8'), + }] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/discovery.py youtube-dl-2014.02.17/youtube_dl/extractor/discovery.py --- youtube-dl-2012.09.27/youtube_dl/extractor/discovery.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/discovery.py 2014-01-27 13:43:16.000000000 +0000 @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class DiscoveryIE(InfoExtractor): + _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' + _TEST = { + 'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', + 'file': '614784.mp4', + 'md5': 'e12614f9ee303a6ccef415cb0793eba2', + 'info_dict': { + 'title': 'MythBusters: Mission Impossible Outtakes', + 'description': ('Watch Jamie Hyneman and Adam Savage practice being' + ' each other -- to the point of confusing Jamie\'s dog -- and ' + 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s' + ' back.'), + 'duration': 156, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + video_list_json = self._search_regex(r'var videoListJSON = ({.*?});', + webpage, 'video list', flags=re.DOTALL) + video_list = json.loads(video_list_json) + info = video_list['clips'][0] + formats = [] + for f in info['mp4']: + formats.append( + {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])}) + + return { + 'id': info['contentId'], + 'title': video_list['name'], + 'formats': formats, + 'description': info['videoCaption'], + 'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'), + 'duration': info['duration'], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/dotsub.py youtube-dl-2014.02.17/youtube_dl/extractor/dotsub.py --- youtube-dl-2012.09.27/youtube_dl/extractor/dotsub.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/dotsub.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,42 @@ +from __future__ import unicode_literals + +import re +import time + +from .common import InfoExtractor + + +class DotsubIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', + 'md5': '0914d4d69605090f623b7ac329fea66e', + 'info_dict': { + 'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', + 'ext': 'flv', + 'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', + 'uploader': '4v4l0n42', + 'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', + 'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', + 'upload_date': '20101213', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info_url = "https://dotsub.com/api/media/%s/metadata" % video_id + info = self._download_json(info_url, video_id) + date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds + + return { + 'id': video_id, + 'url': info['mediaURI'], + 'ext': 'flv', + 'title': info['title'], + 'thumbnail': info['screenshotURI'], + 'description': info['description'], + 'uploader': info['user'], + 'view_count': info['numberOfViews'], + 'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/dreisat.py youtube-dl-2014.02.17/youtube_dl/extractor/dreisat.py --- youtube-dl-2012.09.27/youtube_dl/extractor/dreisat.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/dreisat.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,71 @@ +# coding: utf-8 + +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + + +class DreiSatIE(InfoExtractor): + IE_NAME = '3sat' + _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' + _TEST = { + u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", + u'file': u'36983.mp4', + u'md5': u'9dcfe344732808dbfcc901537973c922', + u'info_dict': { + u"title": u"Kaffeeland Schweiz", + u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", + u"uploader": u"3sat", + u"upload_date": u"20130622" + } + } + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id + details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details') + + thumbnail_els = details_doc.findall('.//teaserimage') + thumbnails = [{ + 'width': te.attrib['key'].partition('x')[0], + 'height': te.attrib['key'].partition('x')[2], + 'url': te.text, + } for te in thumbnail_els] + + information_el = details_doc.find('.//information') + video_title = information_el.find('./title').text + video_description = information_el.find('./detail').text + + details_el = details_doc.find('.//details') + video_uploader = details_el.find('./channel').text + upload_date = unified_strdate(details_el.find('./airtime').text) + + format_els = details_doc.findall('.//formitaet') + formats = [{ + 'format_id': fe.attrib['basetype'], + 'width': int(fe.find('./width').text), + 'height': int(fe.find('./height').text), + 'url': fe.find('./url').text, + 'filesize': int(fe.find('./filesize').text), + 'video_bitrate': int(fe.find('./videoBitrate').text), + } for fe in format_els + if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')] + + self._sort_formats(formats) + + return { + '_type': 'video', + 'id': video_id, + 'title': video_title, + 'formats': formats, + 'description': video_description, + 'thumbnails': thumbnails, + 'thumbnail': thumbnails[-1]['url'], + 'uploader': video_uploader, + 'upload_date': upload_date, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/dropbox.py youtube-dl-2014.02.17/youtube_dl/extractor/dropbox.py --- youtube-dl-2012.09.27/youtube_dl/extractor/dropbox.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/dropbox.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os.path +import re + +from .common import InfoExtractor + + +class DropboxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)' + _TEST = { + 'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4', + 'md5': '8ae17c51172fb7f93bdd6a214cc8c896', + 'info_dict': { + 'id': '0qr9sai2veej4f8', + 'ext': 'mp4', + 'title': 'THE_DOCTOR_GAMES' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + title = os.path.splitext(mobj.group('title'))[0] + video_url = url + '?dl=1' + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ebaumsworld.py youtube-dl-2014.02.17/youtube_dl/extractor/ebaumsworld.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ebaumsworld.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ebaumsworld.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,35 @@ +import re + +from .common import InfoExtractor +from ..utils import determine_ext + + +class EbaumsWorldIE(InfoExtractor): + _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)' + + _TEST = { + u'url': u'http://www.ebaumsworld.com/video/watch/83367677/', + u'file': u'83367677.mp4', + u'info_dict': { + u'title': u'A Giant Python Opens The Door', + u'description': u'This is how nightmares start...', + u'uploader': u'jihadpizza', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + config = self._download_xml( + 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) + video_url = config.find('file').text + + return { + 'id': video_id, + 'title': config.find('title').text, + 'url': video_url, + 'ext': determine_ext(video_url), + 'description': config.find('description').text, + 'thumbnail': config.find('image').text, + 'uploader': config.find('username').text, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ehow.py youtube-dl-2014.02.17/youtube_dl/extractor/ehow.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ehow.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ehow.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,46 @@ +import re + +from ..utils import ( + compat_urllib_parse, + determine_ext +) +from .common import InfoExtractor + + +class EHowIE(InfoExtractor): + IE_NAME = u'eHow' + _VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)' + _TEST = { + u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html', + u'file': u'12245069.flv', + u'md5': u'9809b4e3f115ae2088440bcb4efbf371', + u'info_dict': { + u"title": u"Hardwood Flooring Basics", + u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...", + u"uploader": u"Erick Nathan" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)', + webpage, u'video URL') + final_url = compat_urllib_parse.unquote(video_url) + uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />', + webpage, u'uploader') + title = self._og_search_title(webpage).replace(' | eHow', '') + ext = determine_ext(final_url) + + return { + '_type': 'video', + 'id': video_id, + 'url': final_url, + 'ext': ext, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'uploader': uploader, + } + diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/eighttracks.py youtube-dl-2014.02.17/youtube_dl/extractor/eighttracks.py --- youtube-dl-2012.09.27/youtube_dl/extractor/eighttracks.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/eighttracks.py 2014-02-02 10:54:19.000000000 +0000 @@ -0,0 +1,119 @@ +import json +import random +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class EightTracksIE(InfoExtractor): + IE_NAME = '8tracks' + _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' + _TEST = { + u"name": u"EightTracks", + u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a", + u"playlist": [ + { + u"file": u"11885610.m4a", + u"md5": u"96ce57f24389fc8734ce47f4c1abcc55", + u"info_dict": { + u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885608.m4a", + u"md5": u"4ab26f05c1f7291ea460a3920be8021f", + u"info_dict": { + u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885679.m4a", + u"md5": u"d30b5b5f74217410f4689605c35d1fd7", + u"info_dict": { + u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885680.m4a", + u"md5": u"4eb0a669317cd725f6bbd336a29f923a", + u"info_dict": { + u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885682.m4a", + u"md5": u"1893e872e263a2705558d1d319ad19e8", + u"info_dict": { + u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885683.m4a", + u"md5": u"b673c46f47a216ab1741ae8836af5899", + u"info_dict": { + u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885684.m4a", + u"md5": u"1d74534e95df54986da7f5abf7d842b7", + u"info_dict": { + u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885685.m4a", + u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0", + u"info_dict": { + u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + } + ] + } + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + playlist_id = mobj.group('id') + + webpage = self._download_webpage(url, playlist_id) + + json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL) + data = json.loads(json_like) + + session = str(random.randint(0, 1000000000)) + mix_id = data['id'] + track_count = data['tracks_count'] + first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) + next_url = first_url + res = [] + for i in range(track_count): + api_json = self._download_webpage(next_url, playlist_id, + note=u'Downloading song information %s/%s' % (str(i+1), track_count), + errnote=u'Failed to download song information') + api_data = json.loads(api_json) + track_data = api_data[u'set']['track'] + info = { + 'id': track_data['id'], + 'url': track_data['track_file_stream_url'], + 'title': track_data['performer'] + u' - ' + track_data['name'], + 'raw_title': track_data['name'], + 'uploader_id': data['user']['login'], + 'ext': 'm4a', + } + res.append(info) + next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) + return res diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/eitb.py youtube-dl-2014.02.17/youtube_dl/extractor/eitb.py --- youtube-dl-2012.09.27/youtube_dl/extractor/eitb.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/eitb.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,37 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveIE +from ..utils import ExtractorError + + +class EitbIE(InfoExtractor): + IE_NAME = u'eitb.tv' + _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)' + + _TEST = { + u'add_ie': ['Brightcove'], + u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', + u'md5': u'edf4436247185adee3ea18ce64c47998', + u'info_dict': { + u'id': u'2743577154001', + u'ext': u'mp4', + u'title': u'60 minutos (Lasa y Zabala, 30 años)', + # All videos from eitb has this description in the brightcove info + u'description': u'.', + u'uploader': u'Euskal Telebista', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + chapter_id = mobj.group('chapter_id') + webpage = self._download_webpage(url, chapter_id) + bc_url = BrightcoveIE._extract_brightcove_url(webpage) + if bc_url is None: + raise ExtractorError(u'Could not extract the Brightcove url') + # The BrightcoveExperience object doesn't contain the video id, we set + # it manually + bc_url += '&%40videoPlayer={0}'.format(chapter_id) + return self.url_result(bc_url, BrightcoveIE.ie_key()) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/elpais.py youtube-dl-2014.02.17/youtube_dl/extractor/elpais.py --- youtube-dl-2012.09.27/youtube_dl/extractor/elpais.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/elpais.py 2014-02-06 21:42:15.000000000 +0000 @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class ElPaisIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])' + IE_DESC = 'El País' + + _TEST = { + 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html', + 'md5': '98406f301f19562170ec071b83433d55', + 'info_dict': { + 'id': 'tiempo-nuevo-recetas-viejas', + 'ext': 'mp4', + 'title': 'Tiempo nuevo, recetas viejas', + 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.', + 'upload_date': '20140206', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + prefix = self._html_search_regex( + r'var url_cache = "([^"]+)";', webpage, 'URL prefix') + video_suffix = self._search_regex( + r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL') + video_url = prefix + video_suffix + thumbnail_suffix = self._search_regex( + r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL', + fatal=False) + thumbnail = ( + None if thumbnail_suffix is None + else prefix + thumbnail_suffix) + title = self._html_search_regex( + '<h2 class="entry-header entry-title.*?>(.*?)</h2>', + webpage, 'title') + date_str = self._search_regex( + r'<p class="date-header date-int updated"\s+title="([^"]+)">', + webpage, 'upload date', fatal=False) + upload_date = (None if date_str is None else unified_strdate(date_str)) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': self._og_search_description(webpage), + 'thumbnail': thumbnail, + 'upload_date': upload_date, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/escapist.py youtube-dl-2014.02.17/youtube_dl/extractor/escapist.py --- youtube-dl-2012.09.27/youtube_dl/extractor/escapist.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/escapist.py 2014-02-13 15:32:34.000000000 +0000 @@ -0,0 +1,82 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + + ExtractorError, +) + + +class EscapistIE(InfoExtractor): + _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-' + _TEST = { + 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', + 'md5': 'ab3a706c681efca53f0a35f1415cf0d1', + 'info_dict': { + 'id': '6618', + 'ext': 'mp4', + 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", + 'uploader': 'the-escapist-presents', + 'title': "Breaking Down Baldur's Gate", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + showName = mobj.group('showname') + video_id = mobj.group('id') + + self.report_extraction(video_id) + webpage = self._download_webpage(url, video_id) + + videoDesc = self._html_search_regex( + r'<meta name="description" content="([^"]*)"', + webpage, 'description', fatal=False) + + playerUrl = self._og_search_video_url(webpage, name=u'player URL') + + title = self._html_search_regex( + r'<meta name="title" content="([^"]*)"', + webpage, 'title').split(' : ')[-1] + + configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL') + configUrl = compat_urllib_parse.unquote(configUrl) + + formats = [] + + def _add_format(name, cfgurl, quality): + config = self._download_json( + cfgurl, video_id, + 'Downloading ' + name + ' configuration', + 'Unable to download ' + name + ' configuration', + transform_source=lambda s: s.replace("'", '"')) + + playlist = config['playlist'] + formats.append({ + 'url': playlist[1]['url'], + 'format_id': name, + 'quality': quality, + }) + + _add_format('normal', configUrl, quality=0) + hq_url = (configUrl + + ('&hq=1' if '?' in configUrl else configUrl + '?hq=1')) + try: + _add_format('hq', hq_url, quality=1) + except ExtractorError: + pass # That's fine, we'll just use normal quality + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'uploader': showName, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': videoDesc, + 'player_url': playerUrl, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/everyonesmixtape.py youtube-dl-2014.02.17/youtube_dl/extractor/everyonesmixtape.py --- youtube-dl-2012.09.27/youtube_dl/extractor/everyonesmixtape.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/everyonesmixtape.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,69 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + ExtractorError, +) + + +class EveryonesMixtapeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$' + + _TEST = { + 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5', + 'file': '5bfseWNmlds.mp4', + "info_dict": { + "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)", + "uploader": "FKR.TV", + "uploader_id": "frenchkissrecords", + "description": "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com", + "upload_date": "20081015" + }, + 'params': { + 'skip_download': True, # This is simply YouTube + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + + pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id + pllist_req = compat_urllib_request.Request(pllist_url) + pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') + + playlist_list = self._download_json( + pllist_req, playlist_id, note='Downloading playlist metadata') + try: + playlist_no = next(playlist['id'] + for playlist in playlist_list + if playlist['code'] == playlist_id) + except StopIteration: + raise ExtractorError('Playlist id not found') + + pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no + pl_req = compat_urllib_request.Request(pl_url) + pl_req.add_header('X-Requested-With', 'XMLHttpRequest') + playlist = self._download_json( + pl_req, playlist_id, note='Downloading playlist info') + + entries = [{ + '_type': 'url', + 'url': t['url'], + 'title': t['title'], + } for t in playlist['tracks']] + + if mobj.group('songnr'): + songnr = int(mobj.group('songnr')) - 1 + return entries[songnr] + + playlist_title = playlist['mixData']['name'] + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': playlist_title, + 'entries': entries, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/exfm.py youtube-dl-2014.02.17/youtube_dl/extractor/exfm.py --- youtube-dl-2012.09.27/youtube_dl/extractor/exfm.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/exfm.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class ExfmIE(InfoExtractor): + IE_NAME = 'exfm' + IE_DESC = 'ex.fm' + _VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' + _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' + _TESTS = [ + { + 'url': 'http://ex.fm/song/eh359', + 'md5': 'e45513df5631e6d760970b14cc0c11e7', + 'info_dict': { + 'id': '44216187', + 'ext': 'mp3', + 'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive', + 'uploader': 'deadjournalist', + 'upload_date': '20120424', + 'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', + }, + 'note': 'Soundcloud song', + 'skip': 'The site is down too often', + }, + { + 'url': 'http://ex.fm/song/wddt8', + 'md5': '966bd70741ac5b8570d8e45bfaed3643', + 'info_dict': { + 'id': 'wddt8', + 'ext': 'mp3', + 'title': 'Safe and Sound', + 'uploader': 'Capital Cities', + }, + 'skip': 'The site is down too often', + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + song_id = mobj.group('id') + info_url = "http://ex.fm/api/v3/song/%s" % song_id + info = self._download_json(info_url, song_id)['song'] + song_url = info['url'] + if re.match(self._SOUNDCLOUD_URL, song_url) is not None: + self.to_screen('Soundcloud song detected') + return self.url_result(song_url.replace('/stream', ''), 'Soundcloud') + return { + 'id': song_id, + 'url': song_url, + 'ext': 'mp3', + 'title': info['title'], + 'thumbnail': info['image']['large'], + 'uploader': info['artist'], + 'view_count': info['loved_count'], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/extremetube.py youtube-dl-2014.02.17/youtube_dl/extractor/extremetube.py --- youtube-dl-2012.09.27/youtube_dl/extractor/extremetube.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/extremetube.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,50 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class ExtremeTubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' + _TEST = { + u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', + u'file': u'652431.mp4', + u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', + u'info_dict': { + u"title": u"Music Video 14 british euro brit european cumshots swallow", + u"uploader": u"unknown", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') + uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join(format) + + return { + 'id': video_id, + 'title': video_title, + 'uploader': uploader, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': 18, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/facebook.py youtube-dl-2014.02.17/youtube_dl/extractor/facebook.py --- youtube-dl-2012.09.27/youtube_dl/extractor/facebook.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/facebook.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,137 @@ +import json +import re +import socket + +from .common import InfoExtractor +from ..utils import ( + compat_http_client, + compat_str, + compat_urllib_error, + compat_urllib_parse, + compat_urllib_request, + + ExtractorError, +) + + +class FacebookIE(InfoExtractor): + """Information Extractor for Facebook""" + + _VALID_URL = r'''(?x) + (?:https?://)?(?:\w+\.)?facebook\.com/ + (?:[^#?]*\#!/)? + (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) + (?:v|video_id)=(?P<id>[0-9]+) + (?:.*)''' + _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' + _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' + _NETRC_MACHINE = 'facebook' + IE_NAME = u'facebook' + _TEST = { + u'url': u'https://www.facebook.com/photo.php?v=120708114770723', + u'file': u'120708114770723.mp4', + u'md5': u'48975a41ccc4b7a581abd68651c1a5a8', + u'info_dict': { + u"duration": 279, + u"title": u"PEOPLE ARE AWESOME 2013" + } + } + + def report_login(self): + """Report attempt to log in.""" + self.to_screen(u'Logging in') + + def _login(self): + (useremail, password) = self._get_login_info() + if useremail is None: + return + + login_page_req = compat_urllib_request.Request(self._LOGIN_URL) + login_page_req.add_header('Cookie', 'locale=en_US') + self.report_login() + login_page = self._download_webpage(login_page_req, None, note=False, + errnote=u'Unable to download login page') + lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd') + lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd') + + login_form = { + 'email': useremail, + 'pass': password, + 'lsd': lsd, + 'lgnrnd': lgnrnd, + 'next': 'http://facebook.com/home.php', + 'default_persistent': '0', + 'legacy_return': '1', + 'timezone': '-60', + 'trynum': '1', + } + request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + try: + login_results = compat_urllib_request.urlopen(request).read() + if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: + self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') + return + + check_form = { + 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'), + 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'), + 'name_action_selected': 'dont_save', + 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'), + } + check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) + check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') + check_response = compat_urllib_request.urlopen(check_req).read() + if re.search(r'id="checkpointSubmitButton"', check_response) is not None: + self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) + return + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + video_id = mobj.group('id') + + url = 'https://www.facebook.com/video/video.php?v=%s' % video_id + webpage = self._download_webpage(url, video_id) + + BEFORE = '{swf.addParam(param[0], param[1]);});\n' + AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' + m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) + if not m: + m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) + if m_msg is not None: + raise ExtractorError( + u'The video is not available, Facebook said: "%s"' % m_msg.group(1), + expected=True) + else: + raise ExtractorError(u'Cannot parse data') + data = dict(json.loads(m.group(1))) + params_raw = compat_urllib_parse.unquote(data['params']) + params = json.loads(params_raw) + video_data = params['video_data'][0] + video_url = video_data.get('hd_src') + if not video_url: + video_url = video_data['sd_src'] + if not video_url: + raise ExtractorError(u'Cannot find video URL') + video_duration = int(video_data['video_duration']) + thumbnail = video_data['thumbnail_src'] + + video_title = self._html_search_regex( + r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title') + + info = { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': 'mp4', + 'duration': video_duration, + 'thumbnail': thumbnail, + } + return [info] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/faz.py youtube-dl-2014.02.17/youtube_dl/extractor/faz.py --- youtube-dl-2012.09.27/youtube_dl/extractor/faz.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/faz.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,53 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, +) + + +class FazIE(InfoExtractor): + IE_NAME = u'faz.net' + _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html' + + _TEST = { + u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', + u'file': u'12610585.mp4', + u'info_dict': { + u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', + u'description': u'md5:1453fbf9a0d041d985a47306192ea253', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + self.to_screen(video_id) + webpage = self._download_webpage(url, video_id) + config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, + u'config xml url') + config = self._download_xml(config_xml_url, video_id, + u'Downloading config xml') + + encodings = config.find('ENCODINGS') + formats = [] + for code in ['LOW', 'HIGH', 'HQ']: + encoding = encodings.find(code) + if encoding is None: + continue + encoding_url = encoding.find('FILENAME').text + formats.append({ + 'url': encoding_url, + 'ext': determine_ext(encoding_url), + 'format_id': code.lower(), + }) + + descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'description': descr, + 'thumbnail': config.find('STILL/STILL_BIG').text, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/firstpost.py youtube-dl-2014.02.17/youtube_dl/extractor/firstpost.py --- youtube-dl-2012.09.27/youtube_dl/extractor/firstpost.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/firstpost.py 2014-02-05 20:21:45.000000000 +0000 @@ -0,0 +1,38 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class FirstpostIE(InfoExtractor): + IE_NAME = 'Firstpost.com' + _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' + + _TEST = { + 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', + 'md5': 'ee9114957692f01fb1263ed87039112a', + 'info_dict': { + 'id': '1025403', + 'ext': 'mp4', + 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', + 'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + video_url = self._html_search_regex( + r'<div.*?name="div_video".*?flashvars="([^"]+)">', + webpage, 'video URL') + + return { + 'id': video_id, + 'url': video_url, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/firsttv.py youtube-dl-2014.02.17/youtube_dl/extractor/firsttv.py --- youtube-dl-2012.09.27/youtube_dl/extractor/firsttv.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/firsttv.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,60 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class FirstTVIE(InfoExtractor): + IE_NAME = 'firsttv' + IE_DESC = 'Видеоархив - Первый канал' + _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.1tv.ru/videoarchive/73390', + 'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', + 'info_dict': { + 'id': '73390', + 'ext': 'mp4', + 'title': 'Олимпийские канатные дороги', + 'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', + 'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', + 'duration': 149, + }, + 'skip': 'Only works from Russia', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id, 'Downloading page') + + video_url = self._html_search_regex( + r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') + + title = self._html_search_regex( + r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title') + description = self._html_search_regex( + r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False) + + thumbnail = self._og_search_thumbnail(webpage) + duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) + + like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]', + webpage, 'like count', fatal=False) + dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]', + webpage, 'dislike count', fatal=False) + + return { + 'id': video_id, + 'url': video_url, + 'thumbnail': thumbnail, + 'title': title, + 'description': description, + 'duration': int_or_none(duration), + 'like_count': int_or_none(like_count), + 'dislike_count': int_or_none(dislike_count), + } \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/fktv.py youtube-dl-2014.02.17/youtube_dl/extractor/fktv.py --- youtube-dl-2012.09.27/youtube_dl/extractor/fktv.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/fktv.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,78 @@ +import re +import random +import json + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + get_element_by_id, + clean_html, +) + + +class FKTVIE(InfoExtractor): + IE_NAME = u'fernsehkritik.tv' + _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?' + + _TEST = { + u'url': u'http://fernsehkritik.tv/folge-1', + u'file': u'00011.flv', + u'info_dict': { + u'title': u'Folge 1 vom 10. April 2007', + u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + episode = int(mobj.group('ep')) + + server = random.randint(2, 4) + video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode + start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode, + episode) + playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, + u'playlist', flags=re.DOTALL) + files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) + # TODO: return a single multipart video + videos = [] + for i, _ in enumerate(files, 1): + video_id = '%04d%d' % (episode, i) + video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i) + videos.append({ + 'id': video_id, + 'url': video_url, + 'ext': determine_ext(video_url), + 'title': clean_html(get_element_by_id('eptitle', start_webpage)), + 'description': clean_html(get_element_by_id('contentlist', start_webpage)), + 'thumbnail': video_thumbnail + }) + return videos + + +class FKTVPosteckeIE(InfoExtractor): + IE_NAME = u'fernsehkritik.tv:postecke' + _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' + _TEST = { + u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', + u'file': u'0120.flv', + u'md5': u'262f0adbac80317412f7e57b4808e5c4', + u'info_dict': { + u"title": u"Postecke 120" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + episode = int(mobj.group('ep')) + + server = random.randint(2, 4) + video_id = '%04d' % episode + video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) + video_title = 'Postecke %d' % episode + return { + 'id': video_id, + 'url': video_url, + 'ext': determine_ext(video_url), + 'title': video_title, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/flickr.py youtube-dl-2014.02.17/youtube_dl/extractor/flickr.py --- youtube-dl-2012.09.27/youtube_dl/extractor/flickr.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/flickr.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,60 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + unescapeHTML, +) + + +class FlickrIE(InfoExtractor): + """Information Extractor for Flickr videos""" + _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' + _TEST = { + 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', + 'file': '5645318632.mp4', + 'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b', + 'info_dict': { + "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", + "uploader_id": "forestwander-nature-pictures", + "title": "Dark Hollow Waterfalls" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + video_uploader_id = mobj.group('uploader_id') + webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret') + + first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' + first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') + + node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>', + first_xml, 'node_id') + + second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' + second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') + + self.report_extraction(video_id) + + mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml) + if mobj is None: + raise ExtractorError('Unable to extract video url') + video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader_id': video_uploader_id, + }] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/fourtube.py youtube-dl-2014.02.17/youtube_dl/extractor/fourtube.py --- youtube-dl-2012.09.27/youtube_dl/extractor/fourtube.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/fourtube.py 2014-02-17 10:22:30.000000000 +0000 @@ -0,0 +1,95 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + unified_strdate, + str_to_int, + parse_duration, +) +from youtube_dl.utils import clean_html + + +class FourTubeIE(InfoExtractor): + IE_NAME = '4tube' + _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', + 'md5': '6516c8ac63b03de06bc8eac14362db4f', + 'info_dict': { + 'id': '209733', + 'ext': 'mp4', + 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', + 'uploader': 'WCP Club', + 'uploader_id': 'wcp-club', + 'upload_date': '20131031', + 'duration': 583, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage_url = 'http://www.4tube.com/videos/' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + self.report_extraction(video_id) + + playlist_json = self._html_search_regex(r'var playerConfigPlaylist\s+=\s+([^;]+)', webpage, 'Playlist') + media_id = self._search_regex(r'idMedia:\s*(\d+)', playlist_json, 'Media Id') + sources = self._search_regex(r'sources:\s*\[([^\]]*)\]', playlist_json, 'Sources').split(',') + title = self._search_regex(r'title:\s*"([^"]*)', playlist_json, 'Title') + thumbnail_url = self._search_regex(r'image:\s*"([^"]*)', playlist_json, 'Thumbnail', fatal=False) + + uploader_str = self._search_regex(r'<span>Uploaded by</span>(.*?)<span>', webpage, 'uploader', fatal=False) + mobj = re.search(r'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str) + (uploader, uploader_id) = (mobj.group('name'), mobj.group('id')) if mobj else (clean_html(uploader_str), None) + + upload_date = None + view_count = None + duration = None + description = self._html_search_meta('description', webpage, 'description') + if description: + upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date', + fatal=False) + if upload_date: + upload_date = unified_strdate(upload_date) + view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False) + if view_count: + view_count = str_to_int(view_count) + duration = parse_duration(self._search_regex(r'Length: (\d+m\d+s)', description, 'duration', fatal=False)) + + token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources)) + headers = { + b'Content-Type': b'application/x-www-form-urlencoded', + b'Origin': b'http://www.4tube.com', + } + token_req = compat_urllib_request.Request(token_url, b'{}', headers) + tokens = self._download_json(token_req, video_id) + + formats = [{ + 'url': tokens[format]['token'], + 'format_id': format + 'p', + 'resolution': format + 'p', + 'quality': int(format), + } for format in sources] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail_url, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'upload_date': upload_date, + 'view_count': view_count, + 'duration': duration, + 'age_limit': 18, + 'webpage_url': webpage_url, + } \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/franceinter.py youtube-dl-2014.02.17/youtube_dl/extractor/franceinter.py --- youtube-dl-2012.09.27/youtube_dl/extractor/franceinter.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/franceinter.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class FranceInterIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})' + _TEST = { + 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', + 'file': '793962.mp3', + 'md5': '4764932e466e6f6c79c317d2e74f6884', + "info_dict": { + "title": "L’Histoire dans les jeux vidéo", + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex( + r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title') + path = self._search_regex( + r'&urlAOD=(.*?)&startTime', webpage, 'video url') + video_url = 'http://www.franceinter.fr/' + path + + return { + 'id': video_id, + 'formats': [{ + 'url': video_url, + 'vcodec': 'none', + }], + 'title': title, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/francetv.py youtube-dl-2014.02.17/youtube_dl/extractor/francetv.py --- youtube-dl-2012.09.27/youtube_dl/extractor/francetv.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/francetv.py 2014-02-15 14:34:17.000000000 +0000 @@ -0,0 +1,223 @@ +# encoding: utf-8 + +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, +) + + +class FranceTVBaseInfoExtractor(InfoExtractor): + def _extract_video(self, video_id): + info = self._download_xml( + 'http://www.francetvinfo.fr/appftv/webservices/video/' + 'getInfosOeuvre.php?id-diffusion=' + + video_id, video_id, 'Downloading XML config') + + manifest_url = info.find('videos/video/url').text + video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') + video_url = video_url.replace('/z/', '/i/') + thumbnail_path = info.find('image').text + + return {'id': video_id, + 'ext': 'flv' if video_url.startswith('rtmp') else 'mp4', + 'url': video_url, + 'title': info.find('titre').text, + 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), + 'description': info.find('synopsis').text, + } + + +class PluzzIE(FranceTVBaseInfoExtractor): + IE_NAME = 'pluzz.francetv.fr' + _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' + + # Can't use tests, videos expire in 7 days + + def _real_extract(self, url): + title = re.match(self._VALID_URL, url).group(1) + webpage = self._download_webpage(url, title) + video_id = self._search_regex( + r'data-diffusion="(\d+)"', webpage, 'ID') + return self._extract_video(video_id) + + +class FranceTvInfoIE(FranceTVBaseInfoExtractor): + IE_NAME = 'francetvinfo.fr' + _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html' + + _TEST = { + 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', + 'file': '84981923.mp4', + 'info_dict': { + 'title': 'Soir 3', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + page_title = mobj.group('title') + webpage = self._download_webpage(url, page_title) + video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id') + return self._extract_video(video_id) + + +class FranceTVIE(FranceTVBaseInfoExtractor): + IE_NAME = 'francetv' + IE_DESC = 'France 2, 3, 4, 5 and Ô' + _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/ + (?: + emissions/.*?/(videos|emissions)/(?P<id>[^/?]+) + | (emissions?|jt)/(?P<key>[^/?]+) + )''' + + _TESTS = [ + # france2 + { + 'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', + 'file': '75540104.mp4', + 'info_dict': { + 'title': '13h15, le samedi...', + 'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + # france3 + { + 'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', + 'info_dict': { + 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', + 'ext': 'flv', + 'title': 'Le scandale du prix des médicaments', + 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + # france4 + { + 'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', + 'info_dict': { + 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', + 'ext': 'flv', + 'title': 'Hero Corp Making of - Extrait 1', + 'description': 'md5:c87d54871b1790679aec1197e73d650a', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + # france5 + { + 'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968', + 'info_dict': { + 'id': '92837968', + 'ext': 'mp4', + 'title': 'C à dire ?!', + 'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + # franceo + { + 'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013', + 'info_dict': { + 'id': '92327925', + 'ext': 'mp4', + 'title': 'Infô-Afrique', + 'description': 'md5:ebf346da789428841bee0fd2a935ea55', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'The id changes frequently', + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj.group('key'): + webpage = self._download_webpage(url, mobj.group('key')) + id_res = [ + (r'''(?x)<div\s+class="video-player">\s* + <a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+ + class="francetv-video-player">'''), + (r'<a id="player_direct" href="http://info\.francetelevisions' + '\.fr/\?id-video=([^"/&]+)'), + (r'<a class="video" id="ftv_player_(.+?)"'), + ] + video_id = self._html_search_regex(id_res, webpage, 'video ID') + else: + video_id = mobj.group('id') + return self._extract_video(video_id) + + +class GenerationQuoiIE(InfoExtractor): + IE_NAME = 'france2.fr:generation-quoi' + _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' + + _TEST = { + 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', + 'file': 'k7FJX8VBcvvLmX4wA5Q.mp4', + 'info_dict': { + 'title': 'Génération Quoi - Garde à Vous', + 'uploader': 'Génération Quoi', + }, + 'params': { + # It uses Dailymotion + 'skip_download': True, + }, + 'skip': 'Only available from France', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) + info_json = self._download_webpage(info_url, name) + info = json.loads(info_json) + return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], + ie='Dailymotion') + + +class CultureboxIE(FranceTVBaseInfoExtractor): + IE_NAME = 'culturebox.francetvinfo.fr' + _VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' + + _TEST = { + 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', + 'info_dict': { + 'id': 'EV_6785', + 'ext': 'mp4', + 'title': 'Einstein on the beach au Théâtre du Châtelet', + 'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id') + return self._extract_video(video_id) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/freesound.py youtube-dl-2014.02.17/youtube_dl/extractor/freesound.py --- youtube-dl-2012.09.27/youtube_dl/extractor/freesound.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/freesound.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class FreesoundIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://www.freesound.org/people/miklovan/sounds/194503/', + 'md5': '12280ceb42c81f19a515c745eae07650', + 'info_dict': { + 'id': '194503', + 'ext': 'mp3', + 'title': 'gulls in the city.wav', + 'uploader': 'miklovan', + 'description': 'the sounds of seagulls in the city', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + music_id = mobj.group('id') + webpage = self._download_webpage(url, music_id) + title = self._html_search_regex( + r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>', + webpage, 'music title', flags=re.DOTALL) + description = self._html_search_regex( + r'<div id="sound_description">(.*?)</div>', webpage, 'description', + fatal=False, flags=re.DOTALL) + + return { + 'id': music_id, + 'title': title, + 'url': self._og_search_property('audio', webpage, 'music url'), + 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'), + 'description': description, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/freespeech.py youtube-dl-2014.02.17/youtube_dl/extractor/freespeech.py --- youtube-dl-2012.09.27/youtube_dl/extractor/freespeech.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/freespeech.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class FreespeechIE(InfoExtractor): + IE_NAME = 'freespeech.org' + _VALID_URL = r'https://www\.freespeech\.org/video/(?P<title>.+)' + _TEST = { + 'add_ie': ['Youtube'], + 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', + 'info_dict': { + 'id': 'poKsVCZ64uU', + 'ext': 'mp4', + 'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', + 'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', + 'uploader': 'freespeechtv', + 'uploader_id': 'freespeechtv', + 'upload_date': '20121002', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') + info = json.loads(info_json) + + return { + '_type': 'url', + 'url': info['jw_player']['basic_video_node_player']['file'], + 'ie_key': 'Youtube', + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/funnyordie.py youtube-dl-2014.02.17/youtube_dl/extractor/funnyordie.py --- youtube-dl-2012.09.27/youtube_dl/extractor/funnyordie.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/funnyordie.py 2014-01-28 20:45:24.000000000 +0000 @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class FunnyOrDieIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' + _TEST = { + 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', + 'file': '0732f586d7.mp4', + 'md5': 'f647e9e90064b53b6e046e75d0241fbd', + 'info_dict': { + 'description': ('Lyrics changed to match the video. Spoken cameo ' + 'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a ' + 'concept by Dustin McLean (DustFilms.com). Performed, edited, ' + 'and written by David A. Scott.'), + 'title': 'Heart-Shaped Box: Literal Video Version', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + video_url = self._search_regex( + [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], + webpage, 'video URL', flags=re.DOTALL) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/gamekings.py youtube-dl-2014.02.17/youtube_dl/extractor/gamekings.py --- youtube-dl-2012.09.27/youtube_dl/extractor/gamekings.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/gamekings.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,38 @@ +import re + +from .common import InfoExtractor + + +class GamekingsIE(InfoExtractor): + _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' + _TEST = { + u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", + u'file': u'20130811.mp4', + # MD5 is flaky, seems to change regularly + #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', + u'info_dict': { + u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", + u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", + } + } + + def _real_extract(self, url): + + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + video_url = self._og_search_video_url(webpage) + + video = re.search(r'[0-9]+', video_url) + video_id = video.group(0) + + # Todo: add medium format + video_url = video_url.replace(video_id, 'large/' + video_id) + + return { + 'id': video_id, + 'ext': 'mp4', + 'url': video_url, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/gamespot.py youtube-dl-2014.02.17/youtube_dl/extractor/gamespot.py --- youtube-dl-2012.09.27/youtube_dl/extractor/gamespot.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/gamespot.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urlparse, + unescapeHTML, + get_meta_content, +) + + +class GameSpotIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' + _TEST = { + "url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", + "file": "gs-2300-6410818.mp4", + "md5": "b2a30deaa8654fcccd43713a6b6a4825", + "info_dict": { + "title": "Arma 3 - Community Guide: SITREP I", + 'description': 'Check out this video where some of the basics of Arma 3 is explained.', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + page_id = mobj.group('page_id') + webpage = self._download_webpage(url, page_id) + data_video_json = self._search_regex(r'data-video=["\'](.*?)["\']', webpage, 'data video') + data_video = json.loads(unescapeHTML(data_video_json)) + + # Transform the manifest url to a link to the mp4 files + # they are used in mobile devices. + f4m_url = data_video['videoStreams']['f4m_stream'] + f4m_path = compat_urlparse.urlparse(f4m_url).path + QUALITIES_RE = r'((,\d+)+,?)' + qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') + http_path = f4m_path[1:].split('/', 1)[1] + http_template = re.sub(QUALITIES_RE, r'%s', http_path) + http_template = http_template.replace('.csmil/manifest.f4m', '') + http_template = compat_urlparse.urljoin('http://video.gamespotcdn.com/', http_template) + formats = [] + for q in qualities: + formats.append({ + 'url': http_template % q, + 'ext': 'mp4', + 'format_id': q, + }) + + return { + 'id': data_video['guid'], + 'title': compat_urllib_parse.unquote(data_video['title']), + 'formats': formats, + 'description': get_meta_content('description', webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/gametrailers.py youtube-dl-2014.02.17/youtube_dl/extractor/gametrailers.py --- youtube-dl-2012.09.27/youtube_dl/extractor/gametrailers.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/gametrailers.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,19 @@ +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor + + +class GametrailersIE(MTVServicesInfoExtractor): + _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' + _TEST = { + 'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', + 'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7', + 'info_dict': { + 'id': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d', + 'ext': 'mp4', + 'title': 'E3 2013: Debut Trailer', + 'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', + }, + } + + _FEED_URL = 'http://www.gametrailers.com/feeds/mrss' diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/generic.py youtube-dl-2014.02.17/youtube_dl/extractor/generic.py --- youtube-dl-2012.09.27/youtube_dl/extractor/generic.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/generic.py 2014-02-03 13:51:03.000000000 +0000 @@ -0,0 +1,403 @@ +# encoding: utf-8 + +from __future__ import unicode_literals + +import os +import re + +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( + compat_urllib_error, + compat_urllib_parse, + compat_urllib_request, + compat_urlparse, + + ExtractorError, + HEADRequest, + smuggle_url, + unescapeHTML, + unified_strdate, + url_basename, +) +from .brightcove import BrightcoveIE +from .ooyala import OoyalaIE + + +class GenericIE(InfoExtractor): + IE_DESC = 'Generic downloader that works on some sites' + _VALID_URL = r'.*' + IE_NAME = 'generic' + _TESTS = [ + { + 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', + 'file': '13601338388002.mp4', + 'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd', + 'info_dict': { + 'uploader': 'www.hodiho.fr', + 'title': 'R\u00e9gis plante sa Jeep', + } + }, + # bandcamp page with custom domain + { + 'add_ie': ['Bandcamp'], + 'url': 'http://bronyrock.com/track/the-pony-mash', + 'file': '3235767654.mp3', + 'info_dict': { + 'title': 'The Pony Mash', + 'uploader': 'M_Pallante', + }, + 'skip': 'There is a limit of 200 free downloads / month for the test song', + }, + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' in the + # http requests + { + 'add_ie': ['Brightcove'], + 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', + 'info_dict': { + 'id': '2765128793001', + 'ext': 'mp4', + 'title': 'Le cours de bourse : l’analyse technique', + 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9', + 'uploader': 'BFM BUSINESS', + }, + 'params': { + 'skip_download': True, + }, + }, + { + # https://github.com/rg3/youtube-dl/issues/2253 + 'url': 'http://bcove.me/i6nfkrc3', + 'file': '3101154703001.mp4', + 'md5': '0ba9446db037002366bab3b3eb30c88c', + 'info_dict': { + 'title': 'Still no power', + 'uploader': 'thestar.com', + 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', + }, + 'add_ie': ['Brightcove'], + }, + # Direct link to a video + { + 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', + 'file': 'trailer.mp4', + 'md5': '67d406c2bcb6af27fa886f31aa934bbe', + 'info_dict': { + 'id': 'trailer', + 'title': 'trailer', + 'upload_date': '20100513', + } + }, + # ooyala video + { + 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', + 'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4', + 'md5': '5644c6ca5d5782c1d0d350dad9bd840c', + 'info_dict': { + 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', + 'ext': 'mp4', + 'title': '2cc213299525360.mov', # that's what we get + }, + }, + ] + + def report_download_webpage(self, video_id): + """Report webpage download.""" + if not self._downloader.params.get('test', False): + self._downloader.report_warning('Falling back on generic information extractor.') + super(GenericIE, self).report_download_webpage(video_id) + + def report_following_redirect(self, new_url): + """Report information extraction.""" + self._downloader.to_screen('[redirect] Following redirect to %s' % new_url) + + def _send_head(self, url): + """Check if it is a redirect, like url shorteners, in case return the new url.""" + + class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler): + """ + Subclass the HTTPRedirectHandler to make it use our + HEADRequest also on the redirected URL + """ + def redirect_request(self, req, fp, code, msg, headers, newurl): + if code in (301, 302, 303, 307): + newurl = newurl.replace(' ', '%20') + newheaders = dict((k,v) for k,v in req.headers.items() + if k.lower() not in ("content-length", "content-type")) + return HEADRequest(newurl, + headers=newheaders, + origin_req_host=req.get_origin_req_host(), + unverifiable=True) + else: + raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) + + class HTTPMethodFallback(compat_urllib_request.BaseHandler): + """ + Fallback to GET if HEAD is not allowed (405 HTTP error) + """ + def http_error_405(self, req, fp, code, msg, headers): + fp.read() + fp.close() + + newheaders = dict((k,v) for k,v in req.headers.items() + if k.lower() not in ("content-length", "content-type")) + return self.parent.open(compat_urllib_request.Request(req.get_full_url(), + headers=newheaders, + origin_req_host=req.get_origin_req_host(), + unverifiable=True)) + + # Build our opener + opener = compat_urllib_request.OpenerDirector() + for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler, + HTTPMethodFallback, HEADRedirectHandler, + compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: + opener.add_handler(handler()) + + response = opener.open(HEADRequest(url)) + if response is None: + raise ExtractorError('Invalid URL protocol') + return response + + def _real_extract(self, url): + parsed_url = compat_urlparse.urlparse(url) + if not parsed_url.scheme: + default_search = self._downloader.params.get('default_search') + if default_search is None: + default_search = 'auto' + + if default_search == 'auto': + if '/' in url: + self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') + return self.url_result('http://' + url) + else: + return self.url_result('ytsearch:' + url) + else: + assert ':' in default_search + return self.url_result(default_search + url) + video_id = os.path.splitext(url.split('/')[-1])[0] + + self.to_screen('%s: Requesting header' % video_id) + + try: + response = self._send_head(url) + + # Check for redirect + new_url = response.geturl() + if url != new_url: + self.report_following_redirect(new_url) + return self.url_result(new_url) + + # Check for direct link to a video + content_type = response.headers.get('Content-Type', '') + m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type) + if m: + upload_date = response.headers.get('Last-Modified') + if upload_date: + upload_date = unified_strdate(upload_date) + return { + 'id': video_id, + 'title': os.path.splitext(url_basename(url))[0], + 'formats': [{ + 'format_id': m.group('format_id'), + 'url': url, + 'vcodec': 'none' if m.group('type') == 'audio' else None + }], + 'upload_date': upload_date, + } + + except compat_urllib_error.HTTPError: + # This may be a stupid server that doesn't like HEAD, our UA, or so + pass + + try: + webpage = self._download_webpage(url, video_id) + except ValueError: + # since this is the last-resort InfoExtractor, if + # this error is thrown, it'll be thrown here + raise ExtractorError('Failed to download URL: %s' % url) + + self.report_extraction(video_id) + + # it's tempting to parse this further, but you would + # have to take into account all the variations like + # Video Title - Site Name + # Site Name | Video Title + # Video Title - Tagline | Site Name + # and so on and so forth; it's just not practical + video_title = self._html_search_regex( + r'(?s)<title>(.*?)', webpage, 'video title', + default='video') + + # video uploader is domain name + video_uploader = self._search_regex( + r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') + + # Look for BrightCove: + bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) + if bc_urls: + self.to_screen('Brightcove video detected.') + entries = [{ + '_type': 'url', + 'url': smuggle_url(bc_url, {'Referer': url}), + 'ie_key': 'Brightcove' + } for bc_url in bc_urls] + + return { + '_type': 'playlist', + 'title': video_title, + 'id': video_id, + 'entries': entries, + } + + # Look for embedded (iframe) Vimeo player + mobj = re.search( + r']+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage) + if mobj: + player_url = unescapeHTML(mobj.group(1)) + surl = smuggle_url(player_url, {'Referer': url}) + return self.url_result(surl, 'Vimeo') + + # Look for embedded (swf embed) Vimeo player + mobj = re.search( + r']+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) + if mobj: + return self.url_result(mobj.group(1), 'Vimeo') + + # Look for embedded YouTube player + matches = re.findall(r'''(?x) + (?:]+?src=|embedSWF\(\s*) + (["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/ + (?:embed|v)/.+?) + \1''', webpage) + if matches: + urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') + for tuppl in matches] + return self.playlist_result( + urlrs, playlist_id=video_id, playlist_title=video_title) + + # Look for embedded Dailymotion player + matches = re.findall( + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) + if matches: + urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') + for tuppl in matches] + return self.playlist_result( + urlrs, playlist_id=video_id, playlist_title=video_title) + + # Look for embedded Wistia player + match = re.search( + r']+?src=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) + if match: + return { + '_type': 'url_transparent', + 'url': unescapeHTML(match.group('url')), + 'ie_key': 'Wistia', + 'uploader': video_uploader, + 'title': video_title, + 'id': video_id, + } + + # Look for embedded blip.tv player + mobj = re.search(r']*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) + if mobj: + return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV') + mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage) + if mobj: + return self.url_result(mobj.group(1), 'BlipTV') + + # Look for Bandcamp pages with custom domain + mobj = re.search(r']*?content="(.*?bandcamp\.com.*?)"', webpage) + if mobj is not None: + burl = unescapeHTML(mobj.group(1)) + # Don't set the extractor because it can be a track url or an album + return self.url_result(burl) + + # Look for embedded Vevo player + mobj = re.search( + r']+?src=(["\'])(?P(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + + # Look for Ooyala videos + mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage) + if mobj is not None: + return OoyalaIE._build_url_result(mobj.group(1)) + + # Look for Aparat videos + mobj = re.search(r'', webpage) + if mobj: + embedded_url = mobj.group(1) + return self.url_result(embedded_url) + + video_title = self._html_search_regex(r'

    ]*>([^<]+)', webpage, u'title') + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) + if webpage.find('encrypted=true')!=-1: + password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password') + video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] + format = path.split('/')[4].split('_')[:2] + format = "-".join(format) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': age_limit, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/khanacademy.py youtube-dl-2014.02.17/youtube_dl/extractor/khanacademy.py --- youtube-dl-2012.09.27/youtube_dl/extractor/khanacademy.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/khanacademy.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + + +class KhanAcademyIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P[^/]+)/(?:[^/]+/){,2}(?P[^?#/]+)(?:$|[?#])' + IE_NAME = 'KhanAcademy' + + _TEST = { + 'url': 'http://www.khanacademy.org/video/one-time-pad', + 'file': 'one-time-pad.mp4', + 'md5': '7021db7f2d47d4fff89b13177cb1e8f4', + 'info_dict': { + 'title': 'The one-time pad', + 'description': 'The perfect cipher', + 'duration': 176, + 'uploader': 'Brit Cruise', + 'upload_date': '20120411', + } + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + if m.group('key') == 'video': + data = self._download_json( + 'http://api.khanacademy.org/api/v1/videos/' + video_id, + video_id, 'Downloading video info') + + upload_date = unified_strdate(data['date_added']) + uploader = ', '.join(data['author_names']) + return { + '_type': 'url_transparent', + 'url': data['url'], + 'id': video_id, + 'title': data['title'], + 'thumbnail': data['image_url'], + 'duration': data['duration'], + 'description': data['description'], + 'uploader': uploader, + 'upload_date': upload_date, + } + else: + # topic + data = self._download_json( + 'http://api.khanacademy.org/api/v1/topic/' + video_id, + video_id, 'Downloading topic info') + + entries = [ + { + '_type': 'url', + 'url': c['url'], + 'id': c['id'], + 'title': c['title'], + } + for c in data['children'] if c['kind'] in ('Video', 'Topic')] + + return { + '_type': 'playlist', + 'id': video_id, + 'title': data['title'], + 'description': data['description'], + 'entries': entries, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/kickstarter.py youtube-dl-2014.02.17/youtube_dl/extractor/kickstarter.py --- youtube-dl-2012.09.27/youtube_dl/extractor/kickstarter.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/kickstarter.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor + + +class KickStarterIE(InfoExtractor): + _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P\d*)/.*' + _TEST = { + u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location", + u"file": u"1404461844.mp4", + u"md5": u"c81addca81327ffa66c642b5d8b08cab", + u"info_dict": { + u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling", + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + webpage_src = self._download_webpage(url, video_id) + + video_url = self._search_regex(r'data-video="(.*?)">', + webpage_src, u'video URL') + if 'mp4' in video_url: + ext = 'mp4' + else: + ext = 'flv' + video_title = self._html_search_regex(r"(.*?)", + webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip() + + results = [{ + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'ext': ext, + }] + return results diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/kontrtube.py youtube-dl-2014.02.17/youtube_dl/extractor/kontrtube.py --- youtube-dl-2012.09.27/youtube_dl/extractor/kontrtube.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/kontrtube.py 2014-02-09 16:54:52.000000000 +0000 @@ -0,0 +1,66 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class KontrTubeIE(InfoExtractor): + IE_NAME = 'kontrtube' + IE_DESC = 'KontrTube.ru - Труба зовёт' + _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P\d+)/.+' + + _TEST = { + 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', + 'md5': '975a991a4926c9a85f383a736a2e6b80', + 'info_dict': { + 'id': '2678', + 'ext': 'mp4', + 'title': 'Над олимпийской деревней в Сочи поднят российский флаг', + 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41', + 'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg', + 'duration': 270, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id, 'Downloading page') + + video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') + thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) + title = self._html_search_regex(r'(.+?) - Труба зовёт - Интересный видеохостинг', webpage, + 'video title') + description = self._html_search_meta('description', webpage, 'video description') + + mobj = re.search(r'
    Длительность: (?P\d+)м:(?P\d+)с
    ', + webpage) + duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None + + view_count = self._html_search_regex(r'
    Просмотров: (\d+)
    ', webpage, + 'view count', fatal=False) + view_count = int(view_count) if view_count is not None else None + + comment_count = None + comment_str = self._html_search_regex(r'Комментарии: ([^<]+)', webpage, 'comment count', + fatal=False) + if comment_str.startswith('комментариев нет'): + comment_count = 0 + else: + mobj = re.search(r'\d+ из (?P\d+) комментариев', comment_str) + if mobj: + comment_count = int(mobj.group('total')) + + return { + 'id': video_id, + 'url': video_url, + 'thumbnail': thumbnail, + 'title': title, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'comment_count': comment_count, + } \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/la7.py youtube-dl-2014.02.17/youtube_dl/extractor/la7.py --- youtube-dl-2012.09.27/youtube_dl/extractor/la7.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/la7.py 2014-01-27 22:47:05.000000000 +0000 @@ -0,0 +1,63 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, +) + + +class LA7IE(InfoExtractor): + IE_NAME = 'la7.tv' + _VALID_URL = r'''(?x) + https?://(?:www\.)?la7\.tv/ + (?: + richplayer/\?assetid=| + \?contentId= + ) + (?P[0-9]+)''' + + _TEST = { + 'url': 'http://www.la7.tv/richplayer/?assetid=50355319', + 'file': '50355319.mp4', + 'md5': 'ec7d1f0224d20ba293ab56cf2259651f', + 'info_dict': { + 'title': 'IL DIVO', + 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', + 'duration': 6254, + }, + 'skip': 'Blocked in the US', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id + doc = self._download_xml(xml_url, video_id) + + video_title = doc.find('title').text + description = doc.find('description').text + duration = parse_duration(doc.find('duration').text) + thumbnail = doc.find('img').text + view_count = int(doc.find('views').text) + + prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:') + + formats = [{ + 'format': vnode.find('quality').text, + 'tbr': int(vnode.find('quality').text), + 'url': vnode.find('fms').text.strip().replace('mp4:', prefix), + } for vnode in doc.findall('.//videos/video')] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'view_count': view_count, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/lifenews.py youtube-dl-2014.02.17/youtube_dl/extractor/lifenews.py --- youtube-dl-2012.09.27/youtube_dl/extractor/lifenews.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/lifenews.py 2014-02-13 15:26:26.000000000 +0000 @@ -0,0 +1,69 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_strdate +) + + +class LifeNewsIE(InfoExtractor): + IE_NAME = 'lifenews' + IE_DESC = 'LIFE | NEWS' + _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P\d+)' + + _TEST = { + 'url': 'http://lifenews.ru/news/126342', + 'md5': 'e1b50a5c5fb98a6a544250f2e0db570a', + 'info_dict': { + 'id': '126342', + 'ext': 'mp4', + 'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом', + 'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.', + 'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg', + 'upload_date': '20140130', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page') + + video_url = self._html_search_regex( + r'', webpage, 'video URL') + + thumbnail = self._html_search_regex( + r'', webpage, 'video thumbnail') + + title = self._og_search_title(webpage) + TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS' + if title.endswith(TITLE_SUFFIX): + title = title[:-len(TITLE_SUFFIX)] + + description = self._og_search_description(webpage) + + view_count = self._html_search_regex( + r'
    (\d+)
    ', webpage, 'view count', fatal=False) + comment_count = self._html_search_regex( + r'
    (\d+)
    ', webpage, 'comment count', fatal=False) + + upload_date = self._html_search_regex( + r'

    ", + webpage, u'title') + + return [{ + 'id': video_id, + 'url': video_url, + 'tc_url': video_url, + 'uploader': None, + 'upload_date': None, + 'title': video_title, + 'ext': u'flv', + 'play_path': video_playpath, + 'video_file': video_file, + 'video_hls_playlist': video_hls_playlist, + 'player_url': video_swfobj, + }] + diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/naver.py youtube-dl-2014.02.17/youtube_dl/extractor/naver.py --- youtube-dl-2012.09.27/youtube_dl/extractor/naver.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/naver.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,67 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + ExtractorError, +) + + +class NaverIE(InfoExtractor): + _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' + + _TEST = { + u'url': u'http://tvcast.naver.com/v/81652', + u'file': u'81652.mp4', + u'info_dict': { + u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', + u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', + u'upload_date': u'20130903', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', + webpage) + if m_id is None: + raise ExtractorError(u'couldn\'t extract vid and key') + vid = m_id.group(1) + key = m_id.group(2) + query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) + query_urls = compat_urllib_parse.urlencode({ + 'masterVid': vid, + 'protocol': 'p2p', + 'inKey': key, + }) + info = self._download_xml( + 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, + video_id, u'Downloading video info') + urls = self._download_xml( + 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, + video_id, u'Downloading video formats info') + + formats = [] + for format_el in urls.findall('EncodingOptions/EncodingOption'): + domain = format_el.find('Domain').text + if domain.startswith('rtmp'): + continue + formats.append({ + 'url': domain + format_el.find('uri').text, + 'ext': 'mp4', + 'width': int(format_el.find('width').text), + 'height': int(format_el.find('height').text), + }) + + return { + 'id': video_id, + 'title': info.find('Subject').text, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'upload_date': info.find('WriteDate').text.replace('.', ''), + 'view_count': int(info.find('PlayCount').text), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/nba.py youtube-dl-2014.02.17/youtube_dl/extractor/nba.py --- youtube-dl-2012.09.27/youtube_dl/extractor/nba.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/nba.py 2014-01-29 20:16:25.000000000 +0000 @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class NBAIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' + _TEST = { + 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', + 'file': u'0021200253-okc-bkn-recap.nba.mp4', + 'md5': u'c0edcfc37607344e2ff8f13c378c88a4', + 'info_dict': { + 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', + 'title': 'Thunder vs. Nets', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + + webpage = self._download_webpage(url, video_id) + + video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' + + shortened_video_id = video_id.rpartition('/')[2] + title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '') + + description = self._html_search_regex(r'', webpage, 'description', fatal=False) + + return { + 'id': shortened_video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': description, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/nbc.py youtube-dl-2014.02.17/youtube_dl/extractor/nbc.py --- youtube-dl-2012.09.27/youtube_dl/extractor/nbc.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/nbc.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,32 @@ +import re + +from .common import InfoExtractor +from ..utils import find_xpath_attr, compat_str + + +class NBCNewsIE(InfoExtractor): + _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P\d+)' + + _TEST = { + u'url': u'http://www.nbcnews.com/video/nbc-news/52753292', + u'file': u'52753292.flv', + u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179', + u'info_dict': { + u'title': u'Crew emerges after four-month Mars food study', + u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) + info = all_info.find('video') + + return {'id': video_id, + 'title': info.find('headline').text, + 'ext': 'flv', + 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, + 'description': compat_str(info.find('caption').text), + 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ndr.py youtube-dl-2014.02.17/youtube_dl/extractor/ndr.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ndr.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ndr.py 2014-02-15 14:34:17.000000000 +0000 @@ -0,0 +1,89 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class NDRIE(InfoExtractor): + IE_NAME = 'ndr' + IE_DESC = 'NDR.de - Mediathek' + _VALID_URL = r'https?://www\.ndr\.de/.+?(?P\d+)\.html' + + _TESTS = [ + { + 'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', + 'md5': 'e7a6079ca39d3568f4996cb858dd6708', + 'note': 'Video file', + 'info_dict': { + 'id': '7959', + 'ext': 'mp4', + 'title': 'Markt - die ganze Sendung', + 'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', + 'duration': 2655, + }, + }, + { + 'url': 'http://www.ndr.de/info/audio51535.html', + 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', + 'note': 'Audio file', + 'info_dict': { + 'id': '51535', + 'ext': 'mp3', + 'title': 'La Valette entgeht der Hinrichtung', + 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', + 'duration': 884, + } + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, 'Downloading page') + + title = self._og_search_title(page) + description = self._og_search_description(page) + + mobj = re.search( + r'
    (?P\d+):(?P\d+)
    ', + page) + duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None + + formats = [] + + mp3_url = re.search(r'''{src:'(?P
    ([^<]+)', + page, 'director name', fatal=False) + + request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, + compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') + + config = self._download_xml(request, video_id, 'Downloading player config XML') + + title = None + description = None + thumbnail = None + duration = None + formats = [] + + def extract_thumbnail(media): + thumbnails = {} + for asset in media.findall('assets/asset'): + thumbnails[asset.get('quality')] = asset.find('default/url').text + if not thumbnails: + return None + if 'high' in thumbnails: + return thumbnails['high'] + return list(thumbnails.values())[0] + + for media in config.findall('./player/stream/media'): + if media.get('type') == 'posterImage': + thumbnail = extract_thumbnail(media) + elif media.get('type') == 'video': + duration = int(media.get('duration')) + title = media.find('title').text + description = media.find('description').text + # It seems assets always go from lower to better quality, so no need to sort + formats = [{ + 'url': x.find('default/streamerURI').text, + 'app': x.find('default/streamerURI').text.split('/', 3)[3], + 'play_path': x.find('default/url').text, + 'rtmp_live': False, + 'ext': 'mp4', + 'format_id': x.get('quality'), + } for x in media.findall('assets/asset')] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, + } \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/nhl.py youtube-dl-2014.02.17/youtube_dl/extractor/nhl.py --- youtube-dl-2012.09.27/youtube_dl/extractor/nhl.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/nhl.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,118 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + compat_urllib_parse, + determine_ext, + unified_strdate, +) + + +class NHLBaseInfoExtractor(InfoExtractor): + @staticmethod + def _fix_json(json_string): + return json_string.replace('\\\'', '\'') + + def _extract_video(self, info): + video_id = info['id'] + self.report_extraction(video_id) + + initial_video_url = info['publishPoint'] + data = compat_urllib_parse.urlencode({ + 'type': 'fvod', + 'path': initial_video_url.replace('.mp4', '_sd.mp4'), + }) + path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data + path_doc = self._download_xml(path_url, video_id, + u'Downloading final video url') + video_url = path_doc.find('path').text + + join = compat_urlparse.urljoin + return { + 'id': video_id, + 'title': info['name'], + 'url': video_url, + 'ext': determine_ext(video_url), + 'description': info['description'], + 'duration': int(info['duration']), + 'thumbnail': join(join(video_url, '/u/'), info['bigImage']), + 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), + } + + +class NHLIE(NHLBaseInfoExtractor): + IE_NAME = u'nhl.com' + _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P\d+)' + + _TEST = { + u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', + u'file': u'453614.mp4', + u'info_dict': { + u'title': u'Quick clip: Weise 4-3 goal vs Flames', + u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.', + u'duration': 18, + u'upload_date': u'20131006', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id + info_json = self._download_webpage(json_url, video_id, + u'Downloading info json') + info_json = self._fix_json(info_json) + info = json.loads(info_json)[0] + return self._extract_video(info) + + +class NHLVideocenterIE(NHLBaseInfoExtractor): + IE_NAME = u'nhl.com:videocenter' + IE_DESC = u'NHL videocenter category' + _VALID_URL = r'https?://video\.(?P[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P[^&]+))?' + + @classmethod + def suitable(cls, url): + if NHLIE.suitable(url): + return False + return super(NHLVideocenterIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + team = mobj.group('team') + webpage = self._download_webpage(url, team) + cat_id = self._search_regex( + [r'var defaultCatId = "(.+?)";', + r'{statusIndex:0,index:0,.*?id:(.*?),'], + webpage, u'category id') + playlist_title = self._html_search_regex( + r'tab0"[^>]*?>(.*?)', + webpage, u'playlist title', flags=re.DOTALL).lower().capitalize() + + data = compat_urllib_parse.urlencode({ + 'cid': cat_id, + # This is the default value + 'count': 12, + 'ptrs': 3, + 'format': 'json', + }) + path = '/videocenter/servlets/browse?' + data + request_url = compat_urlparse.urljoin(url, path) + response = self._download_webpage(request_url, playlist_title) + response = self._fix_json(response) + if not response.strip(): + self._downloader.report_warning(u'Got an empty reponse, trying ' + u'adding the "newvideos" parameter') + response = self._download_webpage(request_url + '&newvideos=true', + playlist_title) + response = self._fix_json(response) + videos = json.loads(response) + + return { + '_type': 'playlist', + 'title': playlist_title, + 'id': cat_id, + 'entries': [self._extract_video(i) for i in videos], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/niconico.py youtube-dl-2014.02.17/youtube_dl/extractor/niconico.py --- youtube-dl-2012.09.27/youtube_dl/extractor/niconico.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/niconico.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,127 @@ +# encoding: utf-8 + +import re +import socket + +from .common import InfoExtractor +from ..utils import ( + compat_http_client, + compat_urllib_error, + compat_urllib_parse, + compat_urllib_request, + compat_urlparse, + compat_str, + + ExtractorError, + unified_strdate, +) + + +class NiconicoIE(InfoExtractor): + IE_NAME = u'niconico' + IE_DESC = u'ニコニコ動画' + + _TEST = { + u'url': u'http://www.nicovideo.jp/watch/sm22312215', + u'file': u'sm22312215.mp4', + u'md5': u'd1a75c0823e2f629128c43e1212760f9', + u'info_dict': { + u'title': u'Big Buck Bunny', + u'uploader': u'takuya0301', + u'uploader_id': u'2698420', + u'upload_date': u'20131123', + u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', + }, + u'params': { + u'username': u'ydl.niconico@gmail.com', + u'password': u'youtube-dl', + }, + } + + _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' + _NETRC_MACHINE = 'niconico' + # If True it will raise an error if no login info is provided + _LOGIN_REQUIRED = True + + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + # No authentication to be performed + if username is None: + if self._LOGIN_REQUIRED: + raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) + return False + + # Log in + login_form_strs = { + u'mail': username, + u'password': password, + } + # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode + # chokes on unicode + login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) + login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') + request = compat_urllib_request.Request( + u'https://secure.nicovideo.jp/secure/login', login_data) + login_results = self._download_webpage( + request, u'', note=u'Logging in', errnote=u'Unable to log in') + if re.search(r'(?i)

    Log in error

    ', login_results) is not None: + self._downloader.report_warning(u'unable to log in: bad username or password') + return False + return True + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + + # Get video webpage. We are not actually interested in it, but need + # the cookies in order to be able to download the info webpage + self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) + + video_info = self._download_xml( + 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, + note=u'Downloading video info page') + + # Get flv info + flv_info_webpage = self._download_webpage( + u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, + video_id, u'Downloading flv info') + video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] + + # Start extracting information + video_title = video_info.find('.//title').text + video_extension = video_info.find('.//movie_type').text + video_format = video_extension.upper() + video_thumbnail = video_info.find('.//thumbnail_url').text + video_description = video_info.find('.//description').text + video_uploader_id = video_info.find('.//user_id').text + video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) + video_view_count = video_info.find('.//view_counter').text + video_webpage_url = video_info.find('.//watch_url').text + + # uploader + video_uploader = video_uploader_id + url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id + try: + user_info = self._download_xml( + url, video_id, note=u'Downloading user information') + video_uploader = user_info.find('.//nickname').text + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err)) + + return { + 'id': video_id, + 'url': video_real_url, + 'title': video_title, + 'ext': video_extension, + 'format': video_format, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'uploader': video_uploader, + 'upload_date': video_upload_date, + 'uploader_id': video_uploader_id, + 'view_count': video_view_count, + 'webpage_url': video_webpage_url, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ninegag.py youtube-dl-2014.02.17/youtube_dl/extractor/ninegag.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ninegag.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ninegag.py 2014-01-28 20:45:24.000000000 +0000 @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor + + +class NineGagIE(InfoExtractor): + IE_NAME = '9gag' + _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P[0-9]+)' + + _TEST = { + "url": "http://9gag.tv/v/1912", + "file": "1912.mp4", + "info_dict": { + "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", + "title": "\"People Are Awesome 2013\" Is Absolutely Awesome" + }, + 'add_ie': ['Youtube'] + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + data_json = self._html_search_regex(r'''(?x) + [0-9a-z-]*)/?$' + _TEST = { + u'url': u'http://normalboots.com/video/home-alone-games-jontron/', + u'file': u'home-alone-games-jontron.mp4', + u'md5': u'8bf6de238915dd501105b44ef5f1e0f6', + u'info_dict': { + u'title': u'Home Alone Games - JonTron - NormalBoots', + u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/', + u'uploader': u'JonTron', + u'upload_date': u'20140125', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + video_id = mobj.group('videoid') + + info = { + 'id': video_id, + 'uploader': None, + 'upload_date': None, + } + + if url[:4] != 'http': + url = 'http://' + url + + webpage = self._download_webpage(url, video_id) + video_title = self._og_search_title(webpage) + video_description = self._og_search_description(webpage) + video_thumbnail = self._og_search_thumbnail(webpage) + video_uploader = self._html_search_regex(r'Posted\sby\s(?P[A-Za-z]*)\s
    ', + webpage, 'uploader') + raw_upload_date = self._html_search_regex('[A-Za-z]+, (?P.*)', + webpage, 'date') + video_upload_date = unified_strdate(raw_upload_date) + video_upload_date = unified_strdate(raw_upload_date) + + player_url = self._html_search_regex(r'[\S]+)"', webpage, 'url') + player_page = self._download_webpage(player_url, video_id) + video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P[0-9A-Za-z-_\.]+)'", player_page, 'file') + + info['url'] = video_url + info['title'] = video_title + info['description'] = video_description + info['thumbnail'] = video_thumbnail + info['uploader'] = video_uploader + info['upload_date'] = video_upload_date + + return info diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/novamov.py youtube-dl-2014.02.17/youtube_dl/extractor/novamov.py --- youtube-dl-2012.09.27/youtube_dl/extractor/novamov.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/novamov.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,63 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urlparse +) + + +class NovamovIE(InfoExtractor): + _VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?v=)(?P[a-z\d]{13})' + + _TEST = { + 'url': 'http://www.novamov.com/video/4rurhn9x446jj', + 'file': '4rurhn9x446jj.flv', + 'md5': '7205f346a52bbeba427603ba10d4b935', + 'info_dict': { + 'title': 'search engine optimization', + 'description': 'search engine optimization is used to rank the web page in the google search engine' + }, + 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + + page = self._download_webpage('http://www.novamov.com/video/%s' % video_id, + video_id, 'Downloading video page') + + if re.search(r'This file no longer exists on our servers!', page) is not None: + raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) + + filekey = self._search_regex( + r'flashvars\.filekey="(?P[^"]+)";', page, 'filekey') + + title = self._html_search_regex( + r'(?s)
    \s*

    ([^<]+)

    ', + page, 'title', fatal=False) + + description = self._html_search_regex( + r'(?s)
    \s*

    [^<]+

    ([^<]+)

    ', + page, 'description', fatal=False) + + api_response = self._download_webpage( + 'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id), + video_id, 'Downloading video api response') + + response = compat_urlparse.parse_qs(api_response) + + if 'error_msg' in response: + raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True) + + video_url = response['url'][0] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/nowness.py youtube-dl-2014.02.17/youtube_dl/extractor/nowness.py --- youtube-dl-2012.09.27/youtube_dl/extractor/nowness.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/nowness.py 2014-02-03 13:40:50.000000000 +0000 @@ -0,0 +1,49 @@ +from __future__ import unicode_literals + +import re + +from .brightcove import BrightcoveIE +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class NownessIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P[0-9]+)/(?P[^/]+?)(?:$|[?#])' + + _TEST = { + 'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', + 'file': '2520295746001.mp4', + 'md5': '0ece2f70a7bd252c7b00f3070182d418', + 'info_dict': { + 'description': 'Candor: The Art of Gesticulation', + 'uploader': 'Nowness', + 'title': 'Candor: The Art of Gesticulation', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('slug') + + webpage = self._download_webpage(url, video_id) + player_url = self._search_regex( + r'"([^"]+/content/issue-[0-9.]+.js)"', webpage, 'player URL') + real_id = self._search_regex( + r'\sdata-videoId="([0-9]+)"', webpage, 'internal video ID') + + player_code = self._download_webpage( + player_url, video_id, + note='Downloading player JavaScript', + errnote='Player download failed') + player_code = player_code.replace("'+d+'", real_id) + + bc_url = BrightcoveIE._extract_brightcove_url(player_code) + if bc_url is None: + raise ExtractorError('Could not find player definition') + return { + '_type': 'url', + 'url': bc_url, + 'ie_key': 'Brightcove', + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/nowvideo.py youtube-dl-2014.02.17/youtube_dl/extractor/nowvideo.py --- youtube-dl-2012.09.27/youtube_dl/extractor/nowvideo.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/nowvideo.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,46 @@ +import re + +from .common import InfoExtractor +from ..utils import compat_urlparse + + +class NowVideoIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P\w+)' + _TEST = { + u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa', + u'file': u'0mw0yow7b6dxa.flv', + u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817', + u'info_dict': { + u"title": u"youtubedl test video _BaW_jenozKc.mp4" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage_url = 'http://www.nowvideo.ch/video/' + video_id + embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id + webpage = self._download_webpage(webpage_url, video_id) + embed_page = self._download_webpage(embed_url, video_id, + u'Downloading embed page') + + self.report_extraction(video_id) + + video_title = self._html_search_regex(r'

    (.*)

    ', + webpage, u'video title') + + video_key = self._search_regex(r'var fkzd="(.*)";', + embed_page, u'video key') + + api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key) + api_response = self._download_webpage(api_call, video_id, + u'Downloading API page') + video_url = compat_urlparse.parse_qs(api_response)[u'url'][0] + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': 'flv', + 'title': video_title, + }] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ooyala.py youtube-dl-2014.02.17/youtube_dl/extractor/ooyala.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ooyala.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ooyala.py 2014-01-29 20:16:25.000000000 +0000 @@ -0,0 +1,63 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import unescapeHTML + +class OoyalaIE(InfoExtractor): + _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P.+?)(&|$)' + + _TEST = { + # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video + u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', + u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', + u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', + u'info_dict': { + u'title': u'Explaining Data Recovery from Hard Drives and SSDs', + u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', + }, + } + + @staticmethod + def _url_for_embed_code(embed_code): + return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code + + @classmethod + def _build_url_result(cls, embed_code): + return cls.url_result(cls._url_for_embed_code(embed_code), + ie=cls.ie_key()) + + def _extract_result(self, info, more_info): + return {'id': info['embedCode'], + 'ext': 'mp4', + 'title': unescapeHTML(info['title']), + 'url': info.get('ipad_url') or info['url'], + 'description': unescapeHTML(more_info['description']), + 'thumbnail': more_info['promo'], + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + embedCode = mobj.group('id') + player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode + player = self._download_webpage(player_url, embedCode) + mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', + player, u'mobile player url') + mobile_player = self._download_webpage(mobile_url, embedCode) + videos_info = self._search_regex( + r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', + mobile_player, u'info').replace('\\"','"') + videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') + videos_info = json.loads(videos_info) + videos_more_info =json.loads(videos_more_info) + + if videos_more_info.get('lineup'): + videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] + return {'_type': 'playlist', + 'id': embedCode, + 'title': unescapeHTML(videos_more_info['title']), + 'entries': videos, + } + else: + return self._extract_result(videos_info[0], videos_more_info) + diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/orf.py youtube-dl-2014.02.17/youtube_dl/extractor/orf.py --- youtube-dl-2012.09.27/youtube_dl/extractor/orf.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/orf.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + unified_strdate, +) + + +class ORFIE(InfoExtractor): + _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P\d+)' + + _TEST = { + 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747', + 'file': '7319747.mp4', + 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375', + 'info_dict': { + 'title': 'Was Sie schon immer über Klassik wissen wollten', + 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4', + 'duration': 3508, + 'upload_date': '20140105', + }, + 'skip': 'Blocked outside of Austria', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + webpage = self._download_webpage(url, playlist_id) + + data_json = self._search_regex( + r'initializeAdworx\((.+?)\);\n', webpage, 'video info') + all_data = json.loads(data_json) + sdata = all_data[0]['values']['segments'] + + def quality_to_int(s): + m = re.search('([0-9]+)', s) + if m is None: + return -1 + return int(m.group(1)) + + entries = [] + for sd in sdata: + video_id = sd['id'] + formats = [{ + 'preference': -10 if fd['delivery'] == 'hls' else None, + 'format_id': '%s-%s-%s' % ( + fd['delivery'], fd['quality'], fd['quality_string']), + 'url': fd['src'], + 'protocol': fd['protocol'], + 'quality': quality_to_int(fd['quality']), + } for fd in sd['playlist_item_array']['sources']] + + # Check for geoblocking. + # There is a property is_geoprotection, but that's always false + geo_str = sd.get('geoprotection_string') + if geo_str: + try: + http_url = next( + f['url'] + for f in formats + if re.match(r'^https?://.*\.mp4$', f['url'])) + except StopIteration: + pass + else: + req = HEADRequest(http_url) + self._request_webpage( + req, video_id, + note='Testing for geoblocking', + errnote=(( + 'This video seems to be blocked outside of %s. ' + 'You may want to try the streaming-* formats.') + % geo_str), + fatal=False) + + self._sort_formats(formats) + + upload_date = unified_strdate(sd['created_date']) + entries.append({ + '_type': 'video', + 'id': video_id, + 'title': sd['header'], + 'formats': formats, + 'description': sd.get('description'), + 'duration': int(sd['duration_in_seconds']), + 'upload_date': upload_date, + 'thumbnail': sd.get('image_full_url'), + }) + + return { + '_type': 'playlist', + 'entries': entries, + 'id': playlist_id, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/pbs.py youtube-dl-2014.02.17/youtube_dl/extractor/pbs.py --- youtube-dl-2012.09.27/youtube_dl/extractor/pbs.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/pbs.py 2014-02-08 17:34:59.000000000 +0000 @@ -0,0 +1,68 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class PBSIE(InfoExtractor): + _VALID_URL = r'''(?x)https?:// + (?: + # Direct video URL + video\.pbs\.org/(?:viralplayer|video)/(?P[0-9]+)/? | + # Article with embedded player + (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P[^/]+)/?(?:$|[?\#]) | + # Player + video\.pbs\.org/partnerplayer/(?P[^/]+)/ + ) + ''' + + _TEST = { + 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', + 'md5': 'ce1888486f0908d555a8093cac9a7362', + 'info_dict': { + 'id': '2365006249', + 'ext': 'mp4', + 'title': 'A More Perfect Union', + 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', + 'duration': 3190, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + presumptive_id = mobj.group('presumptive_id') + display_id = presumptive_id + if presumptive_id: + webpage = self._download_webpage(url, display_id) + url = self._search_regex( + r'', + webpage, 'player URL') + mobj = re.match(self._VALID_URL, url) + + player_id = mobj.group('player_id') + if not display_id: + display_id = player_id + if player_id: + player_page = self._download_webpage( + url, display_id, note='Downloading player page', + errnote='Could not download player page') + video_id = self._search_regex( + r'.*)\.(?P(flv)|(mp4))' + IE_NAME = u'photobucket' + _TEST = { + u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', + u'file': u'zpsc0c3b9fa.mp4', + u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', + u'info_dict': { + u"upload_date": u"20130504", + u"uploader": u"rachaneronas", + u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" + } + } + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + + video_id = mobj.group('id') + + video_extension = mobj.group('ext') + + # Retrieve video webpage to extract further information + webpage = self._download_webpage(url, video_id) + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + # We try first by looking the javascript code: + mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P.*?)\);', webpage) + if mobj is not None: + info = json.loads(mobj.group('json')) + return [{ + 'id': video_id, + 'url': info[u'downloadUrl'], + 'uploader': info[u'username'], + 'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'), + 'title': info[u'title'], + 'ext': video_extension, + 'thumbnail': info[u'thumbUrl'], + }] + + # We try looking in other parts of the webpage + video_url = self._search_regex(r'', + webpage, u'video URL') + + mobj = re.search(r'(.*) video by (.*) - Photobucket', webpage) + if mobj is None: + raise ExtractorError(u'Unable to extract title') + video_title = mobj.group(1).decode('utf-8') + video_uploader = mobj.group(2).decode('utf-8') + + return [{ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'upload_date': None, + 'title': video_title, + 'ext': video_extension.decode('utf-8'), + }] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/podomatic.py youtube-dl-2014.02.17/youtube_dl/extractor/podomatic.py --- youtube-dl-2012.09.27/youtube_dl/extractor/podomatic.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/podomatic.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,49 @@ +import json +import re + +from .common import InfoExtractor + + +class PodomaticIE(InfoExtractor): + IE_NAME = 'podomatic' + _VALID_URL = r'^(?Phttps?)://(?P[^.]+)\.podomatic\.com/entry/(?P[^?]+)' + + _TEST = { + u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", + u"file": u"2009-01-02T16_03_35-08_00.mp3", + u"md5": u"84bb855fcf3429e6bf72460e1eed782d", + u"info_dict": { + u"uploader": u"Science Teaching Tips", + u"uploader_id": u"scienceteachingtips", + u"title": u"64. When the Moon Hits Your Eye", + u"duration": 446, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + channel = mobj.group('channel') + + json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' + + '?permalink=true&rtmp=0') % + (mobj.group('proto'), channel, video_id)) + data_json = self._download_webpage( + json_url, video_id, note=u'Downloading video info') + data = json.loads(data_json) + + video_url = data['downloadLink'] + uploader = data['podcast'] + title = data['title'] + thumbnail = data['imageLocation'] + duration = int(data['length'] / 1000.0) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'uploader': uploader, + 'uploader_id': channel, + 'thumbnail': thumbnail, + 'duration': duration, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/pornhd.py youtube-dl-2014.02.17/youtube_dl/extractor/pornhd.py --- youtube-dl-2012.09.27/youtube_dl/extractor/pornhd.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/pornhd.py 2014-01-28 02:52:52.000000000 +0000 @@ -0,0 +1,44 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import compat_urllib_parse + + +class PornHdIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P[0-9]+)/(?P.+)' + _TEST = { + 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', + 'file': '1962.flv', + 'md5': '35272469887dca97abd30abecc6cdf75', + 'info_dict': { + "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video", + "age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('video_id') + video_title = mobj.group('video_title') + + webpage = self._download_webpage(url, video_id) + + next_url = self._html_search_regex( + r'&hd=(http.+?)&', webpage, 'video URL') + next_url = compat_urllib_parse.unquote(next_url) + + video_url = self._download_webpage( + next_url, video_id, note='Retrieving video URL', + errnote='Could not retrieve video URL') + age_limit = 18 + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'flv', + 'title': video_title, + 'age_limit': age_limit, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/pornhub.py youtube-dl-2014.02.17/youtube_dl/extractor/pornhub.py --- youtube-dl-2012.09.27/youtube_dl/extractor/pornhub.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/pornhub.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,82 @@ +from __future__ import unicode_literals + +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) +from ..aes import ( + aes_decrypt_text +) + + +class PornHubIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Ppornhub\.com/view_video\.php\?viewkey=(?P[0-9a-f]+))' + _TEST = { + 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', + 'file': '648719015.mp4', + 'md5': '882f488fa1f0026f023f33576004a2ed', + 'info_dict': { + "uploader": "BABES-COM", + "title": "Seductive Indian beauty strips down and fingers her pink pussy", + "age_limit": 18 + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'

    ]+>([^<]+)', webpage, 'title') + video_uploader = self._html_search_regex(r'From: (?:\s|<[^>]*>)*(.+?)<', webpage, 'uploader', fatal=False) + thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) + if thumbnail: + thumbnail = compat_urllib_parse.unquote(thumbnail) + + video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) + if webpage.find('"encrypted":true') != -1: + password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ') + video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) + + formats = [] + for video_url in video_urls: + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join(format) + + m = re.match(r'^(?P[0-9]+)P-(?P[0-9]+)K$', format) + if m is None: + height = None + tbr = None + else: + height = int(m.group('height')) + tbr = int(m.group('tbr')) + + formats.append({ + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'tbr': tbr, + 'height': height, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'uploader': video_uploader, + 'title': video_title, + 'thumbnail': thumbnail, + 'formats': formats, + 'age_limit': 18, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/pornotube.py youtube-dl-2014.02.17/youtube_dl/extractor/pornotube.py --- youtube-dl-2012.09.27/youtube_dl/extractor/pornotube.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/pornotube.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,53 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + + unified_strdate, +) + + +class PornotubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P[0-9]+))?(/m/(?P[0-9]+))(/(?P.+))$' + _TEST = { + u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', + u'file': u'1689755.flv', + u'md5': u'374dd6dcedd24234453b295209aa69b6', + u'info_dict': { + u"upload_date": u"20090708", + u"title": u"Marilyn-Monroe-Bathing", + u"age_limit": 18 + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + video_title = mobj.group('title') + + # Get webpage content + webpage = self._download_webpage(url, video_id) + + # Get the video URL + VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",' + video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url') + video_url = compat_urllib_parse.unquote(video_url) + + #Get the uploaded date + VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' + upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) + if upload_date: upload_date = unified_strdate(upload_date) + age_limit = self._rta_search(webpage) + + info = {'id': video_id, + 'url': video_url, + 'uploader': None, + 'upload_date': upload_date, + 'title': video_title, + 'ext': 'flv', + 'format': 'flv', + 'age_limit': age_limit} + + return [info] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/pyvideo.py youtube-dl-2014.02.17/youtube_dl/extractor/pyvideo.py --- youtube-dl-2012.09.27/youtube_dl/extractor/pyvideo.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/pyvideo.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,51 @@ +import re +import os + +from .common import InfoExtractor + + +class PyvideoIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' + _TESTS = [{ + u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', + u'file': u'24_4WWkSmNo.mp4', + u'md5': u'de317418c8bc76b1fd8633e4f32acbc6', + u'info_dict': { + u"title": u"Become a logging expert in 30 minutes", + u"description": u"md5:9665350d466c67fb5b1598de379021f7", + u"upload_date": u"20130320", + u"uploader": u"NextDayVideo", + u"uploader_id": u"NextDayVideo", + }, + u'add_ie': ['Youtube'], + }, + { + u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', + u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12', + u'info_dict': { + u'id': u'2542', + u'ext': u'm4v', + u'title': u'Gloriajw-SpotifyWithErikBernhardsson182', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) + + if m_youtube is not None: + return self.url_result(m_youtube.group(1), 'Youtube') + + title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>', + webpage, u'title', flags=re.DOTALL) + video_url = self._search_regex([r'<source src="(.*?)"', + r'<dt>Download</dt>.*?<a href="(.+?)"'], + webpage, u'video url', flags=re.DOTALL) + return { + 'id': video_id, + 'title': os.path.splitext(title)[0], + 'url': video_url, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/radiofrance.py youtube-dl-2014.02.17/youtube_dl/extractor/radiofrance.py --- youtube-dl-2012.09.27/youtube_dl/extractor/radiofrance.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/radiofrance.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,55 @@ +# coding: utf-8 +import re + +from .common import InfoExtractor + + +class RadioFranceIE(InfoExtractor): + _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' + IE_NAME = u'radiofrance' + + _TEST = { + u'url': u'http://maison.radiofrance.fr/radiovisions/one-one', + u'file': u'one-one.ogg', + u'md5': u'bdbb28ace95ed0e04faab32ba3160daf', + u'info_dict': { + u"title": u"One to one", + u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", + u"uploader": u"Thomas Hercouët", + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title') + description = self._html_search_regex( + r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>', + webpage, u'description', fatal=False) + uploader = self._html_search_regex( + r'<div class="credit">  © (.*?)</div>', + webpage, u'uploader', fatal=False) + + formats_str = self._html_search_regex( + r'class="jp-jplayer[^"]*" data-source="([^"]+)">', + webpage, u'audio URLs') + formats = [ + { + 'format_id': fm[0], + 'url': fm[1], + 'vcodec': 'none', + } + for fm in + re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str) + ] + # No sorting, we don't know any more about these formats + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/rbmaradio.py youtube-dl-2014.02.17/youtube_dl/extractor/rbmaradio.py --- youtube-dl-2012.09.27/youtube_dl/extractor/rbmaradio.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/rbmaradio.py 2014-02-02 11:03:18.000000000 +0000 @@ -0,0 +1,55 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class RBMARadioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$' + _TEST = { + 'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011', + 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', + 'info_dict': { + 'id': 'ford-lopatin-live-at-primavera-sound-2011', + 'ext': 'mp3', + "uploader_id": "ford-lopatin", + "location": "Spain", + "description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", + "uploader": "Ford & Lopatin", + "title": "Live at Primavera Sound 2011", + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('videoID') + + webpage = self._download_webpage(url, video_id) + + json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$', + webpage, 'json data', flags=re.MULTILINE) + + try: + data = json.loads(json_data) + except ValueError as e: + raise ExtractorError('Invalid JSON: ' + str(e)) + + video_url = data['akamai_url'] + '&cbr=256' + + return { + 'id': video_id, + 'url': video_url, + 'title': data['title'], + 'description': data.get('teaser_text'), + 'location': data.get('country_of_origin'), + 'uploader': data.get('host', {}).get('name'), + 'uploader_id': data.get('host', {}).get('slug'), + 'thumbnail': data.get('image', {}).get('large_url_2x'), + 'duration': data.get('duration'), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/redtube.py youtube-dl-2014.02.17/youtube_dl/extractor/redtube.py --- youtube-dl-2012.09.27/youtube_dl/extractor/redtube.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/redtube.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RedTubeIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.redtube.com/66418', + 'file': '66418.mp4', + # md5 varies from time to time, as in + # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295 + #'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', + 'info_dict': { + "title": "Sucked on a toilet", + "age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + video_extension = 'mp4' + webpage = self._download_webpage(url, video_id) + + self.report_extraction(video_id) + + video_url = self._html_search_regex( + r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') + + video_title = self._html_search_regex( + r'<h1 class="videoTitle[^"]*">(.+?)</h1>', + webpage, u'title') + + video_thumbnail = self._html_search_regex( + r'playerInnerHTML.+?<img\s+src="(.+?)"', + webpage, u'thumbnail', fatal=False) + + # No self-labeling, but they describe themselves as + # "Home of Videos Porno" + age_limit = 18 + + return { + 'id': video_id, + 'url': video_url, + 'ext': video_extension, + 'title': video_title, + 'thumbnail': video_thumbnail, + 'age_limit': age_limit, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ringtv.py youtube-dl-2014.02.17/youtube_dl/extractor/ringtv.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ringtv.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ringtv.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,44 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RingTVIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' + _TEST = { + "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30", + "file": "857645.mp4", + "md5": "d25945f5df41cdca2d2587165ac28720", + "info_dict": { + "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV', + "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id').split('-')[0] + webpage = self._download_webpage(url, video_id) + + if mobj.group('type') == 'news': + video_id = self._search_regex( + r'''(?x)<iframe[^>]+src="http://cms\.springboardplatform\.com/ + embed_iframe/[0-9]+/video/([0-9]+)/''', + webpage, 'real video ID') + title = self._og_search_title(webpage) + description = self._html_search_regex( + r'addthis:description="([^"]+)"', + webpage, 'description', fatal=False) + final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" % video_id + thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" % video_id + + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'thumbnail': thumbnail_url, + 'description': description, + } + diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ro220.py youtube-dl-2014.02.17/youtube_dl/extractor/ro220.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ro220.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ro220.py 2014-02-02 10:54:05.000000000 +0000 @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + compat_parse_qs, +) + + +class Ro220IE(InfoExtractor): + IE_NAME = '220.ro' + _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)' + _TEST = { + "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", + 'file': 'LYV6doKo7f.mp4', + 'md5': '03af18b73a07b4088753930db7a34add', + 'info_dict': { + "title": "Luati-le Banii sez 4 ep 1", + "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + webpage = self._download_webpage(url, video_id) + flashVars_str = self._search_regex( + r'<param name="flashVars" value="([^"]+)"', + webpage, 'flashVars') + flashVars = compat_parse_qs(flashVars_str) + + return { + '_type': 'video', + 'id': video_id, + 'ext': 'mp4', + 'url': flashVars['videoURL'][0], + 'title': flashVars['title'][0], + 'description': clean_html(flashVars['desc'][0]), + 'thumbnail': flashVars['preview'][0], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/rottentomatoes.py youtube-dl-2014.02.17/youtube_dl/extractor/rottentomatoes.py --- youtube-dl-2012.09.27/youtube_dl/extractor/rottentomatoes.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/rottentomatoes.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,18 @@ +from __future__ import unicode_literals + +from .videodetective import VideoDetectiveIE + + +# It just uses the same method as videodetective.com, +# the internetvideoarchive.com is extracted from the og:video property +class RottenTomatoesIE(VideoDetectiveIE): + _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', + 'file': '613340.mp4', + 'info_dict': { + 'title': 'TOY STORY 3', + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', + }, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/roxwel.py youtube-dl-2014.02.17/youtube_dl/extractor/roxwel.py --- youtube-dl-2012.09.27/youtube_dl/extractor/roxwel.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/roxwel.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,49 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import unified_strdate, determine_ext + + +class RoxwelIE(InfoExtractor): + _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' + + _TEST = { + u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html', + u'file': u'passionpittakeawalklive.flv', + u'md5': u'd9dea8360a1e7d485d2206db7fe13035', + u'info_dict': { + u'title': u'Take A Walk (live)', + u'uploader': u'Passion Pit', + u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', + }, + u'skip': u'Requires rtmpdump', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + filename = mobj.group('filename') + info_url = 'http://www.roxwel.com/api/videos/%s' % filename + info_page = self._download_webpage(info_url, filename, + u'Downloading video info') + + self.report_extraction(filename) + info = json.loads(info_page) + rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) + best_rate = rtmp_rates[-1] + url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) + rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url') + ext = determine_ext(rtmp_url) + if ext == 'f4v': + rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) + + return {'id': filename, + 'title': info['title'], + 'url': rtmp_url, + 'ext': 'flv', + 'description': info['description'], + 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), + 'uploader': info['artist'], + 'uploader_id': info['artistname'], + 'upload_date': unified_strdate(info['dbdate']), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/rtlnow.py youtube-dl-2014.02.17/youtube_dl/extractor/rtlnow.py --- youtube-dl-2012.09.27/youtube_dl/extractor/rtlnow.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/rtlnow.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,148 @@ +# encoding: utf-8 + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError, +) + + +class RTLnowIE(InfoExtractor): + """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" + _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + _TESTS = [{ + 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', + 'file': '90419.flv', + 'info_dict': { + 'upload_date': '20070416', + 'title': 'Ahornallee - Folge 1 - Der Einzug', + 'description': 'Folge 1 - Der Einzug', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only works from Germany', + }, + { + 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', + 'file': '69756.flv', + 'info_dict': { + 'upload_date': '20120519', + 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', + 'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', + 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only works from Germany', + }, + { + 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', + 'file': '13883.flv', + 'info_dict': { + 'upload_date': '20090627', + 'title': 'Voxtours - Südafrika-Reporter II', + 'description': 'Südafrika-Reporter II', + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', + 'file': '99205.flv', + 'info_dict': { + 'upload_date': '20080928', + 'title': 'Medicopter 117 - Angst!', + 'description': 'Angst!', + 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', + 'file': '124903.flv', + 'info_dict': { + 'upload_date': '20130101', + 'title': 'Top Gear vom 01.01.2013', + 'description': 'Episode 1', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only works from Germany', + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + webpage_url = 'http://' + mobj.group('url') + video_page_url = 'http://' + mobj.group('domain') + '/' + video_id = mobj.group('video_id') + + webpage = self._download_webpage(webpage_url, video_id) + + note_m = re.search(r'''(?sx) + <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?) + <div[ ]id="playerteaser">''', webpage) + if note_m: + msg = clean_html(note_m.group(1)) + raise ExtractorError(msg) + + video_title = self._html_search_regex( + r'<title>(?P<title>[^<]+?)( \| [^<]*)?', + webpage, 'title') + playerdata_url = self._html_search_regex( + r'\'playerdata\': \'(?P[^\']+)\'', + webpage, 'playerdata_url') + + playerdata = self._download_webpage(playerdata_url, video_id) + mobj = re.search(r'<!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]>', playerdata) + if mobj: + video_description = mobj.group('description') + if mobj.group('upload_date_Y'): + video_upload_date = mobj.group('upload_date_Y') + elif mobj.group('upload_date_y'): + video_upload_date = '20' + mobj.group('upload_date_y') + else: + video_upload_date = None + if video_upload_date: + video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d') + else: + video_description = None + video_upload_date = None + self._downloader.report_warning('Unable to extract description and upload date') + + # Thumbnail: not every video has an thumbnail + mobj = re.search(r'', webpage) + if mobj: + video_thumbnail = mobj.group('thumbnail') + else: + video_thumbnail = None + + mobj = re.search(r']+>rtmpe://(?:[^/]+/){2})(?P[^\]]+)\]\]>', playerdata) + if mobj is None: + raise ExtractorError('Unable to extract media URL') + video_url = mobj.group('url') + video_play_path = 'mp4:' + mobj.group('play_path') + video_player_url = video_page_url + 'includes/vodplayer.swf' + + return { + 'id': video_id, + 'url': video_url, + 'play_path': video_play_path, + 'page_url': video_page_url, + 'player_url': video_player_url, + 'ext': 'flv', + 'title': video_title, + 'description': video_description, + 'upload_date': video_upload_date, + 'thumbnail': video_thumbnail, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/rutube.py youtube-dl-2014.02.17/youtube_dl/extractor/rutube.py --- youtube-dl-2012.09.27/youtube_dl/extractor/rutube.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/rutube.py 2014-01-28 02:32:22.000000000 +0000 @@ -0,0 +1,124 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json +import itertools + +from .common import InfoExtractor +from ..utils import ( + compat_str, + unified_strdate, + ExtractorError, +) + + +class RutubeIE(InfoExtractor): + IE_NAME = 'rutube' + IE_DESC = 'Rutube videos' + _VALID_URL = r'https?://rutube\.ru/video/(?P[\da-z]{32})' + + _TEST = { + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4', + 'info_dict': { + 'title': 'Раненный кенгуру забежал в аптеку', + 'description': 'http://www.ntdtv.ru ', + 'duration': 80, + 'uploader': 'NTDRussian', + 'uploader_id': '29790', + 'upload_date': '20131016', + }, + 'params': { + # It requires ffmpeg (m3u8 download) + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id, + video_id, 'Downloading video JSON') + video = json.loads(api_response) + + api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, + video_id, 'Downloading trackinfo JSON') + trackinfo = json.loads(api_response) + + # Some videos don't have the author field + author = trackinfo.get('author') or {} + m3u8_url = trackinfo['video_balancer'].get('m3u8') + if m3u8_url is None: + raise ExtractorError('Couldn\'t find m3u8 manifest url') + + return { + 'id': video['id'], + 'title': video['title'], + 'description': video['description'], + 'duration': video['duration'], + 'view_count': video['hits'], + 'url': m3u8_url, + 'ext': 'mp4', + 'thumbnail': video['thumbnail_url'], + 'uploader': author.get('name'), + 'uploader_id': compat_str(author['id']) if author else None, + 'upload_date': unified_strdate(video['created_ts']), + 'age_limit': 18 if video['is_adult'] else 0, + } + + +class RutubeChannelIE(InfoExtractor): + IE_NAME = 'rutube:channel' + IE_DESC = 'Rutube channels' + _VALID_URL = r'http://rutube\.ru/tags/video/(?P\d+)' + + _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + + def _extract_videos(self, channel_id, channel_title=None): + entries = [] + for pagenum in itertools.count(1): + api_response = self._download_webpage( + self._PAGE_TEMPLATE % (channel_id, pagenum), + channel_id, 'Downloading page %s' % pagenum) + page = json.loads(api_response) + results = page['results'] + if not results: + break + entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) + if not page['has_next']: + break + return self.playlist_result(entries, channel_id, channel_title) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + return self._extract_videos(channel_id) + + +class RutubeMovieIE(RutubeChannelIE): + IE_NAME = 'rutube:movie' + IE_DESC = 'Rutube movies' + _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P\d+)' + + _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' + _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + movie_id = mobj.group('id') + api_response = self._download_webpage( + self._MOVIE_TEMPLATE % movie_id, movie_id, + 'Downloading movie JSON') + movie = json.loads(api_response) + movie_name = movie['name'] + return self._extract_videos(movie_id, movie_name) + + +class RutubePersonIE(RutubeChannelIE): + IE_NAME = 'rutube:person' + IE_DESC = 'Rutube person videos' + _VALID_URL = r'http://rutube\.ru/video/person/(?P\d+)' + + _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/servingsys.py youtube-dl-2014.02.17/youtube_dl/extractor/servingsys.py --- youtube-dl-2012.09.27/youtube_dl/extractor/servingsys.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/servingsys.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + + +class ServingSysIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P[0-9]+)' + + _TEST = { + 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', + 'playlist': [{ + 'file': '29955898.flv', + 'md5': 'baed851342df6846eb8677a60a011a0f', + 'info_dict': { + 'title': 'AdAPPter_Hyundai_demo (1)', + 'duration': 74, + 'tbr': 1378, + 'width': 640, + 'height': 400, + }, + }, { + 'file': '29907998.flv', + 'md5': '979b4da2655c4bc2d81aeb915a8c5014', + 'info_dict': { + 'title': 'AdAPPter_Hyundai_demo (2)', + 'duration': 34, + 'width': 854, + 'height': 480, + 'tbr': 516, + }, + }], + 'params': { + 'playlistend': 2, + }, + 'skip': 'Blocked in the US [sic]', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + pl_id = mobj.group('id') + + vast_doc = self._download_xml(url, pl_id) + title = vast_doc.find('.//AdTitle').text + media = vast_doc.find('.//MediaFile').text + info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') + + doc = self._download_xml(info_url, pl_id, 'Downloading video info') + entries = [{ + '_type': 'video', + 'id': a.attrib['id'], + 'title': '%s (%s)' % (title, a.attrib['assetID']), + 'url': a.attrib['URL'], + 'duration': int_or_none(a.attrib.get('length')), + 'tbr': int_or_none(a.attrib.get('bitrate')), + 'height': int_or_none(a.attrib.get('height')), + 'width': int_or_none(a.attrib.get('width')), + } for a in doc.findall('.//AdditionalAssets/asset')] + + return { + '_type': 'playlist', + 'id': pl_id, + 'title': title, + 'entries': entries, + } + + \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/sina.py youtube-dl-2014.02.17/youtube_dl/extractor/sina.py --- youtube-dl-2012.09.27/youtube_dl/extractor/sina.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/sina.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + compat_urllib_parse, +) + + +class SinaIE(InfoExtractor): + _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/ + ( + (.+?/(((?P\d+).html)|(.*?(\#|(vid=)|b/)(?P\d+?)($|&|\-)))) + | + # This is used by external sites like Weibo + (api/sinawebApi/outplay.php/(?P.+?)\.swf) + ) + ''' + + _TESTS = [ + { + 'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', + 'file': '110028898.flv', + 'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f', + 'info_dict': { + 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', + } + }, + { + 'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html', + 'info_dict': { + 'id': '101314253', + 'ext': 'flv', + 'title': '军方提高对朝情报监视级别', + }, + }, + ] + + @classmethod + def suitable(cls, url): + return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None + + def _extract_video(self, video_id): + data = compat_urllib_parse.urlencode({'vid': video_id}) + url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, + video_id, 'Downloading video url') + image_page = self._download_webpage( + 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, + video_id, 'Downloading thumbnail info') + + return {'id': video_id, + 'url': url_doc.find('./durl/url').text, + 'ext': 'flv', + 'title': url_doc.find('./vname').text, + 'thumbnail': image_page.split('=')[1], + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + video_id = mobj.group('id') + if mobj.group('token') is not None: + # The video id is in the redirected url + self.to_screen('Getting video id') + request = compat_urllib_request.Request(url) + request.get_method = lambda: 'HEAD' + (_, urlh) = self._download_webpage_handle(request, 'NA', False) + return self._real_extract(urlh.geturl()) + elif video_id is None: + pseudo_id = mobj.group('pseudo_id') + webpage = self._download_webpage(url, pseudo_id) + video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id') + + return self._extract_video(video_id) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/slashdot.py youtube-dl-2014.02.17/youtube_dl/extractor/slashdot.py --- youtube-dl-2012.09.27/youtube_dl/extractor/slashdot.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/slashdot.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,24 @@ +import re + +from .common import InfoExtractor + + +class SlashdotIE(InfoExtractor): + _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P.*?)(&|$)' + + _TEST = { + u'add_ie': ['Ooyala'], + u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', + u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', + u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', + u'info_dict': { + u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + ooyala_url = self._search_regex(r'', + webpage, 'json data') + info = json.loads(json_data) + desc = self._html_search_regex(r'
    .*?(.*?)

    ', + webpage, 'description', flags = re.DOTALL) + + thumbnail = self._search_regex(r'[\s.]*
    [\s.]*)(.*?)(?:)', webpage, 'subtitles_language_select', flags=re.DOTALL) + languages = re.findall(r'(?: