.*?

]*>(.*?)

', + webpage, 'description', fatal=False) + + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'thumbnail': thumbnail_url, + 'description': description, + 'view_count': view_count, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/trilulilu.py youtube-dl-2014.02.17/youtube_dl/extractor/trilulilu.py --- youtube-dl-2012.09.27/youtube_dl/extractor/trilulilu.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/trilulilu.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,66 @@ +import json +import re + +from .common import InfoExtractor + + +class TriluliluIE(InfoExtractor): + _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P[^/]+)/(?P[^/]+)' + _TEST = { + u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1", + u'file': u"big-buck-bunny-1.mp4", + u'info_dict': { + u"title": u"Big Buck Bunny", + u"description": u":) pentru copilul din noi", + }, + # Server ignores Range headers (--test) + u"params": { + u"skip_download": True + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + + log_str = self._search_regex( + r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info') + log = json.loads(log_str) + + format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/' + u'video-formats2' % log) + format_doc = self._download_xml( + format_url, video_id, + note=u'Downloading formats', + errnote=u'Error while downloading formats') + + video_url_template = ( + u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' + u'&source=site&hash=%(hash)s&username=%(userid)s&' + u'key=ministhebest&format=%%s&sig=&exp=' % + log) + formats = [ + { + 'format': fnode.text, + 'url': video_url_template % fnode.text, + 'ext': fnode.text.partition('-')[0] + } + + for fnode in format_doc.findall('./formats/format') + ] + + return { + '_type': 'video', + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } + diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/tube8.py youtube-dl-2014.02.17/youtube_dl/extractor/tube8.py --- youtube-dl-2012.09.27/youtube_dl/extractor/tube8.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/tube8.py 2014-02-08 15:23:28.000000000 +0000 @@ -0,0 +1,63 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, +) +from ..aes import ( + aes_decrypt_text +) + +class Tube8IE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Ptube8\.com/.+?/(?P\d+)/?)$' + _TEST = { + u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/', + u'file': u'229795.mp4', + u'md5': u'e9e0b0c86734e5e3766e653509475db0', + u'info_dict': { + u"description": u"hot teen Kasia grinding", + u"uploader": u"unknown", + u"title": u"Kasia music video", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title') + video_description = self._html_search_regex(r'>Description:(.+?)<', webpage, u'description', fatal=False) + video_uploader = self._html_search_regex(r'>Submitted by:(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) + thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) + if thumbnail: + thumbnail = thumbnail.replace('\\/', '/') + + video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url') + if webpage.find('"encrypted":true')!=-1: + password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password') + video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] + format = path.split('/')[4].split('_')[:2] + format = "-".join(format) + + return { + 'id': video_id, + 'uploader': video_uploader, + 'title': video_title, + 'thumbnail': thumbnail, + 'description': video_description, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': 18, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/tudou.py youtube-dl-2014.02.17/youtube_dl/extractor/tudou.py --- youtube-dl-2012.09.27/youtube_dl/extractor/tudou.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/tudou.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,78 @@ +# coding: utf-8 + +import re +import json + +from .common import InfoExtractor + + +class TudouIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' + _TESTS = [{ + u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', + u'file': u'159448201.f4v', + u'md5': u'140a49ed444bd22f93330985d8475fcb', + u'info_dict': { + u"title": u"卡马乔国足开大脚长传冲吊集锦" + } + }, + { + u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', + u'file': u'todo.mp4', + u'md5': u'todo.mp4', + u'info_dict': { + u'title': u'todo.mp4', + }, + u'add_ie': [u'Youku'], + u'skip': u'Only works from China' + }] + + def _url_for_id(self, id, quality = None): + info_url = "http://v2.tudou.com/f?id="+str(id) + if quality: + info_url += '&hd' + quality + webpage = self._download_webpage(info_url, id, "Opening the info webpage") + final_url = self._html_search_regex('>(.+?)',webpage, 'video url') + return final_url + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(2) + webpage = self._download_webpage(url, video_id) + + m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage) + if m and m.group(1): + return { + '_type': 'url', + 'url': u'youku:' + m.group(1), + 'ie_key': 'Youku' + } + + title = self._search_regex( + r",kw:\s*['\"](.+?)[\"']", webpage, u'title') + thumbnail_url = self._search_regex( + r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False) + + segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') + segments = json.loads(segs_json) + # It looks like the keys are the arguments that have to be passed as + # the hd field in the request url, we pick the higher + quality = sorted(segments.keys())[-1] + parts = segments[quality] + result = [] + len_parts = len(parts) + if len_parts > 1: + self.to_screen(u'%s: found %s parts' % (video_id, len_parts)) + for part in parts: + part_id = part['k'] + final_url = self._url_for_id(part_id, quality) + ext = (final_url.split('?')[0]).split('.')[-1] + part_info = {'id': part_id, + 'url': final_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + } + result.append(part_info) + + return result diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/tumblr.py youtube-dl-2014.02.17/youtube_dl/extractor/tumblr.py --- youtube-dl-2012.09.27/youtube_dl/extractor/tumblr.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/tumblr.py 2014-01-28 02:35:52.000000000 +0000 @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class TumblrIE(InfoExtractor): + _VALID_URL = r'http://(?P.*?)\.tumblr\.com/((post)|(video))/(?P\d*)($|/)' + _TEST = { + 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', + 'file': '54196191430.mp4', + 'md5': '479bb068e5b16462f5176a6828829767', + 'info_dict': { + "title": "tatiana maslany news" + } + } + + def _real_extract(self, url): + m_url = re.match(self._VALID_URL, url) + video_id = m_url.group('id') + blog = m_url.group('blog_name') + + url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) + webpage = self._download_webpage(url, video_id) + + re_video = r'src=\\x22(?Phttp://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P.*?)\\x22' % (blog, video_id) + video = re.search(re_video, webpage) + if video is None: + raise ExtractorError('Unable to extract video') + video_url = video.group('video_url') + ext = video.group('ext') + + video_thumbnail = self._search_regex( + r'posters.*?\[\\x22(.*?)\\x22', + webpage, 'thumbnail', fatal=False) # We pick the first poster + if video_thumbnail: + video_thumbnail = video_thumbnail.replace('\\\\/', '/') + + # The only place where you can get a title, it's not complete, + # but searching in other places doesn't work for all videos + video_title = self._html_search_regex(r'(?P<title>.*?)(?: \| Tumblr)?', + webpage, 'title', flags=re.DOTALL) + + return [{'id': video_id, + 'url': video_url, + 'title': video_title, + 'thumbnail': video_thumbnail, + 'ext': ext + }] diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/tutv.py youtube-dl-2014.02.17/youtube_dl/extractor/tutv.py --- youtube-dl-2012.09.27/youtube_dl/extractor/tutv.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/tutv.py 2014-01-29 20:16:25.000000000 +0000 @@ -0,0 +1,38 @@ +from __future__ import unicode_literals +import base64 +import re + +from .common import InfoExtractor +from ..utils import ( + compat_parse_qs, +) + + +class TutvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' + _TEST = { + 'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', + 'file': '2742556.flv', + 'md5': '5eb766671f69b82e528dc1e7769c5cb2', + 'info_dict': { + 'title': 'Noah en pabellon cuahutemoc', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') + + data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) + data_content = self._download_webpage(data_url, video_id, note='Downloading video info') + data = compat_parse_qs(data_content) + video_url = base64.b64decode(data['kpt'][0]).decode('utf-8') + + return { + 'id': internal_id, + 'url': video_url, + 'title': self._og_search_title(webpage), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/tvp.py youtube-dl-2014.02.17/youtube_dl/extractor/tvp.py --- youtube-dl-2012.09.27/youtube_dl/extractor/tvp.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/tvp.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,42 @@ +import json +import re + +from .common import InfoExtractor + + +class TvpIE(InfoExtractor): + IE_NAME = u'tvp.pl' + _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P\d+)/(?P\d+)' + + _TEST = { + u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238', + u'md5': u'148408967a6a468953c0a75cbdaf0d7a', + u'file': u'12878238.wmv', + u'info_dict': { + u'title': u'31.10.2013 - Odcinek 2', + u'description': u'31.10.2013 - Odcinek 2', + }, + u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.' + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id + json_params = self._download_webpage( + json_url, video_id, u"Downloading video metadata") + + params = json.loads(json_params) + self.report_extraction(video_id) + video_url = params['video_url'] + + title = self._og_search_title(webpage, fatal=True) + return { + 'id': video_id, + 'title': title, + 'ext': 'wmv', + 'url': video_url, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/unistra.py youtube-dl-2014.02.17/youtube_dl/extractor/unistra.py --- youtube-dl-2012.09.27/youtube_dl/extractor/unistra.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/unistra.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,32 @@ +import re + +from .common import InfoExtractor + +class UnistraIE(InfoExtractor): + _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)' + + _TEST = { + u'url': u'http://utv.unistra.fr/video.php?id_video=154', + u'file': u'154.mp4', + u'md5': u'736f605cfdc96724d55bb543ab3ced24', + u'info_dict': { + u'title': u'M!ss Yella', + u'description': u'md5:104892c71bd48e55d70b902736b81bbf', + }, + } + + def _real_extract(self, url): + id = re.match(self._VALID_URL, url).group(1) + webpage = self._download_webpage(url, id) + file = re.search(r'file: "(.*?)",', webpage).group(1) + title = self._html_search_regex(r'UTV - (.*?)</', webpage, u'title') + + video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file + + return {'id': id, + 'title': title, + 'ext': 'mp4', + 'url': video_url, + 'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL), + 'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/ustream.py youtube-dl-2014.02.17/youtube_dl/extractor/ustream.py --- youtube-dl-2012.09.27/youtube_dl/extractor/ustream.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/ustream.py 2014-02-02 10:54:05.000000000 +0000 @@ -0,0 +1,74 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + get_meta_content, +) + + +class UstreamIE(InfoExtractor): + _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' + IE_NAME = 'ustream' + _TEST = { + 'url': 'http://www.ustream.tv/recorded/20274954', + 'file': '20274954.flv', + 'md5': '088f151799e8f572f84eb62f17d73e5c', + 'info_dict': { + "uploader": "Young Americans for Liberty", + "title": "Young Americans for Liberty February 7, 2012 2:28 AM", + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('videoID') + + video_url = 'http://tcdn.ustream.tv/video/%s' % video_id + webpage = self._download_webpage(url, video_id) + + self.report_extraction(video_id) + + video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', + webpage, 'title') + + uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', + webpage, 'uploader', fatal=False, flags=re.DOTALL) + + thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'flv', + 'title': video_title, + 'uploader': uploader, + 'thumbnail': thumbnail, + } + + +class UstreamChannelIE(InfoExtractor): + _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' + IE_NAME = 'ustream:channel' + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + slug = m.group('slug') + webpage = self._download_webpage(url, slug) + channel_id = get_meta_content('ustream:channel_id', webpage) + + BASE = 'http://www.ustream.tv' + next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id + video_ids = [] + while next_url: + reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id)) + video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data'])) + next_url = reply['nextUrl'] + + urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids] + url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls] + return self.playlist_result(url_entries, channel_id) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/vbox7.py youtube-dl-2014.02.17/youtube_dl/extractor/vbox7.py --- youtube-dl-2012.09.27/youtube_dl/extractor/vbox7.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/vbox7.py 2014-02-04 22:26:41.000000000 +0000 @@ -0,0 +1,56 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, + + ExtractorError, +) + + +class Vbox7IE(InfoExtractor): + _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)' + _TEST = { + 'url': 'http://vbox7.com/play:249bb972c2', + 'md5': '99f65c0c9ef9b682b97313e052734c3f', + 'info_dict': { + 'id': '249bb972c2', + 'ext': 'flv', + 'title': 'Смях! Чудо - чист за секунди - Скрита камера', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + redirect_page, urlh = self._download_webpage_handle(url, video_id) + new_location = self._search_regex(r'window\.location = \'(.*)\';', + redirect_page, 'redirect location') + redirect_url = urlh.geturl() + new_location + webpage = self._download_webpage(redirect_url, video_id, + 'Downloading redirect page') + + title = self._html_search_regex(r'<title>(.*)', + webpage, 'title').split('/')[0].strip() + + info_url = "http://vbox7.com/play/magare.do" + data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id}) + info_request = compat_urllib_request.Request(info_url, data) + info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') + if info_response is None: + raise ExtractorError('Unable to extract the media url') + (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&')) + + return { + 'id': video_id, + 'url': final_url, + 'ext': 'flv', + 'title': title, + 'thumbnail': thumbnail_url, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/veehd.py youtube-dl-2014.02.17/youtube_dl/extractor/veehd.py --- youtube-dl-2012.09.27/youtube_dl/extractor/veehd.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/veehd.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,65 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + get_element_by_id, + clean_html, +) + + +class VeeHDIE(InfoExtractor): + _VALID_URL = r'https?://veehd\.com/video/(?P\d+)' + + _TEST = { + 'url': 'http://veehd.com/video/4686958', + 'file': '4686958.mp4', + 'info_dict': { + 'title': 'Time Lapse View from Space ( ISS)', + 'uploader_id': 'spotted', + 'description': 'md5:f0094c4cf3a72e22bc4e4239ef767ad7', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # VeeHD seems to send garbage on the first request. + # See https://github.com/rg3/youtube-dl/issues/2102 + self._download_webpage(url, video_id, 'Requesting webpage') + webpage = self._download_webpage(url, video_id) + player_path = self._search_regex( + r'\$$"#playeriframe"$.attr\({src : "(.+?)"', + webpage, 'player path') + player_url = compat_urlparse.urljoin(url, player_path) + + self._download_webpage(player_url, video_id, 'Requesting player page') + player_page = self._download_webpage( + player_url, video_id, 'Downloading player page') + config_json = self._search_regex( + r'value=\'config=({.+?})\'', player_page, 'config json') + config = json.loads(config_json) + + video_url = compat_urlparse.unquote(config['clip']['url']) + title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) + uploader_id = self._html_search_regex(r'(.+?)', + webpage, 'uploader') + thumbnail = self._search_regex(r'

(.*?)\d*)' + + _TEST = { + 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', + 'file': '56314296.mp4', + 'md5': '620e68e6a3cff80086df3348426c9ca3', + 'info_dict': { + 'title': 'Straight Backs Are Stronger', + 'uploader': 'LUMOback', + 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage) + if m_youtube is not None: + youtube_id = m_youtube.group(1) + self.to_screen('%s: detected Youtube video.' % video_id) + return self.url_result(youtube_id, 'Youtube') + + self.report_extraction(video_id) + info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info') + info = json.loads(info) + video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath') + + return { + 'id': info['videoId'], + 'title': info['title'], + 'url': video_url, + 'uploader': info['username'], + 'thumbnail': info.get('highResImage') or info.get('medResImage'), + 'description': info['description'], + 'view_count': info['views'], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/vesti.py youtube-dl-2014.02.17/youtube_dl/extractor/vesti.py --- youtube-dl-2012.09.27/youtube_dl/extractor/vesti.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/vesti.py 2014-02-15 14:34:17.000000000 +0000 @@ -0,0 +1,170 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none +) + + +class VestiIE(InfoExtractor): + IE_NAME = 'vesti' + IE_DESC = 'Вести.Ru' + _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P.+)' + + _TESTS = [ + { + 'url': 'http://www.vesti.ru/videos?vid=575582&cid=1', + 'info_dict': { + 'id': '765035', + 'ext': 'mp4', + 'title': 'Вести.net: биткоины в России не являются законными', + 'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b', + 'duration': 302, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.vesti.ru/only_video.html?vid=576180', + 'info_dict': { + 'id': '766048', + 'ext': 'mp4', + 'title': 'США заморозило, Британию затопило', + 'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1', + 'duration': 87, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', + 'info_dict': { + 'id': '766403', + 'ext': 'mp4', + 'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы', + 'description': 'md5:55805dfd35763a890ff50fa9e35e31b3', + 'duration': 271, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Blocked outside Russia' + }, + { + 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', + 'info_dict': { + 'id': '51499', + 'ext': 'flv', + 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', + 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Translation has finished' + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, 'Downloading page') + + mobj = re.search(r'', page) + if mobj: + video_type = 'video' + video_id = mobj.group('id') + else: + mobj = re.search( + r'[^/]+)/id/(?P\d+)[^"]*".*?>', page) + + if not mobj: + raise ExtractorError('No media found') + + video_type = mobj.group('type') + video_id = mobj.group('id') + + json_data = self._download_json( + 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), + video_id, 'Downloading JSON') + + if json_data['errors']: + raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True) + + playlist = json_data['data']['playlist'] + medialist = playlist['medialist'] + media = medialist[0] + + if media['errors']: + raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True) + + view_count = playlist.get('count_views') + priority_transport = playlist['priority_transport'] + + thumbnail = media['picture'] + width = media['width'] + height = media['height'] + description = media['anons'] + title = media['title'] + duration = int_or_none(media.get('duration')) + + formats = [] + + for transport, links in media['sources'].items(): + for quality, url in links.items(): + if transport == 'rtmp': + mobj = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?P.+)$', url) + if not mobj: + continue + fmt = { + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': 'http://player.rutv.ru', + 'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', + 'rtmp_live': True, + 'ext': 'flv', + 'vbr': int(quality), + } + elif transport == 'm3u8': + fmt = { + 'url': url, + 'ext': 'mp4', + } + else: + fmt = { + 'url': url + } + fmt.update({ + 'width': width, + 'height': height, + 'format_id': '%s-%s' % (transport, quality), + 'preference': -1 if priority_transport == transport else -2, + }) + formats.append(fmt) + + if not formats: + raise ExtractorError('No media links available for %s' % video_id) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'view_count': view_count, + 'duration': duration, + 'formats': formats, + } \ No newline at end of file diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/vevo.py youtube-dl-2014.02.17/youtube_dl/extractor/vevo.py --- youtube-dl-2012.09.27/youtube_dl/extractor/vevo.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/vevo.py 2014-02-02 10:56:24.000000000 +0000 @@ -0,0 +1,131 @@ +from __future__ import unicode_literals + +import re +import xml.etree.ElementTree +import datetime + +from .common import InfoExtractor +from ..utils import ( + compat_HTTPError, + ExtractorError, +) + + +class VevoIE(InfoExtractor): + """ + Accepts urls from vevo.com or in the format 'vevo:{id}' + (currently used by MTVIE) + """ + _VALID_URL = r'''(?x) + (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?| + https?://cache\.vevo\.com/m/html/embed\.html\?video=| + https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| + vevo:) + (?P[^&?#]+)''' + _TESTS = [{ + 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', + 'file': 'GB1101300280.mp4', + "md5": "06bea460acb744eab74a9d7dcb4bfd61", + 'info_dict': { + "upload_date": "20130624", + "uploader": "Hurts", + "title": "Somebody to Die For", + "duration": 230.12, + "width": 1920, + "height": 1080, + } + }] + _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' + + def _formats_from_json(self, video_info): + last_version = {'version': -1} + for version in video_info['videoVersions']: + # These are the HTTP downloads, other types are for different manifests + if version['sourceType'] == 2: + if version['version'] > last_version['version']: + last_version = version + if last_version['version'] == -1: + raise ExtractorError('Unable to extract last version of the video') + + renditions = xml.etree.ElementTree.fromstring(last_version['data']) + formats = [] + # Already sorted from worst to best quality + for rend in renditions.findall('rendition'): + attr = rend.attrib + format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr + formats.append({ + 'url': attr['url'], + 'format_id': attr['name'], + 'format_note': format_note, + 'height': int(attr['frameheight']), + 'width': int(attr['frameWidth']), + }) + return formats + + def _formats_from_smil(self, smil_xml): + formats = [] + smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) + els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') + for el in els: + src = el.attrib['src'] + m = re.match(r'''(?xi) + (?P[a-z0-9]+): + (?P + [/a-z0-9]+ # The directory and main part of the URL + _(?P[0-9]+)k + _(?P[0-9]+)x(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + \.[a-z0-9]+ # File extension + )''', src) + if not m: + continue + + format_url = self._SMIL_BASE_URL + m.group('path') + formats.append({ + 'url': format_url, + 'format_id': 'SMIL_' + m.group('cbr'), + 'vcodec': m.group('vcodec'), + 'acodec': m.group('acodec'), + 'vbr': int(m.group('vbr')), + 'abr': int(m.group('abr')), + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + return formats + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + video_info = self._download_json(json_url, video_id)['video'] + + formats = self._formats_from_json(video_info) + try: + smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( + self._SMIL_BASE_URL, video_id, video_id.lower()) + smil_xml = self._download_webpage(smil_url, video_id, + 'Downloading SMIL info') + formats.extend(self._formats_from_smil(smil_xml)) + except ExtractorError as ee: + if not isinstance(ee.cause, compat_HTTPError): + raise + self._downloader.report_warning( + 'Cannot download SMIL information, falling back to JSON ..') + + timestamp_ms = int(self._search_regex( + r'/Date$(\d+)$/', video_info['launchDate'], 'launch date')) + upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) + return { + 'id': video_id, + 'title': video_info['title'], + 'formats': formats, + 'thumbnail': video_info['imageUrl'], + 'upload_date': upload_date.strftime('%Y%m%d'), + 'uploader': video_info['mainArtists'][0]['artistName'], + 'duration': video_info['duration'], + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/vice.py youtube-dl-2014.02.17/youtube_dl/extractor/vice.py --- youtube-dl-2012.09.27/youtube_dl/extractor/vice.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/vice.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,38 @@ +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE +from ..utils import ExtractorError + + +class ViceIE(InfoExtractor): + _VALID_URL = r'http://www\.vice\.com/.*?/(?P.+)' + + _TEST = { + u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', + u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', + u'info_dict': { + u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', + }, + u'params': { + # Requires ffmpeg (m3u8 manifest) + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + try: + ooyala_url = self._og_search_video_url(webpage) + except ExtractorError: + try: + embed_code = self._search_regex( + r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, + u'ooyala embed code') + ooyala_url = OoyalaIE._url_for_embed_code(embed_code) + except ExtractorError: + raise ExtractorError(u'The page doesn\'t contain a video', expected=True) + return self.url_result(ooyala_url, ie='Ooyala') + diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/viddler.py youtube-dl-2014.02.17/youtube_dl/extractor/viddler.py --- youtube-dl-2012.09.27/youtube_dl/extractor/viddler.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/viddler.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,55 @@ +import json +import re + +from .common import InfoExtractor + + +class ViddlerIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P[a-z0-9]+)' + _TEST = { + u"url": u"http://www.viddler.com/v/43903784", + u'file': u'43903784.mp4', + u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', + u'info_dict': { + u"title": u"Video Made Easy", + u"uploader": u"viddler", + u"duration": 100.89, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + embed_url = mobj.group('domain') + u'/embed/' + video_id + webpage = self._download_webpage(embed_url, video_id) + + video_sources_code = self._search_regex( + r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') + video_sources = json.loads(video_sources_code.replace("'", '"')) + + formats = [{ + 'url': video_url, + 'format': format_id, + } for video_url, format_id in video_sources.items()] + + title = self._html_search_regex( + r"title\s*:\s*'([^']*)'", webpage, u'title') + uploader = self._html_search_regex( + r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) + duration_s = self._html_search_regex( + r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) + duration = float(duration_s) if duration_s else None + thumbnail = self._html_search_regex( + r"thumbnail\s*:\s*'([^']*)'", + webpage, u'thumbnail', fatal=False) + + return { + '_type': 'video', + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'duration': duration, + 'formats': formats, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/videodetective.py youtube-dl-2014.02.17/youtube_dl/extractor/videodetective.py --- youtube-dl-2012.09.27/youtube_dl/extractor/videodetective.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/videodetective.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,30 @@ +import re + +from .common import InfoExtractor +from .internetvideoarchive import InternetVideoArchiveIE +from ..utils import ( + compat_urlparse, +) + + +class VideoDetectiveIE(InfoExtractor): + _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P\d+)' + + _TEST = { + u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', + u'file': u'194487.mp4', + u'info_dict': { + u'title': u'KICK-ASS 2', + u'description': u'md5:65ba37ad619165afac7d432eaded6013', + u'duration': 135, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage) + query = compat_urlparse.urlparse(og_video).query + return self.url_result(InternetVideoArchiveIE._build_url(query), + ie=InternetVideoArchiveIE.ie_key()) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/videofyme.py youtube-dl-2014.02.17/youtube_dl/extractor/videofyme.py --- youtube-dl-2012.09.27/youtube_dl/extractor/videofyme.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/videofyme.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,46 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + find_xpath_attr, + determine_ext, +) + +class VideofyMeIE(InfoExtractor): + _VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P\d+)(&|#|$)' + IE_NAME = u'videofy.me' + + _TEST = { + u'url': u'http://www.videofy.me/thisisvideofyme/1100701', + u'file': u'1100701.mp4', + u'md5': u'c77d700bdc16ae2e9f3c26019bd96143', + u'info_dict': { + u'title': u'This is VideofyMe', + u'description': None, + u'uploader': u'VideofyMe', + u'uploader_id': u'thisisvideofyme', + }, + + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id, + video_id) + video = config.find('video') + sources = video.find('sources') + url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) + for key in ['on', 'av', 'off']] if node is not None) + video_url = url_node.find('url').text + + return {'id': video_id, + 'title': video.find('title').text, + 'url': video_url, + 'ext': determine_ext(video_url), + 'thumbnail': video.find('thumb').text, + 'description': video.find('description').text, + 'uploader': config.find('blog/name').text, + 'uploader_id': video.find('identifier').text, + 'view_count': re.search(r'\d+', video.find('views').text).group(), + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/videopremium.py youtube-dl-2014.02.17/youtube_dl/extractor/videopremium.py --- youtube-dl-2012.09.27/youtube_dl/extractor/videopremium.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/videopremium.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,45 @@ +import re +import random + +from .common import InfoExtractor + + +class VideoPremiumIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P\w+)(?:/.*)?' + _TEST = { + u'url': u'http://videopremium.tv/4w7oadjsf156', + u'file': u'4w7oadjsf156.f4v', + u'info_dict': { + u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4" + }, + u'params': { + u'skip_download': True, + }, + u'skip': u'Test file has been deleted.', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage_url = 'http://videopremium.tv/' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + if re.match(r"^]*>window.location\s*=", webpage): + # Download again, we need a cookie + webpage = self._download_webpage( + webpage_url, video_id, + note=u'Downloading webpage again (with cookie)') + + video_title = self._html_search_regex( + r'\s*(.+?)\s*<', webpage, u'video title') + + return { + 'id': video_id, + 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16), + 'play_path': "mp4:%s.f4v" % video_id, + 'page_url': "http://videopremium.tv/" + video_id, + 'player_url': "http://videopremium.tv/uplayer/uppod.swf", + 'ext': 'f4v', + 'title': video_title, + } diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/viki.py youtube-dl-2014.02.17/youtube_dl/extractor/viki.py --- youtube-dl-2012.09.27/youtube_dl/extractor/viki.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/viki.py 2014-01-27 02:06:44.000000000 +0000 @@ -0,0 +1,101 @@ +import re + +from ..utils import ( + ExtractorError, + unescapeHTML, + unified_strdate, +) +from .subtitles import SubtitlesInfoExtractor + + +class VikiIE(SubtitlesInfoExtractor): + IE_NAME = u'viki' + + _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' + _TEST = { + u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14', + u'file': u'1023585v.mp4', + u'md5': u'a21454021c2646f5433514177e2caa5f', + u'info_dict': { + u'title': u'Heirs Episode 14', + u'uploader': u'SBS', + u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e', + u'upload_date': u'20131121', + u'age_limit': 13, + }, + u'skip': u'Blocked in the US', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + uploader_m = re.search( + r'Broadcast Network: \s*([^<]*)<', webpage) + if uploader_m is None: + uploader = None + else: + uploader = uploader_m.group(1).strip() + + rating_str = self._html_search_regex( + r'Rating: \s*([^<]*)<', webpage, + u'rating information', default='').strip() + RATINGS = { + 'G': 0, + 'PG': 10, + 'PG-13': 13, + 'R': 16, + 'NC': 18, + } + age_limit = RATINGS.get(rating_str) + + info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id + info_webpage = self._download_webpage( + info_url, video_id, note=u'Downloading info page') + if re.match(r'\s*]+src="([^"]+)"', info_webpage, u'video URL') + + upload_date_str = self._html_search_regex( + r'"created_at":"([^"]+)"', info_webpage, u'upload date') + upload_date = ( + unified_strdate(upload_date_str) + if upload_date_str is not None + else None + ) + + # subtitles + video_subtitles = self.extract_subtitles(video_id, info_webpage) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, info_webpage) + return + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'description': description, + 'thumbnail': thumbnail, + 'age_limit': age_limit, + 'uploader': uploader, + 'subtitles': video_subtitles, + 'upload_date': upload_date, + } + + def _get_available_subtitles(self, video_id, info_webpage): + res = {} + for sturl_html in re.findall(r'', info_webpage): + sturl = unescapeHTML(sturl_html) + m = re.search(r'/(?P[a-z]+)\.vtt', sturl) + if not m: + continue + res[m.group('lang')] = sturl + return res diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/vimeo.py youtube-dl-2014.02.17/youtube_dl/extractor/vimeo.py --- youtube-dl-2012.09.27/youtube_dl/extractor/vimeo.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/vimeo.py 2014-02-03 14:18:07.000000000 +0000 @@ -0,0 +1,416 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import json +import re +import itertools + +from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, + clean_html, + get_element_by_attribute, + ExtractorError, + RegexNotFoundError, + std_headers, + unsmuggle_url, +) + + +class VimeoIE(SubtitlesInfoExtractor): + """Information extractor for vimeo.com.""" + + # _VALID_URL matches Vimeo URLs + _VALID_URL = r'''(?x) + (?P(?:https?:)?//)? + (?:(?:www|(?Pplayer))\.)? + vimeo(?Ppro)?\.com/ + (?:.*?/)? + (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? + (?:videos?/)? + (?P[0-9]+) + /?(?:[?&].*)?(?:[#].*)?$''' + _NETRC_MACHINE = 'vimeo' + IE_NAME = 'vimeo' + _TESTS = [ + { + 'url': 'http://vimeo.com/56015672#at=0', + 'file': '56015672.mp4', + 'md5': '8879b6cc097e987f02484baf890129e5', + 'info_dict': { + "upload_date": "20121220", + "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + "uploader_id": "user7108434", + "uploader": "Filippo Valsorda", + "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + }, + }, + { + 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', + 'file': '68093876.mp4', + 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', + 'note': 'Vimeo Pro video (#1197)', + 'info_dict': { + 'uploader_id': 'openstreetmapus', + 'uploader': 'OpenStreetMap US', + 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + }, + }, + { + 'url': 'http://player.vimeo.com/video/54469442', + 'file': '54469442.mp4', + 'md5': '619b811a4417aa4abe78dc653becf511', + 'note': 'Videos that embed the url in the player page', + 'info_dict': { + 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', + 'uploader': 'The BLN & Business of Software', + 'uploader_id': 'theblnbusinessofsoftware', + }, + }, + { + 'url': 'http://vimeo.com/68375962', + 'file': '68375962.mp4', + 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', + 'note': 'Video protected with password', + 'info_dict': { + 'title': 'youtube-dl password protected test video', + 'upload_date': '20130614', + 'uploader_id': 'user18948128', + 'uploader': 'Jaime Marquínez Ferrándiz', + }, + 'params': { + 'videopassword': 'youtube-dl', + }, + }, + { + 'url': 'http://vimeo.com/76979871', + 'md5': '3363dd6ffebe3784d56f4132317fd446', + 'note': 'Video with subtitles', + 'info_dict': { + 'id': '76979871', + 'ext': 'mp4', + 'title': 'The New Vimeo Player (You Know, For Videos)', + 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', + 'upload_date': '20131015', + 'uploader_id': 'staff', + 'uploader': 'Vimeo Staff', + } + }, + ] + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + self.report_login() + login_url = 'https://vimeo.com/log_in' + webpage = self._download_webpage(login_url, None, False) + token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') + data = compat_urllib_parse.urlencode({'email': username, + 'password': password, + 'action': 'login', + 'service': 'vimeo', + 'token': token, + }) + login_request = compat_urllib_request.Request(login_url, data) + login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + login_request.add_header('Cookie', 'xsrft=%s' % token) + self._download_webpage(login_request, None, False, 'Wrong login info') + + def _verify_video_password(self, url, video_id, webpage): + password = self._downloader.params.get('videopassword', None) + if password is None: + raise ExtractorError('This video is protected by a password, use the --video-password option') + token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') + data = compat_urllib_parse.urlencode({'password': password, + 'token': token}) + # I didn't manage to use the password with https + if url.startswith('https'): + pass_url = url.replace('https','http') + else: + pass_url = url + password_request = compat_urllib_request.Request(pass_url+'/password', data) + password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + password_request.add_header('Cookie', 'xsrft=%s' % token) + self._download_webpage(password_request, video_id, + 'Verifying the password', + 'Wrong password') + + def _verify_player_video_password(self, url, video_id): + password = self._downloader.params.get('videopassword', None) + if password is None: + raise ExtractorError('This video is protected by a password, use the --video-password option') + data = compat_urllib_parse.urlencode({'password': password}) + pass_url = url + '/check-password' + password_request = compat_urllib_request.Request(pass_url, data) + password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + return self._download_json( + password_request, video_id, + 'Verifying the password', + 'Wrong password') + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + url, data = unsmuggle_url(url) + headers = std_headers + if data is not None: + headers = headers.copy() + headers.update(data) + + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + if mobj.group('pro') or mobj.group('player'): + url = 'http://player.vimeo.com/video/' + video_id + else: + url = 'https://vimeo.com/' + video_id + + # Retrieve video webpage to extract further information + request = compat_urllib_request.Request(url, None, headers) + webpage = self._download_webpage(request, video_id) + + # Now we begin extracting as much information as we can from what we + # retrieved. First we extract the information common to all extractors, + # and latter we extract those that are Vimeo specific. + self.report_extraction(video_id) + + # Extract the config JSON + try: + try: + config_url = self._html_search_regex( + r' data-config-url="(.+?)"', webpage, 'config URL') + config_json = self._download_webpage(config_url, video_id) + config = json.loads(config_json) + except RegexNotFoundError: + # For pro videos or player.vimeo.com urls + # We try to find out to which variable is assigned the config dic + m_variable_name = re.search('(\w)\.video\.id', webpage) + if m_variable_name is not None: + config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1)) + else: + config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] + config = self._search_regex(config_re, webpage, 'info section', + flags=re.DOTALL) + config = json.loads(config) + except Exception as e: + if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): + raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') + + if re.search(']+?id="pw_form"', webpage) is not None: + self._verify_video_password(url, video_id, webpage) + return self._real_extract(url) + else: + raise ExtractorError('Unable to extract info section', + cause=e) + else: + if config.get('view') == 4: + config = self._verify_player_video_password(url, video_id) + + # Extract title + video_title = config["video"]["title"] + + # Extract uploader and uploader_id + video_uploader = config["video"]["owner"]["name"] + video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None + + # Extract video thumbnail + video_thumbnail = config["video"].get("thumbnail") + if video_thumbnail is None: + _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1] + + # Extract video description + video_description = None + try: + video_description = get_element_by_attribute("itemprop", "description", webpage) + if video_description: video_description = clean_html(video_description) + except AssertionError as err: + # On some pages like (http://player.vimeo.com/video/54469442) the + # html tags are not closed, python 2.6 cannot handle it + if err.args[0] == 'we should not get here!': + pass + else: + raise + + # Extract upload date + video_upload_date = None + mobj = re.search(r'[^/]+)' + _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' + + def _page_url(self, base_url, pagenum): + return '%s/videos/page:%d/' % (base_url, pagenum) + + def _extract_list_title(self, webpage): + return self._html_search_regex(self._TITLE_RE, webpage, 'list title') + + def _extract_videos(self, list_id, base_url): + video_ids = [] + for pagenum in itertools.count(1): + webpage = self._download_webpage( + self._page_url(base_url, pagenum) ,list_id, + 'Downloading page %s' % pagenum) + video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) + if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: + break + + entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') + for video_id in video_ids] + return {'_type': 'playlist', + 'id': list_id, + 'title': self._extract_list_title(webpage), + 'entries': entries, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id) + + +class VimeoUserIE(VimeoChannelIE): + IE_NAME = 'vimeo:user' + _VALID_URL = r'(?:https?://)?vimeo\.com/(?P[^/]+)(?:/videos|[#?]|$)' + _TITLE_RE = r']+?class="user">([^<>]+?)' + + @classmethod + def suitable(cls, url): + if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url): + return False + return super(VimeoUserIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + return self._extract_videos(name, 'http://vimeo.com/%s' % name) + + +class VimeoAlbumIE(VimeoChannelIE): + IE_NAME = 'vimeo:album' + _VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P\d+)' + _TITLE_RE = r'

\n\s*

(.*?)

' + + def _page_url(self, base_url, pagenum): + return '%s/page:%d/' % (base_url, pagenum) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + album_id = mobj.group('id') + return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) + + +class VimeoGroupsIE(VimeoAlbumIE): + IE_NAME = 'vimeo:group' + _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P[^/]+)' + + def _extract_list_title(self, webpage): + return self._og_search_title(webpage) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name) + + +class VimeoReviewIE(InfoExtractor): + IE_NAME = 'vimeo:review' + IE_DESC = 'Review pages on vimeo' + _VALID_URL = r'(?:https?://)?vimeo\.com/[^/]+/review/(?P[^/]+)' + _TEST = { + 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', + 'file': '75524534.mp4', + 'md5': 'c507a72f780cacc12b2248bb4006d253', + 'info_dict': { + 'title': "DICK HARDWICK 'Comedian'", + 'uploader': 'Richard Hardwick', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + player_url = 'https://player.vimeo.com/player/' + video_id + return self.url_result(player_url, 'Vimeo', video_id) diff -Nru youtube-dl-2012.09.27/youtube_dl/extractor/vine.py youtube-dl-2014.02.17/youtube_dl/extractor/vine.py --- youtube-dl-2012.09.27/youtube_dl/extractor/vine.py 1970-01-01 00:00:00.000000000 +0000 +++ youtube-dl-2014.02.17/youtube_dl/extractor/vine.py 2014-02-04 22:26:41.000000000 +0000 @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class VineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P\w+)' + _TEST = { + 'url': 'https://vine.co/v/b9KOOWX7HUx', + 'md5': '2f36fed6235b16da96ce9b4dc890940d', + 'info_dict': { + 'id': 'b9KOOWX7HUx', + 'ext': 'mp4', + 'uploader': 'Jack Dorsey', + 'title': 'Chicken.', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage_url = 'https://vine.co/v/' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + self.report_extraction(video_id) + + video_url = self._html_search_meta('twitter:player:stream', webpage, + 'video URL') + + uploader = self._html_search_regex(r'

(.*?)

(.*)

', webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video uploader') - return - video_uploader = mobj.group(1).decode('utf-8') - - # Extract video thumbnail - mobj = re.search(r'', webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video thumbnail') - return - video_thumbnail = mobj.group(1).decode('utf-8') - - # Extract video description - mobj = re.search(r'', webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video description') - return - video_description = mobj.group(1).decode('utf-8') - if not video_description: - video_description = 'No description available.' - - # Extract video height and width - mobj = re.search(r'', webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video height') - return - yv_video_height = mobj.group(1) - - mobj = re.search(r'', webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video width') - return - yv_video_width = mobj.group(1) - - # Retrieve video playlist to extract media URL - # I'm not completely sure what all these options are, but we - # seem to need most of them, otherwise the server sends a 401. - yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents - yv_bitrate = '700' # according to Wikipedia this is hard-coded - request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + - '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + - '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') - try: - self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) - return - - # Extract media URL from playlist XML - mobj = re.search(r'

(.*?)

(.*?)

Log in error

([^<]+)

[^<]+

(.*)

([^<]*)

(.*)