diff -Nru python-webvtt-0.4.3/debian/changelog python-webvtt-0.4.5/debian/changelog --- python-webvtt-0.4.3/debian/changelog 2019-11-27 19:59:31.000000000 +0000 +++ python-webvtt-0.4.5/debian/changelog 2020-06-30 13:54:01.000000000 +0000 @@ -1,3 +1,11 @@ +python-webvtt (0.4.5-1) unstable; urgency=low + + * New upstream release. + * Bump debhelper version to 13. + * Bump Standards-Version to 4.5.0. + + -- Michael Fladischer Tue, 30 Jun 2020 15:54:01 +0200 + python-webvtt (0.4.3-1) unstable; urgency=low [ Ondřej Nový ] diff -Nru python-webvtt-0.4.3/debian/control python-webvtt-0.4.5/debian/control --- python-webvtt-0.4.3/debian/control 2019-11-27 19:59:31.000000000 +0000 +++ python-webvtt-0.4.5/debian/control 2020-06-30 13:54:01.000000000 +0000 @@ -6,7 +6,7 @@ Testsuite: autopkgtest-pkg-python Priority: optional Build-Depends: - debhelper-compat (= 12), + debhelper-compat (= 13), dh-python, docbook-to-man, python3-all, @@ -14,7 +14,7 @@ python3-setuptools, python3-sphinx, python3-sphinx-rtd-theme, -Standards-Version: 4.4.1 +Standards-Version: 4.5.0 Vcs-Browser: https://salsa.debian.org/python-team/modules/python-webvtt Vcs-Git: https://salsa.debian.org/python-team/modules/python-webvtt.git Homepage: https://github.com/glut23/webvtt-py/ diff -Nru python-webvtt-0.4.3/docs/history.rst python-webvtt-0.4.5/docs/history.rst --- python-webvtt-0.4.3/docs/history.rst 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/docs/history.rst 2020-04-09 09:10:58.000000000 +0000 @@ -1,15 +1,30 @@ History ======= -0.4.3 (22-11-2019) Few improvements ------------------------------------ +0.4.5 (09-04-2020) +------------------ + +* Fix issue reading buffer + +0.4.4 (27-03-2020) +------------------ + +* Allow parsing empty SBV captions, thanks to `@ishunyu `_ (#26) +* Fix invalid time cues, thanks to `@sontek `_ (#19) +* Enable pytest as test runner, thanks to `@sontek `_ (#20) +* Packaging improvements +* Added Python 3.8 support +* Improve parsing empty lines + +0.4.3 (22-11-2019) +------------------ * Parsing improvements, thanks to `@sontek `_ (#18) * Add support for reading content from a file-like object, thanks to `@omerholz `_ (#23) * Documentation fixes thanks to `@sontek `_ (#22) and `@netcmcc `_ (#24) -0.4.2 (08-06-2018) Rename of modules and usability improvements ---------------------------------------------------------------- +0.4.2 (08-06-2018) +------------------ * Renamed and reorganized few of the modules * Parsing methods are now class methods: read, from_srt and from_sbv @@ -20,13 +35,13 @@ webvtt.read('captions.vtt') # this will return a WebVTT instance -0.4.1 (24-12-2017) Hot fix on cue identifiers ---------------------------------------------- +0.4.1 (24-12-2017) +------------------ * Support for saving cue identifiers -0.4.0 (18-09-2017) Refactor and parse compatibility ---------------------------------------------------- +0.4.0 (18-09-2017) +------------------ The main goal of this release is a refactor of the WebVTT parser to be able to parse easier and give support to new features of the format. @@ -45,8 +60,8 @@ * Refactored WebVTT parser -0.3.3 (23-08-2017) Hot fix on cue tags --------------------------------------- +0.3.3 (23-08-2017) +------------------ The text for the caption is now returned clean (tags removed). The cue text could contain tags like: * timestamp tags: *<00:19.000>* @@ -56,20 +71,20 @@ Also a new attribute is available on captions to retrieve the text without cleaning tags: **raw_text** -0.3.2 (11-08-2017) Hot fix for compatibility --------------------------------------------- +0.3.2 (11-08-2017) +------------------ The goal of this release if to allow the WebVTT parser to be able to read caption files that contain metadata headers that extend to more than one line. -0.3.1 (08-08-2017) Compatibility updates ----------------------------------------- +0.3.1 (08-08-2017) +------------------ * Made hours in WebVTT parser optional as per specs. * Added support to parse WebVTT files that contain metadata headers. -0.3.0 (02-06-2016) YouTube SBV ------------------------------- +0.3.0 (02-06-2016) +------------------ New features: @@ -83,14 +98,14 @@ * Added an exception for invalid timestamps in captions. * Added an exception when saving without a filename. -0.2.0 (23-05-2016) Module refactor ----------------------------------- +0.2.0 (23-05-2016) +------------------ * Refactor of the main module and parsers. -0.1.0 (20-05-2016) First release --------------------------------- +0.1.0 (20-05-2016) +------------------ This module is released with the following initial features: diff -Nru python-webvtt-0.4.3/MANIFEST.in python-webvtt-0.4.5/MANIFEST.in --- python-webvtt-0.4.3/MANIFEST.in 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/MANIFEST.in 2020-04-09 09:10:58.000000000 +0000 @@ -1,6 +1,7 @@ include LICENSE -include MANIFEST.in -include *.rst +include README.rst + +graft tests + global-exclude __pycache__ global-exclude *.py[co] -recursive-exclude tests * \ No newline at end of file diff -Nru python-webvtt-0.4.3/requirements.txt python-webvtt-0.4.5/requirements.txt --- python-webvtt-0.4.3/requirements.txt 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/requirements.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -docopt==0.6.2 diff -Nru python-webvtt-0.4.3/setup.cfg python-webvtt-0.4.5/setup.cfg --- python-webvtt-0.4.3/setup.cfg 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/setup.cfg 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,2 @@ +[metadata] +description-file = README.rst diff -Nru python-webvtt-0.4.3/setup.py python-webvtt-0.4.5/setup.py --- python-webvtt-0.4.3/setup.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/setup.py 2020-04-09 09:10:58.000000000 +0000 @@ -1,44 +1,47 @@ import io -from setuptools import setup +import re +from setuptools import setup, find_packages -from webvtt import __version__ +with io.open('README.rst', 'r', encoding='utf-8') as f: + readme = f.read() +with io.open('webvtt/__init__.py', 'rt', encoding='utf-8') as f: + version = re.search(r'__version__ = \'(.*?)\'', f.read()).group(1) -def readme(): - with io.open('README.rst', 'r', encoding='utf-8') as f: - return f.read() setup( name='webvtt-py', - version=__version__, + version=version, description='WebVTT reader, writer and segmenter', - long_description=readme(), + long_description=readme, author='Alejandro Mendez', author_email='amendez23@gmail.com', url='https://github.com/glut23/webvtt-py', - packages=[ - 'webvtt', - ], + packages=find_packages('.', exclude=['tests']), include_package_data=True, - install_requires=['docopt'], + install_requires=[ + 'docopt' + ], entry_points={ 'console_scripts': [ 'webvtt=webvtt.cli:main' ] }, license='MIT', + python_requires='>=3.4', classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Operating System :: OS Independent', 'Topic :: Software Development :: Libraries :: Python Modules', ], keywords='webvtt captions', - test_suite='tests' -) \ No newline at end of file +) diff -Nru python-webvtt-0.4.3/tests/sbv_parser.py python-webvtt-0.4.5/tests/sbv_parser.py --- python-webvtt-0.4.3/tests/sbv_parser.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/tests/sbv_parser.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -import webvtt - -from .generic import GenericParserTestCase - - -class SBVParserTestCase(GenericParserTestCase): - - def test_sbv_parse_empty_file(self): - self.assertRaises( - webvtt.errors.MalformedFileError, - webvtt.from_sbv, - self._get_file('empty.vtt') # We reuse this file as it is empty and serves the purpose. - ) - - def test_sbv_invalid_format(self): - self.assertRaises( - webvtt.errors.MalformedFileError, - webvtt.from_sbv, - self._get_file('invalid_format.sbv') - ) - - def test_sbv_total_length(self): - self.assertEqual( - webvtt.from_sbv(self._get_file('sample.sbv')).total_length, - 16 - ) - - def test_sbv_parse_captions(self): - self.assertEqual( - len(webvtt.from_srt(self._get_file('sample.srt')).captions), - 5 - ) - - def test_sbv_missing_timeframe_line(self): - self.assertRaises( - webvtt.errors.MalformedCaptionError, - webvtt.from_sbv, - self._get_file('missing_timeframe.sbv') - ) - - def test_sbv_missing_caption_text(self): - self.assertRaises( - webvtt.errors.MalformedCaptionError, - webvtt.from_sbv, - self._get_file('missing_caption_text.sbv') - ) - - def test_sbv_invalid_timestamp(self): - self.assertRaises( - webvtt.errors.MalformedCaptionError, - webvtt.from_sbv, - self._get_file('invalid_timeframe.sbv') - ) - - def test_sbv_timestamps_format(self): - vtt = webvtt.from_sbv(self._get_file('sample.sbv')) - self.assertEqual(vtt.captions[1].start, '00:00:11.378') - self.assertEqual(vtt.captions[1].end, '00:00:12.305') - - def test_sbv_timestamps_in_seconds(self): - vtt = webvtt.from_sbv(self._get_file('sample.sbv')) - self.assertEqual(vtt.captions[1].start_in_seconds, 11.378) - self.assertEqual(vtt.captions[1].end_in_seconds, 12.305) - - def test_sbv_get_caption_text(self): - vtt = webvtt.from_sbv(self._get_file('sample.sbv')) - self.assertEqual(vtt.captions[1].text, 'Caption text #2') - - def test_sbv_get_caption_text_multiline(self): - vtt = webvtt.from_sbv(self._get_file('sample.sbv')) - self.assertEqual( - vtt.captions[2].text, - 'Caption text #3 (line 1)\nCaption text #3 (line 2)' - ) - self.assertListEqual( - vtt.captions[2].lines, - ['Caption text #3 (line 1)', 'Caption text #3 (line 2)'] - ) diff -Nru python-webvtt-0.4.3/tests/segmenter.py python-webvtt-0.4.5/tests/segmenter.py --- python-webvtt-0.4.3/tests/segmenter.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/tests/segmenter.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,180 +0,0 @@ -import os -import unittest -from shutil import rmtree - -from webvtt import WebVTTSegmenter, Caption -from webvtt.errors import InvalidCaptionsError -from webvtt import WebVTT - -BASE_DIR = os.path.dirname(__file__) -SUBTITLES_DIR = os.path.join(BASE_DIR, 'subtitles') -OUTPUT_DIR = os.path.join(BASE_DIR, 'output') - - -class WebVTTSegmenterTestCase(unittest.TestCase): - - def setUp(self): - self.segmenter = WebVTTSegmenter() - - def tearDown(self): - if os.path.exists(OUTPUT_DIR): - rmtree(OUTPUT_DIR) - - def _parse_captions(self, filename): - self.webvtt = WebVTT().read(os.path.join(SUBTITLES_DIR, filename)) - - def test_invalid_captions(self): - self.assertRaises( - FileNotFoundError, - self.segmenter.segment, - 'text' - ) - - self.assertRaises( - InvalidCaptionsError, - self.segmenter.segment, - 10 - ) - - def test_single_invalid_caption(self): - self.assertRaises( - InvalidCaptionsError, - self.segmenter.segment, - [Caption(), Caption(), 'text', Caption()] - ) - - def test_total_segments(self): - # segment with default 10 seconds - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR) - self.assertEqual(self.segmenter.total_segments, 7) - - # segment with custom 30 seconds - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR, 30) - self.assertEqual(self.segmenter.total_segments, 3) - - def test_output_folder_is_created(self): - self.assertFalse(os.path.exists(OUTPUT_DIR)) - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR) - self.assertTrue(os.path.exists(OUTPUT_DIR)) - - def test_segmentation_files_exist(self): - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR) - for i in range(7): - self.assertTrue( - os.path.exists(os.path.join(OUTPUT_DIR, 'fileSequence{}.webvtt'.format(i))) - ) - self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'prog_index.m3u8'))) - - def test_segmentation(self): - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR) - - # segment 1 should have caption 1 and 2 - self.assertEqual(len(self.segmenter.segments[0]), 2) - self.assertIn(self.webvtt.captions[0], self.segmenter.segments[0]) - self.assertIn(self.webvtt.captions[1], self.segmenter.segments[0]) - # segment 2 should have caption 2 again (overlap), 3 and 4 - self.assertEqual(len(self.segmenter.segments[1]), 3) - self.assertIn(self.webvtt.captions[2], self.segmenter.segments[1]) - self.assertIn(self.webvtt.captions[3], self.segmenter.segments[1]) - # segment 3 should have caption 4 again (overlap), 5, 6 and 7 - self.assertEqual(len(self.segmenter.segments[2]), 4) - self.assertIn(self.webvtt.captions[3], self.segmenter.segments[2]) - self.assertIn(self.webvtt.captions[4], self.segmenter.segments[2]) - self.assertIn(self.webvtt.captions[5], self.segmenter.segments[2]) - self.assertIn(self.webvtt.captions[6], self.segmenter.segments[2]) - # segment 4 should have caption 7 again (overlap), 8, 9 and 10 - self.assertEqual(len(self.segmenter.segments[3]), 4) - self.assertIn(self.webvtt.captions[6], self.segmenter.segments[3]) - self.assertIn(self.webvtt.captions[7], self.segmenter.segments[3]) - self.assertIn(self.webvtt.captions[8], self.segmenter.segments[3]) - self.assertIn(self.webvtt.captions[9], self.segmenter.segments[3]) - # segment 5 should have caption 10 again (overlap), 11 and 12 - self.assertEqual(len(self.segmenter.segments[4]), 3) - self.assertIn(self.webvtt.captions[9], self.segmenter.segments[4]) - self.assertIn(self.webvtt.captions[10], self.segmenter.segments[4]) - self.assertIn(self.webvtt.captions[11], self.segmenter.segments[4]) - # segment 6 should have caption 12 again (overlap), 13, 14 and 15 - self.assertEqual(len(self.segmenter.segments[5]), 4) - self.assertIn(self.webvtt.captions[11], self.segmenter.segments[5]) - self.assertIn(self.webvtt.captions[12], self.segmenter.segments[5]) - self.assertIn(self.webvtt.captions[13], self.segmenter.segments[5]) - self.assertIn(self.webvtt.captions[14], self.segmenter.segments[5]) - # segment 7 should have caption 15 again (overlap) and 16 - self.assertEqual(len(self.segmenter.segments[6]), 2) - self.assertIn(self.webvtt.captions[14], self.segmenter.segments[6]) - self.assertIn(self.webvtt.captions[15], self.segmenter.segments[6]) - - def test_segment_content(self): - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR, 10) - - with open(os.path.join(OUTPUT_DIR, 'fileSequence0.webvtt'), 'r', encoding='utf-8') as f: - lines = [line.rstrip() for line in f.readlines()] - - expected_lines = [ - 'WEBVTT', - 'X-TIMESTAMP-MAP=MPEGTS:900000,LOCAL:00:00:00.000', - '', - '00:00:00.500 --> 00:00:07.000', - 'Caption text #1', - '', - '00:00:07.000 --> 00:00:11.890', - 'Caption text #2' - ] - - self.assertListEqual(lines, expected_lines) - - def test_manifest_content(self): - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR, 10) - - with open(os.path.join(OUTPUT_DIR, 'prog_index.m3u8'), 'r', encoding='utf-8') as f: - lines = [line.rstrip() for line in f.readlines()] - - expected_lines = [ - '#EXTM3U', - '#EXT-X-TARGETDURATION:{}'.format(self.segmenter.seconds), - '#EXT-X-VERSION:3', - '#EXT-X-PLAYLIST-TYPE:VOD', - ] - - for i in range(7): - expected_lines.extend([ - '#EXTINF:30.00000', - 'fileSequence{}.webvtt'.format(i) - ]) - - expected_lines.append('#EXT-X-ENDLIST') - - for index, line in enumerate(expected_lines): - self.assertEqual(lines[index], line) - - def test_customize_mpegts(self): - self._parse_captions('sample.vtt') - self.segmenter.segment(self.webvtt, OUTPUT_DIR, mpegts=800000) - - with open(os.path.join(OUTPUT_DIR, 'fileSequence0.webvtt'), 'r', encoding='utf-8') as f: - lines = f.readlines() - self.assertIn('MPEGTS:800000', lines[1]) - - def test_segment_from_file(self): - self.segmenter.segment(os.path.join(SUBTITLES_DIR, 'sample.vtt'), OUTPUT_DIR), - self.assertEqual(self.segmenter.total_segments, 7) - - def test_segment_with_no_captions(self): - self.segmenter.segment(os.path.join(SUBTITLES_DIR, 'no_captions.vtt'), OUTPUT_DIR), - self.assertEqual(self.segmenter.total_segments, 0) - - def test_total_segments_readonly(self): - self.assertRaises( - AttributeError, - setattr, - WebVTTSegmenter(), - 'total_segments', - 5 - ) diff -Nru python-webvtt-0.4.3/tests/srt_parser.py python-webvtt-0.4.5/tests/srt_parser.py --- python-webvtt-0.4.3/tests/srt_parser.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/tests/srt_parser.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -import webvtt - -from .generic import GenericParserTestCase - - -class SRTParserTestCase(GenericParserTestCase): - - def test_srt_parse_empty_file(self): - self.assertRaises( - webvtt.errors.MalformedFileError, - webvtt.from_srt, - self._get_file('empty.vtt') # We reuse this file as it is empty and serves the purpose. - ) - - def test_srt_invalid_format(self): - for i in range(1, 5): - self.assertRaises( - webvtt.errors.MalformedFileError, - webvtt.from_srt, - self._get_file('invalid_format{}.srt'.format(i)) - ) - - def test_srt_total_length(self): - self.assertEqual( - webvtt.from_srt(self._get_file('sample.srt')).total_length, - 23 - ) - - def test_srt_parse_captions(self): - self.assertTrue(webvtt.from_srt(self._get_file('sample.srt')).captions) - - def test_srt_missing_timeframe_line(self): - self.assertRaises( - webvtt.errors.MalformedCaptionError, - webvtt.from_srt, - self._get_file('missing_timeframe.srt') - ) - - def test_srt_empty_caption_text(self): - self.assertTrue(webvtt.from_srt(self._get_file('missing_caption_text.srt')).captions) - - def test_srt_empty_gets_removed(self): - captions = webvtt.from_srt(self._get_file('missing_caption_text.srt')).captions - self.assertEqual(len(captions), 4) - - def test_srt_invalid_timestamp(self): - self.assertRaises( - webvtt.errors.MalformedCaptionError, - webvtt.from_srt, - self._get_file('invalid_timeframe.srt') - ) - - def test_srt_timestamps_format(self): - vtt = webvtt.from_srt(self._get_file('sample.srt')) - self.assertEqual(vtt.captions[2].start, '00:00:11.890') - self.assertEqual(vtt.captions[2].end, '00:00:16.320') - - def test_srt_parse_get_caption_data(self): - vtt = webvtt.from_srt(self._get_file('one_caption.srt')) - self.assertEqual(vtt.captions[0].start_in_seconds, 0.5) - self.assertEqual(vtt.captions[0].start, '00:00:00.500') - self.assertEqual(vtt.captions[0].end_in_seconds, 7) - self.assertEqual(vtt.captions[0].end, '00:00:07.000') - self.assertEqual(vtt.captions[0].lines[0], 'Caption text #1') - self.assertEqual(len(vtt.captions[0].lines), 1) diff -Nru python-webvtt-0.4.3/tests/srt.py python-webvtt-0.4.5/tests/srt.py --- python-webvtt-0.4.3/tests/srt.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/tests/srt.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -import os -import unittest -from shutil import rmtree, copy - -import webvtt - -from .generic import GenericParserTestCase - - -BASE_DIR = os.path.dirname(__file__) -OUTPUT_DIR = os.path.join(BASE_DIR, 'output') - - -class SRTCaptionsTestCase(GenericParserTestCase): - - def setUp(self): - os.makedirs(OUTPUT_DIR) - - def tearDown(self): - if os.path.exists(OUTPUT_DIR): - rmtree(OUTPUT_DIR) - - def test_convert_from_srt_to_vtt_and_back_gives_same_file(self): - copy(self._get_file('sample.srt'), OUTPUT_DIR) - - vtt = webvtt.from_srt(os.path.join(OUTPUT_DIR, 'sample.srt')) - vtt.save_as_srt(os.path.join(OUTPUT_DIR, 'sample_converted.srt')) - - with open(os.path.join(OUTPUT_DIR, 'sample.srt'), 'r', encoding='utf-8') as f: - original = f.read() - - with open(os.path.join(OUTPUT_DIR, 'sample_converted.srt'), 'r', encoding='utf-8') as f: - converted = f.read() - - self.assertEqual(original.strip(), converted.strip()) diff -Nru python-webvtt-0.4.3/tests/subtitles/youtube_dl.vtt python-webvtt-0.4.5/tests/subtitles/youtube_dl.vtt --- python-webvtt-0.4.3/tests/subtitles/youtube_dl.vtt 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/tests/subtitles/youtube_dl.vtt 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,24 @@ +WEBVTT +Kind: captions +Language: en +Style: +::cue(c.colorCCCCCC) { color: rgb(204,204,204); + } +::cue(c.colorE5E5E5) { color: rgb(229,229,229); + } +## + + +00:04:46.070 --> 00:04:46.470 align:start position:0% +yeah + + +00:04:46.470 --> 00:05:04.080 align:start position:0% +yeah +what +00:05:04.080 --> 00:05:05.069 align:start position:0% +this<00:05:04.199> will<00:05:04.379> happen<00:05:04.620> is<00:05:04.860> I'm<00:05:05.069> telling + +00:05:05.069 --> 00:05:05.400 align:start position:0% +this will happen is I'm telling + diff -Nru python-webvtt-0.4.3/tests/test_sbv_parser.py python-webvtt-0.4.5/tests/test_sbv_parser.py --- python-webvtt-0.4.3/tests/test_sbv_parser.py 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/tests/test_sbv_parser.py 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,74 @@ +import webvtt + +from .generic import GenericParserTestCase + + +class SBVParserTestCase(GenericParserTestCase): + + def test_sbv_parse_empty_file(self): + self.assertRaises( + webvtt.errors.MalformedFileError, + webvtt.from_sbv, + self._get_file('empty.vtt') # We reuse this file as it is empty and serves the purpose. + ) + + def test_sbv_invalid_format(self): + self.assertRaises( + webvtt.errors.MalformedFileError, + webvtt.from_sbv, + self._get_file('invalid_format.sbv') + ) + + def test_sbv_total_length(self): + self.assertEqual( + webvtt.from_sbv(self._get_file('sample.sbv')).total_length, + 16 + ) + + def test_sbv_parse_captions(self): + self.assertEqual( + len(webvtt.from_srt(self._get_file('sample.srt')).captions), + 5 + ) + + def test_sbv_missing_timeframe_line(self): + self.assertRaises( + webvtt.errors.MalformedCaptionError, + webvtt.from_sbv, + self._get_file('missing_timeframe.sbv') + ) + + def test_sbv_missing_caption_text(self): + self.assertTrue(webvtt.from_sbv(self._get_file('missing_caption_text.sbv')).captions) + + def test_sbv_invalid_timestamp(self): + self.assertRaises( + webvtt.errors.MalformedCaptionError, + webvtt.from_sbv, + self._get_file('invalid_timeframe.sbv') + ) + + def test_sbv_timestamps_format(self): + vtt = webvtt.from_sbv(self._get_file('sample.sbv')) + self.assertEqual(vtt.captions[1].start, '00:00:11.378') + self.assertEqual(vtt.captions[1].end, '00:00:12.305') + + def test_sbv_timestamps_in_seconds(self): + vtt = webvtt.from_sbv(self._get_file('sample.sbv')) + self.assertEqual(vtt.captions[1].start_in_seconds, 11.378) + self.assertEqual(vtt.captions[1].end_in_seconds, 12.305) + + def test_sbv_get_caption_text(self): + vtt = webvtt.from_sbv(self._get_file('sample.sbv')) + self.assertEqual(vtt.captions[1].text, 'Caption text #2') + + def test_sbv_get_caption_text_multiline(self): + vtt = webvtt.from_sbv(self._get_file('sample.sbv')) + self.assertEqual( + vtt.captions[2].text, + 'Caption text #3 (line 1)\nCaption text #3 (line 2)' + ) + self.assertListEqual( + vtt.captions[2].lines, + ['Caption text #3 (line 1)', 'Caption text #3 (line 2)'] + ) diff -Nru python-webvtt-0.4.3/tests/test_segmenter.py python-webvtt-0.4.5/tests/test_segmenter.py --- python-webvtt-0.4.3/tests/test_segmenter.py 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/tests/test_segmenter.py 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,180 @@ +import os +import unittest +from shutil import rmtree + +from webvtt import WebVTTSegmenter, Caption +from webvtt.errors import InvalidCaptionsError +from webvtt import WebVTT + +BASE_DIR = os.path.dirname(__file__) +SUBTITLES_DIR = os.path.join(BASE_DIR, 'subtitles') +OUTPUT_DIR = os.path.join(BASE_DIR, 'output') + + +class WebVTTSegmenterTestCase(unittest.TestCase): + + def setUp(self): + self.segmenter = WebVTTSegmenter() + + def tearDown(self): + if os.path.exists(OUTPUT_DIR): + rmtree(OUTPUT_DIR) + + def _parse_captions(self, filename): + self.webvtt = WebVTT().read(os.path.join(SUBTITLES_DIR, filename)) + + def test_invalid_captions(self): + self.assertRaises( + FileNotFoundError, + self.segmenter.segment, + 'text' + ) + + self.assertRaises( + InvalidCaptionsError, + self.segmenter.segment, + 10 + ) + + def test_single_invalid_caption(self): + self.assertRaises( + InvalidCaptionsError, + self.segmenter.segment, + [Caption(), Caption(), 'text', Caption()] + ) + + def test_total_segments(self): + # segment with default 10 seconds + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR) + self.assertEqual(self.segmenter.total_segments, 7) + + # segment with custom 30 seconds + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR, 30) + self.assertEqual(self.segmenter.total_segments, 3) + + def test_output_folder_is_created(self): + self.assertFalse(os.path.exists(OUTPUT_DIR)) + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR) + self.assertTrue(os.path.exists(OUTPUT_DIR)) + + def test_segmentation_files_exist(self): + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR) + for i in range(7): + self.assertTrue( + os.path.exists(os.path.join(OUTPUT_DIR, 'fileSequence{}.webvtt'.format(i))) + ) + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'prog_index.m3u8'))) + + def test_segmentation(self): + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR) + + # segment 1 should have caption 1 and 2 + self.assertEqual(len(self.segmenter.segments[0]), 2) + self.assertIn(self.webvtt.captions[0], self.segmenter.segments[0]) + self.assertIn(self.webvtt.captions[1], self.segmenter.segments[0]) + # segment 2 should have caption 2 again (overlap), 3 and 4 + self.assertEqual(len(self.segmenter.segments[1]), 3) + self.assertIn(self.webvtt.captions[2], self.segmenter.segments[1]) + self.assertIn(self.webvtt.captions[3], self.segmenter.segments[1]) + # segment 3 should have caption 4 again (overlap), 5, 6 and 7 + self.assertEqual(len(self.segmenter.segments[2]), 4) + self.assertIn(self.webvtt.captions[3], self.segmenter.segments[2]) + self.assertIn(self.webvtt.captions[4], self.segmenter.segments[2]) + self.assertIn(self.webvtt.captions[5], self.segmenter.segments[2]) + self.assertIn(self.webvtt.captions[6], self.segmenter.segments[2]) + # segment 4 should have caption 7 again (overlap), 8, 9 and 10 + self.assertEqual(len(self.segmenter.segments[3]), 4) + self.assertIn(self.webvtt.captions[6], self.segmenter.segments[3]) + self.assertIn(self.webvtt.captions[7], self.segmenter.segments[3]) + self.assertIn(self.webvtt.captions[8], self.segmenter.segments[3]) + self.assertIn(self.webvtt.captions[9], self.segmenter.segments[3]) + # segment 5 should have caption 10 again (overlap), 11 and 12 + self.assertEqual(len(self.segmenter.segments[4]), 3) + self.assertIn(self.webvtt.captions[9], self.segmenter.segments[4]) + self.assertIn(self.webvtt.captions[10], self.segmenter.segments[4]) + self.assertIn(self.webvtt.captions[11], self.segmenter.segments[4]) + # segment 6 should have caption 12 again (overlap), 13, 14 and 15 + self.assertEqual(len(self.segmenter.segments[5]), 4) + self.assertIn(self.webvtt.captions[11], self.segmenter.segments[5]) + self.assertIn(self.webvtt.captions[12], self.segmenter.segments[5]) + self.assertIn(self.webvtt.captions[13], self.segmenter.segments[5]) + self.assertIn(self.webvtt.captions[14], self.segmenter.segments[5]) + # segment 7 should have caption 15 again (overlap) and 16 + self.assertEqual(len(self.segmenter.segments[6]), 2) + self.assertIn(self.webvtt.captions[14], self.segmenter.segments[6]) + self.assertIn(self.webvtt.captions[15], self.segmenter.segments[6]) + + def test_segment_content(self): + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR, 10) + + with open(os.path.join(OUTPUT_DIR, 'fileSequence0.webvtt'), 'r', encoding='utf-8') as f: + lines = [line.rstrip() for line in f.readlines()] + + expected_lines = [ + 'WEBVTT', + 'X-TIMESTAMP-MAP=MPEGTS:900000,LOCAL:00:00:00.000', + '', + '00:00:00.500 --> 00:00:07.000', + 'Caption text #1', + '', + '00:00:07.000 --> 00:00:11.890', + 'Caption text #2' + ] + + self.assertListEqual(lines, expected_lines) + + def test_manifest_content(self): + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR, 10) + + with open(os.path.join(OUTPUT_DIR, 'prog_index.m3u8'), 'r', encoding='utf-8') as f: + lines = [line.rstrip() for line in f.readlines()] + + expected_lines = [ + '#EXTM3U', + '#EXT-X-TARGETDURATION:{}'.format(self.segmenter.seconds), + '#EXT-X-VERSION:3', + '#EXT-X-PLAYLIST-TYPE:VOD', + ] + + for i in range(7): + expected_lines.extend([ + '#EXTINF:30.00000', + 'fileSequence{}.webvtt'.format(i) + ]) + + expected_lines.append('#EXT-X-ENDLIST') + + for index, line in enumerate(expected_lines): + self.assertEqual(lines[index], line) + + def test_customize_mpegts(self): + self._parse_captions('sample.vtt') + self.segmenter.segment(self.webvtt, OUTPUT_DIR, mpegts=800000) + + with open(os.path.join(OUTPUT_DIR, 'fileSequence0.webvtt'), 'r', encoding='utf-8') as f: + lines = f.readlines() + self.assertIn('MPEGTS:800000', lines[1]) + + def test_segment_from_file(self): + self.segmenter.segment(os.path.join(SUBTITLES_DIR, 'sample.vtt'), OUTPUT_DIR), + self.assertEqual(self.segmenter.total_segments, 7) + + def test_segment_with_no_captions(self): + self.segmenter.segment(os.path.join(SUBTITLES_DIR, 'no_captions.vtt'), OUTPUT_DIR), + self.assertEqual(self.segmenter.total_segments, 0) + + def test_total_segments_readonly(self): + self.assertRaises( + AttributeError, + setattr, + WebVTTSegmenter(), + 'total_segments', + 5 + ) diff -Nru python-webvtt-0.4.3/tests/test_srt_parser.py python-webvtt-0.4.5/tests/test_srt_parser.py --- python-webvtt-0.4.3/tests/test_srt_parser.py 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/tests/test_srt_parser.py 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,65 @@ +import webvtt + +from .generic import GenericParserTestCase + + +class SRTParserTestCase(GenericParserTestCase): + + def test_srt_parse_empty_file(self): + self.assertRaises( + webvtt.errors.MalformedFileError, + webvtt.from_srt, + self._get_file('empty.vtt') # We reuse this file as it is empty and serves the purpose. + ) + + def test_srt_invalid_format(self): + for i in range(1, 5): + self.assertRaises( + webvtt.errors.MalformedFileError, + webvtt.from_srt, + self._get_file('invalid_format{}.srt'.format(i)) + ) + + def test_srt_total_length(self): + self.assertEqual( + webvtt.from_srt(self._get_file('sample.srt')).total_length, + 23 + ) + + def test_srt_parse_captions(self): + self.assertTrue(webvtt.from_srt(self._get_file('sample.srt')).captions) + + def test_srt_missing_timeframe_line(self): + self.assertRaises( + webvtt.errors.MalformedCaptionError, + webvtt.from_srt, + self._get_file('missing_timeframe.srt') + ) + + def test_srt_empty_caption_text(self): + self.assertTrue(webvtt.from_srt(self._get_file('missing_caption_text.srt')).captions) + + def test_srt_empty_gets_removed(self): + captions = webvtt.from_srt(self._get_file('missing_caption_text.srt')).captions + self.assertEqual(len(captions), 4) + + def test_srt_invalid_timestamp(self): + self.assertRaises( + webvtt.errors.MalformedCaptionError, + webvtt.from_srt, + self._get_file('invalid_timeframe.srt') + ) + + def test_srt_timestamps_format(self): + vtt = webvtt.from_srt(self._get_file('sample.srt')) + self.assertEqual(vtt.captions[2].start, '00:00:11.890') + self.assertEqual(vtt.captions[2].end, '00:00:16.320') + + def test_srt_parse_get_caption_data(self): + vtt = webvtt.from_srt(self._get_file('one_caption.srt')) + self.assertEqual(vtt.captions[0].start_in_seconds, 0.5) + self.assertEqual(vtt.captions[0].start, '00:00:00.500') + self.assertEqual(vtt.captions[0].end_in_seconds, 7) + self.assertEqual(vtt.captions[0].end, '00:00:07.000') + self.assertEqual(vtt.captions[0].lines[0], 'Caption text #1') + self.assertEqual(len(vtt.captions[0].lines), 1) diff -Nru python-webvtt-0.4.3/tests/test_srt.py python-webvtt-0.4.5/tests/test_srt.py --- python-webvtt-0.4.3/tests/test_srt.py 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/tests/test_srt.py 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,35 @@ +import os +import unittest +from shutil import rmtree, copy + +import webvtt + +from .generic import GenericParserTestCase + + +BASE_DIR = os.path.dirname(__file__) +OUTPUT_DIR = os.path.join(BASE_DIR, 'output') + + +class SRTCaptionsTestCase(GenericParserTestCase): + + def setUp(self): + os.makedirs(OUTPUT_DIR) + + def tearDown(self): + if os.path.exists(OUTPUT_DIR): + rmtree(OUTPUT_DIR) + + def test_convert_from_srt_to_vtt_and_back_gives_same_file(self): + copy(self._get_file('sample.srt'), OUTPUT_DIR) + + vtt = webvtt.from_srt(os.path.join(OUTPUT_DIR, 'sample.srt')) + vtt.save_as_srt(os.path.join(OUTPUT_DIR, 'sample_converted.srt')) + + with open(os.path.join(OUTPUT_DIR, 'sample.srt'), 'r', encoding='utf-8') as f: + original = f.read() + + with open(os.path.join(OUTPUT_DIR, 'sample_converted.srt'), 'r', encoding='utf-8') as f: + converted = f.read() + + self.assertEqual(original.strip(), converted.strip()) diff -Nru python-webvtt-0.4.3/tests/test_webvtt_parser.py python-webvtt-0.4.5/tests/test_webvtt_parser.py --- python-webvtt-0.4.3/tests/test_webvtt_parser.py 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/tests/test_webvtt_parser.py 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,172 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from .generic import GenericParserTestCase + +import webvtt +from webvtt.parsers import WebVTTParser +from webvtt.structures import Caption +from webvtt.errors import MalformedFileError, MalformedCaptionError + + +class WebVTTParserTestCase(GenericParserTestCase): + + def test_webvtt_parse_invalid_file(self): + self.assertRaises( + MalformedFileError, + webvtt.read, + self._get_file('invalid.vtt') + ) + + def test_webvtt_captions_not_found(self): + self.assertRaises( + FileNotFoundError, + webvtt.read, + 'some_file' + ) + + def test_webvtt_total_length(self): + self.assertEqual( + webvtt.read(self._get_file('sample.vtt')).total_length, + 64 + ) + + def test_webvtt_total_length_no_parser(self): + self.assertEqual( + webvtt.WebVTT().total_length, + 0 + ) + + def test_webvtt__parse_captions(self): + self.assertTrue(webvtt.read(self._get_file('sample.vtt')).captions) + + def test_webvtt_parse_empty_file(self): + self.assertRaises( + MalformedFileError, + webvtt.read, + self._get_file('empty.vtt') + ) + + def test_webvtt_parse_get_captions(self): + self.assertEqual( + len(webvtt.read(self._get_file('sample.vtt')).captions), + 16 + ) + + def test_webvtt_parse_invalid_timeframe_line(self): + self.assertRaises( + MalformedCaptionError, + webvtt.read, + self._get_file('invalid_timeframe.vtt') + ) + + def test_webvtt_parse_invalid_timeframe_in_cue_text(self): + vtt = webvtt.read(self._get_file('invalid_timeframe_in_cue_text.vtt')) + self.assertEqual(4, len(vtt.captions)) + self.assertEqual('', vtt.captions[1].text) + + def test_webvtt_parse_get_caption_data(self): + vtt = webvtt.read(self._get_file('one_caption.vtt')) + self.assertEqual(vtt.captions[0].start_in_seconds, 0.5) + self.assertEqual(vtt.captions[0].start, '00:00:00.500') + self.assertEqual(vtt.captions[0].end_in_seconds, 7) + self.assertEqual(vtt.captions[0].end, '00:00:07.000') + self.assertEqual(vtt.captions[0].lines[0], 'Caption text #1') + self.assertEqual(len(vtt.captions[0].lines), 1) + + def test_webvtt_caption_without_timeframe(self): + self.assertRaises( + MalformedCaptionError, + webvtt.read, + self._get_file('missing_timeframe.vtt') + ) + + def test_webvtt_caption_without_cue_text(self): + vtt = webvtt.read(self._get_file('missing_caption_text.vtt')) + self.assertEqual(len(vtt.captions), 5) + + def test_webvtt_timestamps_format(self): + vtt = webvtt.read(self._get_file('sample.vtt')) + self.assertEqual(vtt.captions[2].start, '00:00:11.890') + self.assertEqual(vtt.captions[2].end, '00:00:16.320') + + def test_parse_timestamp(self): + caption = Caption(start='02:03:11.890') + self.assertEqual( + caption.start_in_seconds, + 7391.89 + ) + + def test_captions_attribute(self): + self.assertListEqual([], webvtt.WebVTT().captions) + + def test_webvtt_timestamp_format(self): + self.assertTrue(WebVTTParser()._validate_timeframe_line('00:00:00.000 --> 00:00:00.000')) + self.assertTrue(WebVTTParser()._validate_timeframe_line('00:00.000 --> 00:00.000')) + + def test_metadata_headers(self): + vtt = webvtt.read(self._get_file('metadata_headers.vtt')) + self.assertEqual(len(vtt.captions), 2) + + def test_metadata_headers_multiline(self): + vtt = webvtt.read(self._get_file('metadata_headers_multiline.vtt')) + self.assertEqual(len(vtt.captions), 2) + + def test_parse_identifiers(self): + vtt = webvtt.read(self._get_file('using_identifiers.vtt')) + self.assertEqual(len(vtt.captions), 6) + + self.assertEqual(vtt.captions[1].identifier, 'second caption') + self.assertEqual(vtt.captions[2].identifier, None) + self.assertEqual(vtt.captions[3].identifier, '4') + + def test_parse_with_comments(self): + vtt = webvtt.read(self._get_file('comments.vtt')) + self.assertEqual(len(vtt.captions), 3) + self.assertListEqual( + vtt.captions[0].lines, + ['- Ta en kopp varmt te.', + '- Det är inte varmt.'] + ) + self.assertEqual( + vtt.captions[2].text, + '- Ta en kopp' + ) + + def test_parse_styles(self): + vtt = webvtt.read(self._get_file('styles.vtt')) + self.assertEqual(len(vtt.captions), 1) + self.assertEqual( + vtt.styles[0].text, + '::cue {background-image: linear-gradient(to bottom, dimgray, lightgray);color: papayawhip;}' + ) + + def test_clean_cue_tags(self): + vtt = webvtt.read(self._get_file('cue_tags.vtt')) + self.assertEqual( + vtt.captions[1].text, + 'Like a big-a pizza pie' + ) + self.assertEqual( + vtt.captions[2].text, + 'That\'s amore' + ) + + def test_parse_captions_with_bom(self): + vtt = webvtt.read(self._get_file('captions_with_bom.vtt')) + self.assertEqual(len(vtt.captions), 4) + + def test_empty_lines_are_not_included_in_result(self): + vtt = webvtt.read(self._get_file('netflix_chicas_del_cable.vtt')) + self.assertEqual(vtt.captions[0].text, "[Alba] En 1928,") + self.assertEqual( + vtt.captions[-2].text, + "Diez años no son suficientes\npara olvidarte..." + ) + + def test_can_parse_youtube_dl_files(self): + vtt = webvtt.read(self._get_file('youtube_dl.vtt')) + self.assertEqual( + "this will happen is I'm telling", + vtt.captions[2].text + ) diff -Nru python-webvtt-0.4.3/tests/test_webvtt.py python-webvtt-0.4.5/tests/test_webvtt.py --- python-webvtt-0.4.3/tests/test_webvtt.py 1970-01-01 00:00:00.000000000 +0000 +++ python-webvtt-0.4.5/tests/test_webvtt.py 2020-04-09 09:10:58.000000000 +0000 @@ -0,0 +1,395 @@ +import os +import io +import textwrap +from shutil import rmtree, copy + +import webvtt +from webvtt.structures import Caption, Style +from .generic import GenericParserTestCase +from webvtt.errors import MalformedFileError + + +BASE_DIR = os.path.dirname(__file__) +OUTPUT_DIR = os.path.join(BASE_DIR, 'output') + + +class WebVTTTestCase(GenericParserTestCase): + + def tearDown(self): + if os.path.exists(OUTPUT_DIR): + rmtree(OUTPUT_DIR) + + def test_create_caption(self): + caption = Caption('00:00:00.500', '00:00:07.000', ['Caption test line 1', 'Caption test line 2']) + self.assertEqual(caption.start, '00:00:00.500') + self.assertEqual(caption.start_in_seconds, 0.5) + self.assertEqual(caption.end, '00:00:07.000') + self.assertEqual(caption.end_in_seconds, 7) + self.assertEqual(caption.lines, ['Caption test line 1', 'Caption test line 2']) + + def test_write_captions(self): + os.makedirs(OUTPUT_DIR) + copy(self._get_file('one_caption.vtt'), OUTPUT_DIR) + + out = io.StringIO() + vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt')) + new_caption = Caption('00:00:07.000', '00:00:11.890', ['New caption text line1', 'New caption text line2']) + vtt.captions.append(new_caption) + vtt.write(out) + + out.seek(0) + lines = [line.rstrip() for line in out.readlines()] + + expected_lines = [ + 'WEBVTT', + '', + '00:00:00.500 --> 00:00:07.000', + 'Caption text #1', + '', + '00:00:07.000 --> 00:00:11.890', + 'New caption text line1', + 'New caption text line2' + ] + + self.assertListEqual(lines, expected_lines) + + def test_save_captions(self): + os.makedirs(OUTPUT_DIR) + copy(self._get_file('one_caption.vtt'), OUTPUT_DIR) + + vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt')) + new_caption = Caption('00:00:07.000', '00:00:11.890', ['New caption text line1', 'New caption text line2']) + vtt.captions.append(new_caption) + vtt.save() + + with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'), 'r', encoding='utf-8') as f: + lines = [line.rstrip() for line in f.readlines()] + + expected_lines = [ + 'WEBVTT', + '', + '00:00:00.500 --> 00:00:07.000', + 'Caption text #1', + '', + '00:00:07.000 --> 00:00:11.890', + 'New caption text line1', + 'New caption text line2' + ] + + self.assertListEqual(lines, expected_lines) + + def test_srt_conversion(self): + os.makedirs(OUTPUT_DIR) + copy(self._get_file('one_caption.srt'), OUTPUT_DIR) + + vtt = webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt')) + vtt.save() + + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))) + + with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'), 'r', encoding='utf-8') as f: + lines = [line.rstrip() for line in f.readlines()] + + expected_lines = [ + 'WEBVTT', + '', + '00:00:00.500 --> 00:00:07.000', + 'Caption text #1', + ] + + self.assertListEqual(lines, expected_lines) + + def test_sbv_conversion(self): + os.makedirs(OUTPUT_DIR) + copy(self._get_file('two_captions.sbv'), OUTPUT_DIR) + + vtt = webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv')) + vtt.save() + + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt'))) + + with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'), 'r', encoding='utf-8') as f: + lines = [line.rstrip() for line in f.readlines()] + + expected_lines = [ + 'WEBVTT', + '', + '00:00:00.378 --> 00:00:11.378', + 'Caption text #1', + '', + '00:00:11.378 --> 00:00:12.305', + 'Caption text #2 (line 1)', + 'Caption text #2 (line 2)', + ] + + self.assertListEqual(lines, expected_lines) + + def test_save_to_other_location(self): + target_path = os.path.join(OUTPUT_DIR, 'test_folder') + os.makedirs(target_path) + + webvtt.read(self._get_file('one_caption.vtt')).save(target_path) + self.assertTrue(os.path.exists(os.path.join(target_path, 'one_caption.vtt'))) + + def test_save_specific_filename(self): + target_path = os.path.join(OUTPUT_DIR, 'test_folder') + os.makedirs(target_path) + output_file = os.path.join(target_path, 'custom_name.vtt') + + webvtt.read(self._get_file('one_caption.vtt')).save(output_file) + self.assertTrue(os.path.exists(output_file)) + + def test_save_specific_filename_no_extension(self): + target_path = os.path.join(OUTPUT_DIR, 'test_folder') + os.makedirs(target_path) + output_file = os.path.join(target_path, 'custom_name') + + webvtt.read(self._get_file('one_caption.vtt')).save(output_file) + self.assertTrue(os.path.exists(os.path.join(target_path, 'custom_name.vtt'))) + + def test_caption_timestamp_update(self): + c = Caption('00:00:00.500', '00:00:07.000') + c.start = '00:00:01.750' + c.end = '00:00:08.250' + + self.assertEqual(c.start, '00:00:01.750') + self.assertEqual(c.end, '00:00:08.250') + + def test_caption_timestamp_format(self): + c = Caption('01:02:03.400', '02:03:04.500') + self.assertEqual(c.start, '01:02:03.400') + self.assertEqual(c.end, '02:03:04.500') + + c = Caption('02:03.400', '03:04.500') + self.assertEqual(c.start, '00:02:03.400') + self.assertEqual(c.end, '00:03:04.500') + + def test_caption_text(self): + c = Caption(text=['Caption line #1', 'Caption line #2']) + self.assertEqual( + c.text, + 'Caption line #1\nCaption line #2' + ) + + def test_caption_receive_text(self): + c = Caption(text='Caption line #1\nCaption line #2') + + self.assertEqual( + len(c.lines), + 2 + ) + self.assertEqual( + c.text, + 'Caption line #1\nCaption line #2' + ) + + def test_update_text(self): + c = Caption(text='Caption line #1') + c.text = 'Caption line #1 updated' + self.assertEqual( + c.text, + 'Caption line #1 updated' + ) + + def test_update_text_multiline(self): + c = Caption(text='Caption line #1') + c.text = 'Caption line #1\nCaption line #2' + + self.assertEqual( + len(c.lines), + 2 + ) + + self.assertEqual( + c.text, + 'Caption line #1\nCaption line #2' + ) + + def test_update_text_wrong_type(self): + c = Caption(text='Caption line #1') + + self.assertRaises( + AttributeError, + setattr, + c, + 'text', + 123 + ) + + def test_manipulate_lines(self): + c = Caption(text=['Caption line #1', 'Caption line #2']) + c.lines[0] = 'Caption line #1 updated' + self.assertEqual( + c.lines[0], + 'Caption line #1 updated' + ) + + def test_read_file_buffer(self): + with open(self._get_file('sample.vtt'), 'r', encoding='utf-8') as f: + vtt = webvtt.read_buffer(f) + self.assertIsInstance(vtt.captions, list) + + def test_read_memory_buffer(self): + payload = '' + with open(self._get_file('sample.vtt'), 'r', encoding='utf-8') as f: + payload = f.read() + + buffer = io.StringIO(payload) + vtt = webvtt.read_buffer(buffer) + self.assertIsInstance(vtt.captions, list) + + def test_read_memory_buffer_carriage_return(self): + """https://github.com/glut23/webvtt-py/issues/29""" + buffer = io.StringIO(textwrap.dedent('''\ + WEBVTT\r + \r + 00:00:00.500 --> 00:00:07.000\r + Caption text #1\r + \r + 00:00:07.000 --> 00:00:11.890\r + Caption text #2\r + \r + 00:00:11.890 --> 00:00:16.320\r + Caption text #3\r + ''')) + vtt = webvtt.read_buffer(buffer) + self.assertEqual(len(vtt.captions), 3) + + def test_read_malformed_buffer(self): + malformed_payloads = ['', 'MOCK MELFORMED CONTENT'] + for payload in malformed_payloads: + buffer = io.StringIO(payload) + with self.assertRaises(MalformedFileError): + webvtt.read_buffer(buffer) + + + def test_captions(self): + vtt = webvtt.read(self._get_file('sample.vtt')) + self.assertIsInstance(vtt.captions, list) + + def test_captions_prevent_write(self): + vtt = webvtt.read(self._get_file('sample.vtt')) + self.assertRaises( + AttributeError, + setattr, + vtt, + 'captions', + [] + ) + + def test_sequence_iteration(self): + vtt = webvtt.read(self._get_file('sample.vtt')) + self.assertIsInstance(vtt[0], Caption) + self.assertEqual(len(vtt), len(vtt.captions)) + + def test_save_no_filename(self): + vtt = webvtt.WebVTT() + self.assertRaises( + webvtt.errors.MissingFilenameError, + vtt.save + ) + + def test_malformed_start_timestamp(self): + self.assertRaises( + webvtt.errors.MalformedCaptionError, + Caption, + '01:00' + ) + + def test_set_styles_from_text(self): + style = Style() + style.text = '::cue(b) {\n color: peachpuff;\n}' + self.assertListEqual( + style.lines, + ['::cue(b) {', ' color: peachpuff;', '}'] + ) + + def test_get_styles_as_text(self): + style = Style() + style.lines = ['::cue(b) {', ' color: peachpuff;', '}'] + self.assertEqual( + style.text, + '::cue(b) {color: peachpuff;}' + ) + + def test_save_identifiers(self): + os.makedirs(OUTPUT_DIR) + copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR) + + vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt')) + vtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt')) + + with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'), 'r', encoding='utf-8') as f: + lines = [line.rstrip() for line in f.readlines()] + + expected_lines = [ + 'WEBVTT', + '', + '00:00:00.500 --> 00:00:07.000', + 'Caption text #1', + '', + 'second caption', + '00:00:07.000 --> 00:00:11.890', + 'Caption text #2', + '', + '00:00:11.890 --> 00:00:16.320', + 'Caption text #3', + '', + '4', + '00:00:16.320 --> 00:00:21.580', + 'Caption text #4', + '', + '00:00:21.580 --> 00:00:23.880', + 'Caption text #5', + '', + '00:00:23.880 --> 00:00:27.280', + 'Caption text #6' + ] + + self.assertListEqual(lines, expected_lines) + + def test_save_updated_identifiers(self): + os.makedirs(OUTPUT_DIR) + copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR) + + vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt')) + vtt.captions[0].identifier = 'first caption' + vtt.captions[1].identifier = None + vtt.captions[3].identifier = '44' + last_caption = Caption('00:00:27.280', '00:00:29.200', 'Caption text #7') + last_caption.identifier = 'last caption' + vtt.captions.append(last_caption) + vtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt')) + + with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'), 'r', encoding='utf-8') as f: + lines = [line.rstrip() for line in f.readlines()] + + expected_lines = [ + 'WEBVTT', + '', + 'first caption', + '00:00:00.500 --> 00:00:07.000', + 'Caption text #1', + '', + '00:00:07.000 --> 00:00:11.890', + 'Caption text #2', + '', + '00:00:11.890 --> 00:00:16.320', + 'Caption text #3', + '', + '44', + '00:00:16.320 --> 00:00:21.580', + 'Caption text #4', + '', + '00:00:21.580 --> 00:00:23.880', + 'Caption text #5', + '', + '00:00:23.880 --> 00:00:27.280', + 'Caption text #6', + '', + 'last caption', + '00:00:27.280 --> 00:00:29.200', + 'Caption text #7' + ] + + self.assertListEqual(lines, expected_lines) diff -Nru python-webvtt-0.4.3/tests/webvtt_parser.py python-webvtt-0.4.5/tests/webvtt_parser.py --- python-webvtt-0.4.3/tests/webvtt_parser.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/tests/webvtt_parser.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,164 +0,0 @@ -from .generic import GenericParserTestCase - -import webvtt -from webvtt.parsers import WebVTTParser -from webvtt.structures import Caption -from webvtt.errors import MalformedFileError, MalformedCaptionError - - -class WebVTTParserTestCase(GenericParserTestCase): - - def test_webvtt_parse_invalid_file(self): - self.assertRaises( - MalformedFileError, - webvtt.read, - self._get_file('invalid.vtt') - ) - - def test_webvtt_captions_not_found(self): - self.assertRaises( - FileNotFoundError, - webvtt.read, - 'some_file' - ) - - def test_webvtt_total_length(self): - self.assertEqual( - webvtt.read(self._get_file('sample.vtt')).total_length, - 64 - ) - - def test_webvtt_total_length_no_parser(self): - self.assertEqual( - webvtt.WebVTT().total_length, - 0 - ) - - def test_webvtt__parse_captions(self): - self.assertTrue(webvtt.read(self._get_file('sample.vtt')).captions) - - def test_webvtt_parse_empty_file(self): - self.assertRaises( - MalformedFileError, - webvtt.read, - self._get_file('empty.vtt') - ) - - def test_webvtt_parse_get_captions(self): - self.assertEqual( - len(webvtt.read(self._get_file('sample.vtt')).captions), - 16 - ) - - def test_webvtt_parse_invalid_timeframe_line(self): - self.assertRaises( - MalformedCaptionError, - webvtt.read, - self._get_file('invalid_timeframe.vtt') - ) - - def test_webvtt_parse_invalid_timeframe_in_cue_text(self): - self.assertRaises( - MalformedCaptionError, - webvtt.read, - self._get_file('invalid_timeframe_in_cue_text.vtt') - ) - - def test_webvtt_parse_get_caption_data(self): - vtt = webvtt.read(self._get_file('one_caption.vtt')) - self.assertEqual(vtt.captions[0].start_in_seconds, 0.5) - self.assertEqual(vtt.captions[0].start, '00:00:00.500') - self.assertEqual(vtt.captions[0].end_in_seconds, 7) - self.assertEqual(vtt.captions[0].end, '00:00:07.000') - self.assertEqual(vtt.captions[0].lines[0], 'Caption text #1') - self.assertEqual(len(vtt.captions[0].lines), 1) - - def test_webvtt_caption_without_timeframe(self): - self.assertRaises( - MalformedCaptionError, - webvtt.read, - self._get_file('missing_timeframe.vtt') - ) - - def test_webvtt_caption_without_cue_text(self): - vtt = webvtt.read(self._get_file('missing_caption_text.vtt')) - self.assertEqual(len(vtt.captions), 5) - - def test_webvtt_timestamps_format(self): - vtt = webvtt.read(self._get_file('sample.vtt')) - self.assertEqual(vtt.captions[2].start, '00:00:11.890') - self.assertEqual(vtt.captions[2].end, '00:00:16.320') - - def test_parse_timestamp(self): - caption = Caption(start='02:03:11.890') - self.assertEqual( - caption.start_in_seconds, - 7391.89 - ) - - def test_captions_attribute(self): - self.assertListEqual([], webvtt.WebVTT().captions) - - def test_webvtt_timestamp_format(self): - self.assertTrue(WebVTTParser()._validate_timeframe_line('00:00:00.000 --> 00:00:00.000')) - self.assertTrue(WebVTTParser()._validate_timeframe_line('00:00.000 --> 00:00.000')) - - def test_metadata_headers(self): - vtt = webvtt.read(self._get_file('metadata_headers.vtt')) - self.assertEqual(len(vtt.captions), 2) - - def test_metadata_headers_multiline(self): - vtt = webvtt.read(self._get_file('metadata_headers_multiline.vtt')) - self.assertEqual(len(vtt.captions), 2) - - def test_parse_identifiers(self): - vtt = webvtt.read(self._get_file('using_identifiers.vtt')) - self.assertEqual(len(vtt.captions), 6) - - self.assertEqual(vtt.captions[1].identifier, 'second caption') - self.assertEqual(vtt.captions[2].identifier, None) - self.assertEqual(vtt.captions[3].identifier, '4') - - def test_parse_with_comments(self): - vtt = webvtt.read(self._get_file('comments.vtt')) - self.assertEqual(len(vtt.captions), 3) - self.assertListEqual( - vtt.captions[0].lines, - ['- Ta en kopp varmt te.', - '- Det är inte varmt.'] - ) - self.assertEqual( - vtt.captions[2].text, - '- Ta en kopp' - ) - - def test_parse_styles(self): - vtt = webvtt.read(self._get_file('styles.vtt')) - self.assertEqual(len(vtt.captions), 1) - self.assertEqual( - vtt.styles[0].text, - '::cue {background-image: linear-gradient(to bottom, dimgray, lightgray);color: papayawhip;}' - ) - - def test_clean_cue_tags(self): - vtt = webvtt.read(self._get_file('cue_tags.vtt')) - self.assertEqual( - vtt.captions[1].text, - 'Like a big-a pizza pie' - ) - self.assertEqual( - vtt.captions[2].text, - 'That\'s amore' - ) - - def test_parse_captions_with_bom(self): - vtt = webvtt.read(self._get_file('captions_with_bom.vtt')) - self.assertEqual(len(vtt.captions), 4) - - def test_empty_lines_are_not_included_in_result(self): - vtt = webvtt.read(self._get_file('netflix_chicas_del_cable.vtt')) - self.assertEqual(vtt.captions[0].text, "[Alba] En 1928,") - self.assertEqual( - vtt.captions[-2].text, - "Diez años no son suficientes\npara olvidarte..." - ) diff -Nru python-webvtt-0.4.3/tests/webvtt.py python-webvtt-0.4.5/tests/webvtt.py --- python-webvtt-0.4.3/tests/webvtt.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/tests/webvtt.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,377 +0,0 @@ -import os -import io -from shutil import rmtree, copy - -import webvtt -from webvtt.structures import Caption, Style -from .generic import GenericParserTestCase -from webvtt.errors import MalformedFileError - - -BASE_DIR = os.path.dirname(__file__) -OUTPUT_DIR = os.path.join(BASE_DIR, 'output') - - -class WebVTTTestCase(GenericParserTestCase): - - def tearDown(self): - if os.path.exists(OUTPUT_DIR): - rmtree(OUTPUT_DIR) - - def test_create_caption(self): - caption = Caption('00:00:00.500', '00:00:07.000', ['Caption test line 1', 'Caption test line 2']) - self.assertEqual(caption.start, '00:00:00.500') - self.assertEqual(caption.start_in_seconds, 0.5) - self.assertEqual(caption.end, '00:00:07.000') - self.assertEqual(caption.end_in_seconds, 7) - self.assertEqual(caption.lines, ['Caption test line 1', 'Caption test line 2']) - - def test_write_captions(self): - os.makedirs(OUTPUT_DIR) - copy(self._get_file('one_caption.vtt'), OUTPUT_DIR) - - out = io.StringIO() - vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt')) - new_caption = Caption('00:00:07.000', '00:00:11.890', ['New caption text line1', 'New caption text line2']) - vtt.captions.append(new_caption) - vtt.write(out) - - out.seek(0) - lines = [line.rstrip() for line in out.readlines()] - - expected_lines = [ - 'WEBVTT', - '', - '00:00:00.500 --> 00:00:07.000', - 'Caption text #1', - '', - '00:00:07.000 --> 00:00:11.890', - 'New caption text line1', - 'New caption text line2' - ] - - self.assertListEqual(lines, expected_lines) - - def test_save_captions(self): - os.makedirs(OUTPUT_DIR) - copy(self._get_file('one_caption.vtt'), OUTPUT_DIR) - - vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt')) - new_caption = Caption('00:00:07.000', '00:00:11.890', ['New caption text line1', 'New caption text line2']) - vtt.captions.append(new_caption) - vtt.save() - - with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'), 'r', encoding='utf-8') as f: - lines = [line.rstrip() for line in f.readlines()] - - expected_lines = [ - 'WEBVTT', - '', - '00:00:00.500 --> 00:00:07.000', - 'Caption text #1', - '', - '00:00:07.000 --> 00:00:11.890', - 'New caption text line1', - 'New caption text line2' - ] - - self.assertListEqual(lines, expected_lines) - - def test_srt_conversion(self): - os.makedirs(OUTPUT_DIR) - copy(self._get_file('one_caption.srt'), OUTPUT_DIR) - - vtt = webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt')) - vtt.save() - - self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))) - - with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'), 'r', encoding='utf-8') as f: - lines = [line.rstrip() for line in f.readlines()] - - expected_lines = [ - 'WEBVTT', - '', - '00:00:00.500 --> 00:00:07.000', - 'Caption text #1', - ] - - self.assertListEqual(lines, expected_lines) - - def test_sbv_conversion(self): - os.makedirs(OUTPUT_DIR) - copy(self._get_file('two_captions.sbv'), OUTPUT_DIR) - - vtt = webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv')) - vtt.save() - - self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt'))) - - with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'), 'r', encoding='utf-8') as f: - lines = [line.rstrip() for line in f.readlines()] - - expected_lines = [ - 'WEBVTT', - '', - '00:00:00.378 --> 00:00:11.378', - 'Caption text #1', - '', - '00:00:11.378 --> 00:00:12.305', - 'Caption text #2 (line 1)', - 'Caption text #2 (line 2)', - ] - - self.assertListEqual(lines, expected_lines) - - def test_save_to_other_location(self): - target_path = os.path.join(OUTPUT_DIR, 'test_folder') - os.makedirs(target_path) - - webvtt.read(self._get_file('one_caption.vtt')).save(target_path) - self.assertTrue(os.path.exists(os.path.join(target_path, 'one_caption.vtt'))) - - def test_save_specific_filename(self): - target_path = os.path.join(OUTPUT_DIR, 'test_folder') - os.makedirs(target_path) - output_file = os.path.join(target_path, 'custom_name.vtt') - - webvtt.read(self._get_file('one_caption.vtt')).save(output_file) - self.assertTrue(os.path.exists(output_file)) - - def test_save_specific_filename_no_extension(self): - target_path = os.path.join(OUTPUT_DIR, 'test_folder') - os.makedirs(target_path) - output_file = os.path.join(target_path, 'custom_name') - - webvtt.read(self._get_file('one_caption.vtt')).save(output_file) - self.assertTrue(os.path.exists(os.path.join(target_path, 'custom_name.vtt'))) - - def test_caption_timestamp_update(self): - c = Caption('00:00:00.500', '00:00:07.000') - c.start = '00:00:01.750' - c.end = '00:00:08.250' - - self.assertEqual(c.start, '00:00:01.750') - self.assertEqual(c.end, '00:00:08.250') - - def test_caption_timestamp_format(self): - c = Caption('01:02:03.400', '02:03:04.500') - self.assertEqual(c.start, '01:02:03.400') - self.assertEqual(c.end, '02:03:04.500') - - c = Caption('02:03.400', '03:04.500') - self.assertEqual(c.start, '00:02:03.400') - self.assertEqual(c.end, '00:03:04.500') - - def test_caption_text(self): - c = Caption(text=['Caption line #1', 'Caption line #2']) - self.assertEqual( - c.text, - 'Caption line #1\nCaption line #2' - ) - - def test_caption_receive_text(self): - c = Caption(text='Caption line #1\nCaption line #2') - - self.assertEqual( - len(c.lines), - 2 - ) - self.assertEqual( - c.text, - 'Caption line #1\nCaption line #2' - ) - - def test_update_text(self): - c = Caption(text='Caption line #1') - c.text = 'Caption line #1 updated' - self.assertEqual( - c.text, - 'Caption line #1 updated' - ) - - def test_update_text_multiline(self): - c = Caption(text='Caption line #1') - c.text = 'Caption line #1\nCaption line #2' - - self.assertEqual( - len(c.lines), - 2 - ) - - self.assertEqual( - c.text, - 'Caption line #1\nCaption line #2' - ) - - def test_update_text_wrong_type(self): - c = Caption(text='Caption line #1') - - self.assertRaises( - AttributeError, - setattr, - c, - 'text', - 123 - ) - - def test_manipulate_lines(self): - c = Caption(text=['Caption line #1', 'Caption line #2']) - c.lines[0] = 'Caption line #1 updated' - self.assertEqual( - c.lines[0], - 'Caption line #1 updated' - ) - - def test_read_file_buffer(self): - with open(self._get_file('sample.vtt'), 'r', encoding='utf-8') as f: - vtt = webvtt.read_buffer(f) - self.assertIsInstance(vtt.captions, list) - - def test_read_memory_buffer(self): - payload = '' - with open(self._get_file('sample.vtt'), 'r', encoding='utf-8') as f: - payload = f.read() - - buffer = io.StringIO(payload) - vtt = webvtt.read_buffer(buffer) - self.assertIsInstance(vtt.captions, list) - - def test_read_malformed_buffer(self): - malformed_payloads = ['', 'MOCK MELFORMED CONTENT'] - for payload in malformed_payloads: - buffer = io.StringIO(payload) - with self.assertRaises(MalformedFileError): - webvtt.read_buffer(buffer) - - - def test_captions(self): - vtt = webvtt.read(self._get_file('sample.vtt')) - self.assertIsInstance(vtt.captions, list) - - def test_captions_prevent_write(self): - vtt = webvtt.read(self._get_file('sample.vtt')) - self.assertRaises( - AttributeError, - setattr, - vtt, - 'captions', - [] - ) - - def test_sequence_iteration(self): - vtt = webvtt.read(self._get_file('sample.vtt')) - self.assertIsInstance(vtt[0], Caption) - self.assertEqual(len(vtt), len(vtt.captions)) - - def test_save_no_filename(self): - vtt = webvtt.WebVTT() - self.assertRaises( - webvtt.errors.MissingFilenameError, - vtt.save - ) - - def test_malformed_start_timestamp(self): - self.assertRaises( - webvtt.errors.MalformedCaptionError, - Caption, - '01:00' - ) - - def test_set_styles_from_text(self): - style = Style() - style.text = '::cue(b) {\n color: peachpuff;\n}' - self.assertListEqual( - style.lines, - ['::cue(b) {', ' color: peachpuff;', '}'] - ) - - def test_get_styles_as_text(self): - style = Style() - style.lines = ['::cue(b) {', ' color: peachpuff;', '}'] - self.assertEqual( - style.text, - '::cue(b) {color: peachpuff;}' - ) - - def test_save_identifiers(self): - os.makedirs(OUTPUT_DIR) - copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR) - - vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt')) - vtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt')) - - with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'), 'r', encoding='utf-8') as f: - lines = [line.rstrip() for line in f.readlines()] - - expected_lines = [ - 'WEBVTT', - '', - '00:00:00.500 --> 00:00:07.000', - 'Caption text #1', - '', - 'second caption', - '00:00:07.000 --> 00:00:11.890', - 'Caption text #2', - '', - '00:00:11.890 --> 00:00:16.320', - 'Caption text #3', - '', - '4', - '00:00:16.320 --> 00:00:21.580', - 'Caption text #4', - '', - '00:00:21.580 --> 00:00:23.880', - 'Caption text #5', - '', - '00:00:23.880 --> 00:00:27.280', - 'Caption text #6' - ] - - self.assertListEqual(lines, expected_lines) - - def test_save_updated_identifiers(self): - os.makedirs(OUTPUT_DIR) - copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR) - - vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt')) - vtt.captions[0].identifier = 'first caption' - vtt.captions[1].identifier = None - vtt.captions[3].identifier = '44' - last_caption = Caption('00:00:27.280', '00:00:29.200', 'Caption text #7') - last_caption.identifier = 'last caption' - vtt.captions.append(last_caption) - vtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt')) - - with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'), 'r', encoding='utf-8') as f: - lines = [line.rstrip() for line in f.readlines()] - - expected_lines = [ - 'WEBVTT', - '', - 'first caption', - '00:00:00.500 --> 00:00:07.000', - 'Caption text #1', - '', - '00:00:07.000 --> 00:00:11.890', - 'Caption text #2', - '', - '00:00:11.890 --> 00:00:16.320', - 'Caption text #3', - '', - '44', - '00:00:16.320 --> 00:00:21.580', - 'Caption text #4', - '', - '00:00:21.580 --> 00:00:23.880', - 'Caption text #5', - '', - '00:00:23.880 --> 00:00:27.280', - 'Caption text #6', - '', - 'last caption', - '00:00:27.280 --> 00:00:29.200', - 'Caption text #7' - ] - - self.assertListEqual(lines, expected_lines) diff -Nru python-webvtt-0.4.3/tox.ini python-webvtt-0.4.5/tox.ini --- python-webvtt-0.4.3/tox.ini 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/tox.ini 2020-04-09 09:10:58.000000000 +0000 @@ -1,9 +1,17 @@ [tox] -envlist = - py34, - py35 - py36 +envlist = py34, py35, py36, py37, py38 + +[travis] +python = + 3.8: py38 + 3.7: py37 + 3.6: py36 + 3.5: py35 + 3.4: py34 [testenv] +setenv = + PYTHONPATH = {toxinidir} +deps = pytest commands = - python setup.py test + pytest diff -Nru python-webvtt-0.4.3/.travis.yml python-webvtt-0.4.5/.travis.yml --- python-webvtt-0.4.3/.travis.yml 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/.travis.yml 2020-04-09 09:10:58.000000000 +0000 @@ -1,19 +1,11 @@ language: python +python: + - 3.8 + - 3.7 + - 3.6 + - 3.5 + - 3.4 -matrix: - include: - - python: 3.4 - env: TOX_ENV=py34 - - python: 3.5 - env: TOX_ENV=py35 - - python: 3.6 - env: TOX_ENV=py36 +install: pip install -U tox-travis -script: tox -e $TOX_ENV - -install: - - pip install -r requirements.txt - - pip install tox - -notifications: - email: false \ No newline at end of file +script: tox diff -Nru python-webvtt-0.4.3/webvtt/__init__.py python-webvtt-0.4.5/webvtt/__init__.py --- python-webvtt-0.4.3/webvtt/__init__.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/webvtt/__init__.py 2020-04-09 09:10:58.000000000 +0000 @@ -1,4 +1,4 @@ -__version__ = '0.4.3' +__version__ = '0.4.5' from .webvtt import * from .segmenter import * diff -Nru python-webvtt-0.4.3/webvtt/parsers.py python-webvtt-0.4.5/webvtt/parsers.py --- python-webvtt-0.4.3/webvtt/parsers.py 2019-11-22 17:32:21.000000000 +0000 +++ python-webvtt-0.4.5/webvtt/parsers.py 2020-04-09 09:10:58.000000000 +0000 @@ -51,7 +51,7 @@ def _read_content_lines(self, file_obj): - lines = [line.rstrip('\n') for line in file_obj.readlines()] + lines = [line.rstrip('\n\r') for line in file_obj.readlines()] if not lines: raise MalformedFileError('The file is empty.') @@ -133,7 +133,7 @@ SRT parser. """ - TIMEFRAME_LINE_PATTERN = re.compile('\s*(\d+:\d{2}:\d{2},\d{3})\s*-->\s*(\d+:\d{2}:\d{2},\d{3})') + TIMEFRAME_LINE_PATTERN = re.compile(r'\s*(\d+:\d{2}:\d{2},\d{3})\s*-->\s*(\d+:\d{2}:\d{2},\d{3})') PARSER_OPTIONS = { 'ignore_empty_captions': True @@ -155,9 +155,9 @@ WebVTT parser. """ - TIMEFRAME_LINE_PATTERN = re.compile('\s*((?:\d+:)?\d{2}:\d{2}.\d{3})\s*-->\s*((?:\d+:)?\d{2}:\d{2}.\d{3})') - COMMENT_PATTERN = re.compile('NOTE(?:\s.+|$)') - STYLE_PATTERN = re.compile('STYLE[ \t]*$') + TIMEFRAME_LINE_PATTERN = re.compile(r'\s*((?:\d+:)?\d{2}:\d{2}.\d{3})\s*-->\s*((?:\d+:)?\d{2}:\d{2}.\d{3})') + COMMENT_PATTERN = re.compile(r'NOTE(?:\s.+|$)') + STYLE_PATTERN = re.compile(r'STYLE[ \t]*$') def __init__(self): super().__init__() @@ -167,25 +167,24 @@ blocks = [] for index, line in enumerate(lines, start=1): - # clean up exstraneous whitespace - final_line = line.strip() - - # Remove empty lines - if final_line: + if line: if not blocks: blocks.append(Block(index)) if not blocks[-1].lines: + if not line.strip(): + continue blocks[-1].line_number = index - blocks[-1].lines.append(final_line) + blocks[-1].lines.append(line) else: blocks.append(Block(index)) # filter out empty blocks and skip signature - self.blocks = list(filter(lambda x: x.lines, blocks))[1:] + return list(filter(lambda x: x.lines, blocks))[1:] def _parse_cue_block(self, block): caption = Caption() cue_timings = None + additional_blocks = None for line_number, line in enumerate(block.lines): if self._is_cue_timings_line(line): @@ -196,8 +195,10 @@ raise MalformedCaptionError( '{} in line {}'.format(e, block.line_number + line_number)) else: - raise MalformedCaptionError( - '--> found in line {}'.format(block.line_number + line_number)) + additional_blocks = self._compute_blocks( + ['WEBVTT', ''] + block.lines[line_number:] + ) + break elif line_number == 0: caption.identifier = line else: @@ -205,16 +206,35 @@ caption.start = cue_timings[0] caption.end = cue_timings[1] - return caption + return caption, additional_blocks def _parse(self, lines): self.captions = [] - self._compute_blocks(lines) + blocks = self._compute_blocks(lines) + self._parse_blocks(blocks) + + def _is_empty(self, block): + is_empty = True + + for line in block.lines: + if line.strip() != "": + is_empty = False + + return is_empty + + def _parse_blocks(self, blocks): + for block in blocks: + # skip empty blocks + if self._is_empty(block): + continue - for block in self.blocks: if self._is_cue_block(block): - caption = self._parse_cue_block(block) + caption, additional_blocks = self._parse_cue_block(block) self.captions.append(caption) + + if additional_blocks: + self._parse_blocks(additional_blocks) + elif self._is_comment_block(block): continue elif self._is_style_block(block): @@ -259,7 +279,11 @@ YouTube SBV parser. """ - TIMEFRAME_LINE_PATTERN = re.compile('\s*(\d+:\d{2}:\d{2}.\d{3}),(\d+:\d{2}:\d{2}.\d{3})') + TIMEFRAME_LINE_PATTERN = re.compile(r'\s*(\d+:\d{2}:\d{2}.\d{3}),(\d+:\d{2}:\d{2}.\d{3})') + + PARSER_OPTIONS = { + 'ignore_empty_captions': True + } def _validate(self, lines): if not self._validate_timeframe_line(lines[0]):