diff -Nru guessit-0.8/debian/changelog guessit-0.11.0/debian/changelog --- guessit-0.8/debian/changelog 2014-07-21 20:37:19.000000000 +0000 +++ guessit-0.11.0/debian/changelog 2016-03-08 22:40:57.000000000 +0000 @@ -1,3 +1,30 @@ +guessit (0.11.0-1) unstable; urgency=medium + + * New upstream release (Closes: #796404) + * Remove XS-Testsuite field + * Fix spelling error in description + * Add a dependency on python3-all for autopkgtest (Closes: #806961) + * Use a https URI for Vcs-Git. + + -- Etienne Millon Tue, 25 Aug 2015 21:50:01 +0200 + +guessit (0.10.3-1) unstable; urgency=medium + + * New upstream release + * debian/watch: use pypi redirector + + -- Etienne Millon Tue, 28 Apr 2015 08:05:30 +0200 + +guessit (0.9.3-1) unstable; urgency=medium + + * New upstream release (Closes: #762957) + - add python{3,}-dateutil to Build-Depends + * Run everything under LC_ALL=C.UTF-8 so that clean works + * Add a short name for documentation theme license + * Bump Standards-Version to 3.9.6 (no changes) + + -- Etienne Millon Sun, 28 Sep 2014 19:59:29 +0200 + guessit (0.8-1) unstable; urgency=medium * New upstream release diff -Nru guessit-0.8/debian/control guessit-0.11.0/debian/control --- guessit-0.8/debian/control 2014-07-21 18:54:59.000000000 +0000 +++ guessit-0.11.0/debian/control 2016-03-08 22:40:37.000000000 +0000 @@ -10,6 +10,7 @@ dh-python, python-all (>= 2.6.6-6~), python-babelfish, + python-dateutil (>= 2.1), python-nose, python-pkg-resources, python-requests, @@ -18,45 +19,47 @@ python-yaml, python3-all, python3-babelfish, + python3-dateutil (>= 2.1), python3-nose, python3-pkg-resources, python3-requests, python3-setuptools, python3-stevedore, python3-yaml, -Standards-Version: 3.9.5 +Standards-Version: 3.9.6 Homepage: http://guessit.readthedocs.org/ -Vcs-Svn: svn://anonscm.debian.org/python-modules/packages/guessit/trunk/ -Vcs-Browser: http://anonscm.debian.org/viewvc/python-modules/packages/guessit/trunk/ +Vcs-Git: https://anonscm.debian.org/git/python-modules/packages/guessit.git +Vcs-Browser: https://anonscm.debian.org/cgit/python-modules/packages/guessit.git X-Python-Version: >= 2.6 X-Python3-Version: >= 3.3 -XS-Testsuite: autopkgtest Package: python-guessit Architecture: all Depends: + python-dateutil (>= 2.1), python-pkg-resources, ${misc:Depends}, ${python:Depends}, Description: library for guessing information from video filenames (Python 2) GuessIt is a Python library that tries to extract as much information as possible from a video file. It has a very powerful filename matcher that allows - to guess a lot of metadata from a video using only its filename. This matcher - works with both movies and TV show episodes. + one to guess a lot of metadata from a video using only its filename. This + matcher works with both movies and TV show episodes. . This package contains the Python 2 module. Package: python3-guessit Architecture: all Depends: + python3-dateutil (>= 2.1), python3-pkg-resources, ${misc:Depends}, ${python3:Depends}, Description: library for guessing information from video filenames (Python 3) GuessIt is a Python library that tries to extract as much information as possible from a video file. It has a very powerful filename matcher that allows - to guess a lot of metadata from a video using only its filename. This matcher - works with both movies and TV show episodes. + one to guess a lot of metadata from a video using only its filename. This + matcher works with both movies and TV show episodes. . This package contains the Python 3 module. . diff -Nru guessit-0.8/debian/copyright guessit-0.11.0/debian/copyright --- guessit-0.8/debian/copyright 2014-03-25 21:45:49.000000000 +0000 +++ guessit-0.11.0/debian/copyright 2015-10-21 17:28:30.000000000 +0000 @@ -20,7 +20,7 @@ 2010 Armin Ronacher 2011 Kenneth Reitz 2012 Nicolas Wack -License: +License: Flask-sphinx-themes-license Some rights reserved. . Redistribution and use in source and binary forms of the theme, with or diff -Nru guessit-0.8/debian/.git-dpm guessit-0.11.0/debian/.git-dpm --- guessit-0.8/debian/.git-dpm 1970-01-01 00:00:00.000000000 +0000 +++ guessit-0.11.0/debian/.git-dpm 2016-03-08 19:39:48.000000000 +0000 @@ -0,0 +1,11 @@ +# see git-dpm(1) from git-dpm package +dce2eb4b966def3b5a8e8ea50c82cad7334b171f +dce2eb4b966def3b5a8e8ea50c82cad7334b171f +5533c0e4beff88d470afc9dd6f15a54a97c2b98d +5533c0e4beff88d470afc9dd6f15a54a97c2b98d +guessit_0.11.0.orig.tar.gz +de8e46cab4a9faa33c91244e8c200fecca7c1845 +148248 +debianTag="debian/%e%v" +patchedTag="patched/%e%v" +upstreamTag="upstream/%e%u" diff -Nru guessit-0.8/debian/patches/test_hash.patch guessit-0.11.0/debian/patches/test_hash.patch --- guessit-0.8/debian/patches/test_hash.patch 2014-03-09 16:02:46.000000000 +0000 +++ guessit-0.11.0/debian/patches/test_hash.patch 2016-03-08 19:39:48.000000000 +0000 @@ -1,4 +1,6 @@ +From dce2eb4b966def3b5a8e8ea50c82cad7334b171f Mon Sep 17 00:00:00 2001 From: Etienne Millon +Date: Thu, 8 Oct 2015 09:15:27 -0700 Subject: Skip tests that need the '1MB' file In the git repository, there is a file named '1MB' (of the same size) on which @@ -10,9 +12,16 @@ Forwarded: not-needed Last-Update: 2014-03-04 +Patch-Name: test_hash.patch +--- + guessit/test/test_hashes.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/guessit/test/test_hashes.py b/guessit/test/test_hashes.py +index c6a5c35..3af7454 100644 --- a/guessit/test/test_hashes.py +++ b/guessit/test/test_hashes.py -@@ -26,10 +26,10 @@ +@@ -26,10 +26,10 @@ from guessit.test.guessittest import * class TestHashes(TestGuessit): def test_hashes(self): hashes = ( diff -Nru guessit-0.8/debian/rules guessit-0.11.0/debian/rules --- guessit-0.8/debian/rules 2014-03-28 19:01:15.000000000 +0000 +++ guessit-0.11.0/debian/rules 2015-10-21 17:28:30.000000000 +0000 @@ -1,15 +1,14 @@ #!/usr/bin/make -f export PYBUILD_NAME=guessit +export LC_ALL=C.UTF-8 %: dh $@ --with=python2,python3 --buildsystem=pybuild override_dh_install: dh_install + dh_lintian rm -r debian/python-guessit/usr/bin/ override_dh_installchangelogs: dh_installchangelogs HISTORY.rst - -override_dh_auto_test: - LC_ALL=C.UTF-8 dh_auto_test diff -Nru guessit-0.8/debian/source/lintian-overrides guessit-0.11.0/debian/source/lintian-overrides --- guessit-0.8/debian/source/lintian-overrides 1970-01-01 00:00:00.000000000 +0000 +++ guessit-0.11.0/debian/source/lintian-overrides 2015-10-21 17:28:30.000000000 +0000 @@ -0,0 +1,2 @@ +# False positive (#796924) +guessit source: source-is-missing docs/_themes/__pycache__/flask_theme_support.cpython-34.pyc diff -Nru guessit-0.8/debian/tests/control guessit-0.11.0/debian/tests/control --- guessit-0.8/debian/tests/control 2014-03-09 16:02:46.000000000 +0000 +++ guessit-0.11.0/debian/tests/control 2016-03-08 22:40:37.000000000 +0000 @@ -3,5 +3,5 @@ Restrictions: allow-stderr Tests: unittests3 -Depends: @, python3-yaml, python3-nose +Depends: @, python3-all, python3-yaml, python3-nose Restrictions: allow-stderr diff -Nru guessit-0.8/debian/watch guessit-0.11.0/debian/watch --- guessit-0.8/debian/watch 2013-11-25 20:26:41.000000000 +0000 +++ guessit-0.11.0/debian/watch 2015-10-21 17:28:30.000000000 +0000 @@ -1,2 +1,3 @@ version=3 -https://pypi.python.org/packages/source/g/guessit/guessit-(.*).tar.gz +opts=uversionmangle=s/(rc|a|b|c)/~$1/ \ +http://pypi.debian.net/guessit/guessit-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) diff -Nru guessit-0.8/docs/_build/html/api/guess.html guessit-0.11.0/docs/_build/html/api/guess.html --- guessit-0.8/docs/_build/html/api/guess.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/api/guess.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,225 +0,0 @@ - - - - - - - - Guess — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Guess

-
-
-class guessit.guess.Guess(*args, **kwargs)
-

A Guess is a dictionary which has an associated confidence for each of -its values.

-

As it is a subclass of dict, you can use it everywhere you expect a -simple dict.

-
-
-metadata(prop=None)
-

Return the metadata associated with the given property name

-

If no property name is given, get the global_metadata

-
- -
-
-nice_string(advanced=False)
-

Return a string with the property names and their values, -that also displays the associated confidence to each property.

-

FIXME: doc with param

-
- -
-
-to_dict(advanced=False)
-

Return the guess as a dict containing only base types, ie: -where dates, languages, countries, etc. are converted to strings.

-

if advanced is True, return the data as a json string containing -also the raw information of the properties.

-
- -
-
-update_highest_confidence(other)
-

Update this guess with the values from the given one. In case -there is property present in both, only the one with the highest one -is kept.

-
- -
- -
-
-guessit.guess.choose_int(g1, g2)
-

Function used by merge_similar_guesses to choose between 2 possible -properties when they are integers.

-
- -
-
-guessit.guess.choose_string(g1, g2)
-

Function used by merge_similar_guesses to choose between 2 possible -properties when they are strings.

-

If the 2 strings are similar, or one is contained in the other, the latter is returned -with an increased confidence.

-

If the 2 strings are dissimilar, the one with the higher confidence is returned, with -a weaker confidence.

-

Note that here, ‘similar’ means that 2 strings are either equal, or that they -differ very little, such as one string being the other one with the ‘the’ word -prepended to it.

-
>>> s(choose_string(('Hello', 0.75), ('World', 0.5)))
-('Hello', 0.25)
-
-
-
>>> s(choose_string(('Hello', 0.5), ('hello', 0.5)))
-('Hello', 0.75)
-
-
-
>>> s(choose_string(('Hello', 0.4), ('Hello World', 0.4)))
-('Hello', 0.64)
-
-
-
>>> s(choose_string(('simpsons', 0.5), ('The Simpsons', 0.5)))
-('The Simpsons', 0.75)
-
-
-
- -
-
-guessit.guess.merge_similar_guesses(guesses, prop, choose)
-

Take a list of guesses and merge those which have the same properties, -increasing or decreasing the confidence depending on whether their values -are similar.

-
- -
-
-guessit.guess.merge_all(guesses, append=None)
-

Merge all the guesses in a single result, remove very unlikely values, -and return it. -You can specify a list of properties that should be appended into a list -instead of being merged.

-
>>> s(merge_all([ Guess({'season': 2}, confidence=0.6),
-...               Guess({'episodeNumber': 13}, confidence=0.8) ])
-... ) == {'season': 2, 'episodeNumber': 13}
-True
-
-
-
>>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02),
-...               Guess({'season': 1}, confidence=0.2) ])
-... ) == {'season': 1}
-True
-
-
-
>>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8),
-...               Guess({'releaseGroup': '2HD'}, confidence=0.8) ],
-...             append=['other'])
-... ) == {'releaseGroup': '2HD', 'other': ['PROPER']}
-True
-
-
-
- -
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/api/matcher.html guessit-0.11.0/docs/_build/html/api/matcher.html --- guessit-0.8/docs/_build/html/api/matcher.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/api/matcher.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,142 +0,0 @@ - - - - - - - - Matchers — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Matchers

-
-
-class guessit.matcher.IterativeMatcher(filename, options=None, **kwargs)
-

An iterative matcher tries to match different patterns that appear -in the filename.

-

The filetype argument indicates which type of file you want to match. -If it is undefined, the matcher will try to see whether it can guess -that the file corresponds to an episode, or otherwise will assume it is -a movie.

-

The recognized filetype values are: -['subtitle', 'info', 'movie', 'moviesubtitle', 'movieinfo', 'episode', -'episodesubtitle', 'episodeinfo']

-

options is a dict of options values to be passed to the transformations used -by the matcher.

-

The IterativeMatcher works mainly in 2 steps:

-

First, it splits the filename into a match_tree, which is a tree of groups -which have a semantic meaning, such as episode number, movie title, -etc...

-

The match_tree created looks like the following:

-
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
-0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
-0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
-__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
-xxxxxttttttttttttt               ffffff  vvvv    xxxxxx  ll lll     xx xxx         ccc
-[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
-
-
-

The first 3 lines indicates the group index in which a char in the -filename is located. So for instance, x264 (in the middle) is the group (0, 4, 1), and -it corresponds to a video codec, denoted by the letter v in the 4th line. -(for more info, see guess.matchtree.to_string)

-

Second, it tries to merge all this information into a single object -containing all the found properties, and does some (basic) conflict -resolution when they arise.

-
- -
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/api/matchtree.html guessit-0.11.0/docs/_build/html/api/matchtree.html --- guessit-0.8/docs/_build/html/api/matchtree.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/api/matchtree.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,341 +0,0 @@ - - - - - - - - MatchTree — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

MatchTree

-
-
-class guessit.matchtree.BaseMatchTree(string='', span=None, parent=None)
-

A BaseMatchTree is a tree covering the filename, where each -node represents a substring in the filename and can have a Guess -associated with it that contains the information that has been guessed -in this node. Nodes can be further split into subnodes until a proper -split has been found.

-
-
Each node has the following attributes:
-
    -
  • string = the original string of which this node represents a region
  • -
  • span = a pair of (begin, end) indices delimiting the substring
  • -
  • parent = parent node
  • -
  • children = list of children nodes
  • -
  • guess = Guess()
  • -
-
-
-

BaseMatchTrees are displayed in the following way:

-
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
->>> print(guessit.IterativeMatcher(path).match_tree)
-000000 1111111111111111 2222222222222222222222222222222222222222222 333
-000000 0000000000111111 0000000000111111222222222222222222222222222 000
-                 011112           011112000011111222222222222222222 000
-                                                 011112222222222222
-                                                      0000011112222
-                                                      01112    0111
-Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
-       tttttttttt yyyy             yyyy     fffff ssss aaa vvvv rrr ccc
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-
-
-

The last line contains the filename, which you can use a reference. -The previous line contains the type of property that has been found. -The line before that contains the filename, where all the found groups -have been blanked. Basically, what is left on this line are the leftover -groups which could not be identified.

-

The lines before that indicate the indices of the groups in the tree.

-

For instance, the part of the filename ‘BDRip’ is the leaf with index -(2, 2, 1) (read from top to bottom), and its meaning is ‘format’ -(as shown by the f‘s on the last-but-one line).

-
-
-add_child(span)
-

Add a new child node to this node with the given span.

-
- -
-
-clean_value
-

Return a cleaned value of the matched substring, with better -presentation formatting (punctuation marks removed, duplicate -spaces, ...)

-
- -
-
-depth
-

Return the depth of this node.

-
- -
-
-get_partition_spans(indices)
-

Return the list of absolute spans for the regions of the original -string defined by splitting this node at the given indices (relative -to this node)

-
- -
-
-info
-

Return a dict containing all the info guessed by this node, -subnodes included.

-
- -
-
-is_leaf()
-

Return whether this node is a leaf or not.

-
- -
-
-leaves()
-

Return a generator over all the nodes that are leaves.

-
- -
-
-next_leaf(leaf)
-

Return next leaf for this node

-
- -
-
-next_leaves(leaf)
-

Return next leaves for this node

-
- -
-
-node_at(idx)
-

Return the node at the given index in the subtree rooted at -this node.

-
- -
-
-node_idx
-

Return this node’s index in the tree, as a tuple. -If this node is the root of the tree, then return ().

-
- -
-
-nodes()
-

Return all the nodes and subnodes in this tree.

-
- -
-
-nodes_at_depth(depth)
-

Return all the nodes at a given depth in the tree

-
- -
-
-partition(indices)
-

Partition this node by splitting it at the given indices, -relative to this node.

-
- -
-
-previous_leaf(leaf)
-

Return previous leaf for this node

-
- -
-
-previous_leaves(leaf)
-

Return previous leaves for this node

-
- -
-
-root
-

Return the root node of the tree.

-
- -
-
-to_string()
-

Return a readable string representation of this tree.

-
-
The result is a multi-line string, where the lines are:
-
    -
  • line 1 -> N-2: each line contains the nodes at the given depth in the tree
  • -
  • line N-2: original string where all the found groups have been blanked
  • -
  • line N-1: type of property that has been found
  • -
  • line N: the original string, which you can use a reference.
  • -
-
-
-
- -
-
-value
-

Return the substring that this node matches.

-
- -
- -
-
-class guessit.matchtree.MatchTree(string='', span=None, parent=None)
-

The MatchTree contains a few “utility” methods which are not necessary -for the BaseMatchTree, but add a lot of convenience for writing -higher-level rules.

-
-
-first_leaf_containing(property_name)
-

Return the first leaf containing the given property.

-
- -
-
-is_explicit()
-

Return whether the group was explicitly enclosed by -parentheses/square brackets/etc.

-
- -
-
-leaves_containing(property_name)
-

Return a generator of leaves that guessed the given property.

-
- -
-
-matched()
-

Return a single guess that contains all the info found in the -nodes of this tree, trying to merge properties as good as possible.

-
- -
-
-previous_leaves_containing(node, property_name)
-

Return a generator of leaves containing the given property that are -before the given node (in the string).

-
- -
-
-previous_unidentified_leaves(node)
-

Return a generator of non-empty leaves that are before the given -node (in the string).

-
- -
-
-unidentified_leaves(valid=<function MatchTree.<lambda> at 0x110744268>)
-

Return a generator of leaves that are not empty.

-
- -
- -
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/dev/internals.html guessit-0.11.0/docs/_build/html/dev/internals.html --- guessit-0.8/docs/_build/html/dev/internals.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/dev/internals.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,264 +0,0 @@ - - - - - - - - Understanding the MatchTree — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Understanding the MatchTree

-

The basic structure that the filename detection component uses is the -MatchTree. A MatchTree is a tree covering the filename, where each -node represent a substring in the filename and can have a Guess -associated with it that contains the information that has been guessed -in this node. Nodes can be further split into subnodes until a proper -split has been found.

-

This makes it so that all the leaves concatenated will give you back -the original filename. But enough theory, let’s look at an example:

-
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
->>> print guessit.IterativeMatcher(path).match_tree
-000000 1111111111111111 2222222222222222222222222222222222222222222 333
-000000 0000000000111111 0000000000111111222222222222222222222222222 000
-                 011112           011112000000000000000000000000111
-                                        000000000000000000011112
-                                        0000000000111122222
-                                        0000111112    01112
-Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
-       tttttttttt yyyy             yyyy     fffff ssss aaa vvvv rrr ccc
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-
-
-

The last line contains the filename, which you can use a reference. -The previous line contains the type of property that has been found. -The line before that contains the filename, where all the found groups -have been blanked. Basically, what is left on this line are the leftover -groups which could not be identified.

-

The lines before that indicate the indices of the groups in the tree.

-

For instance, the part of the filename ‘BDRip’ is the leaf with index -(2, 2, 0, 0, 0, 1) (read from top to bottom), and its meaning is ‘format’ -(as shown by the f‘s on the last-but-one line).

-
-
-

What does the IterativeMatcher do?

-

The goal of the api/matcher is to take a MatchTree which -contains no information (yet!) at the beginning, and apply a succession of -rules to try to guess parts of the filename. These rules are called -transformations and work in-place on the tree, splitting into new leaves -and updating the nodes’s guesses when it finds some information.

-

Let’s look at what happens when matching the previous filename.

-
-

Splitting into path components

-

First, we split the filename into folders + basename + extension -This gives us the following tree, which has 4 leaves (from 0 to 3):

-
000000 1111111111111111 2222222222222222222222222222222222222222222 333
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-
-
-
-
-

Splitting into explicit groups

-

Then, we want to split each of those groups into “explicit” groups, i.e.: -groups which are enclosed in parentheses, square brackets, curly braces, etc.:

-
000000 1111111111111111 2222222222222222222222222222222222222222222 333
-000000 0000000000111111 0000000000111111222222222222222222222222222 000
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.___
-                                                                    ccc
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-
-
-

As you can see, the containing folder has been split into 2 sub-groups, -and the basename into 3 groups (separated by the year information).

-

Note that we also got the information from the extension, as you can see -above.

-
-
-

Finding interesting patterns

-

Now that this first split has been made, we can start finding some known -patterns which we can identify in the filename. -That is the main objective of the IterativeMatcher, which will run -a series of transformations which can identify groups in the filename and -will annotate the corresponding nodes.

-

For instance, the year:

-
000000 1111111111111111 2222222222222222222222222222222222222222222 333
-000000 0000000000111111 0000000000111111222222222222222222222222222 000
-                 011112           011112
-Movies/Dark City (____)/Dark.City.(____).DC.BDRip.720p.DTS.X264-CHD.___
-                  yyyy             yyyy                             ccc
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-
-
-

Then, known properties usually found in video filenames:

-
000000 1111111111111111 2222222222222222222222222222222222222222222 333
-000000 0000000000111111 0000000000111111222222222222222222222222222 000
-                 011112           011112000000000000000000000000111
-                                        000000000000000000011112
-                                        0000000000111122222
-                                        0000111112    01112
-Movies/Dark City (____)/Dark.City.(____).DC._____.____.___.____-___.___
-                  yyyy             yyyy     fffff ssss aaa vvvv rrr ccc
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-
-
-

As you can see, this starts to branch pretty quickly, as each found group -splits a leaf into further leaves. In this case, that gives us the -year (1998), the format (BDRip), the screen size (720p), the video codec -(x264) and the release group (CHD).

-
-
-

Using positional rules to find the ‘title’ property

-

Now that we found all the known patterns that we could, it is time to try -to guess what is the title of the movie. This is done by looking at which -groups in the filename are still unidentified, and trying to guess which -one corresponds to the title by looking at their position:

-
000000 1111111111111111 2222222222222222222222222222222222222222222 333
-000000 0000000000111111 0000000000111111222222222222222222222222222 000
-                 011112           011112000000000000000000000000111
-                                        000000000000000000011112
-                                        0000000000111122222
-                                        0000111112    01112
-Movies/__________(____)/Dark.City.(____).DC._____.____.___.____-___.___
-       tttttttttt yyyy             yyyy     fffff ssss aaa vvvv rrr ccc
-Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-
-
-

In this case, as the containing folder is composed of 2 groups, the second -of which is the year, we can (usually) safely assume that the first one -corresponds to the movie title.

-
-
-
-

Merging all the results in a MatchTree to give a final Guess

-

Once that we have matched as many groups as we could, the job is not done yet. -Indeed, every leaf of the tree that we could identify contains the found property -in its guess, but what we want at the end is to have a single Guess containing -all the information.

-

There are some simple strategies implemented to try to deal with conflicts -and/or duplicate properties. In our example, ‘year’ appears twice, but -as it has the same value, so it will be merged into a single ‘year’ property, -but with a confidence that represents the combined confidence of both guesses. -If the properties were conflicting, we would take the one with the highest -confidence and lower it accordingly.

-

Here:

-
>>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
->>> print guessit.guess_movie_info(path)
-{'videoCodec': 'h264', 'container': 'mkv', 'format': 'BluRay',
-'title': 'Dark City', 'releaseGroup': 'CHD', 'screenSize': '720p',
-'year': 1998, 'type': 'movie', 'audioCodec': 'DTS'}
-
-
-

And that gives you your final guess!

-
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

-

Table Of Contents

- -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/genindex.html guessit-0.11.0/docs/_build/html/genindex.html --- guessit-0.8/docs/_build/html/genindex.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/genindex.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,392 +0,0 @@ - - - - - - - - - Index — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - -
-
-
-
- - -

Index

- -
- A - | B - | C - | D - | F - | G - | I - | L - | M - | N - | P - | R - | T - | U - | V - -
-

A

- - -
- -
add_child() (guessit.matchtree.BaseMatchTree method) -
- -
- -

B

- - -
- -
BaseMatchTree (class in guessit.matchtree) -
- -
- -

C

- - - -
- -
choose_int() (in module guessit.guess) -
- - -
choose_string() (in module guessit.guess) -
- -
- -
clean_value (guessit.matchtree.BaseMatchTree attribute) -
- -
- -

D

- - -
- -
depth (guessit.matchtree.BaseMatchTree attribute) -
- -
- -

F

- - -
- -
first_leaf_containing() (guessit.matchtree.MatchTree method) -
- -
- -

G

- - - -
- -
get_partition_spans() (guessit.matchtree.BaseMatchTree method) -
- - -
Guess (class in guessit.guess) -
- - -
guessit.guess (module) -
- -
- -
guessit.matcher (module) -
- - -
guessit.matchtree (module) -
- -
- -

I

- - - -
- -
info (guessit.matchtree.BaseMatchTree attribute) -
- - -
is_explicit() (guessit.matchtree.MatchTree method) -
- -
- -
is_leaf() (guessit.matchtree.BaseMatchTree method) -
- - -
IterativeMatcher (class in guessit.matcher) -
- -
- -

L

- - - -
- -
leaves() (guessit.matchtree.BaseMatchTree method) -
- -
- -
leaves_containing() (guessit.matchtree.MatchTree method) -
- -
- -

M

- - - -
- -
matched() (guessit.matchtree.MatchTree method) -
- - -
MatchTree (class in guessit.matchtree) -
- - -
merge_all() (in module guessit.guess) -
- -
- -
merge_similar_guesses() (in module guessit.guess) -
- - -
metadata() (guessit.guess.Guess method) -
- -
- -

N

- - - -
- -
next_leaf() (guessit.matchtree.BaseMatchTree method) -
- - -
next_leaves() (guessit.matchtree.BaseMatchTree method) -
- - -
nice_string() (guessit.guess.Guess method) -
- - -
node_at() (guessit.matchtree.BaseMatchTree method) -
- -
- -
node_idx (guessit.matchtree.BaseMatchTree attribute) -
- - -
nodes() (guessit.matchtree.BaseMatchTree method) -
- - -
nodes_at_depth() (guessit.matchtree.BaseMatchTree method) -
- -
- -

P

- - - -
- -
partition() (guessit.matchtree.BaseMatchTree method) -
- - -
previous_leaf() (guessit.matchtree.BaseMatchTree method) -
- - -
previous_leaves() (guessit.matchtree.BaseMatchTree method) -
- -
- -
previous_leaves_containing() (guessit.matchtree.MatchTree method) -
- - -
previous_unidentified_leaves() (guessit.matchtree.MatchTree method) -
- -
- -

R

- - -
- -
root (guessit.matchtree.BaseMatchTree attribute) -
- -
- -

T

- - - -
- -
to_dict() (guessit.guess.Guess method) -
- -
- -
to_string() (guessit.matchtree.BaseMatchTree method) -
- -
- -

U

- - - -
- -
unidentified_leaves() (guessit.matchtree.MatchTree method) -
- -
- -
update_highest_confidence() (guessit.guess.Guess method) -
- -
- -

V

- - -
- -
value (guessit.matchtree.BaseMatchTree attribute) -
- -
- - - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/index.html guessit-0.11.0/docs/_build/html/index.html --- guessit-0.8/docs/_build/html/index.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/index.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,306 +0,0 @@ - - - - - - - - GuessIt — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

GuessIt

-

Release v0.7.2.dev0 (Installation)

-

GuessIt is a python library that tries to extract as much information as -possible from a video file.

-

It has a powerful filename matcher that allows to guess a lot of -metadata from a video using only its filename. This matcher works with -both movies and tv shows episodes.

-

For example, GuessIt can do the following:

-
$ guessit "Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi"
-For: Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi
-GuessIt found: {
-    [1.00] "mimetype": "video/x-msvideo",
-    [0.80] "episodeNumber": 3,
-    [0.80] "videoCodec": "XviD",
-    [1.00] "container": "avi",
-    [1.00] "format": "HDTV",
-    [0.70] "series": "Treme",
-    [0.50] "title": "Right Place, Wrong Time",
-    [0.80] "releaseGroup": "NoTV",
-    [0.80] "season": 1,
-    [1.00] "type": "episode"
-}
-
-
-
-

Features

-

At the moment, the filename matcher is able to recognize the following -property types:

-
[ title,                             # for movies and episodes
-  series, season,                    # for episodes only
-  episodeNumber, episodeDetails,     # for episodes only
-  date, year,                        # 'date' instance of datetime.date
-  language, subtitleLanguage,        # instances of babelfish.Language
-  country,                           # instances of babelfish.Country
-  fileSize, duration,                # when detecting video file metadata
-  container, format,
-  videoCodec, audioCodec,
-  videoProfile, audioProfile,
-  audioChannels, screenSize,
-  releaseGroup, website,
-  cdNumber, cdNumberTotal,
-  filmNumber, filmSeries,
-  bonusNumber, edition,
-  idNumber,                          # tries to identify a hash or a serial number
-  other
-  ]
-
-
-

Guessit also allows you to compute a whole lof of hashes from a file, -namely all the ones you can find in the hashlib python module (md5, -sha1, ...), but also the Media Player Classic hash that is used (amongst -others) by OpenSubtitles and SMPlayer, as well as the ed2k hash.

-

If you have the ‘guess-language’ python package installed, GuessIt can also -analyze a subtitle file’s contents and detect which language it is written in.

-

If you have the ‘enzyme’ python package installed, GuessIt can also detect the -properties from the actual video file metadata.

-
-
-

User Guide

-

This part of the documentation, which is mostly prose, shows how to use -Guessit both from the command-line and as a python module which you can -use in your own projects.

- -
-
-

Web Service API

-

The guessit.io server also provides a free webservice that allows you to perform -filename detection, even you don’t have python installed (eg: you need to use it -from an Android app, or NodeJS, etc.).

-

You can look at the documentation web API here: http://api.guessit.io

-
-
-

Developer Guide

-

If you want to contribute to the project, this part of the documentation is for -you.

- -

You may also want to familiarize yourself with the following classes:

- -
-
-

Support

-

The project website for GuessIt is hosted at ReadTheDocs. -There you will also find the User guide and Developer documentation.

-

This project is hosted on GitHub: https://github.com/wackou/guessit

-

Please report issues and/or feature requests via the bug tracker.

-

You can also report issues using the command-line tool:

-
$ guessit --bug "filename.that.fails.avi"
-
-
-
-
-

Contribute

-

GuessIt is under active development, and contributions are more than welcome!

-
    -
  1. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. -There is a Contributor Friendly tag for issues that should be ideal for people who are not very -familiar with the codebase yet.
  2. -
  3. Fork the repository on Github to start making your changes to the master -branch (or branch off of it).
  4. -
  5. Write a test which shows that the bug was fixed or that the feature works as expected.
  6. -
  7. Send a pull request and bug the maintainer until it gets merged and published. :)
  8. -
-
-
-

License

-

GuessIt is licensed under the LGPLV3 license.

-
-
- - -
-
-
-
-
- - -

- -

- -

- - Build status - - - - License - -

- -

- - Travis-CI - - - - Code coverage - -

- -

- GuessIt is a python library that tries to extract and/or guess - as much information as possible from a file. -

- - -

Useful Links

- - -

Donate

-

-If you like GuessIt, please consider making a donation in -bitcoins or in -litecoins. -

- - - - - - - -

- - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/presentation.html guessit-0.11.0/docs/_build/html/presentation.html --- guessit-0.8/docs/_build/html/presentation.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/presentation.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,153 +0,0 @@ - - - - - - - - Features — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - -
-
-
-
- -

GuessIt is a python library that tries to extract as much information as -possible from a video file.

-

It has a powerful filename matcher that allows to guess a lot of -metadata from a video using only its filename. This matcher works with -both movies and tv shows episodes.

-

For example, GuessIt can do the following:

-
$ guessit "Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi"
-For: Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi
-GuessIt found: {
-    [1.00] "mimetype": "video/x-msvideo",
-    [0.80] "episodeNumber": 3,
-    [0.80] "videoCodec": "XviD",
-    [1.00] "container": "avi",
-    [1.00] "format": "HDTV",
-    [0.70] "series": "Treme",
-    [0.50] "title": "Right Place, Wrong Time",
-    [0.80] "releaseGroup": "NoTV",
-    [0.80] "season": 1,
-    [1.00] "type": "episode"
-}
-
-
-
-

Features

-

At the moment, the filename matcher is able to recognize the following -property types:

-
[ title,                             # for movies and episodes
-  series, season,                    # for episodes only
-  episodeNumber, episodeDetails,     # for episodes only
-  date, year,                        # 'date' instance of datetime.date
-  language, subtitleLanguage,        # instances of babelfish.Language
-  country,                           # instances of babelfish.Country
-  fileSize, duration,                # when detecting video file metadata
-  container, format,
-  videoCodec, audioCodec,
-  videoProfile, audioProfile,
-  audioChannels, screenSize,
-  releaseGroup, website,
-  cdNumber, cdNumberTotal,
-  filmNumber, filmSeries,
-  bonusNumber, edition,
-  idNumber,                          # tries to identify a hash or a serial number
-  other
-  ]
-
-
-

Guessit also allows you to compute a whole lof of hashes from a file, -namely all the ones you can find in the hashlib python module (md5, -sha1, ...), but also the Media Player Classic hash that is used (amongst -others) by OpenSubtitles and SMPlayer, as well as the ed2k hash.

-

If you have the ‘guess-language’ python package installed, GuessIt can also -analyze a subtitle file’s contents and detect which language it is written in.

-

If you have the ‘enzyme’ python package installed, GuessIt can also detect the -properties from the actual video file metadata.

-
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/projectinfo.html guessit-0.11.0/docs/_build/html/projectinfo.html --- guessit-0.8/docs/_build/html/projectinfo.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/projectinfo.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,133 +0,0 @@ - - - - - - - - Support — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Support

-

The project website for GuessIt is hosted at ReadTheDocs. -There you will also find the User guide and Developer documentation.

-

This project is hosted on GitHub: https://github.com/wackou/guessit

-

Please report issues and/or feature requests via the bug tracker.

-

You can also report issues using the command-line tool:

-
$ guessit --bug "filename.that.fails.avi"
-
-
-
-
-

Contribute

-

GuessIt is under active development, and contributions are more than welcome!

-
    -
  1. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. -There is a Contributor Friendly tag for issues that should be ideal for people who are not very -familiar with the codebase yet.
  2. -
  3. Fork the repository on Github to start making your changes to the master -branch (or branch off of it).
  4. -
  5. Write a test which shows that the bug was fixed or that the feature works as expected.
  6. -
  7. Send a pull request and bug the maintainer until it gets merged and published. :)
  8. -
-
-
-

License

-

GuessIt is licensed under the LGPLV3 license.

-
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

-

Table Of Contents

- -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/py-modindex.html guessit-0.11.0/docs/_build/html/py-modindex.html --- guessit-0.8/docs/_build/html/py-modindex.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/py-modindex.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,134 +0,0 @@ - - - - - - - - Python Module Index — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- - -

Python Module Index

- -
- g -
- - - - - - - - - - - - - - - - -
 
- g
- guessit -
    - guessit.guess -
    - guessit.matcher -
    - guessit.matchtree -
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/search.html guessit-0.11.0/docs/_build/html/search.html --- guessit-0.8/docs/_build/html/search.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/search.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,115 +0,0 @@ - - - - - - - - Search — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Search

-
- -

- Please activate JavaScript to enable the search - functionality. -

-
-

- From here you can search these documents. Enter your search - words into the box below and click "search". Note that the search - function will automatically search for all of the words. Pages - containing fewer words won't appear in the result list. -

-
- - - -
- -
- -
- -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- -
-
-
-
- - - \ No newline at end of file Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/coinwidget/icon_bitcoin.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/coinwidget/icon_bitcoin.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/coinwidget/icon_litecoin.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/coinwidget/icon_litecoin.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/coinwidget/icon_qrcode.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/coinwidget/icon_qrcode.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/coinwidget/icon_wallet.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/coinwidget/icon_wallet.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/comment-bright.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/comment-bright.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/comment-close.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/comment-close.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/comment.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/comment.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/down.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/down.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/down-pressed.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/down-pressed.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/file.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/file.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/guessit-logo.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/guessit-logo.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/lgplv3-88x31.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/lgplv3-88x31.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/minus.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/minus.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/plus.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/plus.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/up.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/up.png differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_build/html/_static/up-pressed.png and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_build/html/_static/up-pressed.png differ diff -Nru guessit-0.8/docs/_build/html/_themes/README.html guessit-0.11.0/docs/_build/html/_themes/README.html --- guessit-0.8/docs/_build/html/_themes/README.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/_themes/README.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,124 +0,0 @@ - - - - - - - - krTheme Sphinx Style — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

krTheme Sphinx Style

-

This repository contains sphinx styles Kenneth Reitz uses in most of -his projects. It is a derivative of Mitsuhiko’s themes for Flask and Flask related -projects. To use this style in your Sphinx documentation, follow -this guide:

-
    -
  1. put this folder as _themes into your docs folder. Alternatively -you can also use git submodules to check out the contents there.

    -
  2. -
  3. add this to your conf.py:

    -
    sys.path.append(os.path.abspath('_themes'))
    -html_theme_path = ['_themes']
    -html_theme = 'flask'
    -
    -
    -
  4. -
-

The following themes exist:

-
-
kr
-
the standard flask documentation theme for large projects
-
kr_small
-
small one-page theme. Intended to be used by very small addon libraries.
-
-
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/user/commandline.html guessit-0.11.0/docs/_build/html/user/commandline.html --- guessit-0.8/docs/_build/html/user/commandline.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/user/commandline.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,179 +0,0 @@ - - - - - - - - Command-line usage — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Command-line usage

-

To have GuessIt try to guess some information from a filename, just run it as a command:

-
$ guessit "Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv"
-For: Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv
-GuessIt found: {
-    [1.00] "videoCodec": "h264",
-    [1.00] "container": "mkv",
-    [1.00] "format": "BluRay",
-    [0.60] "title": "Dark City",
-    [1.00] "releaseGroup": "CHD",
-    [1.00] "screenSize": "720p",
-    [1.00] "year": 1998,
-    [1.00] "type": "movie",
-    [1.00] "audioCodec": "DTS"
-}
-
-
-

The numbers between square brackets indicate the confidence in the -value, so for instance in the previous example, GuessIt is sure that -the videoCodec is h264, but only 60% confident that the title is -‘Dark City’.

-

You can use the -v or --verbose flag to have it display debug information.

-

You can use the -p or -l flags to display the properties names or the -multiple values they can take.

-

You can also run a --demo mode which will run a few tests and -display the results.

-

By default, GuessIt will try to autodetect the type of file you are asking it to -guess, movie or episode. If you want to force one of those, use the -t movie or --t episode flags.

-

If input file is remote file or a release name with no folder and extension, -you should use the -n or --name-only flag. It will disable folder and extension -parsing, and any concrete file related analysis.

-

Guessit also allows you to specify the type of information you want -using the -i or --info flag:

-
$ guessit -i hash_md5,hash_sha1,hash_ed2k tests/dummy.srt
-For: tests/dummy.srt
-GuessIt found: {
-    [1.00] "hash_ed2k": "ed2k://|file|dummy.srt|44|1CA0B9DED3473B926AA93A0A546138BB|/",
-    [1.00] "hash_md5": "e781de9b94ba2753a8e2945b2c0a123d",
-    [1.00] "hash_sha1": "bfd18e2f4e5d59775c2bc14d80f56971891ed620"
-}
-
-
-

You can see the list of options that guessit.py accepts like that:

-
$ guessit -h
-Usage: guessit.py [options] file1 [file2...]
-
-Options:
-  -h, --help            show this help message and exit
-  -v, --verbose         Display debug output
-  -p, --properties      Display properties that can be guessed.
-  -l, --values          Display property values that can be guessed.
-  -s, --transformers    Display transformers that can be used.
-  -i INFO, --info=INFO  The desired information type: filename, video,
-                        hash_mpc or a hash from python's hashlib module, such
-                        as hash_md5, hash_sha1, ...; or a list of any of them,
-                        comma-separated
-  -n, --name-only       Parse files as name only. Disable folder parsing,
-                        extension parsing, and file content analysis.
-  -t TYPE, --type=TYPE  The suggested file type: movie, episode. If undefined,
-                        type will be guessed.
-  -a, --advanced        Display advanced information for filename guesses, as
-                        json output
-  -y, --yaml            Display information for filename guesses as yaml
-                        output (like unit-test)
-  -d, --demo            Run a few builtin tests instead of analyzing a file
-  -b, --bug             Submit a wrong detection to the guessit.io service
-
-
-
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/user/install.html guessit-0.11.0/docs/_build/html/user/install.html --- guessit-0.8/docs/_build/html/user/install.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/user/install.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,156 +0,0 @@ - - - - - - - - Installation — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Installation

-

This part of the documentation covers the installation of GuessIt. -The first step to using any software package is getting it properly installed.

-
-

Distribute & Pip

-

Installing GuessIt is simple with pip:

-
$ pip install guessit
-
-
-

or, with easy_install:

-
$ easy_install guessit
-
-
-

But, you really shouldn’t do that.

-
-
-

Get the Code

-

GuessIt is actively developed on GitHub, where the code is -always available.

-

You can either clone the public repository:

-
git clone git://github.com/wackou/guessit.git
-
-
-

Download the tarball:

-
$ curl -L https://github.com/wackou/guessit/tarball/master -o guessit.tar.gz
-
-
-

Or, download the zipball:

-
$ curl -L https://github.com/wackou/guessit/zipball/master -o guessit.zip
-
-
-

Once you have a copy of the source, you can embed it in your Python package, -or install it into your site-packages easily:

-
$ python setup.py install
-
-
-
-
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

-

Table Of Contents

- -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/_build/html/user/python.html guessit-0.11.0/docs/_build/html/user/python.html --- guessit-0.8/docs/_build/html/user/python.html 2014-06-03 22:06:34.000000000 +0000 +++ guessit-0.11.0/docs/_build/html/user/python.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,153 +0,0 @@ - - - - - - - - Python module usage — GuessIt 0.7.2.dev0 documentation - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Python module usage

-

The main entry points to the python module are the guess_video_info, -guess_movie_info and guess_episode_info.

-

The guess_video_info function will try to autodetect the type of the -file, either movie, moviesubtitle, movieinfo, episode, episodesubtitle or -episodeinfo.

-

Pass them the filename and the desired information type:

-
>>> import guessit
->>> path = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv'
->>> guess = guessit.guess_movie_info(path, info = ['filename'])
-
-
-
>>> print type(guess)
-<class 'guessit.guess.Guess'>
-
-
-
>>> print guess
-{'videoCodec': 'h264', 'container': 'mkv', 'format': 'BluRay',
-'title': 'Dark City', 'releaseGroup': 'CHD', 'screenSize': '720p',
-'year': 1998, 'type': 'movie', 'audioCodec': 'DTS'}
-
-
-
>>> print guess.nice_string()
-{
-    [1.00] "videoCodec": "h264",
-    [1.00] "container": "mkv",
-    [1.00] "format": "BluRay",
-    [0.60] "title": "Dark City",
-    [1.00] "releaseGroup": "CHD",
-    [1.00] "screenSize": "720p",
-    [1.00] "year": 1998,
-    [1.00] "type": "movie",
-    [1.00] "audioCodec": "DTS"
-}
-
-
-

A Guess instance is a dictionary which has an associated confidence -for each of the properties it has.

-

A Guess instance is also a python dict instance, so you can use it -wherever you would use a normal python dict.

-

If you have the Enzyme python package installed, then the -guess_video_metadata function is also available, which will return a guess -with the properties from the video file metadata.

-
- - -
-
-
-
-
-

- -

- -

- GuessIt is a python library that tries to extract as much information - as possible from a file. -

Related Topics

- - - -
-
-
-
- - - \ No newline at end of file diff -Nru guessit-0.8/docs/presentation.rst guessit-0.11.0/docs/presentation.rst --- guessit-0.8/docs/presentation.rst 2014-05-31 10:03:20.000000000 +0000 +++ guessit-0.11.0/docs/presentation.rst 2015-06-06 12:39:15.000000000 +0000 @@ -1,9 +1,9 @@ -GuessIt is a python library that tries to extract as much information as +GuessIt is a python library that extracts as much information as possible from a video file. -It has a powerful filename matcher that allows to guess a lot of -metadata from a video using only its filename. This matcher works with +It has a very powerful filename matcher that allows to guess a lot of +metadata from a video using its filename only. This matcher works with both movies and tv shows episodes. For example, GuessIt can do the following:: @@ -25,33 +25,26 @@ -Features --------- +Filename matcher +---------------- -At the moment, the filename matcher is able to recognize the following -property types:: +The filename matcher is based on pattern matching and is able to recognize many properties from the filename, +like ``title``, ``year``, ``series``, ``episodeNumber``, ``seasonNumber``, +``videoCodec``, ``screenSize``, ``language``. Guessed values are cleaned up and given in a readable format +which may not match exactly the raw filename. - [ title, # for movies and episodes - series, season, # for episodes only - episodeNumber, episodeDetails, # for episodes only - date, year, # 'date' instance of datetime.date - language, subtitleLanguage, # instances of babelfish.Language - country, # instances of babelfish.Country - fileSize, duration, # when detecting video file metadata - container, format, - videoCodec, audioCodec, - videoProfile, audioProfile, - audioChannels, screenSize, - releaseGroup, website, - cdNumber, cdNumberTotal, - filmNumber, filmSeries, - bonusNumber, edition, - idNumber, # tries to identify a hash or a serial number - other - ] +The full list of available properties can be seen here: +.. toctree:: + :maxdepth: 2 -Guessit also allows you to compute a whole lof of hashes from a file, + user/properties + + +Other features +-------------- + +GuessIt also allows you to compute a whole lof of hashes from a file, namely all the ones you can find in the hashlib python module (md5, sha1, ...), but also the Media Player Classic hash that is used (amongst others) by OpenSubtitles and SMPlayer, as well as the ed2k hash. @@ -61,3 +54,87 @@ If you have the 'enzyme' python package installed, GuessIt can also detect the properties from the actual video file metadata. + +Usage +----- + +GuessIt can be used from the command line:: + + $ guessit + usage: guessit [-h] [-t TYPE] [-n] [-c] [-X DISABLED_TRANSFORMERS] [-v] + [-P SHOW_PROPERTY] [-u] [-a] [-y] [-f INPUT_FILE] [-d] [-p] + [-V] [-s] [--version] [-b] [-i INFO] [-S EXPECTED_SERIES] + [-T EXPECTED_TITLE] [-Y] [-D] [-L ALLOWED_LANGUAGES] [-E] + [-C ALLOWED_COUNTRIES] [-G EXPECTED_GROUP] + [filename [filename ...]] + + positional arguments: + filename Filename or release name to guess + + optional arguments: + -h, --help show this help message and exit + + Naming: + -t TYPE, --type TYPE The suggested file type: movie, episode. If undefined, + type will be guessed. + -n, --name-only Parse files as name only. Disable folder parsing, + extension parsing, and file content analysis. + -c, --split-camel Split camel case part of filename. + -X DISABLED_TRANSFORMERS, --disabled-transformer DISABLED_TRANSFORMERS + Transformer to disable (can be used multiple time) + -S EXPECTED_SERIES, --expected-series EXPECTED_SERIES + Expected series to parse (can be used multiple times) + -T EXPECTED_TITLE, --expected-title EXPECTED_TITLE + Expected title (can be used multiple times) + -Y, --date-year-first + If short date is found, consider the first digits as + the year. + -D, --date-day-first If short date is found, consider the second digits as + the day. + -L ALLOWED_LANGUAGES, --allowed-languages ALLOWED_LANGUAGES + Allowed language (can be used multiple times) + -E, --episode-prefer-number + Guess "serie.213.avi" as the episodeNumber 213. + Without this option, it will be guessed as season 2, + episodeNumber 13 + -C ALLOWED_COUNTRIES, --allowed-country ALLOWED_COUNTRIES + Allowed country (can be used multiple times) + -G EXPECTED_GROUP, --expected-group EXPECTED_GROUP + Expected release group (can be used multiple times) + + Output: + -v, --verbose Display debug output + -P SHOW_PROPERTY, --show-property SHOW_PROPERTY + Display the value of a single property (title, series, + videoCodec, year, type ...) + -u, --unidentified Display the unidentified parts. + -a, --advanced Display advanced information for filename guesses, as + json output + -y, --yaml Display information for filename guesses as yaml + output (like unit-test) + -f INPUT_FILE, --input-file INPUT_FILE + Read filenames from an input file. + -d, --demo Run a few builtin tests instead of analyzing a file + + Information: + -p, --properties Display properties that can be guessed. + -V, --values Display property values that can be guessed. + -s, --transformers Display transformers that can be used. + --version Display the guessit version. + + guessit.io: + -b, --bug Submit a wrong detection to the guessit.io service + + Other features: + -i INFO, --info INFO The desired information type: filename, video, + hash_mpc or a hash from python's hashlib module, such + as hash_md5, hash_sha1, ...; or a list of any of them, + comma-separated + +It can also be used as a python module:: + + >>> from guessit import guess_file_info + >>> guess_file_info('Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi') + {u'mimetype': 'video/x-msvideo', u'episodeNumber': 3, u'videoCodec': u'XviD', u'container': u'avi', u'format': u'HDTV', u'series': u'Treme', u'title': u'Right Place, Wrong Time', u'releaseGroup': u'NoTV', u'season': 1, u'type': u'episode'} + + diff -Nru guessit-0.8/docs/projectinfo.rst guessit-0.11.0/docs/projectinfo.rst --- guessit-0.8/docs/projectinfo.rst 2014-05-16 22:51:19.000000000 +0000 +++ guessit-0.11.0/docs/projectinfo.rst 2014-09-22 14:11:44.000000000 +0000 @@ -32,4 +32,4 @@ License ------- -GuessIt is licensed under the `LGPLV3 license `_. +GuessIt is licensed under the `LGPLv3 license `_. Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_themes/flask_theme_support.pyc and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_themes/flask_theme_support.pyc differ Binary files /tmp/tmp40IUWV/l79OTXdPku/guessit-0.8/docs/_themes/__pycache__/flask_theme_support.cpython-34.pyc and /tmp/tmp40IUWV/JAo2jrXNJo/guessit-0.11.0/docs/_themes/__pycache__/flask_theme_support.cpython-34.pyc differ diff -Nru guessit-0.8/docs/user/commandline.rst guessit-0.11.0/docs/user/commandline.rst --- guessit-0.8/docs/user/commandline.rst 2014-06-01 22:42:15.000000000 +0000 +++ guessit-0.11.0/docs/user/commandline.rst 2015-01-03 11:01:51.000000000 +0000 @@ -27,7 +27,7 @@ You can use the ``-v`` or ``--verbose`` flag to have it display debug information. -You can use the ``-p`` or ``-l`` flags to display the properties names or the +You can use the ``-p`` or ``-V`` flags to display the properties names or the multiple values they can take. You can also run a ``--demo`` mode which will run a few tests and @@ -55,26 +55,74 @@ You can see the list of options that guessit.py accepts like that:: - $ guessit -h - Usage: guessit.py [options] file1 [file2...] + $ guessit --help + usage: guessit [-h] [-t TYPE] [-n] [-c] [-X DISABLED_TRANSFORMERS] [-v] + [-P SHOW_PROPERTY] [-u] [-a] [-y] [-f INPUT_FILE] [-d] [-p] + [-V] [-s] [--version] [-b] [-i INFO] [-S EXPECTED_SERIES] + [-T EXPECTED_TITLE] [-Y] [-D] [-L ALLOWED_LANGUAGES] [-E] + [-C ALLOWED_COUNTRIES] [-G EXPECTED_GROUP] + [filename [filename ...]] - Options: + positional arguments: + filename Filename or release name to guess + + optional arguments: -h, --help show this help message and exit - -v, --verbose Display debug output - -p, --properties Display properties that can be guessed. - -l, --values Display property values that can be guessed. - -s, --transformers Display transformers that can be used. - -i INFO, --info=INFO The desired information type: filename, video, - hash_mpc or a hash from python's hashlib module, such - as hash_md5, hash_sha1, ...; or a list of any of them, - comma-separated + + Naming: + -t TYPE, --type TYPE The suggested file type: movie, episode. If undefined, + type will be guessed. -n, --name-only Parse files as name only. Disable folder parsing, extension parsing, and file content analysis. - -t TYPE, --type=TYPE The suggested file type: movie, episode. If undefined, - type will be guessed. + -c, --split-camel Split camel case part of filename. + -X DISABLED_TRANSFORMERS, --disabled-transformer DISABLED_TRANSFORMERS + Transformer to disable (can be used multiple time) + -S EXPECTED_SERIES, --expected-series EXPECTED_SERIES + Expected series to parse (can be used multiple times) + -T EXPECTED_TITLE, --expected-title EXPECTED_TITLE + Expected title (can be used multiple times) + -Y, --date-year-first + If short date is found, consider the first digits as + the year. + -D, --date-day-first If short date is found, consider the second digits as + the day. + -L ALLOWED_LANGUAGES, --allowed-languages ALLOWED_LANGUAGES + Allowed language (can be used multiple times) + -E, --episode-prefer-number + Guess "serie.213.avi" as the episodeNumber 213. + Without this option, it will be guessed as season 2, + episodeNumber 13 + -C ALLOWED_COUNTRIES, --allowed-country ALLOWED_COUNTRIES + Allowed country (can be used multiple times) + -G EXPECTED_GROUP, --expected-group EXPECTED_GROUP + Expected release group (can be used multiple times) + + Output: + -v, --verbose Display debug output + -P SHOW_PROPERTY, --show-property SHOW_PROPERTY + Display the value of a single property (title, series, + videoCodec, year, type ...) + -u, --unidentified Display the unidentified parts. -a, --advanced Display advanced information for filename guesses, as json output -y, --yaml Display information for filename guesses as yaml output (like unit-test) + -f INPUT_FILE, --input-file INPUT_FILE + Read filenames from an input file. -d, --demo Run a few builtin tests instead of analyzing a file + + Information: + -p, --properties Display properties that can be guessed. + -V, --values Display property values that can be guessed. + -s, --transformers Display transformers that can be used. + --version Display the guessit version. + + guessit.io: -b, --bug Submit a wrong detection to the guessit.io service + + Other features: + -i INFO, --info INFO The desired information type: filename, video, + hash_mpc or a hash from python's hashlib module, such + as hash_md5, hash_sha1, ...; or a list of any of them, + comma-separated + diff -Nru guessit-0.8/docs/user/install.rst guessit-0.11.0/docs/user/install.rst --- guessit-0.8/docs/user/install.rst 2012-04-28 18:42:49.000000000 +0000 +++ guessit-0.11.0/docs/user/install.rst 2015-06-06 12:39:15.000000000 +0000 @@ -7,41 +7,38 @@ The first step to using any software package is getting it properly installed. -Distribute & Pip ----------------- +Installing with Pip +------------------- Installing GuessIt is simple with `pip `_:: $ pip install guessit -or, with `easy_install `_:: - $ easy_install guessit - -But, you really `shouldn't do that `_. - - - -Get the Code ------------- +Getting the source code +----------------------- GuessIt is actively developed on GitHub, where the code is `always available `_. You can either clone the public repository:: - git clone git://github.com/wackou/guessit.git + $ git clone git://github.com/wackou/guessit.git Download the `tarball `_:: $ curl -L https://github.com/wackou/guessit/tarball/master -o guessit.tar.gz -Or, download the `zipball `_:: +Or download the `zipball `_:: $ curl -L https://github.com/wackou/guessit/zipball/master -o guessit.zip Once you have a copy of the source, you can embed it in your Python package, -or install it into your site-packages easily:: +install it into your site-packages folder like that:: $ python setup.py install + +or use it directly from the source folder for development:: + + $ python setup.py develop diff -Nru guessit-0.8/docs/user/properties.rst guessit-0.11.0/docs/user/properties.rst --- guessit-0.8/docs/user/properties.rst 1970-01-01 00:00:00.000000000 +0000 +++ guessit-0.11.0/docs/user/properties.rst 2015-08-31 16:57:28.000000000 +0000 @@ -0,0 +1,263 @@ +.. _properties: + +Properties +~~~~~~~~~~ + +Guessed values are cleaned up and given in a readable format +which may not match exactly the raw filename. + +So, for instance, + +- ``DVDSCR`` will be guessed as ``format`` = ``DVD`` + ``other`` = ``Screener`` +- ``1920x1080`` will be guessed as ``screenSize`` = ``1080p`` +- ``DD5.1`` will be guessed as ``audioCodec`` = ``DolbyDigital`` + ``audioChannel`` = ``5.1`` + + +Main properties +~~~~~~~~~~~~~~~ + +- **type** + + Type of the file. + + - ``unknown``, ``movie``, ``episode``, ``moviesubtitle``, ``episodesubtitle`` + + +- **title** + + Title of movie or episode. + +- **container** + + Container of the file. + + - ``3g2``, ``wmv``, ``webm``, ``mp4``, ``avi``, ``mp4a``, ``mpeg``, ``sub``, ``mka``, ``m4v``, ``ts``, ``mkv``, ``ra``, ``rm``, ``wma``, ``ass``, ``mpg``, ``ram``, ``3gp``, ``ogv``, ``mov``, ``ogm``, ``asf``, ``divx``, ``ogg``, ``ssa``, ``qt``, ``idx``, ``nfo``, ``wav``, ``flv``, ``3gp2``, ``iso``, ``mk2``, ``srt`` + + +- **date** + + Date found in filename. + + +- **year** + + Year of movie (or episode). + + +- **releaseGroup** + + Name of (non)scene group that released the file. + + +- **website** + + Name of website contained in the filename. + + +Episode properties +~~~~~~~~~~~~~~~~~~ + +- **series** + + Name of series. + + +- **season** + + Season number. + + +- **episodeNumber** + + Episode number. + + +- **episodeList** + + List of episode numbers if several were found. + + - note: If several are found, ``episodeNumber`` is the first item of this list. + + +- **seasonList** + + List of season numbers if several were found. + + - note: If several are found, ``seasonNumber`` is the first item of this list. + + +- **episodeCount** + + Total number of episodes. + + +- **seasonCount** + + Total number of seasons. + + +- **episodeDetails** + + Some details about the episode. + + - ``Bonus`` ``Oav`` ``Ova`` ``Omake`` ``Extras`` ``Unaired`` ``Special`` ``Pilot`` + + +- **episodeFormat** + + Episode format of the series. + + - ``Minisode`` + +- **part** + + Part number of the video. + +- **partList** + +List of part numbers if several were found. + +- note: If several are found, ``part`` is the first item of this list. + +- **version** + + Version of the episode. + + - In anime fansub scene, new versions are released with tag ``v[0-9]``. + + +Video properties +~~~~~~~~~~~~~~~~ + +- **format** + + Format of the initial source + + - ``HDTV`` ``WEB-DL`` ``TV`` ``VOD`` ``BluRay`` ``DVD`` ``WEBRip`` ``Workprint`` ``Telecine`` ``VHS`` ``DVB`` ``Telesync`` ``HD-DVD`` ``PPV`` ``Cam`` + + +- **screenSize** + + Resolution of video. + - ``720p`` ``1080p`` ``1080i`` ``x`` ``4K`` ``360p`` ``368p`` ``480p`` ``576p`` ``900p`` + + +- **videoCodec** + Codec used for video. + + - ``h264`` ``h265`` ``DivX`` ``XviD`` ``Real`` ``Mpeg2`` + + +- **videoProfile** + Codec profile used for video. + + - ``8bit`` ``10bit`` ``HP`` ``BP`` ``MP`` ``XP`` ``Hi422P`` ``Hi444PP`` + + +- **videoApi** + API used for the video. + + - ``DXVA`` + + +Audio properties +~~~~~~~~~~~~~~~~ + +- **audioChannels** + + Number of channels for audio. + + - ``1.0`` ``2.0`` ``5.1`` ``7.1`` + + +- **audioCodec** + Codec used for audio. + + - ``DTS`` ``TrueHD`` ``DolbyDigital`` ``AAC`` ``AC3`` ``MP3`` ``Flac`` + + +- **audioProfile** + The codec profile used for audio. + + - ``LC`` ``HQ`` ``HD`` ``HE`` ``HDMA`` + + +Localization properties +~~~~~~~~~~~~~~~~~~~~~~~ + +- **Country** + + Country(ies) of content. Often found in series, ``Shameless (US)`` for instance. + + - ``[]`` (This class equals name and iso code) + + +- **Language** + + Language(s) of the audio soundtrack. + + - ``[]`` (This class equals name and iso code) + + +- **subtitleLanguage** + + Language(s) of the subtitles. + + - ``[]`` (This class equals name and iso code) + + +Other properties +~~~~~~~~~~~~~~~~ + +- **bonusNumber** + + Bonus number. + + +- **bonusTitle** + + Bonus title. + + +- **cdNumber** + + CD number. + + +- **cdNumberTotal** + + Total number of CD. + + +- **crc32** + + CRC32 of the file. + + +- **idNumber** + + Volume identifier (UUID). + + +- **edition** + + Edition of the movie. + + - ``Special Edition``, ``Collector Edition``, ``Director's cut``, ``Criterion Edition``, ``Deluxe Edition`` + + +- **filmNumber** + + Film number of this movie. + + +- **filmSeries** + + Film series of this movie. + +- **other** + + Other property will appear under this property. + + - ``Fansub``, ``HR``, ``HQ``, ``Netflix``, ``Screener``, ``Unrated``, ``HD``, ``3D``, ``SyncFix``, ``Bonus``, ``WideScreen``, ``Fastsub``, ``R5``, ``AudioFix``, ``DDC``, ``Trailer``, ``Complete``, ``Limited``, ``Classic``, ``Proper``, ``DualAudio``, ``LiNE``, ``CC``, ``LD``, ``MD`` + diff -Nru guessit-0.8/guessit/containers.py guessit-0.11.0/guessit/containers.py --- guessit-0.8/guessit/containers.py 2014-06-12 09:32:13.000000000 +0000 +++ guessit-0.11.0/guessit/containers.py 2015-08-31 16:57:28.000000000 +0000 @@ -21,10 +21,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import types + from .patterns import compile_pattern, sep from . import base_text_type from .guess import Guess -import types def _get_span(prop, match): @@ -38,11 +39,28 @@ start = span[0] if end is None or span[1] > end: end = span[1] - return (start, end) + return start, end else: return match.span() - start = span[0] - end = span[1] + + +def _trim_span(span, value, blanks = sep): + start, end = span + + for i in range(0, len(value)): + if value[i] in blanks: + start += 1 + else: + break + + for i in reversed(range(0, len(value))): + if value[i] in blanks: + end -= 1 + else: + break + if end <= start: + return -1, -1 + return start, end def _get_groups(compiled_re): @@ -64,7 +82,40 @@ class NoValidator(object): - def validate(self, prop, string, node, match, entry_start, entry_end): + @staticmethod + def validate(prop, string, node, match, entry_start, entry_end): + return True + + +class LeftValidator(object): + """Make sure our match is starting by separator, or by another entry""" + + @staticmethod + def validate(prop, string, node, match, entry_start, entry_end): + span = _get_span(prop, match) + span = _trim_span(span, string[span[0]:span[1]]) + start, end = span + + sep_start = start <= 0 or string[start - 1] in sep + start_by_other = start in entry_end + if not sep_start and not start_by_other: + return False + return True + + +class RightValidator(object): + """Make sure our match is ended by separator, or by another entry""" + + @staticmethod + def validate(prop, string, node, match, entry_start, entry_end): + span = _get_span(prop, match) + span = _trim_span(span, string[span[0]:span[1]]) + start, end = span + + sep_end = end >= len(string) or string[end] in sep + end_by_other = end in entry_start + if not sep_end and not end_by_other: + return False return True @@ -79,52 +130,136 @@ return True +class SameKeyValidator(object): + def __init__(self, validator_function): + self.validator_function = validator_function + + def validate(self, prop, string, node, match, entry_start, entry_end): + path_nodes = [path_node for path_node in node.ancestors if path_node.category == 'path'] + if path_nodes: + path_node = path_nodes[0] + else: + path_node = node.root + + for key in prop.keys: + for same_value_leaf in path_node.leaves_containing(key): + ret = self.validator_function(same_value_leaf, key, prop, string, node, match, entry_start, entry_end) + if ret is not None: + return ret + return True + + +class OnlyOneValidator(SameKeyValidator): + """ + Check that there's only one occurence of key for current directory + """ + def __init__(self): + super(OnlyOneValidator, self).__init__(lambda same_value_leaf, key, prop, string, node, match, entry_start, entry_end: False) + + class DefaultValidator(object): """Make sure our match is surrounded by separators, or by another entry""" def validate(self, prop, string, node, match, entry_start, entry_end): - start, end = _get_span(prop, match) + span = _get_span(prop, match) + span = _trim_span(span, string[span[0]:span[1]]) + return DefaultValidator.validate_string(string, span, entry_start, entry_end) + + @staticmethod + def validate_string(string, span, entry_start=None, entry_end=None): + start, end = span sep_start = start <= 0 or string[start - 1] in sep sep_end = end >= len(string) or string[end] in sep - start_by_other = start in entry_end - end_by_other = end in entry_start + start_by_other = start in entry_end if entry_end else False + end_by_other = end in entry_start if entry_start else False if (sep_start or start_by_other) and (sep_end or end_by_other): return True return False +class FunctionValidator(object): + def __init__(self, function): + self.function = function + + def validate(self, prop, string, node, match, entry_start, entry_end): + return self.function(prop, string, node, match, entry_start, entry_end) + + +class FormatterValidator(object): + def __init__(self, group_name=None, formatted_validator=None): + self.group_name = group_name + self.formatted_validator = formatted_validator + + def validate(self, prop, string, node, match, entry_start, entry_end): + if self.group_name: + formatted = prop.format(match.group(self.group_name), self.group_name) + else: + formatted = prop.format(match.group()) + if self.formatted_validator: + return self.formatted_validator(formatted) + else: + return formatted + + +def _get_positions(prop, string, node, match, entry_start, entry_end): + span = match.span() + start = span[0] + end = span[1] + + at_start = True + at_end = True + + while start > 0: + start -= 1 + if string[start] not in sep: + at_start = False + break + while end < len(string) - 1: + end += 1 + if string[end] not in sep: + at_end = False + break + return at_start, at_end + + class WeakValidator(DefaultValidator): """Make sure our match is surrounded by separators and is the first or last element in the string""" def validate(self, prop, string, node, match, entry_start, entry_end): if super(WeakValidator, self).validate(prop, string, node, match, entry_start, entry_end): - span = match.span() - start = span[0] - end = span[1] - - at_start = True - at_end = True - - while start > 0: - start = start - 1 - if string[start] not in sep: - at_start = False - break - if at_start: + at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end) + return at_start or at_end + return False + + +class NeighborValidator(DefaultValidator): + """Make sure the node is next another one""" + def validate(self, prop, string, node, match, entry_start, entry_end): + at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end) + + if at_start: + previous_leaf = node.root.previous_leaf(node) + if previous_leaf is not None: return True - while end < len(string) - 1: - end = end + 1 - if string[end] not in sep: - at_end = False - break - if at_end: + + if at_end: + next_leaf = node.root.next_leaf(node) + if next_leaf is not None: return True + return False +class FullMatchValidator(DefaultValidator): + """Make sure the node match fully""" + def validate(self, prop, string, node, match, entry_start, entry_end): + at_start, at_end = _get_positions(prop, string, node, match, entry_start, entry_end) + + return at_start and at_end + class LeavesValidator(DefaultValidator): def __init__(self, lambdas=None, previous_lambdas=None, next_lambdas=None, both_side=False, default_=True): - self.previous_lambdas = previous_lambdas if not previous_lambdas is None else [] - self.next_lambdas = next_lambdas if not next_lambdas is None else [] + self.previous_lambdas = previous_lambdas if previous_lambdas is not None else [] + self.next_lambdas = next_lambdas if next_lambdas is not None else [] if lambdas: self.previous_lambdas.extend(lambdas) self.next_lambdas.extend(lambdas) @@ -141,13 +276,10 @@ return False previous_ = self._validate_previous(prop, string, node, match, entry_start, entry_end) - if previous_ and self.both_side: - return previous_ next_ = self._validate_next(prop, string, node, match, entry_start, entry_end) if previous_ is None and next_ is None: return super_ret - if self.both_side: return previous_ and next_ else: @@ -158,7 +290,7 @@ for leaf in node.root.previous_leaves(node): for lambda_ in self.previous_lambdas: ret = self._check_rule(lambda_, leaf) - if not ret is None: + if ret is not None: return ret return False @@ -167,17 +299,18 @@ for leaf in node.root.next_leaves(node): for lambda_ in self.next_lambdas: ret = self._check_rule(lambda_, leaf) - if not ret is None: + if ret is not None: return ret return False - def _check_rule(self, lambda_, previous_leaf): + @staticmethod + def _check_rule(lambda_, previous_leaf): return lambda_(previous_leaf) class _Property: """Represents a property configuration.""" - def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None): + def __init__(self, keys=None, pattern=None, canonical_form=None, canonical_from_pattern=True, confidence=1.0, enhance=True, global_span=False, validator=DefaultValidator(), formatter=None, disabler=None, confidence_lambda=None, remove_duplicates=False): """ :param keys: Keys of the property (format, screenSize, ...) :type keys: string @@ -196,6 +329,8 @@ :type validator: :class:`DefaultValidator` :param formatter: Formater to use :type formatter: function + :param remove_duplicates: Keep only the last match if multiple values are found + :type remove_duplicates: bool """ if isinstance(keys, list): self.keys = keys @@ -204,7 +339,7 @@ else: self.keys = [] self.canonical_form = canonical_form - if not pattern is None: + if pattern is not None: self.pattern = pattern else: self.pattern = canonical_form @@ -217,22 +352,30 @@ if not self.keys: raise ValueError("No property key is defined") self.confidence = confidence + self.confidence_lambda = confidence_lambda self.global_span = global_span self.validator = validator self.formatter = formatter + self.disabler = disabler + self.remove_duplicates = remove_duplicates + + def disabled(self, options): + if self.disabler: + return self.disabler(options) + return False def format(self, value, group_name=None): """Retrieves the final value from re group match value""" formatter = None if isinstance(self.formatter, dict): formatter = self.formatter.get(group_name) - if formatter is None and not group_name is None: + if formatter is None and group_name is not None: formatter = self.formatter.get(None) else: formatter = self.formatter if isinstance(formatter, types.FunctionType): return formatter(value) - elif not formatter is None: + elif formatter is not None: return formatter.format(value) return value @@ -297,7 +440,7 @@ """Unregister all defined properties""" self._properties.clear() - def find_properties(self, string, node, name=None, validate=True, re_match=False, sort=True, multiple=False): + def find_properties(self, string, node, options, name=None, validate=True, re_match=False, sort=True, multiple=False): """Find all distinct properties for given string If no capturing group is defined in the property, value will be grabbed from the entire match. @@ -342,6 +485,7 @@ entry_end = {} entries = [] + duplicate_matches = {} ret = [] @@ -350,21 +494,28 @@ # search all properties for prop in self.get_properties(name): - valid_match = None - if re_match: - match = prop.compiled.match(string) - if match: - valid_match = match - else: - matches = prop.compiled.finditer(string) - for match in matches: - # Keeping the last match, maybe it should be optional ... - # Needed for the.100.109.hdtv-lol.mp4 - valid_match = match - - if valid_match: - entry = prop, valid_match - entries.append(entry) + if not prop.disabled(options): + valid_match = None + if re_match: + match = prop.compiled.match(string) + if match: + entries.append((prop, match)) + else: + matches = list(prop.compiled.finditer(string)) + if prop.remove_duplicates: + duplicate_matches[prop] = matches + for match in matches: + entries.append((prop, match)) + + for prop, match in entries: + # compute confidence + if prop.confidence_lambda: + computed_confidence = prop.confidence_lambda(match) + if computed_confidence is not None: + prop.confidence = computed_confidence + + entries.sort(key=lambda entry: -entry[0].confidence) + # sort entries, from most confident to less confident if validate: # compute entries start and ends @@ -393,6 +544,9 @@ for entry in invalid_entries: prop, match = entry entries.remove(entry) + prop_duplicate_matches = duplicate_matches.get(prop) + if prop_duplicate_matches: + prop_duplicate_matches.remove(match) invalid_span = _get_span(prop, match) start = invalid_span[0] end = invalid_span[1] @@ -403,6 +557,12 @@ if not entry_end.get(end): del entry_end[end] + for prop, prop_duplicate_matches in duplicate_matches.items(): + # Keeping the last valid match only. + # Needed for the.100.109.hdtv-lol.mp4 + for duplicate_match in prop_duplicate_matches[:-1]: + entries.remove((prop, duplicate_match)) + if multiple: ret = entries else: @@ -410,26 +570,26 @@ entries_dict = {} for entry in entries: for key in prop.keys: - if not key in entries_dict: + if key not in entries_dict: entries_dict[key] = [] entries_dict[key].append(entry) - for entries in entries_dict.values(): + for key_entries in entries_dict.values(): if multiple: - for entry in entries: + for entry in key_entries: ret.append(entry) else: best_ret = {} best_prop, best_match = None, None - if len(entries) == 1: - best_prop, best_match = entries[0] + if len(key_entries) == 1: + best_prop, best_match = key_entries[0] else: - for prop, match in entries: + for prop, match in key_entries: start, end = _get_span(prop, match) if not best_prop or \ - best_prop.confidence < best_prop.confidence or \ - best_prop.confidence == best_prop.confidence and \ + best_prop.confidence < prop.confidence or \ + best_prop.confidence == prop.confidence and \ best_match.span()[1] - best_match.span()[0] < match.span()[1] - match.span()[0]: best_prop, best_match = prop, match @@ -442,18 +602,17 @@ def _sorting(x): _, x_match = x x_start, x_end = x_match.span() - return (x_start - x_end) + return x_start - x_end ret.sort(key=_sorting) return ret - def as_guess(self, found_properties, input=None, filter=None, sep_replacement=None, multiple=False, *args, **kwargs): - if filter is None: - filter = lambda property, *args, **kwargs: True + def as_guess(self, found_properties, input=None, filter_=None, sep_replacement=None, multiple=False, *args, **kwargs): + if filter_ is None: + filter_ = lambda property, *args, **kwargs: True guesses = [] if multiple else None - for property in found_properties: - prop, match = property + for prop, match in found_properties: first_key = None for key in prop.keys: # First property key will be used as base for effective name @@ -478,23 +637,29 @@ k = name guess[k] = v else: - guess[name] = value + if name in guess: + if not isinstance(guess[name], list): + guess[name] = [guess[name]] + guess[name].append(value) + else: + guess[name] = value if group_name: guess.metadata(prop).span = match.span(group_name) - if filter(guess): + if filter_(guess): if multiple: guesses.append(guess) else: return guess return guesses - def _effective_prop_value(self, prop, group_name, input=None, span=None, sep_replacement=None): + @staticmethod + def _effective_prop_value(prop, group_name, input=None, span=None, sep_replacement=None): if prop.canonical_form: return prop.canonical_form if input is None: return None value = input - if not span is None: + if span is not None: value = value[span[0]:span[1]] value = input[span[0]:span[1]] if input else None if sep_replacement: @@ -564,7 +729,7 @@ del self._qualities[name] else: property_qualities = self._qualities.get(name) - if not property_qualities is None: + if property_qualities is not None: for property_canonical_form in canonical_forms: if property_canonical_form in property_qualities: del property_qualities[property_canonical_form] @@ -593,7 +758,7 @@ for prop in props: prop_value = guess.get(prop) prop_qualities = self._qualities.get(prop) - if not prop_value is None and not prop_qualities is None: + if prop_value is not None and prop_qualities is not None: rate += prop_qualities.get(prop_value, 0) return rate diff -Nru guessit-0.8/guessit/date.py guessit-0.11.0/guessit/date.py --- guessit-0.8/guessit/date.py 2014-02-24 10:56:25.000000000 +0000 +++ guessit-0.11.0/guessit/date.py 2015-06-06 12:39:15.000000000 +0000 @@ -22,39 +22,21 @@ import datetime import re -import math + +from dateutil import parser _dsep = r'[-/ \.]' -_date_rexps = [re.compile( - # 20010823 - r'[^0-9]' + - r'(?P[0-9]{4})' + - r'(?P[0-9]{2})' + - r'(?P[0-9]{2})' + - r'[^0-9]'), - - # 2001-08-23 - re.compile(r'[^0-9]' + - r'(?P[0-9]{4})' + _dsep + - r'(?P[0-9]{2})' + _dsep + - r'(?P[0-9]{2})' + - r'[^0-9]'), - - # 23-08-2001 - re.compile(r'[^0-9]' + - r'(?P[0-9]{2})' + _dsep + - r'(?P[0-9]{2})' + _dsep + - r'(?P[0-9]{4})' + - r'[^0-9]'), - - # 23-08-01 - re.compile(r'[^0-9]' + - r'(?P[0-9]{2})' + _dsep + - r'(?P[0-9]{2})' + _dsep + - r'(?P[0-9]{2})' + - r'[^0-9]'), - ] +_dsep_bis = r'[-/ \.x]' + +date_regexps = [ + re.compile('%s(\d{8})%s' % (_dsep, _dsep), re.IGNORECASE), + re.compile('%s(\d{6})%s' % (_dsep, _dsep), re.IGNORECASE), + re.compile('[^\d](\d{2})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep, _dsep), re.IGNORECASE), + re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{2})[^\d]' % (_dsep, _dsep), re.IGNORECASE), + re.compile('[^\d](\d{4})%s(\d{1,2})%s(\d{1,2})[^\d]' % (_dsep_bis, _dsep), re.IGNORECASE), + re.compile('[^\d](\d{1,2})%s(\d{1,2})%s(\d{4})[^\d]' % (_dsep, _dsep_bis), re.IGNORECASE), + re.compile('[^\d](\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4})[^\d]' % (_dsep, _dsep), re.IGNORECASE)] def valid_year(year, today=None): @@ -84,12 +66,12 @@ if match: year = int(match.group(1)) if valid_year(year): - return (year, match.span(1)) + return year, match.span(1) - return (None, None) + return None, None -def search_date(string): +def search_date(string, year_first=None, day_first=True): """Looks for date patterns, and if found return the date and group span. Assumes there are sentinels at the beginning and end of the string that @@ -107,40 +89,40 @@ >>> search_date(' no date in here ') (None, None) """ + start, end = None, None + match = None + for date_re in date_regexps: + s = date_re.search(string) + if s and (match is None or s.end() - s.start() > len(match)): + start, end = s.start(), s.end() + if date_re.groups: + match = '-'.join(s.groups()) + else: + match = s.group() + + if match is None: + return None, None today = datetime.date.today() - for drexp in _date_rexps: - match = re.search(drexp, string) - if match: - d = match.groupdict() - year, month, day = int(d['year']), int(d['month']), int(d['day']) - # years specified as 2 digits should be adjusted here - if year < 100: - if year > (today.year % 100) + 5: - year = 1900 + year - else: - year = 2000 + year + # If day_first/year_first is undefined, parse is made using both possible values. + yearfirst_opts = [False, True] + if year_first is not None: + yearfirst_opts = [year_first] + + dayfirst_opts = [True, False] + if day_first is not None: + dayfirst_opts = [day_first] + + kwargs_list = ({'dayfirst': d, 'yearfirst': y} for d in dayfirst_opts for y in yearfirst_opts) + for kwargs in kwargs_list: + try: + date = parser.parse(match, **kwargs) + except (ValueError, TypeError) as e: #see https://bugs.launchpad.net/dateutil/+bug/1247643 date = None - try: - date = datetime.date(year, month, day) - except ValueError: - try: - date = datetime.date(year, day, month) - except ValueError: - pass - - if date is None: - continue - - # check date plausibility - if not valid_year(date.year, today=today): - continue - - # looks like we have a valid date - # note: span is [+1,-1] because we don't want to include the - # non-digit char - start, end = match.span() - return (date, (start + 1, end - 1)) + pass + # check date plausibility + if date and valid_year(date.year, today=today): + return date.date(), (start+1, end-1) #compensate for sentinels return None, None diff -Nru guessit-0.8/guessit/fileutils.py guessit-0.11.0/guessit/fileutils.py --- guessit-0.8/guessit/fileutils.py 2014-05-08 20:12:49.000000000 +0000 +++ guessit-0.11.0/guessit/fileutils.py 2015-06-06 12:39:15.000000000 +0000 @@ -20,11 +20,12 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from guessit import s, u import os.path import zipfile import io +from guessit import s, u + def split_path(path): r"""Splits the given path into the list of folders and the filename (or the diff -Nru guessit-0.8/guessit/guess.py guessit-0.11.0/guessit/guess.py --- guessit-0.8/guessit/guess.py 2014-06-21 23:03:47.000000000 +0000 +++ guessit-0.11.0/guessit/guess.py 2015-08-31 17:02:15.000000000 +0000 @@ -20,12 +20,15 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from guessit import UnicodeMixin, s, u, base_text_type -from babelfish import Language, Country import json import datetime import logging +from guessit import UnicodeMixin, s, u, base_text_type +from babelfish import Language, Country +from guessit.textutils import common_words + + log = logging.getLogger(__name__) @@ -62,7 +65,7 @@ :rtype: int :return: confidence value """ - return self._confidence if not self._confidence is None else self.parent.confidence if self.parent else None + return self._confidence if self._confidence is not None else self.parent.confidence if self.parent else None @confidence.setter def confidence(self, confidence): @@ -75,7 +78,15 @@ :rtype: string :return: String used to find this guess value """ - return self._input if not self._input is None else self.parent.input if self.parent else None + return self._input if self._input is not None else self.parent.input if self.parent else None + + @input.setter + def input(self, input): + """The input + + :rtype: string + """ + self._input = input @property def span(self): @@ -84,7 +95,7 @@ :rtype: tuple (int, int) :return: span of input string used to find this guess value """ - return self._span if not self._span is None else self.parent.span if self.parent else None + return self._span if self._span is not None else self.parent.span if self.parent else None @span.setter def span(self, span): @@ -102,7 +113,7 @@ :rtype: :class:`_Property` :return: The property """ - return self._prop if not self._prop is None else self.parent.prop if self.parent else None + return self._prop if self._prop is not None else self.parent.prop if self.parent else None @property def raw(self): @@ -142,6 +153,18 @@ for prop in self: self._metadata[prop] = GuessMetadata(parent=self._global_metadata) + def rename(self, old_name, new_name): + if old_name in self._metadata: + metadata = self._metadata[old_name] + del self._metadata[old_name] + self._metadata[new_name] = metadata + if old_name in self: + value = self[old_name] + del self[old_name] + self[new_name] = value + return True + return False + def to_dict(self, advanced=False): """Return the guess as a dict containing only base types, ie: where dates, languages, countries, etc. are converted to strings. @@ -176,11 +199,11 @@ FIXME: doc with param""" if advanced: data = self.to_dict(advanced) - return json.dumps(data, indent=4) + return json.dumps(data, indent=4, ensure_ascii=False) else: data = self.to_dict() - parts = json.dumps(data, indent=4).split('\n') + parts = json.dumps(data, indent=4, ensure_ascii=False).split('\n') for i, p in enumerate(parts): if p[:5] != ' "': continue @@ -200,7 +223,7 @@ """ if prop is None: return self._global_metadata - if not prop in self._metadata: + if prop not in self._metadata: self._metadata[prop] = GuessMetadata(parent=self._global_metadata) return self._metadata[prop] @@ -238,7 +261,7 @@ self._metadata[prop] = other._metadata[prop] except KeyError: pass - if not confidence is None: + if confidence is not None: for prop in other: self.set_confidence(prop, confidence) @@ -261,21 +284,21 @@ properties when they are integers.""" v1, c1 = g1 # value, confidence v2, c2 = g2 - if (v1 == v2): - return (v1, 1 - (1 - c1) * (1 - c2)) + if v1 == v2: + return v1, 1 - (1 - c1) * (1 - c2) else: - if c1 > c2: - return (v1, c1 - c2) + if c1 >= c2: + return v1, c1 - c2 / 2 else: - return (v2, c2 - c1) + return v2, c2 - c1 / 2 def choose_string(g1, g2): """Function used by merge_similar_guesses to choose between 2 possible properties when they are strings. - If the 2 strings are similar, or one is contained in the other, the latter is returned - with an increased confidence. + If the 2 strings are similar or have common words longer than 3 letters, + the one with highest confidence is returned with an increased confidence. If the 2 strings are dissimilar, the one with the higher confidence is returned, with a weaker confidence. @@ -285,7 +308,7 @@ prepended to it. >>> s(choose_string(('Hello', 0.75), ('World', 0.5))) - ('Hello', 0.25) + ('Hello', 0.5) >>> s(choose_string(('Hello', 0.5), ('hello', 0.5))) ('Hello', 0.75) @@ -311,26 +334,30 @@ combined_prob = 1 - (1 - c1) * (1 - c2) if v1l == v2l: - return (v1, combined_prob) + return v1, combined_prob # check for common patterns elif v1l == 'the ' + v2l: - return (v1, combined_prob) + return v1, combined_prob elif v2l == 'the ' + v1l: - return (v2, combined_prob) + return v2, combined_prob - # if one string is contained in the other, return the shortest one - elif v2l in v1l: - return (v2, combined_prob) - elif v1l in v2l: - return (v1, combined_prob) + # If the 2 strings have common words longer than 3 letters, + # return the one with highest confidence. + commons = common_words(v1l, v2l) + for common_word in commons: + if len(common_word) > 3: + if c1 >= c2: + return v1, combined_prob + else: + return v2, combined_prob # in case of conflict, return the one with highest confidence else: - if c1 > c2: - return (v1, c1 - c2) + if c1 >= c2: + return v1, c1 - c2 / 2 else: - return (v2, c2 - c1) + return v2, c2 - c1 / 2 def _merge_similar_guesses_nocheck(guesses, prop, choose): @@ -447,8 +474,8 @@ # delete very unlikely values for p in list(result.keys()): - if result.confidence(p) < 0.05: - del result[p] + if result.confidence(p) < 0.05: + del result[p] # make sure our appendable properties contain unique values for prop in append: @@ -482,13 +509,13 @@ for string_part in ('title', 'series', 'container', 'format', 'releaseGroup', 'website', 'audioCodec', 'videoCodec', 'screenSize', 'episodeFormat', - 'audioChannels', 'idNumber'): + 'audioChannels', 'idNumber', 'container'): merge_similar_guesses(guesses, string_part, choose_string) # 2- merge the rest, potentially discarding information not properly # merged before result = merge_all(guesses, append=['language', 'subtitleLanguage', 'other', - 'episodeDetails']) + 'episodeDetails', 'unidentified']) return result diff -Nru guessit-0.8/guessit/hash_ed2k.py guessit-0.11.0/guessit/hash_ed2k.py --- guessit-0.8/guessit/hash_ed2k.py 2014-02-03 20:46:28.000000000 +0000 +++ guessit-0.11.0/guessit/hash_ed2k.py 2015-06-06 12:39:15.000000000 +0000 @@ -20,9 +20,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from guessit import s, to_hex import hashlib import os.path +from functools import reduce + +from guessit import s, to_hex def hash_file(filename): diff -Nru guessit-0.8/guessit/hash_mpc.py guessit-0.11.0/guessit/hash_mpc.py --- guessit-0.8/guessit/hash_mpc.py 2014-02-07 09:50:27.000000000 +0000 +++ guessit-0.11.0/guessit/hash_mpc.py 2015-01-03 12:02:01.000000000 +0000 @@ -44,14 +44,14 @@ buf = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buf) hash_value += l_value - hash_value = hash_value & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number + hash_value &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number f.seek(max(0, filesize - 65536), 0) for x in range(int(65536 / bytesize)): buf = f.read(bytesize) (l_value,) = struct.unpack(longlongformat, buf) hash_value += l_value - hash_value = hash_value & 0xFFFFFFFFFFFFFFFF + hash_value &= 0xFFFFFFFFFFFFFFFF f.close() diff -Nru guessit-0.8/guessit/__init__.py guessit-0.11.0/guessit/__init__.py --- guessit-0.8/guessit/__init__.py 2014-05-28 17:39:30.000000000 +0000 +++ guessit-0.11.0/guessit/__init__.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,12 +20,12 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import pkg_resources from .__version__ import __version__ __all__ = ['Guess', 'Language', 'guess_file_info', 'guess_video_info', - 'guess_movie_info', 'guess_episode_info'] + 'guess_movie_info', 'guess_episode_info', + 'default_options'] # Do python3 detection before importing any other module, to be sure that @@ -88,11 +88,15 @@ from guessit.guess import Guess, smart_merge from guessit.language import Language from guessit.matcher import IterativeMatcher -from guessit.textutils import clean_string, is_camel, from_camel +from guessit.textutils import clean_default, is_camel, from_camel +from copy import deepcopy import babelfish import os.path import logging -import json +from guessit.options import get_opts +import shlex +# Needed for guessit.plugins.transformers.reload() to be called. +from guessit.plugins import transformers log = logging.getLogger(__name__) @@ -108,7 +112,8 @@ def _guess_filename(filename, options=None, **kwargs): mtree = _build_filename_mtree(filename, options=options, **kwargs) - _add_camel_properties(mtree, options=options) + if options.get('split_camel'): + _add_camel_properties(mtree, options=options) return mtree.matched() @@ -116,7 +121,7 @@ mtree = IterativeMatcher(filename, options=options, **kwargs) second_pass_options = mtree.second_pass_options if second_pass_options: - log.info("Running 2nd pass") + log.debug('Running 2nd pass with options: %s' % second_pass_options) merged_options = dict(options) merged_options.update(second_pass_options) mtree = IterativeMatcher(filename, options=merged_options, **kwargs) @@ -135,15 +140,20 @@ def _guess_camel_string(mtree, string, options=None, skip_title=False, **kwargs): if string and is_camel(string): - log.info('"%s" is camel cased. Try to detect more properties.' % (string,)) + log.debug('"%s" is camel cased. Try to detect more properties.' % (string,)) uncameled_value = from_camel(string) - camel_tree = _build_filename_mtree(uncameled_value, options=options, name_only=True, skip_title=skip_title, **kwargs) + merged_options = dict(options) + if 'type' in mtree.match_tree.info: + current_type = mtree.match_tree.info.get('type') + if current_type and current_type != 'unknown': + merged_options['type'] = current_type + camel_tree = _build_filename_mtree(uncameled_value, options=merged_options, name_only=True, skip_title=skip_title, **kwargs) if len(camel_tree.matched()) > 0: - # Title has changed. mtree.matched().update(camel_tree.matched()) return True return False + def guess_video_metadata(filename): """Gets the video metadata properties out of a given file. The file needs to exist on the filesystem to be able to be analyzed. An empty guess is @@ -251,6 +261,8 @@ log.error('Error: %s' % e) return result +default_options = {} + def guess_file_info(filename, info=None, options=None, **kwargs): """info can contain the names of the various plugins, such as 'filename' to @@ -264,6 +276,18 @@ info = info or 'filename' options = options or {} + if isinstance(options, base_text_type): + args = shlex.split(options) + options = vars(get_opts().parse_args(args)) + if default_options: + if isinstance(default_options, base_text_type): + default_args = shlex.split(default_options) + merged_options = vars(get_opts().parse_args(default_args)) + else: + merged_options = deepcopy(default_options) + merged_options.update(options) + options = merged_options + result = [] hashers = [] diff -Nru guessit-0.8/guessit/language.py guessit-0.11.0/guessit/language.py --- guessit-0.8/guessit/language.py 2014-06-21 23:07:44.000000000 +0000 +++ guessit-0.11.0/guessit/language.py 2015-09-04 20:32:59.000000000 +0000 @@ -20,14 +20,17 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from guessit import UnicodeMixin, base_text_type, u +import re +import logging + +from guessit import u from guessit.textutils import find_words + from babelfish import Language, Country import babelfish -import re -import logging from guessit.guess import Guess + __all__ = ['Language', 'UNDETERMINED', 'search_language', 'guess_language'] @@ -71,7 +74,8 @@ babelfish.country_converters['name'].codes | frozenset(self.guessit_exceptions.keys())) - def convert(self, alpha3, country=None, script=None): + @staticmethod + def convert(alpha3, country=None, script=None): return str(babelfish.Language(alpha3, country, script)) def reverse(self, name): @@ -82,7 +86,7 @@ if with_country: lang = Language.fromguessit(with_country.group(1).strip()) lang.country = babelfish.Country.fromguessit(with_country.group(2).strip()) - return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None) + return lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None # exceptions come first, as they need to override a potential match # with any of the other guessers @@ -130,7 +134,8 @@ frozenset(babelfish.COUNTRIES.values()) | frozenset(self.guessit_exceptions.keys())) - def convert(self, alpha2): + @staticmethod + def convert(alpha2): if alpha2 == 'GB': return 'UK' return str(Country(alpha2)) @@ -168,17 +173,20 @@ 'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to', 'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan', 'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as', - 'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', 'bt', - 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt' + 'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb', + 'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice', + 'ay', 'at', 'star', 'so', # french words 'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que', 'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me', - 'ne', 'ma', + 'ne', 'ma', 'va', 'au', + # japanese words, + 'wa', 'ga', 'ao', # spanish words - 'la', 'el', 'del', 'por', 'mar', + 'la', 'el', 'del', 'por', 'mar', 'al', # other 'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii', - 'vi', 'ben', 'da', 'lt', 'ch', + 'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx', # new from babelfish 'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and', 'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy', @@ -188,21 +196,35 @@ 'brazil', # release groups 'bs', # Bosnian + 'kz', # countries - 'gt', 'lt' + 'gt', 'lt', 'im', + # part/pt + 'pt' ]) +LNG_COMMON_WORDS_STRICT = frozenset(['brazil']) + subtitle_prefixes = ['sub', 'subs', 'st', 'vost', 'subforced', 'fansub', 'hardsub'] -subtitle_suffixes = ['subforced', 'fansub', 'hardsub'] +subtitle_suffixes = ['subforced', 'fansub', 'hardsub', 'sub', 'subs'] lang_prefixes = ['true'] +all_lang_prefixes_suffixes = subtitle_prefixes + subtitle_suffixes + lang_prefixes + -def find_possible_languages(string): +def find_possible_languages(string, allowed_languages=None): """Find possible languages in the string :return: list of tuple (property, Language, lang_word, word) """ + + common_words = None + if allowed_languages: + common_words = LNG_COMMON_WORDS_STRICT + else: + common_words = LNG_COMMON_WORDS + words = find_words(string) valid_words = [] @@ -220,19 +242,22 @@ for prefix in lang_prefixes: if lang_word.startswith(prefix): lang_word = lang_word[len(prefix):] - if not lang_word in LNG_COMMON_WORDS: + if lang_word not in common_words and word.lower() not in common_words: try: lang = Language.fromguessit(lang_word) + if allowed_languages: + if lang.name.lower() in allowed_languages or lang.alpha2.lower() in allowed_languages or lang.alpha3.lower() in allowed_languages: + valid_words.append((key, lang, lang_word, word)) # Keep language with alpha2 equivalent. Others are probably # uncommon languages. - if lang == 'mul' or hasattr(lang, 'alpha2'): + elif lang == 'mul' or hasattr(lang, 'alpha2'): valid_words.append((key, lang, lang_word, word)) except babelfish.Error: pass return valid_words -def search_language(string, lang_filter=None): +def search_language(string, allowed_languages=None): """Looks for language patterns, and if found return the language object, its group span and an associated confidence. @@ -242,26 +267,23 @@ >>> search_language('movie [en].avi')['language'] - >>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es']) + >>> search_language('the zen fat cat and the gay mad men got a new fan', allowed_languages = ['en', 'fr', 'es']) """ - if lang_filter: - lang_filter = set(Language.fromguessit(lang) for lang in lang_filter) + if allowed_languages: + allowed_languages = set(Language.fromguessit(lang) for lang in allowed_languages) confidence = 1.0 # for all of them - for prop, language, lang, word in find_possible_languages(string): + for prop, language, lang, word in find_possible_languages(string, allowed_languages): pos = string.find(word) end = pos + len(word) - if lang_filter and language not in lang_filter: - continue - # only allow those languages that have a 2-letter code, those that # don't are too esoteric and probably false matches - #if language.lang not in lng3_to_lng2: - # continue + # if language.lang not in lng3_to_lng2: + # continue # confidence depends on alpha2, alpha3, english name, ... if len(lang) == 2: diff -Nru guessit-0.8/guessit/__main__.py guessit-0.11.0/guessit/__main__.py --- guessit-0.8/guessit/__main__.py 2014-06-01 22:42:15.000000000 +0000 +++ guessit-0.11.0/guessit/__main__.py 2015-08-31 16:57:28.000000000 +0000 @@ -25,42 +25,52 @@ import os from guessit import PY2, u, guess_file_info -from guessit.options import option_parser +from guessit.options import get_opts +from guessit.__version__ import __version__ def guess_file(filename, info='filename', options=None, **kwargs): options = options or {} filename = u(filename) - print('For:', filename) + if not options.get('yaml') and not options.get('show_property'): + print('For:', filename) guess = guess_file_info(filename, info, options, **kwargs) - if options.get('yaml'): + + if not options.get('unidentified'): try: - import yaml - for k, v in guess.items(): - if isinstance(v, list) and len(v) == 1: - guess[k] = v[0] - ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False) - i = 0 - for yline in ystr.splitlines(): - if i == 0: - print("? " + yline[:-1]) - elif i == 1: - print(":" + yline[1:]) - else: - print(yline) - i = i + 1 - return - except ImportError: # pragma: no cover - print('PyYAML not found. Using default output.') + del guess['unidentified'] + except KeyError: + pass + + if options.get('show_property'): + print(guess.get(options.get('show_property'), '')) + return + + if options.get('yaml'): + import yaml + for k, v in guess.items(): + if isinstance(v, list) and len(v) == 1: + guess[k] = v[0] + ystr = yaml.safe_dump({filename: dict(guess)}, default_flow_style=False, allow_unicode=True) + i = 0 + for yline in ystr.splitlines(): + if i == 0: + print("? " + yline[:-1]) + elif i == 1: + print(":" + yline[1:]) + else: + print(yline) + i += 1 + return print('GuessIt found:', guess.nice_string(options.get('advanced'))) def _supported_properties(): - from guessit.plugins import transformers - all_properties = defaultdict(list) transformers_properties = [] + + from guessit.plugins import transformers for transformer in transformers.all_transformers(): supported_properties = transformer.supported_properties() transformers_properties.append((transformer, supported_properties)) @@ -72,7 +82,7 @@ for property_name in supported_properties: all_properties[property_name] # just make sure it exists - return (all_properties, transformers_properties) + return all_properties, transformers_properties def display_transformers(): @@ -130,8 +140,7 @@ 'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg', 'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi', 'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi', - 'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi' - ] + 'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi'] for f in testeps: print('-' * 80) @@ -143,8 +152,8 @@ 'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi', 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv', 'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv', - 'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten - '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten + 'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', + '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', 'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi', 'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt', 'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv', @@ -163,26 +172,31 @@ print('-' * 80) guess_file(f, options=options, type='movie') -def submit_bug(filename): - import requests # only import when needed + +def submit_bug(filename, options): + import requests # only import when needed from requests.exceptions import RequestException try: - r = requests.post('http://guessit.io/bugs', {'filename': filename}) + opts = dict((k, v) for k, v in options.__dict__.items() + if v and k != 'submit_bug') + + r = requests.post('http://guessit.io/bugs', {'filename': filename, + 'version': __version__, + 'options': str(opts)}) if r.status_code == 200: print('Successfully submitted file: %s' % r.text) else: - print('Could not submit bug at the moment, please try again later.') + print('Could not submit bug at the moment, please try again later: %s %s' % (r.status_code, r.reason)) except RequestException as e: - print('Could not submit bug at the moment, please try again later.') - + print('Could not submit bug at the moment, please try again later: %s' % e) def main(args=None, setup_logging=True): if setup_logging: from guessit import slogging - slogging.setupLogging() + slogging.setup_logging() if PY2: # pragma: no cover import codecs @@ -199,10 +213,13 @@ # Wrap sys.stdout into a StreamWriter to allow writing unicode. sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) + # Needed for guessit.plugins.transformers.reload() to be called. + from guessit.plugins import transformers + if args: - options, args = option_parser.parse_args(args) + options = get_opts().parse_args(args) else: # pragma: no cover - options, args = option_parser.parse_args() + options = get_opts().parse_args() if options.verbose: logging.getLogger().setLevel(logging.DEBUG) @@ -217,20 +234,52 @@ if options.demo: run_demo(episodes=True, movies=True, options=vars(options)) help_required = False - elif options.submit_bug: - for filename in args: - help_required = False - submit_bug(filename) - else: - if args: - help_required = False - for filename in args: + + if options.version: + print('+-------------------------------------------------------+') + print('+ GuessIt ' + __version__ + (28-len(__version__)) * ' ' + '+') + print('+-------------------------------------------------------+') + print('| Please report any bug or feature request at |') + print('| https://github.com/wackou/guessit/issues. |') + print('+-------------------------------------------------------+') + help_required = False + + if options.yaml: + try: + import yaml, babelfish + def default_representer(dumper, data): + return dumper.represent_str(str(data)) + yaml.SafeDumper.add_representer(babelfish.Language, default_representer) + yaml.SafeDumper.add_representer(babelfish.Country, default_representer) + except ImportError: # pragma: no cover + print('PyYAML not found. Using default output.') + + filenames = [] + if options.filename: + filenames.extend(options.filename) + if options.input_file: + input_file = open(options.input_file, 'r') + try: + filenames.extend([line.strip() for line in input_file.readlines()]) + finally: + input_file.close() + + filenames = filter(lambda f: f, filenames) + + if filenames: + if options.submit_bug: + for filename in filenames: + help_required = False + submit_bug(filename, options) + else: + for filename in filenames: + help_required = False guess_file(filename, info=options.info.split(','), options=vars(options)) if help_required: # pragma: no cover - option_parser.print_help() + get_opts().print_help() if __name__ == '__main__': main() diff -Nru guessit-0.8/guessit/matcher.py guessit-0.11.0/guessit/matcher.py --- guessit-0.8/guessit/matcher.py 2014-06-21 22:55:04.000000000 +0000 +++ guessit-0.11.0/guessit/matcher.py 2015-09-03 20:28:56.000000000 +0000 @@ -23,13 +23,13 @@ unicode_literals import logging +import inspect from guessit import PY3, u from guessit.transfo import TransformerException from guessit.matchtree import MatchTree -from guessit.textutils import normalize_unicode, clean_string +from guessit.textutils import normalize_unicode, clean_default from guessit.guess import Guess -import inspect log = logging.getLogger(__name__) @@ -85,12 +85,25 @@ filename = filename.decode('utf-8') filename = normalize_unicode(filename) - self.match_tree = MatchTree(filename) + if options and options.get('clean_function'): + clean_function = options.get('clean_function') + if not hasattr(clean_function, '__call__'): + module, function = clean_function.rsplit('.') + if not module: + module = 'guessit.textutils' + clean_function = getattr(__import__(module), function) + if not clean_function: + log.error('Can\'t find clean function %s. Default will be used.' % options.get('clean_function')) + clean_function = clean_default + else: + clean_function = clean_default + + self.match_tree = MatchTree(filename, clean_function=clean_function) self.options = options self._transfo_calls = [] # sanity check: make sure we don't process a (mostly) empty string - if clean_string(filename) == '': + if clean_function(filename).strip() == '': return from guessit.plugins import transformers @@ -102,17 +115,22 @@ # Process for transformer in transformers.all_transformers(): - self._process(transformer, False) + disabled = options.get('disabled_transformers') + if not disabled or transformer.name not in disabled: + self._process(transformer, False) # Post-process for transformer in transformers.all_transformers(): - self._process(transformer, True) + disabled = options.get('disabled_transformers') + if not disabled or transformer.name not in disabled: + self._process(transformer, True) log.debug('Found match tree:\n%s' % u(mtree)) except TransformerException as e: log.debug('An error has occurred in Transformer %s: %s' % (e.transformer, e)) def _process(self, transformer, post=False): + if not hasattr(transformer, 'should_process') or transformer.should_process(self.match_tree, self.options): if post: transformer.post_process(self.match_tree, self.options) @@ -131,25 +149,50 @@ return second_pass_options - def _validate_options(self, options): + @staticmethod + def _validate_options(options): valid_filetypes = ('subtitle', 'info', 'video', - 'movie', 'moviesubtitle', 'movieinfo', - 'episode', 'episodesubtitle', 'episodeinfo') + 'movie', 'moviesubtitle', 'movieinfo', + 'episode', 'episodesubtitle', 'episodeinfo') - type = options.get('type') - if type and type not in valid_filetypes: + type_ = options.get('type') + if type_ and type_ not in valid_filetypes: raise ValueError("filetype needs to be one of %s" % (valid_filetypes,)) def matched(self): return self.match_tree.matched() +def build_guess(node, name, value=None, confidence=1.0): + guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence) + guess.metadata().input = node.value if value is None else value + if value is None: + left_offset = 0 + right_offset = 0 + + clean_value = node.clean_value + + if clean_value: + for i in range(0, len(node.value)): + if clean_value[0] == node.value[i]: + break + left_offset += 1 + + for i in reversed(range(0, len(node.value))): + if clean_value[-1] == node.value[i]: + break + right_offset += 1 + + guess.metadata().span = (node.span[0] - node.offset + left_offset, node.span[1] - node.offset - right_offset) + return guess + + def found_property(node, name, value=None, confidence=1.0, update_guess=True, logger=None): # automatically retrieve the log object from the caller frame if not logger: caller_frame = inspect.stack()[1][0] logger = caller_frame.f_locals['self'].log - guess = Guess({name: node.clean_value if value is None else value}, confidence=confidence) + guess = build_guess(node, name, value, confidence) return found_guess(node, guess, update_guess=update_guess, logger=logger) @@ -177,76 +220,95 @@ self.guess_func = guess_func self.confidence = confidence self.logger = logger or log - self.options = options + self.options = options or {} def process_nodes(self, nodes): for node in nodes: self.process_node(node) - def process_node(self, node, iterative=True, partial_span=None): - value = None + def process_node(self, node, iterative=True, partial_span=None, skip_nodes=True): + if skip_nodes and not isinstance(skip_nodes, list): + skip_nodes = self.options.get('skip_nodes') + elif not isinstance(skip_nodes, list): + skip_nodes = [] + if partial_span: value = node.value[partial_span[0]:partial_span[1]] else: value = node.value string = ' %s ' % value # add sentinels - if not self.options: - matcher_result = self.guess_func(string, node) + matcher_result = self.guess_func(string, node, self.options) + if not matcher_result: + return + + if not isinstance(matcher_result, Guess): + result, span = matcher_result else: - matcher_result = self.guess_func(string, node, self.options) + result, span = matcher_result, matcher_result.metadata().span + #log.error('span2 %s' % (span,)) - if matcher_result: - if not isinstance(matcher_result, Guess): - result, span = matcher_result - else: - result, span = matcher_result, matcher_result.metadata().span + if not result: + return + + if span[1] == len(string): + # somehow, the sentinel got included in the span. Remove it + span = (span[0], span[1] - 1) + + # readjust span to compensate for sentinels + span = (span[0] - 1, span[1] - 1) + + # readjust span to compensate for partial_span + if partial_span: + span = (span[0] + partial_span[0], span[1] + partial_span[0]) + + if skip_nodes: + skip_nodes = [skip_node for skip_node in self.options.get('skip_nodes') if skip_node.parent.span[0] == node.span[0] or skip_node.parent.span[1] == node.span[1]] + # if we guessed a node that we need to skip, recurse down the tree and ignore that node + indices = set() + skip_nodes_spans = [] + next_skip_nodes = [] + for skip_node in skip_nodes: + skip_for_next = False + skip_nodes_spans.append(skip_node.span) + if node.offset <= skip_node.span[0] <= node.span[1]: + indices.add(skip_node.span[0] - node.offset) + skip_for_next = True + if node.offset <= skip_node.span[1] <= node.span[1]: + indices.add(skip_node.span[1] - node.offset) + skip_for_next = True + if not skip_for_next: + next_skip_nodes.append(skip_node) + if indices: + partition_spans = [s for s in node.get_partition_spans(indices) if s not in skip_nodes_spans] + for partition_span in partition_spans: + relative_span = (partition_span[0] - node.offset, partition_span[1] - node.offset) + self.process_node(node, partial_span=relative_span, skip_nodes=next_skip_nodes) + return + + # restore sentinels compensation + if isinstance(result, Guess): + guess = result + else: + no_sentinel_string =string[1:-1] + guess = Guess(result, confidence=self.confidence, input=no_sentinel_string, span=span) + + if not iterative: + found_guess(node, guess, logger=self.logger) + else: + absolute_span = (span[0] + node.offset, span[1] + node.offset) + node.partition(span) + found_child = None + + for child in node.children: + if child.span == absolute_span: + # if we have a match on one of our children, mark it as such... + found_guess(child, guess, logger=self.logger) + found_child = child + break + + # ...and only then recurse on the other children + for child in node.children: + if child is not found_child: + self.process_node(child) - if result: - # readjust span to compensate for sentinels - span = (span[0] - 1, span[1] - 1) - - # readjust span to compensate for partial_span - if partial_span: - span = (span[0] + partial_span[0], span[1] + partial_span[0]) - - partition_spans = None - if self.options and 'skip_nodes' in self.options: - skip_nodes = self.options.get('skip_nodes') - for skip_node in skip_nodes: - if skip_node.parent.node_idx == node.node_idx[:len(skip_node.parent.node_idx)] and\ - skip_node.span == span or\ - skip_node.span == (span[0] + skip_node.offset, span[1] + skip_node.offset): - partition_spans = node.get_partition_spans(skip_node.span) - for to_remove_span in partition_spans: - if to_remove_span[0] == skip_node.span[0] and to_remove_span[1] in [skip_node.span[1], skip_node.span[1] + 1]: - partition_spans.remove(to_remove_span) - break - #break - - if not partition_spans: - # restore sentinels compensation - - guess = None - if isinstance(result, Guess): - guess = result - else: - guess = Guess(result, confidence=self.confidence, input=string, span=span) - - if not iterative: - node.guess.update(guess) - else: - absolute_span = (span[0] + node.offset, span[1] + node.offset) - node.partition(span) - found_child = None - for child in node.children: - if child.span == absolute_span: - found_guess(child, guess, self.logger) - found_child = child - break - for child in node.children: - if not child is found_child: - self.process_node(child) - else: - for partition_span in partition_spans: - self.process_node(node, partial_span=partition_span) diff -Nru guessit-0.8/guessit/matchtree.py guessit-0.11.0/guessit/matchtree.py --- guessit-0.8/guessit/matchtree.py 2014-05-30 22:08:44.000000000 +0000 +++ guessit-0.11.0/guessit/matchtree.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,15 +20,14 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import copy +import logging + import guessit # @UnusedImport needed for doctests from guessit import UnicodeMixin, base_text_type -from guessit.textutils import clean_string, str_fill +from guessit.textutils import clean_default, str_fill from guessit.patterns import group_delimiters -from guessit.guess import (merge_similar_guesses, smart_merge, - choose_int, choose_string, Guess) -from itertools import takewhile -import copy -import logging +from guessit.guess import smart_merge, Guess log = logging.getLogger(__name__) @@ -74,12 +73,15 @@ (as shown by the ``f``'s on the last-but-one line). """ - def __init__(self, string='', span=None, parent=None): + def __init__(self, string='', span=None, parent=None, clean_function=None, category=None): self.string = string self.span = span or (0, len(string)) self.parent = parent self.children = [] self.guess = Guess() + self._clean_value = None + self._clean_function = clean_function or clean_default + self.category = category @property def value(self): @@ -91,7 +93,12 @@ """Return a cleaned value of the matched substring, with better presentation formatting (punctuation marks removed, duplicate spaces, ...)""" - return clean_string(self.value) + if self._clean_value is None: + self._clean_value = self.clean_string(self.value) + return self._clean_value + + def clean_string(self, string): + return self._clean_function(string) @property def offset(self): @@ -109,6 +116,32 @@ return result @property + def raw(self): + result = {} + for guess in self.guesses: + for k in guess.keys(): + result[k] = guess.raw(k) + return result + + @property + def guesses(self): + """ + List all guesses, including children ones. + + :return: list of guesses objects + """ + + result = [] + + if self.guess: + result.append(self.guess) + + for c in self.children: + result.extend(c.guesses) + + return result + + @property def root(self): """Return the root node of the tree.""" if not self.parent: @@ -117,6 +150,23 @@ return self.parent.root @property + def ancestors(self): + """ + Retrieve all ancestors, from this node to root node. + + :return: a list of MatchTree objects + """ + ret = [self] + + if not self.parent: + return ret + + parent_ancestors = self.parent.ancestors + ret.extend(parent_ancestors) + + return ret + + @property def depth(self): """Return the depth of this node.""" if self.is_leaf(): @@ -128,17 +178,30 @@ """Return whether this node is a leaf or not.""" return self.children == [] - def add_child(self, span): - """Add a new child node to this node with the given span.""" - child = MatchTree(self.string, span=span, parent=self) + def add_child(self, span, category=None): + """Add a new child node to this node with the given span. + + :param span: span of the new MatchTree + :param category: category of the new MatchTree + :return: A new MatchTree instance having self as a parent + """ + child = MatchTree(self.string, span=span, parent=self, clean_function=self._clean_function, category=category) self.children.append(child) return child def get_partition_spans(self, indices): """Return the list of absolute spans for the regions of the original string defined by splitting this node at the given indices (relative - to this node)""" + to this node) + + :param indices: indices of the partition spans + :return: a list of tuple of the spans + """ indices = sorted(indices) + if indices[-1] > len(self.value): + log.error('Filename: {}'.format(self.string)) + log.error('Invalid call to get_partitions_spans, indices are too high: {}, len({}) == {:d}' + .format(indices, self.value, len(self.value))) if indices[0] != 0: indices.insert(0, 0) if indices[-1] != len(self.value): @@ -147,23 +210,33 @@ spans = [] for start, end in zip(indices[:-1], indices[1:]): spans.append((self.offset + start, - self.offset + end)) + self.offset + end)) + return spans - def partition(self, indices): + def partition(self, indices, category=None): """Partition this node by splitting it at the given indices, - relative to this node.""" + relative to this node. + + :param indices: indices of the partition spans + :param category: category of the new MatchTree + :return: a list of created MatchTree instances + """ + created = [] for partition_span in self.get_partition_spans(indices): - self.add_child(span=partition_span) + created.append(self.add_child(span=partition_span, category=category)) + return created - def split_on_components(self, components): + def split_on_components(self, components, category=None): offset = 0 + created = [] for c in components: start = self.value.find(c, offset) end = start + len(c) - self.add_child(span=(self.offset + start, - self.offset + end)) + created.append(self.add_child(span=(self.offset + start, + self.offset + end), category=category)) offset = end + return created def nodes_at_depth(self, depth): """Return all the nodes at a given depth in the tree""" @@ -180,7 +253,13 @@ If this node is the root of the tree, then return ().""" if self.parent is None: return () - return self.parent.node_idx + (self.parent.children.index(self),) + return self.parent.node_idx + (self.node_last_idx,) + + @property + def node_last_idx(self): + if self.parent is None: + return None + return self.parent.children.index(self) def node_at(self, idx): """Return the node at the given index in the subtree rooted at @@ -194,7 +273,7 @@ raise ValueError('Non-existent node index: %s' % (idx,)) def nodes(self): - """Return all the nodes and subnodes in this tree.""" + """Return a generator of all nodes and subnodes in this tree.""" yield self for child in self.children: for node in child.nodes(): @@ -206,7 +285,6 @@ yield self else: for child in self.children: - # pylint: disable=W0212 for leaf in child.leaves(): yield leaf @@ -242,7 +320,7 @@ def _other_leaf(self, leaf, offset): leaves = list(self.leaves()) index = leaves.index(leaf) + offset - if index > 0 and index < len(leaves): + if 0 < index < len(leaves): return leaves[index] return None @@ -250,7 +328,7 @@ """Return previous leaves for this node""" leaves = list(self.leaves()) index = leaves.index(leaf) - if index > 0 and index < len(leaves): + if 0 < index < len(leaves): previous_leaves = leaves[:index] previous_leaves.reverse() return previous_leaves @@ -260,7 +338,7 @@ """Return next leaves for this node""" leaves = list(self.leaves()) index = leaves.index(leaf) - if index > 0 and index < len(leaves): + if 0 < index < len(leaves): return leaves[index + 1:len(leaves)] return [] @@ -346,7 +424,7 @@ """ def unidentified_leaves(self, - valid=lambda leaf: len(leaf.clean_value) >= 2): + valid=lambda leaf: len(leaf.clean_value) > 0): """Return a generator of leaves that are not empty.""" for leaf in self.leaves(): if not leaf.guess and valid(leaf): @@ -405,4 +483,9 @@ log.debug('Final result: ' + result.nice_string()) self._matched_result = result + for leaf in self.unidentified_leaves(): + if 'unidentified' not in self._matched_result: + self._matched_result['unidentified'] = [] + self._matched_result['unidentified'].append(leaf.clean_value) + return self._matched_result diff -Nru guessit-0.8/guessit/options.py guessit-0.11.0/guessit/options.py --- guessit-0.8/guessit/options.py 2014-06-01 22:42:28.000000000 +0000 +++ guessit-0.11.0/guessit/options.py 2015-01-03 12:02:01.000000000 +0000 @@ -1,27 +1,69 @@ -from optparse import OptionParser - -option_parser = OptionParser(usage='usage: %prog [options] file1 [file2...]') -option_parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='Display debug output') -option_parser.add_option('-p', '--properties', dest='properties', action='store_true', default=False, - help='Display properties that can be guessed.') -option_parser.add_option('-l', '--values', dest='values', action='store_true', default=False, - help='Display property values that can be guessed.') -option_parser.add_option('-s', '--transformers', dest='transformers', action='store_true', default=False, - help='Display transformers that can be used.') -option_parser.add_option('-i', '--info', dest='info', default='filename', - help='The desired information type: filename, video, hash_mpc or a hash from python\'s ' - 'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of ' - 'them, comma-separated') -option_parser.add_option('-n', '--name-only', dest='name_only', action='store_true', default=False, - help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.') -option_parser.add_option('-t', '--type', dest='type', default=None, - help='The suggested file type: movie, episode. If undefined, type will be guessed.') -option_parser.add_option('-a', '--advanced', dest='advanced', action='store_true', default=False, - help='Display advanced information for filename guesses, as json output') -option_parser.add_option('-y', '--yaml', dest='yaml', action='store_true', default=False, - help='Display information for filename guesses as yaml output (like unit-test)') -option_parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False, - help='Run a few builtin tests instead of analyzing a file') -option_parser.add_option('-b', '--bug', action='store_true', dest='submit_bug', default=False, - help='Submit a wrong detection to the guessit.io service') +from argparse import ArgumentParser + + +def build_opts(transformers=None): + opts = ArgumentParser() + opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*') + + naming_opts = opts.add_argument_group("Naming") + naming_opts.add_argument('-t', '--type', dest='type', default=None, + help='The suggested file type: movie, episode. If undefined, type will be guessed.') + naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False, + help='Parse files as name only. Disable folder parsing, extension parsing, and file content analysis.') + naming_opts.add_argument('-c', '--split-camel', dest='split_camel', action='store_true', default=False, + help='Split camel case part of filename.') + + naming_opts.add_argument('-X', '--disabled-transformer', action='append', dest='disabled_transformers', + help='Transformer to disable (can be used multiple time)') + + output_opts = opts.add_argument_group("Output") + output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='Display debug output') + output_opts.add_argument('-P', '--show-property', dest='show_property', default=None, + help='Display the value of a single property (title, series, videoCodec, year, type ...)'), + output_opts.add_argument('-u', '--unidentified', dest='unidentified', action='store_true', default=False, + help='Display the unidentified parts.'), + output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=False, + help='Display advanced information for filename guesses, as json output') + output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=False, + help='Display information for filename guesses as yaml output (like unit-test)') + output_opts.add_argument('-f', '--input-file', dest='input_file', default=False, + help='Read filenames from an input file.') + output_opts.add_argument('-d', '--demo', action='store_true', dest='demo', default=False, + help='Run a few builtin tests instead of analyzing a file') + + information_opts = opts.add_argument_group("Information") + information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False, + help='Display properties that can be guessed.') + information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=False, + help='Display property values that can be guessed.') + information_opts.add_argument('-s', '--transformers', dest='transformers', action='store_true', default=False, + help='Display transformers that can be used.') + information_opts.add_argument('--version', dest='version', action='store_true', default=False, + help='Display the guessit version.') + + webservice_opts = opts.add_argument_group("guessit.io") + webservice_opts.add_argument('-b', '--bug', action='store_true', dest='submit_bug', default=False, + help='Submit a wrong detection to the guessit.io service') + + other_opts = opts.add_argument_group("Other features") + other_opts.add_argument('-i', '--info', dest='info', default='filename', + help='The desired information type: filename, video, hash_mpc or a hash from python\'s ' + 'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of ' + 'them, comma-separated') + + if transformers: + for transformer in transformers: + transformer.register_arguments(opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts) + + return opts, naming_opts, output_opts, information_opts, webservice_opts, other_opts +_opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = None, None, None, None, None, None + + +def reload(transformers=None): + global _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts + _opts, _naming_opts, _output_opts, _information_opts, _webservice_opts, _other_opts = build_opts(transformers) + + +def get_opts(): + return _opts diff -Nru guessit-0.8/guessit/patterns/extension.py guessit-0.11.0/guessit/patterns/extension.py --- guessit-0.8/guessit/patterns/extension.py 2014-02-05 16:20:50.000000000 +0000 +++ guessit-0.11.0/guessit/patterns/extension.py 2015-07-21 14:06:59.000000000 +0000 @@ -22,11 +22,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals -subtitle_exts = ['srt', 'idx', 'sub', 'ssa'] +subtitle_exts = ['srt', 'idx', 'sub', 'ssa', 'ass'] info_exts = ['nfo'] video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv', - 'iso'] + 'iso', 'vob'] diff -Nru guessit-0.8/guessit/patterns/__init__.py guessit-0.11.0/guessit/patterns/__init__.py --- guessit-0.8/guessit/patterns/__init__.py 2014-06-22 12:05:20.000000000 +0000 +++ guessit-0.11.0/guessit/patterns/__init__.py 2015-06-06 12:39:15.000000000 +0000 @@ -23,18 +23,16 @@ import re -from guessit import base_text_type - group_delimiters = ['()', '[]', '{}'] # separator character regexp -sep = r'[][,)(}:{+ /\._-]' # regexp art, hehe :D +sep = r'[][,)(}:{+ /~/\._-]' # regexp art, hehe :D _dash = '-' _psep = '[\W_]?' -def build_or_pattern(patterns): +def build_or_pattern(patterns, escape=False): """Build a or pattern string from a list of possible patterns """ or_pattern = [] @@ -43,7 +41,7 @@ or_pattern.append('(?:') else: or_pattern.append('|') - or_pattern.append('(?:%s)' % pattern) + or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern) or_pattern.append(')') return ''.join(or_pattern) diff -Nru guessit-0.8/guessit/patterns/list.py guessit-0.11.0/guessit/patterns/list.py --- guessit-0.8/guessit/patterns/list.py 1970-01-01 00:00:00.000000000 +0000 +++ guessit-0.11.0/guessit/patterns/list.py 2015-08-31 16:57:28.000000000 +0000 @@ -0,0 +1,80 @@ +import re +from guessit.patterns import sep, build_or_pattern +from guessit.patterns.numeral import parse_numeral + +range_separators = ['-', 'to', 'a'] +discrete_separators = ['&', 'and', 'et'] +excluded_separators = ['.'] # Dot cannot serve as a discrete_separator + +discrete_sep = sep +for range_separator in range_separators: + discrete_sep = discrete_sep.replace(range_separator, '') +for excluded_separator in excluded_separators: + discrete_sep = discrete_sep.replace(excluded_separator, '') +discrete_separators.append(discrete_sep) +all_separators = list(range_separators) +all_separators.extend(discrete_separators) + +range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE) +discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE) +all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE) + + +def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False): + discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value)) + discrete_elements = [x.strip() for x in discrete_elements] + + proper_discrete_elements = [] + i = 0 + while i < len(discrete_elements): + if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]): + proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2]) + i += 3 + else: + match = range_separators_re.search(discrete_elements[i]) + if match and match.start() == 0: + proper_discrete_elements[i - 1] += discrete_elements[i] + elif match and match.end() == len(discrete_elements[i]): + proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1]) + else: + proper_discrete_elements.append(discrete_elements[i]) + i += 1 + + discrete_elements = proper_discrete_elements + + ret = [] + + for discrete_element in discrete_elements: + range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element)) + range_values = [x.strip() for x in range_values] + if len(range_values) > 1: + for x in range(0, len(range_values) - 1): + start_range_ep = parse_numeral(range_values[x]) + end_range_ep = parse_numeral(range_values[x+1]) + for range_ep in range(start_range_ep, end_range_ep + 1): + if range_ep not in ret: + ret.append(range_ep) + else: + discrete_value = parse_numeral(discrete_element) + if discrete_value not in ret: + ret.append(discrete_value) + + if len(ret) > 1: + if not allow_discrete: + valid_ret = list() + # replace discrete elements by ranges + valid_ret.append(ret[0]) + for i in range(0, len(ret) - 1): + previous = valid_ret[len(valid_ret) - 1] + if ret[i+1] < previous: + pass + else: + valid_ret.append(ret[i+1]) + ret = valid_ret + if fill_gaps: + ret = list(range(min(ret), max(ret) + 1)) + if len(ret) > 1: + return {None: ret[0], property_list_name: ret} + if len(ret) > 0: + return ret[0] + return None \ No newline at end of file diff -Nru guessit-0.8/guessit/patterns/numeral.py guessit-0.11.0/guessit/patterns/numeral.py --- guessit-0.8/guessit/patterns/numeral.py 2014-05-08 20:12:50.000000000 +0000 +++ guessit-0.11.0/guessit/patterns/numeral.py 2015-01-03 12:02:01.000000000 +0000 @@ -43,16 +43,16 @@ def __build_word_numeral(*args, **kwargs): - re = None + re_ = None for word_list in args: for word in word_list: - if not re: - re = '(?:(?=\w+)' + if not re_: + re_ = '(?:(?=\w+)' else: - re += '|' - re += word - re += ')' - return re + re_ += '|' + re_ += word + re_ += ')' + return re_ word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list) @@ -84,10 +84,10 @@ result = 0 index = 0 - for numeral, integer in __romanNumeralMap: - while value[index:index + len(numeral)] == numeral: + for num, integer in __romanNumeralMap: + while value[index:index + len(num)] == num: result += integer - index += len(numeral) + index += len(num) return result @@ -95,7 +95,7 @@ """Convert Word numeral to integer""" for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]: try: - return word_list.index(value) + return word_list.index(value.lower()) except ValueError: pass raise ValueError @@ -130,7 +130,7 @@ if clean: for word in value.split(): try: - return __parse_roman(word) + return __parse_roman(word.upper()) except ValueError: pass return __parse_roman(value) diff -Nru guessit-0.8/guessit/plugins/transformers.py guessit-0.11.0/guessit/plugins/transformers.py --- guessit-0.8/guessit/plugins/transformers.py 2014-05-30 20:38:31.000000000 +0000 +++ guessit-0.11.0/guessit/plugins/transformers.py 2015-06-06 12:39:15.000000000 +0000 @@ -19,12 +19,13 @@ # from __future__ import absolute_import, division, print_function, unicode_literals +from logging import getLogger -from stevedore import ExtensionManager from pkg_resources import EntryPoint +from guessit.options import reload as reload_options +from stevedore import ExtensionManager from stevedore.extension import Extension -from logging import getLogger log = getLogger(__name__) @@ -53,23 +54,27 @@ def post_process(self, mtree, options=None): pass + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + pass + def rate_quality(self, guess, *props): return 0 class CustomTransformerExtensionManager(ExtensionManager): def __init__(self, namespace='guessit.transformer', invoke_on_load=True, - invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None, + invoke_args=(), invoke_kwds={}, propagate_map_exceptions=True, on_load_failure_callback=None, verify_requirements=False): super(CustomTransformerExtensionManager, self).__init__(namespace=namespace, - invoke_on_load=invoke_on_load, - invoke_args=invoke_args, - invoke_kwds=invoke_kwds, - propagate_map_exceptions=propagate_map_exceptions, - on_load_failure_callback=on_load_failure_callback, - verify_requirements=verify_requirements) + invoke_on_load=invoke_on_load, + invoke_args=invoke_args, + invoke_kwds=invoke_kwds, + propagate_map_exceptions=propagate_map_exceptions, + on_load_failure_callback=on_load_failure_callback, + verify_requirements=verify_requirements) - def order_extensions(self, extensions): + @staticmethod + def order_extensions(extensions): """Order the loaded transformers It should follow those rules @@ -81,11 +86,18 @@ extensions.sort(key=lambda ext: -ext.obj.priority) return extensions - def _load_one_plugin(self, ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements): + @staticmethod + def _load_one_plugin(ep, invoke_on_load, invoke_args, invoke_kwds, verify_requirements=True): if not ep.dist: - plugin = ep.load(require=False) + # `require` argument of ep.load() is deprecated in newer versions of setuptools + if hasattr(ep, 'resolve'): + plugin = ep.resolve() + elif hasattr(ep, '_load'): + plugin = ep._load() + else: + plugin = ep.load(require=False) else: - plugin = ep.load(require=verify_requirements) + plugin = ep.load() if invoke_on_load: obj = plugin(*invoke_args, **invoke_kwds) else: @@ -98,7 +110,8 @@ def objects(self): return self.map(self._get_obj) - def _get_obj(self, ext): + @staticmethod + def _get_obj(ext): return ext.obj def object(self, name): @@ -107,8 +120,11 @@ except KeyError: return None - def register_module(self, name, module_name): - ep = EntryPoint(name, module_name) + def register_module(self, name=None, module_name=None, attrs=(), entry_point=None): + if entry_point: + ep = EntryPoint.parse(entry_point) + else: + ep = EntryPoint(name, module_name, attrs) loaded = self._load_one_plugin(ep, invoke_on_load=True, invoke_args=(), invoke_kwds={}) if loaded: self.extensions.append(loaded) @@ -137,7 +153,9 @@ 'split_on_dash = guessit.transfo.split_on_dash:SplitOnDash', 'guess_episode_info_from_position = guessit.transfo.guess_episode_info_from_position:GuessEpisodeInfoFromPosition', 'guess_movie_title_from_position = guessit.transfo.guess_movie_title_from_position:GuessMovieTitleFromPosition', - 'guess_episode_details = guessit.transfo.guess_episode_details:GuessEpisodeDetails'] + 'guess_episode_details = guessit.transfo.guess_episode_details:GuessEpisodeDetails', + 'expected_series = guessit.transfo.expected_series:ExpectedSeries', + 'expected_title = guessit.transfo.expected_title:ExpectedTitle',] def _find_entry_points(self, namespace): entry_points = {} @@ -165,8 +183,25 @@ return _extensions.object(name) -def add_transformer(name, module_name): - _extensions.register_module(name, module_name) +def add_transformer(name, module_name, class_name): + """ + Add a transformer + + :param name: the name of the transformer. ie: 'guess_regexp_id' + :param name: the module name. ie: 'flexget.utils.parsers.transformers.guess_regexp_id' + :param class_name: the class name. ie: 'GuessRegexpId' + """ + + _extensions.register_module(name, module_name, (class_name,)) + + +def add_transformer(entry_point): + """ + Add a transformer + + :param entry_point: entry point spec format. ie: 'guess_regexp_id = flexget.utils.parsers.transformers.guess_regexp_id:GuessRegexpId' + """ + _extensions.register_module(entry_point = entry_point) def reload(custom=False): @@ -182,5 +217,6 @@ _extensions = CustomTransformerExtensionManager() else: _extensions = DefaultTransformerExtensionManager() + reload_options(all_transformers()) reload() diff -Nru guessit-0.8/guessit/slogging.py guessit-0.11.0/guessit/slogging.py --- guessit-0.8/guessit/slogging.py 2014-02-24 10:56:25.000000000 +0000 +++ guessit-0.11.0/guessit/slogging.py 2015-08-31 16:57:28.000000000 +0000 @@ -19,11 +19,14 @@ # from __future__ import absolute_import, division, print_function, unicode_literals +from functools import wraps import logging import sys import os +log = logging.getLogger(__name__) + GREEN_FONT = "\x1B[0;32m" YELLOW_FONT = "\x1B[0;33m" BLUE_FONT = "\x1B[0;34m" @@ -31,7 +34,7 @@ RESET_FONT = "\x1B[0m" -def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover +def setup_logging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): # pragma: no cover """Set up a nice colored logger as the main application logger.""" class SimpleFormatter(logging.Formatter): @@ -87,3 +90,27 @@ ch.setFormatter(SimpleFormatter(with_time, with_thread)) logging.getLogger().addHandler(ch) + + +def trace_func_call(f): + @wraps(f) + def wrapper(*args, **kwargs): + is_method = (f.__name__ != f.__qualname__) # method is still not bound, we need to get around it + if is_method: + no_self_args = args[1:] + else: + no_self_args = args + + args_str = ', '.join(repr(arg) for arg in no_self_args) + kwargs_str = ', '.join('{}={}'.format(k, v) for k, v in kwargs.items()) + if not args_str: + args_str = kwargs_str + elif not kwargs_str: + args_str = args_str + else: + args_str = '{}, {}'.format(args_str, kwargs_str) + + log.debug('Calling {}({})'.format(f.__name__, args_str)) + return f(*args, **kwargs) + + return wrapper diff -Nru guessit-0.8/guessit/test/autodetect.yaml guessit-0.11.0/guessit/test/autodetect.yaml --- guessit-0.8/guessit/test/autodetect.yaml 2014-05-30 19:54:11.000000000 +0000 +++ guessit-0.11.0/guessit/test/autodetect.yaml 2015-08-31 16:57:28.000000000 +0000 @@ -171,6 +171,18 @@ releaseGroup: PublicHD audioChannels: "5.1" +? White.House.Down.2013.1080p.BluRay.DTSHD.MA.5.1.x264-PublicHD.mkv +: type: movie + title: White House Down + year: 2013 + screenSize: 1080p + format: BluRay + audioCodec: DTS + audioProfile: HDMA + videoCodec: h264 + releaseGroup: PublicHD + audioChannels: "5.1" + ? Hostages.S01E01.Pilot.for.Air.720p.WEB-DL.DD5.1.H.264-NTb.nfo : type: episodeinfo series: Hostages @@ -215,6 +227,7 @@ title: La petite bande videoCodec: h264 year: 1983 + other: PAL ? Retour de Flammes (Gregor Schnitzler 2003) FULL DVD.iso : type: movie @@ -306,3 +319,235 @@ ? star.trek.9.mkv : type: movie title: star trek 9 + +? FlexGet.S01E02.TheName.HDTV.xvid +: options: -n + episodeNumber: 2 + format: HDTV + season: 1 + series: FlexGet + title: TheName + type: episode + videoCodec: XviD + +? FlexGet.S01E02.TheName.HDTV.xvid +: options: -n + episodeNumber: 2 + format: HDTV + season: 1 + series: FlexGet + title: TheName + type: episode + videoCodec: XviD + +? some.series.S03E14.Title.Here.720p +: options: -n + episodeNumber: 14 + screenSize: 720p + season: 3 + series: some series + title: Title Here + type: episode + +? '[the.group] Some.Series.S03E15.Title.Two.720p' +: options: -n + episodeNumber: 15 + releaseGroup: the.group + screenSize: 720p + season: 3 + series: Some Series + title: Title Two + type: episode + +? 'HD 720p: Some series.S03E16.Title.Three' +: options: -n + episodeNumber: 16 + other: HD + screenSize: 720p + season: 3 + series: Some series + title: Title Three + type: episode + +? Something.Season.2.1of4.Ep.Title.HDTV.torrent +: episodeCount: 4 + episodeNumber: 1 + format: HDTV + season: 2 + series: Something + title: Title + type: episode + extension: torrent + +? Show-A (US) - Episode Title S02E09 hdtv +: options: -n + country: US + episodeNumber: 9 + format: HDTV + season: 2 + series: Show-A (US) + type: episode + +? Jack's.Show.S03E01.blah.1080p +: options: -n + episodeNumber: 1 + screenSize: 1080p + season: 3 + series: Jack's Show + title: blah + type: episode + +? FlexGet.epic +: options: -n + title: FlexGet epic + type: movie + +? FlexGet.Apt.1 +: options: -n + title: FlexGet Apt 1 + type: movie + +? FlexGet.aptitude +: options: -n + title: FlexGet aptitude + type: movie + +? FlexGet.Step1 +: options: -n + title: FlexGet Step1 + type: movie + +? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720 * 432].avi +: format: DVD + screenSize: 720x432 + title: El Bosque Animado + videoCodec: XviD + year: 1987 + type: movie + +? Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi +: format: DVD + screenSize: 720x432 + title: El Bosque Animado + videoCodec: XviD + year: 1987 + type: movie + +? 2009.shoot.fruit.chan.multi.dvd9.pal +: options: -n + format: DVD + language: mul + other: PAL + title: shoot fruit chan + type: movie + year: 2009 + +? 2009.shoot.fruit.chan.multi.dvd5.pal +: options: -n + format: DVD + language: mul + other: PAL + title: shoot fruit chan + type: movie + year: 2009 + +? The.Flash.2014.S01E01.PREAIR.WEBRip.XviD-EVO.avi +: episodeNumber: 1 + format: WEBRip + other: Preair + releaseGroup: EVO + season: 1 + series: The Flash + type: episode + videoCodec: XviD + year: 2014 + +? Ice.Lake.Rebels.S01E06.Ice.Lake.Games.720p.HDTV.x264-DHD +: options: -n + episodeNumber: 6 + format: HDTV + releaseGroup: DHD + screenSize: 720p + season: 1 + series: Ice Lake Rebels + title: Ice Lake Games + type: episode + videoCodec: h264 + +? The League - S06E10 - Epi Sexy.mkv +: episodeNumber: 10 + season: 6 + series: The League + title: Epi Sexy + type: episode + +? Stay (2005) [1080p]/Stay.2005.1080p.BluRay.x264.YIFY.mp4 +: format: BluRay + releaseGroup: YIFY + screenSize: 1080p + title: Stay + type: movie + videoCodec: h264 + year: 2005 + +? /media/live/A/Anger.Management.S02E82.720p.HDTV.X264-DIMENSION.mkv +: format: HDTV + releaseGroup: DIMENSION + screenSize: 720p + series: Anger Management + type: episode + season: 2 + episodeNumber: 82 + videoCodec: h264 + +? "[Figmentos] Monster 34 - At the End of Darkness [781219F1].mkv" +: type: episode + releaseGroup: Figmentos + series: Monster + episodeNumber: 34 + title: At the End of Darkness + crc32: 781219F1 + +? Game.of.Thrones.S05E07.720p.HDTV-KILLERS.mkv +: type: episode + episodeNumber: 7 + format: HDTV + releaseGroup: KILLERS + screenSize: 720p + season: 5 + series: Game of Thrones + +? Game.of.Thrones.S05E07.HDTV.720p-KILLERS.mkv +: type: episode + episodeNumber: 7 + format: HDTV + releaseGroup: KILLERS + screenSize: 720p + season: 5 + series: Game of Thrones + +? Parks and Recreation - [04x12] - Ad Campaign.avi +: type: episode + series: Parks and Recreation + season: 4 + episodeNumber: 12 + title: Ad Campaign + +? Star Trek Into Darkness (2013)/star.trek.into.darkness.2013.720p.web-dl.h264-publichd.mkv +: type: movie + title: Star Trek Into Darkness + year: 2013 + screenSize: 720p + format: WEB-DL + videoCodec: h264 + releaseGroup: PublicHD + +? /var/medias/series/The Originals/Season 02/The.Originals.S02E15.720p.HDTV.X264-DIMENSION.mkv +: type: episode + series: The Originals + season: 2 + episodeNumber: 15 + screenSize: 720p + format: HDTV + videoCodec: h264 + releaseGroup: DIMENSION diff -Nru guessit-0.8/guessit/test/episodes.yaml guessit-0.11.0/guessit/test/episodes.yaml --- guessit-0.8/guessit/test/episodes.yaml 2014-06-21 19:30:15.000000000 +0000 +++ guessit-0.11.0/guessit/test/episodes.yaml 2015-09-04 20:40:32.000000000 +0000 @@ -194,6 +194,13 @@ episodeDetails: Extras videoCodec: XviD +? series/Freaks And Geeks/Season 1/Episode 4 - Kim Kelly Is My Friend-eng(1).srt +: series: Freaks And Geeks + season: 1 + episodeNumber: 4 + title: Kim Kelly Is My Friend + language: English + ? /mnt/series/The Big Bang Theory/S01/The.Big.Bang.Theory.S01E01.mkv : series: The Big Bang Theory season: 1 @@ -264,6 +271,9 @@ ? Series/Baccano!/Baccano!_-_T1_-_Trailer_-_[Ayu](dae8173e).mkv : series: Baccano! other: Trailer + releaseGroup: Ayu + title: T1 + crc32: dae8173e ? Series/Doctor Who (2005)/Season 06/Doctor Who (2005) - S06E01 - The Impossible Astronaut (1).avi : series: Doctor Who @@ -272,6 +282,12 @@ episodeNumber: 1 title: The Impossible Astronaut +? The Sopranos - [05x07] - In Camelot.mp4 +: series: The Sopranos + season: 5 + episodeNumber: 7 + title: In Camelot + ? The.Office.(US).1x03.Health.Care.HDTV.XviD-LOL.avi : series: The Office (US) country: US @@ -285,8 +301,9 @@ ? /Volumes/data-1/Series/Futurama/Season 3/Futurama_-_S03_DVD_Bonus_-_Deleted_Scenes_Part_3.ogm : series: Futurama season: 3 + part: 3 other: Bonus - title: Deleted Scenes Part 3 + title: Deleted Scenes format: DVD ? Ben.and.Kate.S01E02.720p.HDTV.X264-DIMENSION.mkv @@ -345,13 +362,15 @@ : series: Merlin season: 5 episodeNumber: 2 - title: Arthurs bane part two + part: 2 + title: Arthurs bane screenSize: 720p format: HDTV videoCodec: h264 releaseGroup: Fov year: 2008 other: Proper + properCount: 1 ? "Da Vinci's Demons - 1x04 - The Magician.mkv" : series: "Da Vinci's Demons" @@ -511,6 +530,7 @@ season: 1 series: Falling Skies videoCodec: h264 + other: HDLight ? Sleepy.Hollow.S01E09.720p.WEB-DL.DD5.1.H.264-BP.mkv : episodeNumber: 9 @@ -565,6 +585,7 @@ videoCodec: XviD releaseGroup: GIGGITY other: proper + properCount: 1 episodeDetails: [Unaired, Pilot] title: Unaired Pilot @@ -597,6 +618,7 @@ other: Netflix format: Webrip audioChannels: "5.1" + audioCodec: DolbyDigital videoCodec: h264 releaseGroup: NTb @@ -607,7 +629,7 @@ format: HDTV releaseGroup: lol -? 03-Criminal.Minds.5x03.Reckoner.ENG.-.sub.FR.HDTV.XviD-STi.[tvu.org.ru].avi +? Criminal.Minds.5x03.Reckoner.ENG.-.sub.FR.HDTV.XviD-STi.[tvu.org.ru].avi : series: Criminal Minds language: English subtitleLanguage: French @@ -622,3 +644,1220 @@ ? 03-Criminal.Minds.avi : series: Criminal Minds episodeNumber: 3 + +? '[Evil-Saizen]_Laughing_Salesman_14_[DVD][1C98686A].mkv' +: crc32: 1C98686A + episodeNumber: 14 + format: DVD + releaseGroup: Evil-Saizen + series: Laughing Salesman + +? '[Kaylith] Zankyou no Terror - 04 [480p][B4D4514E].mp4' +: crc32: B4D4514E + episodeNumber: 4 + releaseGroup: Kaylith + screenSize: 480p + series: Zankyou no Terror + +? '[PuyaSubs!] Seirei Tsukai no Blade Dance - 05 [720p][32DD560E].mkv' +: crc32: 32DD560E + episodeNumber: 5 + releaseGroup: PuyaSubs! + screenSize: 720p + series: Seirei Tsukai no Blade Dance + +? '[Doremi].Happiness.Charge.Precure.27.[1280x720].[DC91581A].mkv' +: crc32: DC91581A + episodeNumber: 27 + releaseGroup: Doremi + screenSize: 720p + series: Happiness Charge Precure + +? "[Daisei] Free!:Iwatobi Swim Club - 01 ~ (BD 720p 10-bit AAC) [99E8E009].mkv" +: audioCodec: AAC + crc32: 99E8E009 + episodeNumber: 1 + format: BluRay + releaseGroup: Daisei + screenSize: 720p + series: Free!:Iwatobi Swim Club + videoProfile: 10bit + +? '[Tsundere] Boku wa Tomodachi ga Sukunai - 03 [BDRip h264 1920x1080 10bit FLAC][AF0C22CC].mkv' +: audioCodec: Flac + crc32: AF0C22CC + episodeNumber: 3 + format: BluRay + releaseGroup: Tsundere + screenSize: 1080p + series: Boku wa Tomodachi ga Sukunai + videoCodec: h264 + videoProfile: 10bit + +? '[t.3.3.d]_Mikakunin_de_Shinkoukei_-_12_[720p][5DDC1352].mkv' +: crc32: 5DDC1352 + episodeNumber: 12 + screenSize: 720p + series: Mikakunin de Shinkoukei + releaseGroup: t.3.3.d + +? '[Anime-Koi] Sabagebu! - 06 [h264-720p][ABB3728A].mkv' +: crc32: ABB3728A + episodeNumber: 6 + releaseGroup: Anime-Koi + screenSize: 720p + series: Sabagebu! + videoCodec: h264 + +? '[aprm-Diogo4D] [BD][1080p] Nagi no Asukara 08 [4D102B7C].mkv' +: crc32: 4D102B7C + episodeNumber: 8 + format: BluRay + releaseGroup: aprm-Diogo4D + screenSize: 1080p + series: Nagi no Asukara + +? '[Akindo-SSK] Zankyou no Terror - 05 [720P][Sub_ITA][F5CCE87C].mkv' +: crc32: F5CCE87C + episodeNumber: 5 + releaseGroup: Akindo-SSK + screenSize: 720p + series: Zankyou no Terror + subtitleLanguage: it + +? Naruto Shippuden Episode 366 VOSTFR.avi +: episodeNumber: 366 + series: Naruto Shippuden + subtitleLanguage: fr + +? Naruto Shippuden Episode 366v2 VOSTFR.avi +: episodeNumber: 366 + version: 2 + series: Naruto Shippuden + subtitleLanguage: fr + +? '[HorribleSubs] Ao Haru Ride - 06 [480p].mkv' +: episodeNumber: 6 + releaseGroup: HorribleSubs + screenSize: 480p + series: Ao Haru Ride + +? '[DeadFish] Tari Tari - 01 [BD][720p][AAC].mp4' +: audioCodec: AAC + episodeNumber: 1 + format: BluRay + releaseGroup: DeadFish + screenSize: 720p + series: Tari Tari + +? '[NoobSubs] Sword Art Online II 06 (720p 8bit AAC).mp4' +: audioCodec: AAC + episodeNumber: 6 + releaseGroup: NoobSubs + screenSize: 720p + series: Sword Art Online II + videoProfile: 8bit + +? '[DeadFish] 01 - Tari Tari [BD][720p][AAC].mp4' +: audioCodec: AAC + episodeNumber: 1 + format: BluRay + releaseGroup: DeadFish + screenSize: 720p + series: Tari Tari + +? '[NoobSubs] 06 Sword Art Online II (720p 8bit AAC).mp4' +: audioCodec: AAC + episodeNumber: 6 + releaseGroup: NoobSubs + screenSize: 720p + series: Sword Art Online II + videoProfile: 8bit + +? '[DeadFish] 12 - Tari Tari [BD][720p][AAC].mp4' +: audioCodec: AAC + episodeNumber: 12 + format: BluRay + releaseGroup: DeadFish + screenSize: 720p + series: Tari Tari + +? Something.Season.2.1of4.Ep.Title.HDTV.torrent +: episodeCount: 4 + episodeNumber: 1 + format: HDTV + season: 2 + series: Something + title: Title + extension: torrent + +? Something.Season.2of5.3of9.Ep.Title.HDTV.torrent +: episodeCount: 9 + episodeNumber: 3 + format: HDTV + season: 2 + seasonCount: 5 + series: Something + title: Title + extension: torrent + +? Something.Other.Season.3of5.Complete.HDTV.torrent +: format: HDTV + other: Complete + season: 3 + seasonCount: 5 + series: Something Other + extension: torrent + +? Something.Other.Season.1-3.avi +: season: 1 + seasonList: + - 1 + - 2 + - 3 + series: Something Other + +? Something.Other.Season.1&3.avi +: season: 1 + seasonList: + - 1 + - 3 + series: Something Other + +? Something.Other.Season.1&3-1to12ep.avi +: season: 1 + seasonList: + - 1 + - 3 + series: Something Other + +? Something.Other.saison 1 2 & 4 a 7.avi +: season: 1 + seasonList: + - 1 + - 2 + - 4 + - 5 + - 6 + - 7 + series: Something Other + +? W2Test.123.HDTV.XViD-FlexGet +: options: -n + episodeNumber: 23 + season: 1 + format: HDTV + releaseGroup: FlexGet + series: W2Test + videoCodec: XviD + +? W2Test.123.HDTV.XViD-FlexGet +: options: -n --episode-prefer-number + episodeNumber: 123 + format: HDTV + releaseGroup: FlexGet + series: W2Test + videoCodec: XviD + +? FooBar.0307.PDTV-FlexGet +: options: -n --episode-prefer-number + episodeNumber: 7 + format: DVB + releaseGroup: FlexGet + season: 3 + series: FooBar + +? FooBar.307.PDTV-FlexGet +: options: -n --episode-prefer-number + episodeNumber: 307 + format: DVB + releaseGroup: FlexGet + series: FooBar + +? FooBar.07.PDTV-FlexGet +: options: -n --episode-prefer-number + episodeNumber: 7 + format: DVB + releaseGroup: FlexGet + series: FooBar + +? FooBar.7.PDTV-FlexGet +: options: -n -t episode --episode-prefer-number + episodeNumber: 7 + format: DVB + releaseGroup: FlexGet + series: FooBar + +? FooBar.0307.PDTV-FlexGet +: options: -n + episodeNumber: 7 + format: DVB + releaseGroup: FlexGet + season: 3 + series: FooBar + +? FooBar.307.PDTV-FlexGet +: options: -n + episodeNumber: 7 + format: DVB + releaseGroup: FlexGet + season: 3 + series: FooBar + +? FooBar.07.PDTV-FlexGet +: options: -n + episodeNumber: 7 + format: DVB + releaseGroup: FlexGet + series: FooBar + +? FooBar.07v4.PDTV-FlexGet +: options: -n + episodeNumber: 7 + version: 4 + format: DVB + releaseGroup: FlexGet + series: FooBar + +? FooBar.7.PDTV-FlexGet +: options: -n -t episode + format: DVB + releaseGroup: FlexGet + series: FooBar 7 + +? FooBar.7v3.PDTV-FlexGet +: options: -n -t episode + episodeNumber: 7 + version: 3 + format: DVB + releaseGroup: FlexGet + series: FooBar + +? Test.S02E01.hdtv.real.proper +: options: -n + episodeNumber: 1 + format: HDTV + other: Proper + properCount: 2 + season: 2 + series: Test + +? Real.Test.S02E01.hdtv.proper +: options: -n + episodeNumber: 1 + format: HDTV + other: Proper + properCount: 1 + season: 2 + series: Real Test + +? Test.Real.S02E01.hdtv.proper +: options: -n + episodeNumber: 1 + format: HDTV + other: Proper + properCount: 1 + season: 2 + series: Test Real + +? Test.S02E01.hdtv.proper +: options: -n + episodeNumber: 1 + format: HDTV + other: Proper + properCount: 1 + season: 2 + series: Test + +? Test.S02E01.hdtv.real.repack.proper +: options: -n + episodeNumber: 1 + format: HDTV + other: Proper + properCount: 3 + season: 2 + series: Test + +? Date.Show.03-29-2012.HDTV.XViD-FlexGet +: options: -n + date: 2012-03-29 + format: HDTV + releaseGroup: FlexGet + series: Date Show + videoCodec: XviD + +? Something.1x5.Season.Complete-FlexGet +: options: -n + episodeNumber: 5 + other: Complete + season: 1 + series: Something + releaseGroup: FlexGet + +? Something Seasons 1 & 2 - Complete +: options: -n + other: Complete + season: 1 + seasonList: + - 1 + - 2 + series: Something + +? Something Seasons 4 Complete +: options: -n + other: Complete + season: 4 + series: Something + +? Something.1xAll.Season.Complete-FlexGet +: options: -n + other: Complete + season: 1 + series: Something + releaseGroup: FlexGet + +? Something.1xAll-FlexGet +: options: -n + other: Complete + season: 1 + series: Something + releaseGroup: FlexGet + +? FlexGet.US.S2013E14.Title.Here.720p.HDTV.AAC5.1.x264-NOGRP +: options: -n + audioChannels: '5.1' + audioCodec: AAC + country: US + episodeNumber: 14 + format: HDTV + releaseGroup: NOGRP + screenSize: 720p + season: 2013 + series: FlexGet (US) + title: Title Here + videoCodec: h264 + year: 2013 + +? FlexGet.14.of.21.Title.Here.720p.HDTV.AAC5.1.x264-NOGRP +: options: -n + audioChannels: '5.1' + audioCodec: AAC + episodeCount: 21 + episodeNumber: 14 + format: HDTV + releaseGroup: NOGRP + screenSize: 720p + series: FlexGet + title: Title Here + videoCodec: h264 + +? FlexGet.Series.2013.14.of.21.Title.Here.720p.HDTV.AAC5.1.x264-NOGRP +: options: -n + audioChannels: '5.1' + audioCodec: AAC + episodeCount: 21 + episodeNumber: 14 + format: HDTV + releaseGroup: NOGRP + screenSize: 720p + season: 2013 + series: FlexGet + title: Title Here + videoCodec: h264 + year: 2013 + +? Something.S04E05E09 +: options: -n + episodeList: + - 5 + - 6 + - 7 + - 8 + - 9 + episodeNumber: 5 + season: 4 + series: Something + +? FooBar 360 1080i +: options: -n -t episode --episode-prefer-number + episodeNumber: 360 + screenSize: 1080i + series: FooBar + +? FooBar 360 1080i +: options: -n -t episode + episodeNumber: 60 + season: 3 + screenSize: 1080i + series: FooBar + +? FooBar 360 +: options: -n -t episode + screenSize: 360p + series: FooBar + +? BarFood christmas special HDTV +: options: -n -t episode --expected-series BarFood + format: HDTV + series: BarFood + title: christmas special + episodeDetails: Special + +? Something.2008x12.13-FlexGet +: options: -n -t episode + series: Something + date: 2008-12-13 + title: FlexGet + +? '[Ignored] Test 12' +: options: -n + episodeNumber: 12 + releaseGroup: Ignored + series: Test + +? '[FlexGet] Test 12' +: options: -n + episodeNumber: 12 + releaseGroup: FlexGet + series: Test + +? Test.13.HDTV-Ignored +: options: -n + episodeNumber: 13 + format: HDTV + releaseGroup: Ignored + series: Test + +? Test.13.HDTV-Ignored +: options: -n --expected-series test + episodeNumber: 13 + format: HDTV + releaseGroup: Ignored + series: Test + +? Test.13.HDTV-Ignored +: series: Test + episodeNumber: 13 + format: HDTV + releaseGroup: Ignored + +? Test.13.HDTV-Ignored +: options: -n --expected-group "Name;FlexGet" + episodeNumber: 13 + format: HDTV + releaseGroup: Ignored + series: Test + +? Test.13.HDTV-FlexGet +: options: -n + episodeNumber: 13 + format: HDTV + releaseGroup: FlexGet + series: Test + +? Test.14.HDTV-Name +: options: -n + episodeNumber: 14 + format: HDTV + releaseGroup: Name + series: Test + +? Real.Time.With.Bill.Maher.2014.10.31.HDTV.XviD-AFG.avi +: date: 2014-10-31 + format: HDTV + releaseGroup: AFG + series: Real Time With Bill Maher + videoCodec: XviD + +? Arrow.S03E21.Al.Sah-Him.1080p.WEB-DL.DD5.1.H.264-BS.mkv +: series: Arrow + season: 3 + episodeNumber: 21 + title: Al Sah-Him + screenSize: 1080p + audioCodec: DolbyDigital + audioChannels: "5.1" + videoCodec: h264 + releaseGroup: BS + format: WEB-DL + +? How to Make It in America - S02E06 - I'm Sorry, Who's Yosi?.mkv +: series: How to Make It in America + season: 2 + episodeNumber: 6 + title: I'm Sorry, Who's Yosi? + +? 24.S05E07.FRENCH.DVDRip.XviD-FiXi0N.avi +: episodeNumber: 7 + format: DVD + language: fr + season: 5 + series: '24' + videoCodec: XviD + releaseGroup: FiXi0N + +? 12.Monkeys.S01E12.FRENCH.BDRip.x264-VENUE.mkv +: episodeNumber: 12 + format: BluRay + language: fr + releaseGroup: VENUE + season: 1 + series: 12 Monkeys + videoCodec: h264 + +? The.Daily.Show.2015.07.01.Kirsten.Gillibrand.Extended.720p.CC.WEBRip.AAC2.0.x264-BTW.mkv +: audioChannels: '2.0' + audioCodec: AAC + date: 2015-07-01 + format: WEBRip + other: CC + releaseGroup: BTW + screenSize: 720p + series: The Daily Show + title: Kirsten Gillibrand Extended + videoCodec: h264 + +? The.Daily.Show.2015.07.02.Sarah.Vowell.CC.WEBRip.AAC2.0.x264-BTW.mkv +: audioChannels: '2.0' + audioCodec: AAC + date: 2015-07-02 + format: WEBRip + other: CC + releaseGroup: BTW + series: The Daily Show + title: Sarah Vowell + videoCodec: h264 + +? 90.Day.Fiance.S02E07.I.Have.To.Tell.You.Something.720p.HDTV.x264-W4F +: options: -n + episodeNumber: 7 + format: HDTV + screenSize: 720p + season: 2 + series: 90 Day Fiance + title: I Have To Tell You Something + +? Doctor.Who.2005.S04E06.FRENCH.LD.DVDRip.XviD-TRACKS.avi +: episodeNumber: 6 + format: DVD + language: fr + releaseGroup: TRACKS + season: 4 + series: Doctor Who + other: LD + videoCodec: XviD + year: 2005 + +? Astro.Le.Petit.Robot.S01E01+02.FRENCH.DVDRiP.X264.INT-BOOLZ.mkv +: episodeNumber: 1 + episodeList: [1, 2] + format: DVD + language: fr + releaseGroup: INT-BOOLZ + season: 1 + series: Astro Le Petit Robot + videoCodec: h264 + +? Annika.Bengtzon.2012.E01.Le.Testament.De.Nobel.FRENCH.DVDRiP.XViD-STVFRV.avi +: episodeNumber: 1 + format: DVD + language: fr + releaseGroup: STVFRV + series: Annika Bengtzon + title: Le Testament De Nobel + videoCodec: XviD + year: 2012 + +? Dead.Set.02.FRENCH.LD.DVDRip.XviD-EPZ.avi +: episodeNumber: 2 + format: DVD + language: fr + other: LD + releaseGroup: EPZ + series: Dead Set + videoCodec: XviD + +? Phineas and Ferb S01E00 & S01E01 & S01E02 +: options: -n + episodeList: + - 0 + - 1 + - 2 + episodeNumber: 0 + season: 1 + series: Phineas and Ferb + +? Show.Name.S01E02.S01E03.HDTV.XViD.Etc-Group +: options: -n + episodeList: + - 2 + - 3 + episodeNumber: 2 + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? Show Name - S01E02 - S01E03 - S01E04 - Ep Name +: options: -n + episodeList: + - 2 + - 3 + - 4 + episodeNumber: 2 + season: 1 + series: Show Name + title: Ep Name + +? Show.Name.1x02.1x03.HDTV.XViD.Etc-Group +: options: -n + episodeList: + - 2 + - 3 + episodeNumber: 2 + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? Show Name - 1x02 - 1x03 - 1x04 - Ep Name +: options: -n + episodeList: + - 2 + - 3 + - 4 + episodeNumber: 2 + season: 1 + series: Show Name + title: Ep Name + +? Show.Name.S01E02.HDTV.XViD.Etc-Group +: options: -n + episodeNumber: 2 + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? Show Name - S01E02 - My Ep Name +: options: -n + episodeNumber: 2 + season: 1 + series: Show Name + title: My Ep Name + +? Show Name - S01.E03 - My Ep Name +: options: -n + episodeNumber: 3 + season: 1 + series: Show Name + title: My Ep Name + +? Show.Name.S01E02E03.HDTV.XViD.Etc-Group +: options: -n + episodeList: + - 2 + - 3 + episodeNumber: 2 + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? Show Name - S01E02-03 - My Ep Name +: options: -n + episodeList: + - 2 + - 3 + episodeNumber: 2 + season: 1 + series: Show Name + title: My Ep Name + +? Show.Name.S01.E02.E03 +: options: -n + episodeList: + - 2 + - 3 + episodeNumber: 2 + season: 1 + series: Show Name + +? Show_Name.1x02.HDTV_XViD_Etc-Group +: options: -n + episodeNumber: 2 + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? Show Name - 1x02 - My Ep Name +: options: -n + episodeNumber: 2 + season: 1 + series: Show Name + title: My Ep Name + +? Show_Name.1x02x03x04.HDTV_XViD_Etc-Group +: options: -n + episodeList: + - 2 + - 3 + - 4 + episodeNumber: 2 + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? Show Name - 1x02-03-04 - My Ep Name +: options: -n + episodeList: + - 2 + - 3 + - 4 + episodeNumber: 2 + season: 1 + series: Show Name + title: My Ep Name + +? Show.Name.100.Event.2010.11.23.HDTV.XViD.Etc-Group +: options: -n + date: 2010-11-23 + episodeNumber: 100 + format: HDTV + releaseGroup: Etc-Group + series: Show Name + title: Event + videoCodec: XviD + +? Show.Name.2010.11.23.HDTV.XViD.Etc-Group +: options: -n + date: 2010-11-23 + format: HDTV + releaseGroup: Etc-Group + series: Show Name + +? Show Name - 2010-11-23 - Ep Name +: options: -n + date: 2010-11-23 + series: Show Name + title: Ep Name + +? Show Name Season 1 Episode 2 Ep Name +: options: -n + episodeNumber: 2 + season: 1 + series: Show Name + title: Ep Name + +? Show.Name.S01.HDTV.XViD.Etc-Group +: options: -n + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? Show.Name.E02-03 +: options: -n + episodeNumber: 2 + episodeList: + - 2 + - 3 + series: Show Name + +? Show.Name.E02.2010 +: options: -n + episodeNumber: 2 + year: 2010 + series: Show Name + +? Show.Name.E23.Test +: options: -n + episodeNumber: 23 + series: Show Name + title: Test + +? Show.Name.Part.3.HDTV.XViD.Etc-Group +: options: -n -t episode + part: 3 + series: Show Name + format: HDTV + videoCodec: XviD + releaseGroup: Etc-Group + +? Show.Name.Part.1.and.Part.2.Blah-Group +: options: -n -t episode + part: 1 + partList: + - 1 + - 2 + series: Show Name + +? Show Name - 01 - Ep Name +: options: -n + episodeNumber: 1 + series: Show Name + title: Ep Name + +? 01 - Ep Name +: options: -n + episodeNumber: 1 + series: Ep Name + +? Show.Name.102.HDTV.XViD.Etc-Group +: options: -n + episodeNumber: 2 + format: HDTV + releaseGroup: Etc-Group + season: 1 + series: Show Name + videoCodec: XviD + +? '[HorribleSubs] Maria the Virgin Witch - 01 [720p].mkv' +: episodeNumber: 1 + releaseGroup: HorribleSubs + screenSize: 720p + series: Maria the Virgin Witch + +? '[ISLAND]One_Piece_679_[VOSTFR]_[V1]_[8bit]_[720p]_[EB7838FC].mp4' +: options: -E + crc32: EB7838FC + episodeNumber: 679 + releaseGroup: ISLAND + screenSize: 720p + series: One Piece + subtitleLanguage: fr + videoProfile: 8bit + version: 1 + + +? '[ISLAND]One_Piece_679_[VOSTFR]_[8bit]_[720p]_[EB7838FC].mp4' +: options: -E + crc32: EB7838FC + episodeNumber: 679 + releaseGroup: ISLAND + screenSize: 720p + series: One Piece + subtitleLanguage: fr + videoProfile: 8bit + +? '[Kaerizaki-Fansub]_One_Piece_679_[VOSTFR][HD_1280x720].mp4' +: options: -E + episodeNumber: 679 + other: HD + releaseGroup: Kaerizaki-Fansub + screenSize: 720p + series: One Piece + subtitleLanguage: fr + +? '[Kaerizaki-Fansub]_One_Piece_679_[VOSTFR][FANSUB][HD_1280x720].mp4' +: options: -E + episodeNumber: 679 + other: + - Fansub + - HD + releaseGroup: Kaerizaki-Fansub + screenSize: 720p + series: One Piece + subtitleLanguage: fr + +? '[Kaerizaki-Fansub]_One_Piece_681_[VOSTFR][HD_1280x720]_V2.mp4' +: options: -E + episodeNumber: 681 + other: HD + releaseGroup: Kaerizaki-Fansub + screenSize: 720p + series: One Piece + subtitleLanguage: fr + version: 2 + +? '[Kaerizaki-Fansub] High School DxD New 04 VOSTFR HD (1280x720) V2.mp4' +: options: -E + episodeNumber: 4 + other: HD + releaseGroup: Kaerizaki-Fansub + screenSize: 720p + series: High School DxD New + subtitleLanguage: fr + version: 2 + +? '[Kaerizaki-Fansub] One Piece 603 VOSTFR PS VITA (960x544) V2.mp4' +: options: -E + episodeNumber: 603 + releaseGroup: Kaerizaki-Fansub + screenSize: 960x544 + series: One Piece + subtitleLanguage: fr + version: 2 + +? '[Group Name] Show Name.13' +: options: -n + episodeNumber: 13 + releaseGroup: Group Name + series: Show Name + +? '[Group Name] Show Name - 13' +: options: -n + episodeNumber: 13 + releaseGroup: Group Name + series: Show Name + +? '[Group Name] Show Name 13' +: options: -n + episodeNumber: 13 + releaseGroup: Group Name + series: Show Name + +# [Group Name] Show Name.13-14 +# [Group Name] Show Name - 13-14 +# Show Name 13-14 + +? '[Stratos-Subs]_Infinite_Stratos_-_12_(1280x720_H.264_AAC)_[379759DB]' +: options: -n + audioCodec: AAC + crc32: 379759DB + episodeNumber: 12 + releaseGroup: Stratos-Subs + screenSize: 720p + series: Infinite Stratos + videoCodec: h264 + +# [ShinBunBu-Subs] Bleach - 02-03 (CX 1280x720 x264 AAC) + +? '[SGKK] Bleach 312v1 [720p/MKV]' +: options: -n + episodeNumber: 312 + releaseGroup: SGKK + screenSize: 720p + series: Bleach + version: 1 + +? '[Ayako]_Infinite_Stratos_-_IS_-_07_[H264][720p][EB7838FC]' +: options: -n + crc32: EB7838FC + episodeNumber: 7 + releaseGroup: Ayako + screenSize: 720p + series: Infinite Stratos + videoCodec: h264 + +? '[Ayako] Infinite Stratos - IS - 07v2 [H264][720p][44419534]' +: options: -n + crc32: '44419534' + episodeNumber: 7 + releaseGroup: Ayako + screenSize: 720p + series: Infinite Stratos + videoCodec: h264 + version: 2 + +? '[Ayako-Shikkaku] Oniichan no Koto Nanka Zenzen Suki Janain Dakara ne - 10 [LQ][h264][720p] [8853B21C]' +: options: -n + crc32: 8853B21C + episodeNumber: 10 + releaseGroup: Ayako-Shikkaku + screenSize: 720p + series: Oniichan no Koto Nanka Zenzen Suki Janain Dakara ne + videoCodec: h264 + +# Add support for absolute episodes +? Bleach - s16e03-04 - 313-314 +: options: -n + episodeList: + - 3 + - 4 + episodeNumber: 3 + season: 16 + series: Bleach + +? Bleach.s16e03-04.313-314 +: options: -n + episodeList: + - 3 + - 4 + episodeNumber: 3 + season: 16 + series: Bleach + +? Bleach.s16e03-04.313-314 +: options: -n + episodeList: + - 3 + - 4 + episodeNumber: 3 + season: 16 + series: Bleach + +? Bleach - 313-314 +: options: -En + episodeList: + - 313 + - 314 + episodeNumber: 313 + series: Bleach + +? Bleach - s16e03-04 - 313-314 +: options: -n + episodeList: + - 3 + - 4 + episodeNumber: 3 + season: 16 + series: Bleach + +? Bleach.s16e03-04.313-314 +: options: -n + episodeList: + - 3 + - 4 + episodeNumber: 3 + season: 16 + series: Bleach + + +? Bleach s16e03e04 313-314 +: options: -n + episodeList: + - 3 + - 4 + episodeNumber: 3 + season: 16 + series: Bleach + +? '[ShinBunBu-Subs] Bleach - 02-03 (CX 1280x720 x264 AAC)' +: audioCodec: AAC + episodeList: + - 2 + - 3 + episodeNumber: 2 + releaseGroup: ShinBunBu-Subs + screenSize: 720p + series: Bleach + videoCodec: h264 + +? 003. Show Name - Ep Name.ext +: episodeNumber: 3 + series: Show Name + title: Ep Name + +? 003-004. Show Name - Ep Name.ext +: episodeList: + - 3 + - 4 + episodeNumber: 3 + series: Show Name + title: Ep Name + +? One Piece - 102 +: options: -n -t episode + episodeNumber: 2 + season: 1 + series: One Piece + +? "[ACX]_Wolf's_Spirit_001.mkv" +: episodeNumber: 1 + releaseGroup: ACX + series: "Wolf's Spirit" + +? Project.Runway.S14E00.and.S14E01.(Eng.Subs).SDTV.x264-[2Maverick].mp4 +: episodeList: + - 0 + - 1 + episodeNumber: 0 + format: TV + releaseGroup: 2Maverick + season: 14 + series: Project Runway + subtitleLanguage: en + videoCodec: h264 + +? '[Hatsuyuki-Kaitou]_Fairy_Tail_2_-_16-20_[720p][10bit].torrent' +: episodeList: + - 16 + - 17 + - 18 + - 19 + - 20 + episodeNumber: 16 + releaseGroup: Hatsuyuki-Kaitou + screenSize: 720p + series: Fairy Tail 2 + videoProfile: 10bit + +? '[Hatsuyuki-Kaitou]_Fairy_Tail_2_-_16-20_(191-195)_[720p][10bit].torrent' +: options: -E + episodeList: + - 16 + - 17 + - 18 + - 19 + - 20 + episodeNumber: 16 + releaseGroup: Hatsuyuki-Kaitou + screenSize: 720p + series: Fairy Tail 2 + +? "Looney Tunes 1940x01 Porky's Last Stand.mkv" +: episodeNumber: 1 + season: 1940 + series: Looney Tunes + title: Porky's Last Stand + year: 1940 + +? The.Good.Wife.S06E01.E10.720p.WEB-DL.DD5.1.H.264-CtrlHD/The.Good.Wife.S06E09.Trust.Issues.720p.WEB-DL.DD5.1.H.264-CtrlHD.mkv +: audioChannels: '5.1' + audioCodec: DolbyDigital + episodeList: + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + episodeNumber: 9 + format: WEB-DL + releaseGroup: CtrlHD + screenSize: 720p + season: 6 + series: The Good Wife + title: Trust Issues + videoCodec: h264 + +? Fear the Walking Dead - 01x02 - So Close, Yet So Far.REPACK-KILLERS.French.C.updated.Addic7ed.com.mkv +: episodeNumber: 2 + language: fr + other: Proper + properCount: 1 + season: 1 + series: Fear the Walking Dead + title: So Close, Yet So Far + +? Fear the Walking Dead - 01x02 - En Close, Yet En Far.REPACK-KILLERS.French.C.updated.Addic7ed.com.mkv +: episodeNumber: 2 + language: fr + other: Proper + properCount: 1 + season: 1 + series: Fear the Walking Dead + title: En Close, Yet En Far + +? /av/unsorted/The.Daily.Show.2015.07.22.Jake.Gyllenhaal.720p.HDTV.x264-BATV.mkv +: date: 2015-07-22 + format: HDTV + releaseGroup: BATV + screenSize: 720p + series: The Daily Show + title: Jake Gyllenhaal + videoCodec: h264 diff -Nru guessit-0.8/guessit/test/guessittest.py guessit-0.11.0/guessit/test/guessittest.py --- guessit-0.8/guessit/test/guessittest.py 2014-06-21 23:05:32.000000000 +0000 +++ guessit-0.11.0/guessit/test/guessittest.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,34 +20,36 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from guessit import base_text_type, u from collections import defaultdict -from unittest import TestCase, TestLoader, TextTestRunner -import shlex -import babelfish -import yaml, logging, sys, os +from unittest import TestCase, TestLoader +import logging +import os +import sys from os.path import * +import babelfish +import yaml + def currentPath(): - '''Returns the path in which the calling file is located.''' + """Returns the path in which the calling file is located.""" return dirname(join(os.getcwd(), sys._getframe(1).f_globals['__file__'])) def addImportPath(path): - '''Function that adds the specified path to the import path. The path can be - absolute or relative to the calling file.''' + """Function that adds the specified path to the import path. The path can be + absolute or relative to the calling file.""" importPath = abspath(join(currentPath(), path)) sys.path = [importPath] + sys.path log = logging.getLogger(__name__) -from guessit.plugins import transformers -import guessit -from guessit.options import option_parser +from guessit.options import get_opts +from guessit import base_text_type from guessit import * from guessit.matcher import * from guessit.fileutils import * +import guessit def allTests(testClass): @@ -83,13 +85,14 @@ options = required_fields.pop('options') if 'options' in required_fields else None - if options: - args = shlex.split(options) - options, _ = option_parser.parse_args(args) - options = vars(options) - found = guess_func(filename, options) + try: + found = guess_func(filename, options) + except Exception as e: + fails[filename].append("An exception has occured in %s: %s" % (filename, e)) + log.exception("An exception has occured in %s: %s" % (filename, e)) + continue - total = total + 1 + total += 1 # no need for these in the unittests if remove_type: @@ -97,12 +100,12 @@ del found['type'] except: pass - for prop in ('container', 'mimetype'): + for prop in ('container', 'mimetype', 'unidentified'): if prop in found: del found[prop] # props which are list of just 1 elem should be opened for easier writing of the tests - for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails'): + for prop in ('language', 'subtitleLanguage', 'other', 'episodeDetails', 'unidentified'): value = found.get(prop, None) if isinstance(value, list) and len(value) == 1: found[prop] = value[0] @@ -174,9 +177,8 @@ log.error("FAILED: " + failed_property) for additional_entry, additional_properties in additionals.items(): - log.warn('---- ' + additional_entry + ' ----') + log.warning('---- ' + additional_entry + ' ----') for additional_property in additional_properties: - log.warn("ADDITIONAL: " + additional_property) + log.warning("ADDITIONAL: " + additional_property) - self.assertTrue(correct == total, - msg='Correct: %d < Total: %d' % (correct, total)) + assert correct == total, 'Correct: %d < Total: %d' % (correct, total) diff -Nru guessit-0.8/guessit/test/__init__.py guessit-0.11.0/guessit/test/__init__.py --- guessit-0.8/guessit/test/__init__.py 2014-06-21 21:45:12.000000000 +0000 +++ guessit-0.11.0/guessit/test/__init__.py 2015-06-06 12:39:15.000000000 +0000 @@ -21,6 +21,8 @@ from __future__ import absolute_import, division, print_function, unicode_literals import logging -from guessit.slogging import setupLogging -setupLogging() + +from guessit.slogging import setup_logging + +setup_logging() logging.disable(logging.INFO) diff -Nru guessit-0.8/guessit/test/__main__.py guessit-0.11.0/guessit/test/__main__.py --- guessit-0.8/guessit/test/__main__.py 2014-02-21 20:04:02.000000000 +0000 +++ guessit-0.11.0/guessit/test/__main__.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# GuessIt - A library for guessing information from filenames -# Copyright (c) 2013 Nicolas Wack -# -# GuessIt is free software; you can redistribute it and/or modify it under -# the terms of the Lesser GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# GuessIt is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# Lesser GNU General Public License for more details. -# -# You should have received a copy of the Lesser GNU General Public License -# along with this program. If not, see . -# - -from __future__ import absolute_import, division, print_function, unicode_literals -from guessit.test import (test_api, test_autodetect, test_autodetect_all, test_doctests, - test_episode, test_hashes, test_language, test_main, - test_matchtree, test_movie, test_quality, test_utils) -from unittest import TextTestRunner - - -import logging - -def main(): - for suite in [test_api.suite, test_autodetect.suite, - test_autodetect_all.suite, test_doctests.suite, - test_episode.suite, test_hashes.suite, test_language.suite, - test_main.suite, test_matchtree.suite, test_movie.suite, - test_quality.suite, test_utils.suite]: - TextTestRunner(verbosity=2).run(suite) - - -if __name__ == '__main__': - main() diff -Nru guessit-0.8/guessit/test/movies.yaml guessit-0.11.0/guessit/test/movies.yaml --- guessit-0.8/guessit/test/movies.yaml 2014-05-10 16:02:25.000000000 +0000 +++ guessit-0.11.0/guessit/test/movies.yaml 2015-08-31 16:57:28.000000000 +0000 @@ -14,6 +14,7 @@ format: DVD language: spanish videoCodec: DivX + releaseGroup: Artik[SEDG] ? Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv : title: Dark City @@ -38,6 +39,7 @@ : title: Borat year: 2006 other: PROPER + properCount: 2 format: DVD other: [ R5, Proper ] videoCodec: XviD @@ -53,6 +55,7 @@ audioProfile: HE language: [ french, english ] subtitleLanguage: [ french, english ] + releaseGroup: XCT ? Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi : title: Battle Royale @@ -77,6 +80,7 @@ audioCodec: AAC language: [ French, English ] subtitleLanguage: [ French, English ] + releaseGroup: XCT ? Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv : title: Toy Story @@ -143,6 +147,7 @@ videoCodec: XviD releaseGroup: ARROW other: ['Proper', 'Limited'] + properCount: 2 ? Movies/Fr - Paris 2054, Renaissance (2005) - De Christian Volckman - (Film Divx Science Fiction Fantastique Thriller Policier N&B).avi : title: Paris 2054, Renaissance @@ -220,6 +225,19 @@ language: english website: sharethefiles.com +? Movies/The Doors (1991)/08.03.09.The.Doors.(1991).BDRip.720p.AC3.X264-HiS@SiLUHD-English.[sharethefiles.com].mkv +: options: --date-year-first + title: The Doors + year: 1991 + date: 2008-03-09 + format: BluRay + screenSize: 720p + audioCodec: AC3 + videoCodec: h264 + releaseGroup: HiS@SiLUHD + language: english + website: sharethefiles.com + ? Movies/Ratatouille/video_ts-ratatouille.srt : title: Ratatouille format: DVD @@ -298,10 +316,12 @@ : title: OSS 117--Cairo, Nest of Spies ? The Godfather Part III.mkv -: title: The Godfather Part III +: title: The Godfather + part: 3 ? Foobar Part VI.mkv -: title: Foobar Part VI +: title: Foobar + part: 6 ? The_Insider-(1999)-x02-60_Minutes_Interview-1996.mp4 : title: The Insider @@ -380,7 +400,7 @@ format: BluRay audioCodec: DTS videoCodec: h264 - releaseGroup: D-Z0N3 + releaseGroup: D Z0N3 ? Street.Kings.2008.BluRay.1080p.DTS.x264.dxva EuReKA.mkv : title: Street Kings @@ -435,20 +455,28 @@ language: - French - English + format: DVD + other: NTSC ? Immersion.French.2011.STV.READNFO.QC.FRENCH.NTSC.DVDR.nfo : title: Immersion French year: 2011 language: French + format: DVD + other: NTSC ? Immersion.French.2011.STV.READNFO.QC.NTSC.DVDR.nfo : title: Immersion French year: 2011 + format: DVD + other: NTSC ? French.Immersion.2011.STV.READNFO.QC.ENGLISH.NTSC.DVDR.nfo : title: French Immersion year: 2011 language: ENGLISH + format: DVD + other: NTSC ? Howl's_Moving_Castle_(2004)_[720p,HDTV,x264,DTS]-FlexGet.avi : videoCodec: h264 @@ -542,13 +570,13 @@ audioCodec: AC3 audioProfile: HQ releaseGroup: Hive-CM8 - + ? "Star Wars: Episode IV - A New Hope (2004) Special Edition.MKV" : title: Star Wars Episode IV year: 2004 edition: Special Edition - + ? Dr.LiNE.The.Lorax.2012.DVDRip.LiNE.XviD.AC3.HQ.Hive-CM8.mp4 : videoCodec: XviD title: Dr LiNE The Lorax @@ -566,7 +594,7 @@ language: French format: TV year: 2007 - + ? entre.ciel.et.terre.(1994).dvdrip.h264.aac-psypeon.avi : audioCodec: AAC format: DVD @@ -574,16 +602,18 @@ title: entre ciel et terre videoCodec: h264 year: 1994 - + ? Yves.Saint.Laurent.2013.FRENCH.DVDSCR.MD.XviD-ViVARiUM.avi : format: DVD language: French - other: Screener + other: + - MD + - Screener releaseGroup: ViVARiUM title: Yves Saint Laurent videoCodec: XviD year: 2013 - + ? Echec et Mort - Hard to Kill - Steven Seagal Multi 1080p BluRay x264 CCATS.avi : format: BluRay language: Multiple languages @@ -597,7 +627,7 @@ title: Paparazzi screenSize: 1080p format: HDTV - + ? some.movie.720p.bluray.x264-mind : options: -n title: some movie @@ -605,7 +635,7 @@ videoCodec: h264 releaseGroup: mind format: BluRay - + ? Dr LiNE The Lorax 720p h264 BluRay : options: -n title: Dr LiNE The Lorax @@ -614,14 +644,136 @@ format: BluRay ? BeatdownFrenchDVDRip.mkv -: title: Beatdown +: options: -c + title: Beatdown language: French format: DVD ? YvesSaintLaurent2013FrenchDVDScrXvid.avi -: format: DVD +: options: -c + format: DVD language: French other: Screener title: Yves saint laurent videoCodec: XviD - year: 2013 \ No newline at end of file + year: 2013 + +? Elle.s.en.va.720p.mkv +: screenSize: 720p + title: Elle s en va + +? FooBar.7.PDTV-FlexGet +: options: -n + format: DVB + releaseGroup: FlexGet + title: FooBar 7 + +? h265 - HEVC Riddick Unrated Director Cut French 1080p DTS.mkv +: audioCodec: DTS + edition: Director's cut + language: fr + screenSize: 1080p + title: Riddick Unrated + videoCodec: h265 + +? "[h265 - HEVC] Riddick Unrated Director Cut French [1080p DTS].mkv" +: audioCodec: DTS + edition: Director's cut + language: fr + screenSize: 1080p + title: Riddick Unrated + videoCodec: h265 + +? Barbecue-2014-French-mHD-1080p +: options: -n + language: fr + other: mHD + screenSize: 1080p + title: Barbecue + year: 2014 + +? Underworld Quadrilogie VO+VFF+VFQ 1080p HDlight.x264~Tonyk~Monde Infernal +: options: -n + language: + - fr + - vo + other: HDLight + screenSize: 1080p + title: Underworld Quadrilogie + videoCodec: h264 + +? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ +: options: -n + format: DVD + language: mul + releaseGroup: KZ + title: A Bout Portant + +? "Mise à Sac (Alain Cavalier, 1967) [Vhs.Rip.Vff]" +: options: -n + format: VHS + language: fr + title: "Mise à Sac" + year: 1967 + +? A Bout Portant (The Killers).PAL.Multi.DVD-R-KZ +: options: -n + format: DVD + other: PAL + language: mul + releaseGroup: KZ + title: A Bout Portant + +? Youth.In.Revolt.(Be.Bad).2009.MULTI.1080p.LAME3*92-MEDIOZZ +: options: -n + audioCodec: MP3 + language: mul + releaseGroup: MEDIOZZ + screenSize: 1080p + title: Youth In Revolt + year: 2009 + +? La Defense Lincoln (The Lincoln Lawyer) 2011 [DVDRIP][Vostfr] +: options: -n + format: DVD + subtitleLanguage: fr + title: La Defense Lincoln + year: 2011 + +? '[h265 - HEVC] Fight Club French 1080p DTS.' +: options: -n + audioCodec: DTS + language: fr + screenSize: 1080p + title: Fight Club + videoCodec: h265 + +? Love Gourou (Mike Myers) - FR +: options: -n + language: fr + title: Love Gourou + +? '[h265 - hevc] transformers 2 1080p french ac3 6ch.' +: options: -n + audioChannels: '5.1' + audioCodec: AC3 + language: fr + screenSize: 1080p + title: transformers 2 + videoCodec: h265 + +? 1.Angry.Man.1957.mkv +: title: 1 Angry Man + year: 1957 + +? 12.Angry.Men.1957.mkv +: title: 12 Angry Men + year: 1957 + +? 123.Angry.Men.1957.mkv +: title: 123 Angry Men + year: 1957 + +? "Looney Tunes 1444x866 Porky's Last Stand.mkv" +: screenSize: 1444x866 + title: Looney Tunes diff -Nru guessit-0.8/guessit/test/test_api.py guessit-0.11.0/guessit/test/test_api.py --- guessit-0.8/guessit/test/test_api.py 2014-02-21 19:03:49.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_api.py 2015-06-06 12:39:15.000000000 +0000 @@ -32,23 +32,19 @@ episode_info = guessit.guess_episode_info(movie_path) file_info = guessit.guess_file_info(movie_path) - self.assertEqual(guessit.guess_file_info(movie_path, type='movie'), movie_info) - self.assertEqual(guessit.guess_file_info(movie_path, type='video'), video_info) - self.assertEqual(guessit.guess_file_info(movie_path, type='episode'), episode_info) + assert guessit.guess_file_info(movie_path, type='movie') == movie_info + assert guessit.guess_file_info(movie_path, type='video') == video_info + assert guessit.guess_file_info(movie_path, type='episode') == episode_info + + assert guessit.guess_file_info(movie_path, options={'type': 'movie'}) == movie_info + assert guessit.guess_file_info(movie_path, options={'type': 'video'}) == video_info + assert guessit.guess_file_info(movie_path, options={'type': 'episode'}) == episode_info - self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'movie'}), movie_info) - self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'video'}), video_info) - self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}), episode_info) - - self.assertEqual(guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie'), episode_info) # kwargs priority other options + # kwargs priority other options + assert guessit.guess_file_info(movie_path, options={'type': 'episode'}, type='movie') == episode_info movie_path_name_only = 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD' file_info_name_only = guessit.guess_file_info(movie_path_name_only, options={"name_only": True}) - self.assertFalse('container' in file_info_name_only) - self.assertTrue('container' in file_info) - -suite = allTests(TestApi) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) + assert 'container' not in file_info_name_only + assert 'container' in file_info diff -Nru guessit-0.8/guessit/test/test_autodetect_all.py guessit-0.11.0/guessit/test/test_autodetect_all.py --- guessit-0.8/guessit/test/test_autodetect_all.py 2014-02-20 19:26:42.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_autodetect_all.py 2015-06-06 12:39:15.000000000 +0000 @@ -38,9 +38,3 @@ def testAutoMatcherEpisodes(self): self.checkMinimumFieldsCorrect(filename='episodes.yaml', exclude_files=IGNORE_EPISODES) - - -suite = allTests(TestAutoDetectAll) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) diff -Nru guessit-0.8/guessit/test/test_autodetect.py guessit-0.11.0/guessit/test/test_autodetect.py --- guessit-0.8/guessit/test/test_autodetect.py 2014-02-20 19:26:42.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_autodetect.py 2015-06-06 12:39:15.000000000 +0000 @@ -26,20 +26,14 @@ class TestAutoDetect(TestGuessit): def testEmpty(self): result = guessit.guess_file_info('') - self.assertEqual(result, {}) + assert result == {} result = guessit.guess_file_info('___-__') - self.assertEqual(result, {}) + assert result == {} result = guessit.guess_file_info('__-.avc') - self.assertEqual(result, {'type': 'unknown', 'extension': 'avc'}) + assert result == {'type': 'unknown', 'extension': 'avc'} def testAutoDetect(self): self.checkMinimumFieldsCorrect(filename='autodetect.yaml', remove_type=False) - - -suite = allTests(TestAutoDetect) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) diff -Nru guessit-0.8/guessit/test/test_doctests.py guessit-0.11.0/guessit/test/test_doctests.py --- guessit-0.8/guessit/test/test_doctests.py 2014-02-21 19:24:02.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_doctests.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,45 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# GuessIt - A library for guessing information from filenames -# Copyright (c) 2014 Nicolas Wack -# -# GuessIt is free software; you can redistribute it and/or modify it under -# the terms of the Lesser GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# GuessIt is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# Lesser GNU General Public License for more details. -# -# You should have received a copy of the Lesser GNU General Public License -# along with this program. If not, see . -# - -from __future__ import absolute_import, division, print_function, unicode_literals - -from guessit.test.guessittest import * -import guessit -import guessit.hash_ed2k -import unittest -import doctest - - -def load_tests(loader, tests, ignore): - tests.addTests(doctest.DocTestSuite(guessit)) - tests.addTests(doctest.DocTestSuite(guessit.date)) - tests.addTests(doctest.DocTestSuite(guessit.fileutils)) - tests.addTests(doctest.DocTestSuite(guessit.guess)) - tests.addTests(doctest.DocTestSuite(guessit.hash_ed2k)) - tests.addTests(doctest.DocTestSuite(guessit.language)) - tests.addTests(doctest.DocTestSuite(guessit.matchtree)) - tests.addTests(doctest.DocTestSuite(guessit.textutils)) - return tests - -suite = unittest.TestSuite() -load_tests(None, suite, None) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) diff -Nru guessit-0.8/guessit/test/test_episode.py guessit-0.11.0/guessit/test/test_episode.py --- guessit-0.8/guessit/test/test_episode.py 2013-12-12 20:29:08.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_episode.py 2015-06-06 12:39:15.000000000 +0000 @@ -27,9 +27,3 @@ def testEpisodes(self): self.checkMinimumFieldsCorrect(filetype='episode', filename='episodes.yaml') - - -suite = allTests(TestEpisode) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) diff -Nru guessit-0.8/guessit/test/test_hashes.py guessit-0.11.0/guessit/test/test_hashes.py --- guessit-0.8/guessit/test/test_hashes.py 2014-02-20 19:26:42.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_hashes.py 2015-06-06 12:39:15.000000000 +0000 @@ -37,10 +37,5 @@ for hash_type, filename, expected_value in hashes: guess = guess_file_info(file_in_same_dir(__file__, filename), hash_type) computed_value = guess.get(hash_type) - self.assertEqual(expected_value, guess.get(hash_type), "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value)) - - -suite = allTests(TestHashes) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) + assert expected_value == guess.get(hash_type), \ + "Invalid %s for %s: %s != %s" % (hash_type, filename, computed_value, expected_value) diff -Nru guessit-0.8/guessit/test/test_language.py guessit-0.11.0/guessit/test/test_language.py --- guessit-0.8/guessit/test/test_language.py 2014-06-21 22:55:04.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_language.py 2015-06-06 12:39:15.000000000 +0000 @@ -22,15 +22,12 @@ from guessit.test.guessittest import * -import io - class TestLanguage(TestGuessit): def check_languages(self, languages): for lang1, lang2 in languages.items(): - self.assertEqual(Language.fromguessit(lang1), - Language.fromguessit(lang2)) + assert Language.fromguessit(lang1) == Language.fromguessit(lang2) def test_addic7ed(self): languages = {'English': 'en', @@ -91,7 +88,7 @@ if int(upload_enabled) and int(web_enabled): # check that we recognize the opensubtitles language code correctly # and that we are able to output this code from a language - self.assertEqual(idlang, Language.fromguessit(idlang).opensubtitles) + assert idlang == Language.fromguessit(idlang).opensubtitles if alpha2: # check we recognize the opensubtitles 2-letter code correctly self.check_languages({idlang: alpha2}) @@ -99,7 +96,7 @@ def test_tmdb(self): # examples from http://api.themoviedb.org/2.1/language-tags for lang in ['en-US', 'en-CA', 'es-MX', 'fr-PF']: - self.assertEqual(lang, str(Language.fromguessit(lang))) + assert lang == str(Language.fromguessit(lang)) def test_subtitulos(self): languages = {'English (US)': 'en-US', 'English (UK)': 'en-UK', 'English': 'en', @@ -118,13 +115,5 @@ self.check_languages(languages) def test_exceptions(self): - self.assertEqual(Language.fromguessit('br'), Language.fromguessit('pt(br)')) - - self.assertEqual(Language.fromguessit('unknown'), - Language.fromguessit('und')) - - -suite = allTests(TestLanguage) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) + assert Language.fromguessit('br') == Language.fromguessit('pt(br)') + assert Language.fromguessit('unknown') == Language.fromguessit('und') diff -Nru guessit-0.8/guessit/test/test_main.py guessit-0.11.0/guessit/test/test_main.py --- guessit-0.8/guessit/test/test_main.py 2014-02-21 19:03:49.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_main.py 2015-06-06 12:39:15.000000000 +0000 @@ -21,8 +21,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals from guessit.test.guessittest import * -from guessit.fileutils import split_path, file_in_same_dir -from guessit.textutils import strip_brackets, str_replace, str_fill +from guessit.fileutils import file_in_same_dir from guessit import PY2 from guessit import __main__ @@ -43,15 +42,14 @@ def test_list_properties(self): __main__.main(["-p"], False) - __main__.main(["-l"], False) + __main__.main(["-V"], False) def test_list_transformers(self): __main__.main(["--transformers"], False) - __main__.main(["-l", "--transformers"], False) + __main__.main(["-V", "--transformers"], False) def test_demo(self): __main__.main(["-d"], False) - __main__.main(["-l"], False) def test_filename(self): __main__.main(["A.Movie.2014.avi"], False) @@ -63,8 +61,3 @@ __main__.main(["-t", "episode", "A.Serie.S02E06.avi"], False) __main__.main(["-i", "hash_mpc", file_in_same_dir(__file__, "1MB")], False) __main__.main(["-i", "hash_md5", file_in_same_dir(__file__, "1MB")], False) - -suite = allTests(TestMain) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) diff -Nru guessit-0.8/guessit/test/test_matchtree.py guessit-0.11.0/guessit/test/test_matchtree.py --- guessit-0.8/guessit/test/test_matchtree.py 2014-05-30 21:05:55.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_matchtree.py 2015-08-31 16:57:28.000000000 +0000 @@ -31,10 +31,12 @@ ? Xvid PROPER : videoCodec: Xvid other: PROPER + properCount: 1 ? PROPER-Xvid : videoCodec: Xvid other: PROPER + properCount: 1 """) @@ -52,42 +54,36 @@ t.partition((3, 7, 20)) leaves = list(t.leaves()) - self.assertEqual(leaves[0].span, (0, 3)) + assert leaves[0].span == (0, 3) - self.assertEqual('One', leaves[0].value) - self.assertEqual(' Two', leaves[1].value) - self.assertEqual(' Three(Three)', leaves[2].value) - self.assertEqual(' Four', leaves[3].value) + assert 'One' == leaves[0].value + assert ' Two' == leaves[1].value + assert ' Three(Three)' == leaves[2].value + assert ' Four' == leaves[3].value leaves[2].partition((1, 6, 7, 12)) three_leaves = list(leaves[2].leaves()) - self.assertEqual('Three', three_leaves[1].value) - self.assertEqual('Three', three_leaves[3].value) + assert 'Three' == three_leaves[1].value + assert 'Three' == three_leaves[3].value leaves = list(t.leaves()) - self.assertEqual(len(leaves), 8) + assert len(leaves) == 8 - self.assertEqual(leaves[5], three_leaves[3]) + assert leaves[5] == three_leaves[3] - self.assertEqual(t.previous_leaf(leaves[5]), leaves[4]) - self.assertEqual(t.next_leaf(leaves[5]), leaves[6]) + assert t.previous_leaf(leaves[5]) == leaves[4] + assert t.next_leaf(leaves[5]) == leaves[6] - self.assertEqual(t.next_leaves(leaves[5]), [leaves[6], leaves[7]]) - self.assertEqual(t.previous_leaves(leaves[5]), [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]]) + assert t.next_leaves(leaves[5]) == [leaves[6], leaves[7]] + assert t.previous_leaves(leaves[5]) == [leaves[4], leaves[3], leaves[2], leaves[1], leaves[0]] - self.assertEqual(t.next_leaf(leaves[7]), None) - self.assertEqual(t.previous_leaf(leaves[0]), None) + assert t.next_leaf(leaves[7]) is None + assert t.previous_leaf(leaves[0]) is None - self.assertEqual(t.next_leaves(leaves[7]), []) - self.assertEqual(t.previous_leaves(leaves[0]), []) + assert t.next_leaves(leaves[7]) == [] + assert t.previous_leaves(leaves[0]) == [] def test_match(self): self.checkFields(keywords, guess_info) - - -suite = allTests(TestMatchTree) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) diff -Nru guessit-0.8/guessit/test/test_movie.py guessit-0.11.0/guessit/test/test_movie.py --- guessit-0.8/guessit/test/test_movie.py 2013-12-12 20:29:08.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_movie.py 2015-06-06 12:39:15.000000000 +0000 @@ -27,9 +27,3 @@ def testMovies(self): self.checkMinimumFieldsCorrect(filetype='movie', filename='movies.yaml') - - -suite = allTests(TestMovie) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) diff -Nru guessit-0.8/guessit/test/test_quality.py guessit-0.11.0/guessit/test/test_quality.py --- guessit-0.8/guessit/test/test_quality.py 2014-02-22 17:04:16.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_quality.py 2015-06-06 12:39:15.000000000 +0000 @@ -47,12 +47,12 @@ g3['color'] = 'orange' q3 = container.rate_quality(g3) - self.assertEqual(q3, 20, "ORANGE should be rated 20. Don't ask why!") + assert q3 == 20, "ORANGE should be rated 20. Don't ask why!" q1 = container.rate_quality(g1) q2 = container.rate_quality(g2) - self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!") + assert q2 > q1, "GREEN should be greater than RED. Don't ask why!" g1['context'] = 'sex' g2['context'] = 'sun' @@ -60,18 +60,20 @@ q1 = container.rate_quality(g1) q2 = container.rate_quality(g2) - self.assertTrue(q1 > q2, "SEX should be greater than SUN. Don't ask why!") + assert q1 > q2, "SEX should be greater than SUN. Don't ask why!" - self.assertEqual(container.best_quality(g1, g2), g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!") + assert container.best_quality(g1, g2) == g1, "RED&SEX should be better than GREEN&SUN. Don't ask why!" - self.assertEqual(container.best_quality_properties(['color'], g1, g2), g2, "GREEN should be better than RED. Don't ask why!") + assert container.best_quality_properties(['color'], g1, g2) == g2, \ + "GREEN should be better than RED. Don't ask why!" - self.assertEqual(container.best_quality_properties(['context'], g1, g2), g1, "SEX should be better than SUN. Don't ask why!") + assert container.best_quality_properties(['context'], g1, g2) == g1, \ + "SEX should be better than SUN. Don't ask why!" q1 = container.rate_quality(g1, 'color') q2 = container.rate_quality(g2, 'color') - self.assertTrue(q2 > q1, "GREEN should be greater than RED. Don't ask why!") + assert q2 > q1, "GREEN should be greater than RED. Don't ask why!" container.unregister_quality('context', 'sex') container.unregister_quality('context', 'sun') @@ -79,48 +81,43 @@ q1 = container.rate_quality(g1) q2 = container.rate_quality(g2) - self.assertTrue(q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!") + assert q2 > q1, "GREEN&SUN should be greater than RED&SEX. Don't ask why!" g3['context'] = 'sea' container.unregister_quality('context', 'sea') q3 = container.rate_quality(g3, 'context') - self.assertEqual(q3, 0, "Context should be unregistered.") + assert q3 == 0, "Context should be unregistered." container.unregister_quality('color') q3 = container.rate_quality(g3, 'color') - self.assertEqual(q3, 0, "Color should be unregistered.") + assert q3 == 0, "Color should be unregistered." container.clear_qualities() q1 = container.rate_quality(g1) q2 = container.rate_quality(g2) - self.assertTrue(q1 == q2 == 0, "Empty quality container should rate each guess to 0") + assert q1 == q2 == 0, "Empty quality container should rate each guess to 0" def test_quality_transformers(self): guess_720p = guessit.guess_file_info("2012.2009.720p.BluRay.x264.DTS WiKi.mkv") guess_1080p = guessit.guess_file_info("2012.2009.1080p.BluRay.x264.MP3 WiKi.mkv") - self.assertTrue('audioCodec' in guess_720p, "audioCodec should be present") - self.assertTrue('audioCodec' in guess_1080p, "audioCodec should be present") - self.assertTrue('screenSize' in guess_720p, "screenSize should be present") - self.assertTrue('screenSize' in guess_1080p, "screenSize should be present") + assert 'audioCodec' in guess_720p, "audioCodec should be present" + assert 'audioCodec' in guess_1080p, "audioCodec should be present" + assert 'screenSize' in guess_720p, "screenSize should be present" + assert 'screenSize' in guess_1080p, "screenSize should be present" best_quality_guess = best_quality(guess_720p, guess_1080p) - self.assertTrue(guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality") + assert guess_1080p == best_quality_guess, "1080p+MP3 is not the best global quality" best_quality_guess = best_quality_properties(['screenSize'], guess_720p, guess_1080p) - self.assertTrue(guess_1080p == best_quality_guess, "1080p is not the best screenSize") + assert guess_1080p == best_quality_guess, "1080p is not the best screenSize" best_quality_guess = best_quality_properties(['audioCodec'], guess_720p, guess_1080p) - self.assertTrue(guess_720p == best_quality_guess, "DTS is not the best audioCodec") - -suite = allTests(TestQuality) - -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) + assert guess_720p == best_quality_guess, "DTS is not the best audioCodec" diff -Nru guessit-0.8/guessit/test/test_utils.py guessit-0.11.0/guessit/test/test_utils.py --- guessit-0.8/guessit/test/test_utils.py 2014-02-24 10:56:25.000000000 +0000 +++ guessit-0.11.0/guessit/test/test_utils.py 2015-06-06 12:39:15.000000000 +0000 @@ -20,13 +20,15 @@ from __future__ import absolute_import, division, print_function, unicode_literals +from datetime import date, timedelta + from guessit.test.guessittest import * + from guessit.fileutils import split_path from guessit.textutils import strip_brackets, str_replace, str_fill, from_camel, is_camel,\ levenshtein, reorder_title from guessit import PY2 from guessit.date import search_date, search_year -from datetime import datetime, date, timedelta class TestUtils(TestGuessit): @@ -48,7 +50,7 @@ } tests = alltests[sys.platform == 'win32'] for path, split in tests.items(): - self.assertEqual(split, split_path(path)) + assert split == split_path(path) def test_strip_brackets(self): allTests = (('', ''), @@ -59,59 +61,60 @@ ) for i, e in allTests: - self.assertEqual(e, strip_brackets(i)) + assert e == strip_brackets(i) def test_levenshtein(self): - self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno"), 0) - self.assertEqual(levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno"), 1) - self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn"), 1) - self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp"), 1) - self.assertEqual(levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq"), 1) - self.assertEqual(levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq"), 2) - self.assertEqual(levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq"), 3) + assert levenshtein("abcdef ghijk lmno", "abcdef ghijk lmno") == 0 + assert levenshtein("abcdef ghijk lmnop", "abcdef ghijk lmno") == 1 + assert levenshtein("abcdef ghijk lmno", "abcdef ghijk lmn") == 1 + assert levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnp") == 1 + assert levenshtein("abcdef ghijk lmno", "abcdef ghijk lmnq") == 1 + assert levenshtein("cbcdef ghijk lmno", "abcdef ghijk lmnq") == 2 + assert levenshtein("cbcdef ghihk lmno", "abcdef ghijk lmnq") == 3 def test_reorder_title(self): - self.assertEqual(reorder_title("Simpsons, The"), "The Simpsons") - self.assertEqual(reorder_title("Simpsons,The"), "The Simpsons") - self.assertEqual(reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons") - self.assertEqual(reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')), "Les Simpsons") + assert reorder_title("Simpsons, The") == "The Simpsons" + assert reorder_title("Simpsons,The") == "The Simpsons" + assert reorder_title("Simpsons,Les", articles=('the', 'le', 'la', 'les')) == "Les Simpsons" + assert reorder_title("Simpsons, Les", articles=('the', 'le', 'la', 'les')) == "Les Simpsons" def test_camel(self): - self.assertEqual("", from_camel("")) + assert "" == from_camel("") - self.assertEqual("Hello world", str_replace("Hello World", 6, 'w')) - self.assertEqual("Hello *****", str_fill("Hello World", (6, 11), '*')) + assert "Hello world" == str_replace("Hello World", 6, 'w') + assert "Hello *****" == str_fill("Hello World", (6, 11), '*') - self.assertTrue("This is camel", from_camel("ThisIsCamel")) + assert "This is camel" == from_camel("ThisIsCamel") - self.assertEqual('camel case', from_camel('camelCase')) - self.assertEqual('A case', from_camel('ACase')) - self.assertEqual('MiXedCaSe is not camel case', from_camel('MiXedCaSe is not camelCase')) + assert 'camel case' == from_camel('camelCase') + assert 'A case' == from_camel('ACase') + assert 'MiXedCaSe is not camel case' == from_camel('MiXedCaSe is not camelCase') - self.assertEqual("This is camel cased title", from_camel("ThisIsCamelCasedTitle")) - self.assertEqual("This is camel CASED title", from_camel("ThisIsCamelCASEDTitle")) + assert "This is camel cased title" == from_camel("ThisIsCamelCasedTitle") + assert "This is camel CASED title" == from_camel("ThisIsCamelCASEDTitle") - self.assertEqual("These are camel CASED title", from_camel("TheseAreCamelCASEDTitle")) + assert "These are camel CASED title" == from_camel("TheseAreCamelCASEDTitle") - self.assertEqual("Give a camel case string", from_camel("GiveACamelCaseString")) + assert "Give a camel case string" == from_camel("GiveACamelCaseString") - self.assertEqual("Death TO camel case", from_camel("DeathTOCamelCase")) - self.assertEqual("But i like java too:)", from_camel("ButILikeJavaToo:)")) + assert "Death TO camel case" == from_camel("DeathTOCamelCase") + assert "But i like java too:)" == from_camel("ButILikeJavaToo:)") - self.assertEqual("Beatdown french DVD rip.mkv", from_camel("BeatdownFrenchDVDRip.mkv")) - self.assertEqual("DO NOTHING ON UPPER CASE", from_camel("DO NOTHING ON UPPER CASE")) + assert "Beatdown french DVD rip.mkv" == from_camel("BeatdownFrenchDVDRip.mkv") + assert "DO NOTHING ON UPPER CASE" == from_camel("DO NOTHING ON UPPER CASE") - self.assertFalse(is_camel("this_is_not_camel")) - self.assertTrue(is_camel("ThisIsCamel")) + assert not is_camel("this_is_not_camel") + assert is_camel("ThisIsCamel") - self.assertEqual("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv", from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv")) - self.assertFalse(is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv")) + assert "Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv" == \ + from_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv") + assert not is_camel("Dark.City.(1998).DC.BDRIP.720p.DTS.X264-CHD.mkv") - self.assertEqual("A2LiNE", from_camel("A2LiNE")) + assert "A2LiNE" == from_camel("A2LiNE") def test_date(self): - self.assertEqual(search_year(' in the year 2000... '), (2000, (13, 17))) - self.assertEqual(search_year(' they arrived in 1492. '), (None, None)) + assert search_year(' in the year 2000... ') == (2000, (13, 17)) + assert search_year(' they arrived in 1492. ') == (None, None) today = date.today() today_year_2 = int(str(today.year)[2:]) @@ -122,34 +125,36 @@ past = today - timedelta(days=10000) past_year_2 = int(str(past.year)[2:]) - self.assertEqual(search_date(' Something before 2002-04-22 '), (date(2002, 4, 22), (18, 28))) - self.assertEqual(search_date(' 2002-04-22 Something after '), (date(2002, 4, 22), (1, 11))) - - self.assertEqual(search_date(' This happened on 2002-04-22. '), (date(2002, 4, 22), (18, 28))) - self.assertEqual(search_date(' This happened on 22-04-2002. '), (date(2002, 4, 22), (18, 28))) + assert search_date(' Something before 2002-04-22 ') == (date(2002, 4, 22), (18, 28)) + assert search_date(' 2002-04-22 Something after ') == (date(2002, 4, 22), (1, 11)) - self.assertEqual(search_date(' This happened on 13-04-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26))) - self.assertEqual(search_date(' This happened on 22-04-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26))) - self.assertEqual(search_date(' This happened on 20-04-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26))) + assert search_date(' This happened on 2002-04-22. ') == (date(2002, 4, 22), (18, 28)) + assert search_date(' This happened on 22-04-2002. ') == (date(2002, 4, 22), (18, 28)) - self.assertEqual(search_date(' This happened on 04-13-%s. ' % (today_year_2,)), (date(today.year, 4, 13), (18, 26))) - self.assertEqual(search_date(' This happened on 04-22-%s. ' % (future_year_2,)), (date(future.year, 4, 22), (18, 26))) - self.assertEqual(search_date(' This happened on 04-20-%s. ' % (past_year_2)), (date(past.year, 4, 20), (18, 26))) + assert search_date(' This happened on 13-04-%s. ' % (today_year_2,)) == (date(today.year, 4, 13), (18, 26)) + assert search_date(' This happened on 22-04-%s. ' % (future_year_2,)) == (date(future.year, 4, 22), (18, 26)) + assert search_date(' This happened on 20-04-%s. ' % past_year_2) == (date(past.year, 4, 20), (18, 26)) - self.assertEqual(search_date(' This happened on 35-12-%s. ' % (today_year_2,)), (None, None)) - self.assertEqual(search_date(' This happened on 37-18-%s. ' % (future_year_2,)), (None, None)) - self.assertEqual(search_date(' This happened on 44-42-%s. ' % (past_year_2)), (None, None)) + assert search_date(' This happened on 13-06-14. ', year_first=True) == (date(2013, 6, 14), (18, 26)) + assert search_date(' This happened on 13-05-14. ', year_first=False) == (date(2014, 5, 13), (18, 26)) - self.assertEqual(search_date(' This happened on %s. ' % (today, )), (today, (18, 28))) - self.assertEqual(search_date(' This happened on %s. ' % (future, )), (future, (18, 28))) - self.assertEqual(search_date(' This happened on %s. ' % (past, )), (past, (18, 28))) + assert search_date(' This happened on 04-13-%s. ' % (today_year_2,)) == (date(today.year, 4, 13), (18, 26)) + assert search_date(' This happened on 04-22-%s. ' % (future_year_2,)) == (date(future.year, 4, 22), (18, 26)) + assert search_date(' This happened on 04-20-%s. ' % past_year_2) == (date(past.year, 4, 20), (18, 26)) - self.assertEqual(search_date(' released date: 04-03-1901? '), (None, None)) + assert search_date(' This happened on 35-12-%s. ' % (today_year_2,)) == (None, None) + assert search_date(' This happened on 37-18-%s. ' % (future_year_2,)) == (None, None) + assert search_date(' This happened on 44-42-%s. ' % past_year_2) == (None, None) - self.assertEqual(search_date(' There\'s no date in here. '), (None, None)) + assert search_date(' This happened on %s. ' % (today, )) == (today, (18, 28)) + assert search_date(' This happened on %s. ' % (future, )) == (future, (18, 28)) + assert search_date(' This happened on %s. ' % (past, )) == (past, (18, 28)) + assert search_date(' released date: 04-03-1901? ') == (None, None) -suite = allTests(TestUtils) + assert search_date(' There\'s no date in here. ') == (None, None) -if __name__ == '__main__': - TextTestRunner(verbosity=2).run(suite) + assert search_date(' Something 01-02-03 ') == (date(2003, 2, 1), (11, 19)) + assert search_date(' Something 01-02-03 ', year_first=False, day_first=True) == (date(2003, 2, 1), (11, 19)) + assert search_date(' Something 01-02-03 ', year_first=True) == (date(2001, 2, 3), (11, 19)) + assert search_date(' Something 01-02-03 ', day_first=False) == (date(2003, 1, 2), (11, 19)) diff -Nru guessit-0.8/guessit/textutils.py guessit-0.11.0/guessit/textutils.py --- guessit-0.8/guessit/textutils.py 2014-05-06 19:42:08.000000000 +0000 +++ guessit-0.11.0/guessit/textutils.py 2015-06-06 12:39:15.000000000 +0000 @@ -38,8 +38,8 @@ return s if ((s[0] == '[' and s[-1] == ']') or - (s[0] == '(' and s[-1] == ')') or - (s[0] == '{' and s[-1] == '}')): + (s[0] == '(' and s[-1] == ')') or + (s[0] == '{' and s[-1] == '}')): return s[1:-1] return s @@ -48,7 +48,7 @@ _dotted_rexp = re.compile(r'(?:\W|^)(([A-Za-z]\.){2,}[A-Za-z]\.?)') -def clean_string(st): +def clean_default(st): for c in sep: # do not remove certain chars if c in ['-', ',']: @@ -79,7 +79,6 @@ return result - _words_rexp = re.compile('\w+', re.UNICODE) @@ -248,8 +247,8 @@ return split_on_groups(string, groups) -_camel_word2_set = set(('is', 'to',)) -_camel_word3_set = set(('the',)) +_camel_word2_set = {'is', 'to'} +_camel_word3_set = {'the'} def _camel_split_and_lower(string, i): @@ -305,7 +304,7 @@ need_lower = not uppercase_word and not mixedcase_word and need_split - return (need_split, need_lower) + return need_split, need_lower def is_camel(string): @@ -354,3 +353,13 @@ else: pieces.append(char) return ''.join(pieces) + + +def common_words(s1, s2): + common = [] + words1 = set(s1.split()) + for word in s2.split(): + # strip some chars here, e.g. as in [1] + if word in words1: + common.append(word) + return common diff -Nru guessit-0.8/guessit/transfo/expected_series.py guessit-0.11.0/guessit/transfo/expected_series.py --- guessit-0.8/guessit/transfo/expected_series.py 1970-01-01 00:00:00.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/expected_series.py 2015-06-06 12:39:15.000000000 +0000 @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2013 Nicolas Wack +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see . +# + +from __future__ import absolute_import, division, print_function, unicode_literals +import re + +from guessit.containers import PropertiesContainer +from guessit.matcher import GuessFinder +from guessit.plugins.transformers import Transformer + + +class ExpectedSeries(Transformer): + def __init__(self): + Transformer.__init__(self, 230) + + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + naming_opts.add_argument('-S', '--expected-series', action='append', dest='expected_series', + help='Expected series to parse (can be used multiple times)') + + def should_process(self, mtree, options=None): + return options and options.get('expected_series') + + @staticmethod + def expected_series(string, node=None, options=None): + container = PropertiesContainer(enhance=True, canonical_from_pattern=False) + + for expected_serie in options.get('expected_series'): + if expected_serie.startswith('re:'): + expected_serie = expected_serie[3:] + expected_serie = expected_serie.replace(' ', '-') + container.register_property('series', expected_serie, enhance=True) + else: + expected_serie = re.escape(expected_serie) + container.register_property('series', expected_serie, enhance=False) + + found = container.find_properties(string, node, options) + return container.as_guess(found, string) + + def supported_properties(self): + return ['series'] + + def process(self, mtree, options=None): + GuessFinder(self.expected_series, None, self.log, options).process_nodes(mtree.unidentified_leaves()) diff -Nru guessit-0.8/guessit/transfo/expected_title.py guessit-0.11.0/guessit/transfo/expected_title.py --- guessit-0.8/guessit/transfo/expected_title.py 1970-01-01 00:00:00.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/expected_title.py 2015-06-06 12:39:15.000000000 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2013 Nicolas Wack +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see . +# + +from __future__ import absolute_import, division, print_function, unicode_literals + +import re + +from guessit.containers import PropertiesContainer +from guessit.matcher import GuessFinder +from guessit.plugins.transformers import Transformer + + +class ExpectedTitle(Transformer): + def __init__(self): + Transformer.__init__(self, 225) + + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title', + help='Expected title (can be used multiple times)') + + def should_process(self, mtree, options=None): + return options and options.get('expected_title') + + @staticmethod + def expected_titles(string, node=None, options=None): + container = PropertiesContainer(enhance=True, canonical_from_pattern=False) + + for expected_title in options.get('expected_title'): + if expected_title.startswith('re:'): + expected_title = expected_title[3:] + expected_title = expected_title.replace(' ', '-') + container.register_property('title', expected_title, enhance=True) + else: + expected_title = re.escape(expected_title) + container.register_property('title', expected_title, enhance=False) + + found = container.find_properties(string, node, options) + return container.as_guess(found, string) + + def supported_properties(self): + return ['title'] + + def process(self, mtree, options=None): + GuessFinder(self.expected_titles, None, self.log, options).process_nodes(mtree.unidentified_leaves()) diff -Nru guessit-0.8/guessit/transfo/guess_bonus_features.py guessit-0.11.0/guessit/transfo/guess_bonus_features.py --- guessit-0.8/guessit/transfo/guess_bonus_features.py 2014-05-30 22:02:29.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_bonus_features.py 2015-04-04 08:00:22.000000000 +0000 @@ -47,18 +47,20 @@ bonus = [node for node in mtree.leaves() if 'bonusNumber' in node.guess] if bonus: - bonusTitle = next_group(bonus[0]) - if bonusTitle and same_group(bonusTitle, bonus[0]): - found_property(bonusTitle, 'bonusTitle', confidence=0.8) + bonus_title = next_group(bonus[0]) + if bonus_title and same_group(bonus_title, bonus[0]): + found_property(bonus_title, 'bonusTitle', confidence=0.8) - filmNumber = [node for node in mtree.leaves() + film_number = [node for node in mtree.leaves() if 'filmNumber' in node.guess] - if filmNumber: - filmSeries = previous_group(filmNumber[0]) - found_property(filmSeries, 'filmSeries', confidence=0.9) + if film_number: + film_series = previous_group(film_number[0]) + if film_series: + found_property(film_series, 'filmSeries', confidence=0.9) - title = next_group(filmNumber[0]) - found_property(title, 'title', confidence=0.9) + title = next_group(film_number[0]) + if title: + found_property(title, 'title', confidence=0.9) season = [node for node in mtree.leaves() if 'season' in node.guess] if season and 'bonusNumber' in mtree.info: diff -Nru guessit-0.8/guessit/transfo/guess_country.py guessit-0.11.0/guessit/transfo/guess_country.py --- guessit-0.8/guessit/transfo/guess_country.py 2014-06-21 23:11:12.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_country.py 2015-09-01 06:18:53.000000000 +0000 @@ -20,6 +20,8 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import logging + from guessit.plugins.transformers import Transformer from babelfish import Country from guessit import Guess @@ -27,7 +29,7 @@ from guessit.matcher import GuessFinder, found_guess from guessit.language import LNG_COMMON_WORDS import babelfish -import logging + log = logging.getLogger(__name__) @@ -37,14 +39,19 @@ Transformer.__init__(self, -170) self.replace_language = frozenset(['uk']) + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + naming_opts.add_argument('-C', '--allowed-country', action='append', dest='allowed_countries', + help='Allowed country (can be used multiple times)') + def supported_properties(self): return ['country'] def should_process(self, mtree, options=None): options = options or {} - return 'nocountry' not in options.keys() + return options.get('country', True) - def _scan_country(self, country, strict=False): + @staticmethod + def _scan_country(country, strict=False): """ Find a country if it is at the start or end of country string """ @@ -73,18 +80,23 @@ except babelfish.Error: continue - return Country.fromguessit(country), None + return Country.fromguessit(country), (start, end) - def is_valid_country(self, country): - return (country.name.lower() not in LNG_COMMON_WORDS and - country.alpha2.lower() not in LNG_COMMON_WORDS) + @staticmethod + def is_valid_country(country, options=None): + if options and options.get('allowed_countries'): + allowed_countries = options.get('allowed_countries') + return country.name.lower() in allowed_countries or country.alpha2.lower() in allowed_countries + else: + return (country.name.lower() not in LNG_COMMON_WORDS and + country.alpha2.lower() not in LNG_COMMON_WORDS) def guess_country(self, string, node=None, options=None): c = string.strip().lower() - if not c in LNG_COMMON_WORDS: + if c not in LNG_COMMON_WORDS: try: country, country_span = self._scan_country(c, True) - if self.is_valid_country(country): + if self.is_valid_country(country, options): guess = Guess(country=country, confidence=1.0, input=node.value, span=(country_span[0] + 1, country_span[1] + 1)) return guess except babelfish.Error: @@ -99,7 +111,7 @@ node.guess.set('language', None) try: country = Country.fromguessit(c) - if self.is_valid_country(country): + if self.is_valid_country(country, options): guess = Guess(country=country, confidence=0.9, input=node.value, span=node.span) found_guess(node, guess, logger=log) except babelfish.Error: diff -Nru guessit-0.8/guessit/transfo/guess_date.py guessit-0.11.0/guessit/transfo/guess_date.py --- guessit-0.8/guessit/transfo/guess_date.py 2014-02-22 17:04:16.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_date.py 2015-08-31 16:57:28.000000000 +0000 @@ -19,6 +19,7 @@ # from __future__ import absolute_import, division, print_function, unicode_literals +from guessit.containers import DefaultValidator from guessit.plugins.transformers import Transformer from guessit.matcher import GuessFinder @@ -29,15 +30,21 @@ def __init__(self): Transformer.__init__(self, 50) + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None, + help='If short date is found, consider the first digits as the year.') + naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None, + help='If short date is found, consider the second digits as the day.') + def supported_properties(self): return ['date'] - def guess_date(self, string, node=None, options=None): - date, span = search_date(string) - if date: + @staticmethod + def guess_date(string, node=None, options=None): + date, span = search_date(string, options.get('date_year_first') if options else False, options.get('date_day_first') if options else False) + if date and span and DefaultValidator.validate_string(string, span): # ensure we have a separator before and after date return {'date': date}, span - else: - return None, None + return None, None def process(self, mtree, options=None): GuessFinder(self.guess_date, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves()) diff -Nru guessit-0.8/guessit/transfo/guess_episode_details.py guessit-0.11.0/guessit/transfo/guess_episode_details.py --- guessit-0.8/guessit/transfo/guess_episode_details.py 2014-05-30 22:11:06.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_episode_details.py 2015-06-06 12:39:15.000000000 +0000 @@ -20,10 +20,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import itertools + from guessit.plugins.transformers import Transformer from guessit.matcher import found_guess from guessit.containers import PropertiesContainer -import itertools class GuessEpisodeDetails(Transformer): @@ -34,14 +35,14 @@ self.container.register_property('episodeDetails', 'Extras?', canonical_form='Extras') def guess_details(self, string, node=None, options=None): - properties = self.container.find_properties(string, node, 'episodeDetails', multiple=True) + properties = self.container.find_properties(string, node, options, 'episodeDetails', multiple=True) guesses = self.container.as_guess(properties, multiple=True) return guesses def second_pass_options(self, mtree, options=None): if not mtree.guess.get('type', '').startswith('episode'): for unidentified_leaf in mtree.unidentified_leaves(): - properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, 'episodeDetails') + properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, options, 'episodeDetails') guess = self.container.as_guess(properties) if guess: return {'type': 'episode'} diff -Nru guessit-0.8/guessit/transfo/guess_episode_info_from_position.py guessit-0.11.0/guessit/transfo/guess_episode_info_from_position.py --- guessit-0.8/guessit/transfo/guess_episode_info_from_position.py 2014-06-21 19:30:15.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_episode_info_from_position.py 2015-09-04 20:33:12.000000000 +0000 @@ -24,6 +24,8 @@ from guessit.textutils import reorder_title from guessit.matcher import found_property +from guessit.patterns.list import all_separators +from guessit.language import all_lang_prefixes_suffixes class GuessEpisodeInfoFromPosition(Transformer): @@ -33,33 +35,51 @@ def supported_properties(self): return ['title', 'series'] - def match_from_epnum_position(self, mtree, node): - epnum_idx = node.node_idx + @staticmethod + def excluded_word(*values): + for value in values: + if value.clean_value.lower() in (all_separators + all_lang_prefixes_suffixes): + return True + return False + + def match_from_epnum_position(self, path_node, ep_node, options): + epnum_idx = ep_node.node_idx # a few helper functions to be able to filter using high-level semantics def before_epnum_in_same_pathgroup(): - return [leaf for leaf in mtree.unidentified_leaves() - if (leaf.node_idx[0] == epnum_idx[0] and - leaf.node_idx[1:] < epnum_idx[1:])] + return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1) + if (leaf.node_idx[0] == epnum_idx[0] and + leaf.node_idx[1:] < epnum_idx[1:] and + not GuessEpisodeInfoFromPosition.excluded_word(leaf))] def after_epnum_in_same_pathgroup(): - return [leaf for leaf in mtree.unidentified_leaves() - if (leaf.node_idx[0] == epnum_idx[0] and - leaf.node_idx[1:] > epnum_idx[1:])] + return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1) + if (leaf.node_idx[0] == epnum_idx[0] and + leaf.node_idx[1:] > epnum_idx[1:] and + not GuessEpisodeInfoFromPosition.excluded_word(leaf))] def after_epnum_in_same_explicitgroup(): - return [leaf for leaf in mtree.unidentified_leaves() - if (leaf.node_idx[:2] == epnum_idx[:2] and - leaf.node_idx[2:] > epnum_idx[2:])] + return [leaf for leaf in path_node.unidentified_leaves(lambda x: len(x.clean_value) > 1) + if (leaf.node_idx[:2] == epnum_idx[:2] and + leaf.node_idx[2:] > epnum_idx[2:] and + not GuessEpisodeInfoFromPosition.excluded_word(leaf))] # epnumber is the first group and there are only 2 after it in same # path group # -> series title - episode title - title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) + title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options) + + if ('title' not in path_node.info and # no title + 'series' in path_node.info and # series present + before_epnum_in_same_pathgroup() == [] and # no groups before + len(title_candidates) == 1): # only 1 group after + + found_property(title_candidates[0], 'title', confidence=0.4) + return - if ('title' not in mtree.info and # no title - before_epnum_in_same_pathgroup() == [] and # no groups before - len(title_candidates) == 2): # only 2 groups after + if ('title' not in path_node.info and # no title + before_epnum_in_same_pathgroup() == [] and # no groups before + len(title_candidates) == 2): # only 2 groups after found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) @@ -69,17 +89,17 @@ # probably the series name series_candidates = before_epnum_in_same_pathgroup() if len(series_candidates) >= 1: - found_property(series_candidates[0], 'series', confidence=0.7) + found_property(series_candidates[0], 'series', confidence=0.7) # only 1 group after (in the same path group) and it's probably the # episode title. - title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) + title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options) if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.5) return else: # try in the same explicit group, with lower confidence - title_candidates = self._filter_candidates(after_epnum_in_same_explicitgroup()) + title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_explicitgroup(), options) if len(title_candidates) == 1: found_property(title_candidates[0], 'title', confidence=0.4) return @@ -88,7 +108,7 @@ return # get the one with the longest value - title_candidates = self._filter_candidates(after_epnum_in_same_pathgroup()) + title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(after_epnum_in_same_pathgroup(), options) if title_candidates: maxidx = -1 maxv = -1 @@ -96,18 +116,20 @@ if len(c.clean_value) > maxv: maxidx = i maxv = len(c.clean_value) - found_property(title_candidates[maxidx], 'title', confidence=0.3) + if maxidx > -1: + found_property(title_candidates[maxidx], 'title', confidence=0.3) def should_process(self, mtree, options=None): options = options or {} return not options.get('skip_title') and mtree.guess.get('type', '').startswith('episode') - def _filter_candidates(self, candidates): + @staticmethod + def _filter_candidates(candidates, options): episode_details_transformer = get_transformer('guess_episode_details') if episode_details_transformer: - return [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, re_match=True)] - else: - return list(candidates) + candidates = [n for n in candidates if not episode_details_transformer.container.find_properties(n.value, n, options, re_match=True)] + candidates = list(filter(lambda n: not GuessEpisodeInfoFromPosition.excluded_word(n), candidates)) + return candidates def process(self, mtree, options=None): """ @@ -115,31 +137,47 @@ position relative to other known elements """ eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess] + + if not eps: + eps = [node for node in mtree.leaves() if 'date' in node.guess] + + eps = sorted(eps, key=lambda ep: -ep.guess.confidence()) if eps: - self.match_from_epnum_position(mtree, eps[0]) + performed_path_nodes = [] + for ep_node in eps: + # Perform only first episode node for each path node + path_node = [node for node in ep_node.ancestors if node.category == 'path'] + if len(path_node) > 0: + path_node = path_node[0] + else: + path_node = ep_node.root + if path_node not in performed_path_nodes: + self.match_from_epnum_position(path_node, ep_node, options) + performed_path_nodes.append(path_node) else: # if we don't have the episode number, but at least 2 groups in the # basename, then it's probably series - eptitle - basename = mtree.node_at((-2,)) + basename = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-2] - title_candidates = self._filter_candidates(basename.unidentified_leaves()) + title_candidates = GuessEpisodeInfoFromPosition._filter_candidates(basename.unidentified_leaves(), options) - if len(title_candidates) >= 2: + if len(title_candidates) >= 2 and 'series' not in mtree.info: found_property(title_candidates[0], 'series', confidence=0.4) found_property(title_candidates[1], 'title', confidence=0.4) elif len(title_candidates) == 1: # but if there's only one candidate, it's probably the series name - found_property(title_candidates[0], 'series', confidence=0.4) + found_property(title_candidates[0], 'series' if 'series' not in mtree.info else 'title', confidence=0.4) # if we only have 1 remaining valid group in the folder containing the # file, then it's likely that it is the series name + path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes())) try: - series_candidates = list(mtree.node_at((-3,)).unidentified_leaves()) - except ValueError: + series_candidates = list(path_nodes[-3].unidentified_leaves()) + except IndexError: series_candidates = [] - if len(series_candidates) == 1: + if len(series_candidates) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(series_candidates[0]): found_property(series_candidates[0], 'series', confidence=0.3) # if there's a path group that only contains the season info, then the @@ -150,7 +188,7 @@ if eps: previous = [node for node in mtree.unidentified_leaves() if node.node_idx[0] == eps[0].node_idx[0] - 1] - if len(previous) == 1: + if len(previous) == 1 and not GuessEpisodeInfoFromPosition.excluded_word(previous[0]): found_property(previous[0], 'series', confidence=0.5) # If we have found title without any serie name, replace it by the serie name. diff -Nru guessit-0.8/guessit/transfo/guess_episodes_rexps.py guessit-0.11.0/guessit/transfo/guess_episodes_rexps.py --- guessit-0.8/guessit/transfo/guess_episodes_rexps.py 2014-06-21 19:30:15.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_episodes_rexps.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,64 +20,130 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import re +from guessit.patterns.list import list_parser, all_separators_re + from guessit.plugins.transformers import Transformer from guessit.matcher import GuessFinder -from guessit.patterns import sep -from guessit.containers import PropertiesContainer, WeakValidator, NoValidator, ChainedValidator, DefaultValidator +from guessit.patterns import sep, build_or_pattern +from guessit.containers import PropertiesContainer, WeakValidator, NoValidator, ChainedValidator, DefaultValidator, \ + FormatterValidator from guessit.patterns.numeral import numeral, digital_numeral, parse_numeral -from re import split as re_split class GuessEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 20) + of_separators = ['of', 'sur', '/', '\\'] + of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) + + season_words = ['seasons?', 'saisons?', 'series?'] + episode_words = ['episodes?'] + + season_markers = ['s'] + episode_markers = ['e', 'ep'] + self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) + season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) + episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) + + season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) + episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) + + def episode_parser_x(value): + return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE)) + + def episode_parser_e(value): + return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True) + def episode_parser(value): - values = re_split('[a-zA-Z]', value) - values = [x for x in values if x] - ret = [] - for letters_elt in values: - dashed_values = letters_elt.split('-') - dashed_values = [x for x in dashed_values if x] - if len(dashed_values) > 1: - for _ in range(0, len(dashed_values) - 1): - start_dash_ep = parse_numeral(dashed_values[0]) - end_dash_ep = parse_numeral(dashed_values[1]) - for dash_ep in range(start_dash_ep, end_dash_ep + 1): - ret.append(dash_ep) - else: - ret.append(parse_numeral(letters_elt)) - if len(ret) > 1: - return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also - elif len(ret) > 0: - return ret[0] - else: - return None + return list_parser(value, 'episodeList') - class ResolutionCollisionValidator(object): - def validate(self, prop, string, node, match, entry_start, entry_end): - return len(match.group(2)) < 3 + def season_parser(value): + return list_parser(value, 'seasonList') - self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P' + numeral + '))', confidence=1.0, formatter=parse_numeral) - self.container.register_property(None, r'(s(?P' + digital_numeral + ')[^0-9]?' + sep + '?(?P(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator()) - self.container.register_property(None, r'[^0-9]((?P' + digital_numeral + ')[^0-9 .-]?-?(?P(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) - self.container.register_property(None, r'(s(?P' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator()) - self.container.register_property(None, r'((?P' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral) - self.container.register_property(None, r'((?:ep)' + sep + r'(?P' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral) - self.container.register_property(None, r'(e(?P' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral) - self.container.register_property(None, r'\A ?((?P' + '\d{2}' + '))' + sep, confidence=0.4, formatter=parse_numeral) - self.container.register_property(None, r'\A ?(0(?P' + '\d+' + '))' + sep, confidence=0.4, formatter=parse_numeral) + class ResolutionCollisionValidator(object): + @staticmethod + def validate(prop, string, node, match, entry_start, entry_end): + # Invalidate when season or episode is more than 100. + try: + season_value = season_parser(match.group(2)) + episode_value = episode_parser_x(match.group(3)) + return season_value < 100 or episode_value < 100 + except: + # This may occur for 1xAll or patterns like this. + return True + + self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) + self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) + + self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P' + digital_numeral + ')[^0-9]?' + sep + '?(?P(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator()) + self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P' + digital_numeral + ')[^0-9]?' + sep + '?(?P(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser}, validator=NoValidator()) + + self.container.register_property(None, sep + r'((?P' + digital_numeral + ')' + sep + '' + '(?P(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) + self.container.register_property(None, r'((?P' + digital_numeral + ')' + '(?P(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) + self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator()) + + self.container.register_property(None, r'((?P' + digital_numeral + ')' + sep + '?v(?P\d+))', confidence=0.6, formatter=parse_numeral) + self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator()) + self.container.register_property(None, r'(ep' + sep + r'?(?P' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) + self.container.register_property(None, r'(ep' + sep + r'?(?P' + digital_numeral + ')' + sep + '?v(?P\d+))', confidence=0.7, formatter=parse_numeral) + + self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) + self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) + + self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P' + digital_numeral + ')' + sep + '?v(?P\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) + self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P' + digital_numeral + ')' + sep + '?v(?P\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) + + self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) + self.container.register_property('episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) + self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) + self.container.register_property('episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) + + self.container.register_property(None, r'((?P' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) + self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P' + numeral + '))', confidence=0.7, formatter=parse_numeral) + self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P' + numeral + '))', confidence=0.7, formatter=parse_numeral) + self.container.register_property(None, r'((?P' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) + self.container.register_property(None, r'[^0-9]((?P' + digital_numeral + ')[^0-9 .-]?-?(?PxAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) + + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False, + help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, ' + 'it will be guessed as season 2, episodeNumber 13') + def supported_properties(self): - return ['episodeNumber', 'season'] + return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other'] def guess_episodes_rexps(self, string, node=None, options=None): - found = self.container.find_properties(string, node) - return self.container.as_guess(found, string) + found = self.container.find_properties(string, node, options) + guess = self.container.as_guess(found, string) + if guess and node: + if 'season' in guess and 'episodeNumber' in guess: + # If two guesses contains both season and episodeNumber in same group, create an episodeList + for existing_guess in node.group_node().guesses: + if 'season' in existing_guess and 'episodeNumber' in existing_guess: + if 'episodeList' not in existing_guess: + existing_guess['episodeList'] = [existing_guess['episodeNumber']] + existing_guess['episodeList'].append(guess['episodeNumber']) + existing_guess['episodeList'].sort() + if existing_guess['episodeNumber'] > guess['episodeNumber']: + existing_guess.set_confidence('episodeNumber', 0) + else: + guess.set_confidence('episodeNumber', 0) + guess['episodeList'] = list(existing_guess['episodeList']) + elif 'episodeNumber' in guess: + # If two guesses contains only episodeNumber in same group, remove the existing one. + for existing_guess in node.group_node().guesses: + if 'episodeNumber' in existing_guess: + for k, v in existing_guess.items(): + if k in guess: + del guess[k] + return guess def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') diff -Nru guessit-0.8/guessit/transfo/guess_filetype.py guessit-0.11.0/guessit/transfo/guess_filetype.py --- guessit-0.8/guessit/transfo/guess_filetype.py 2014-06-01 22:23:26.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_filetype.py 2015-08-31 16:57:28.000000000 +0000 @@ -29,12 +29,11 @@ from guessit.transfo import TransformerException from guessit.plugins.transformers import Transformer, get_transformer from guessit.matcher import log_found_guess, found_guess -from guessit.textutils import clean_string class GuessFiletype(Transformer): def __init__(self): - Transformer.__init__(self, 250) + Transformer.__init__(self, 200) # List of well known movies and series, hardcoded because they cannot be # guessed appropriately otherwise @@ -103,15 +102,14 @@ else: if fileext and not options.get('name_only'): other = {'extension': fileext} + list(mtree.unidentified_leaves())[-1].guess = Guess(other) # check whether we are in a 'Movies', 'Tv Shows', ... folder - folder_rexps = [ - (r'Movies?', upgrade_movie), + folder_rexps = [(r'Movies?', upgrade_movie), (r'Films?', upgrade_movie), (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series?', upgrade_episode), - (r'Episodes?', upgrade_episode), - ] + (r'Episodes?', upgrade_episode)] for frexp, upgrade_func in folder_rexps: frexp = re.compile(frexp, re.IGNORECASE) for pathgroup in mtree.children: @@ -122,7 +120,7 @@ # check for a few specific cases which will unintentionally make the # following heuristics confused (eg: OSS 117 will look like an episode, # season 1, epnum 17, when it is in fact a movie) - fname = clean_string(filename).lower() + fname = mtree.clean_string(filename).lower() for m in self.MOVIES: if m in fname: self.log.debug('Found in exception list of movies -> type = movie') @@ -134,27 +132,48 @@ upgrade_episode() return filetype_container[0], other - # now look whether there are some specific hints for episode vs movie # if we have an episode_rexp (eg: s02e13), it is an episode episode_transformer = get_transformer('guess_episodes_rexps') if episode_transformer: - guess = episode_transformer.guess_episodes_rexps(filename) - if guess: - self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess) - upgrade_episode() - return filetype_container[0], other + filename_parts = list(x.value for x in mtree.unidentified_leaves()) + filename_parts.append(filename) + for filename_part in filename_parts: + guess = episode_transformer.guess_episodes_rexps(filename_part) + if guess: + self.log.debug('Found guess_episodes_rexps: %s -> type = episode', guess) + upgrade_episode() + return filetype_container[0], other properties_transformer = get_transformer('guess_properties') if properties_transformer: # if we have certain properties characteristic of episodes, it is an ep - found = properties_transformer.container.find_properties(filename, mtree, 'episodeFormat') + found = properties_transformer.container.find_properties(filename, mtree, options, 'episodeFormat') guess = properties_transformer.container.as_guess(found, filename) if guess: self.log.debug('Found characteristic property of episodes: %s"', guess) upgrade_episode() return filetype_container[0], other - found = properties_transformer.container.find_properties(filename, mtree, 'format') + weak_episode_transformer = get_transformer('guess_weak_episodes_rexps') + if weak_episode_transformer: + found = weak_episode_transformer.container.find_properties(filename, mtree, options, 'episodeNumber') + guess = weak_episode_transformer.container.as_guess(found, filename) + if guess and (guess.raw('episodeNumber')[0] == '0' or guess['episodeNumber'] >= 10): + self.log.debug('Found characteristic property of episodes: %s"', guess) + upgrade_episode() + return filetype_container[0], other + + found = properties_transformer.container.find_properties(filename, mtree, options, 'crc32') + guess = properties_transformer.container.as_guess(found, filename) + if guess: + found = weak_episode_transformer.container.find_properties(filename, mtree, options) + guess = weak_episode_transformer.container.as_guess(found, filename) + if guess: + self.log.debug('Found characteristic property of episodes: %s"', guess) + upgrade_episode() + return filetype_container[0], other + + found = properties_transformer.container.find_properties(filename, mtree, options, 'format') guess = properties_transformer.container.as_guess(found, filename) if guess and guess['format'] in ('HDTV', 'WEBRip', 'WEB-DL', 'DVB'): # Use weak episodes only if TV or WEB source @@ -168,7 +187,7 @@ website_transformer = get_transformer('guess_website') if website_transformer: - found = website_transformer.container.find_properties(filename, mtree, 'website') + found = website_transformer.container.find_properties(filename, mtree, options, 'website') guess = website_transformer.container.as_guess(found, filename) if guess: for namepart in ('tv', 'serie', 'episode'): @@ -205,7 +224,8 @@ if mime is not None: filetype_info.update({'mimetype': mime}, confidence=1.0) - node_ext = mtree.node_at((-1,)) + # Retrieve the last node of category path (extension node) + node_ext = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-1] found_guess(node_ext, filetype_info) if mtree.guess.get('type') in [None, 'unknown']: @@ -213,3 +233,22 @@ mtree.guess.set('type', 'movie', confidence=0.6) else: raise TransformerException(__name__, 'Unknown file type') + + def second_pass_options(self, mtree, options=None): + if 'type' not in options or not options['type']: + if mtree.info.get('type') != 'episode': + # now look whether there are some specific hints for episode vs movie + # If we have a date and no year, this is a TV Show. + if 'date' in mtree.info and 'year' not in mtree.info: + return {'type': 'episode'} + + if mtree.info.get('type') != 'movie': + # If we have a year, no season but raw episodeNumber is a number not starting with '0', this is a movie. + if 'year' in mtree.info and 'episodeNumber' in mtree.info and not 'season' in mtree.info: + try: + int(mtree.raw['episodeNumber']) + return {'type': 'movie'} + except ValueError: + pass + + diff -Nru guessit-0.8/guessit/transfo/guess_idnumber.py guessit-0.11.0/guessit/transfo/guess_idnumber.py --- guessit-0.8/guessit/transfo/guess_idnumber.py 2014-02-22 17:04:16.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_idnumber.py 2015-06-06 12:39:15.000000000 +0000 @@ -20,14 +20,20 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import re + from guessit.plugins.transformers import Transformer from guessit.matcher import GuessFinder -import re + + +_DIGIT = 0 +_LETTER = 1 +_OTHER = 2 class GuessIdnumber(Transformer): def __init__(self): - Transformer.__init__(self, -180) + Transformer.__init__(self, 220) def supported_properties(self): return ['idNumber'] @@ -39,17 +45,22 @@ if match is not None: result = match.groupdict() switch_count = 0 - DIGIT = 0 - LETTER = 1 - OTHER = 2 - last = LETTER + switch_letter_count = 0 + letter_count = 0 + last_letter = None + + last = _LETTER for c in result['idNumber']: if c in '0123456789': - ci = DIGIT + ci = _DIGIT elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': - ci = LETTER + ci = _LETTER + if c != last_letter: + switch_letter_count += 1 + last_letter = c + letter_count += 1 else: - ci = OTHER + ci = _OTHER if ci != last: switch_count += 1 @@ -57,10 +68,11 @@ last = ci switch_ratio = float(switch_count) / len(result['idNumber']) + letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1 # only return the result as probable if we alternate often between # char type (more likely for hash values than for common words) - if switch_ratio > 0.4: + if switch_ratio > 0.4 and letters_ratio > 0.4: return result, match.span() return None, None diff -Nru guessit-0.8/guessit/transfo/guess_language.py guessit-0.11.0/guessit/transfo/guess_language.py --- guessit-0.8/guessit/transfo/guess_language.py 2014-05-30 21:05:55.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_language.py 2015-09-04 19:43:28.000000000 +0000 @@ -22,7 +22,7 @@ from guessit.language import search_language, subtitle_prefixes, subtitle_suffixes from guessit.patterns.extension import subtitle_exts -from guessit.textutils import clean_string, find_words +from guessit.textutils import find_words from guessit.plugins.transformers import Transformer from guessit.matcher import GuessFinder @@ -31,14 +31,29 @@ def __init__(self): Transformer.__init__(self, 30) + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages', + help='Allowed language (can be used multiple times)') + def supported_properties(self): return ['language', 'subtitleLanguage'] - def guess_language(self, string, node=None, options=None): - guess = search_language(string) + @staticmethod + def guess_language(string, node=None, options=None): + allowed_languages = None + if options and 'allowed_languages' in options: + allowed_languages = options.get('allowed_languages') + + directory = list(filter(lambda x: x.category == 'path', node.ancestors))[0] + if len(directory.clean_value) <= 3: + # skip if we have a langage code as directory + return None + + guess = search_language(string, allowed_languages) return guess - def _skip_language_on_second_pass(self, mtree, node): + @staticmethod + def _skip_language_on_second_pass(mtree, node): """Check if found node is a valid language node, or if it's a false positive. :param mtree: Tree detected on first pass. @@ -59,8 +74,10 @@ title_ends = {} for unidentified_node in mtree.unidentified_leaves(): - unidentified_starts[unidentified_node.span[0]] = unidentified_node - unidentified_ends[unidentified_node.span[1]] = unidentified_node + if len(unidentified_node.clean_value) > 1: + # only consider unidentified leaves that have some meaningful content + unidentified_starts[unidentified_node.span[0]] = unidentified_node + unidentified_ends[unidentified_node.span[1]] = unidentified_node for property_node in mtree.leaves_containing('year'): property_starts[property_node.span[0]] = property_node @@ -70,74 +87,60 @@ title_starts[title_node.span[0]] = title_node title_ends[title_node.span[1]] = title_node - return node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or node.span[1] + 1 in property_starts.keys()) or\ - node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or node.span[0] in unidentified_ends.keys() or node.span[0] in property_ends.keys()) + return (node.span[0] in title_ends.keys() and (node.span[1] in unidentified_starts.keys() or + node.span[1] + 1 in property_starts.keys()) or + node.span[1] in title_starts.keys() and (node.span[0] == node.group_node().span[0] or + node.span[0] in unidentified_ends.keys() or + node.span[0] in property_ends.keys())) def second_pass_options(self, mtree, options=None): m = mtree.matched() - to_skip_language_nodes = [] + to_skip_langs = set() for lang_key in ('language', 'subtitleLanguage'): - langs = {} lang_nodes = set(mtree.leaves_containing(lang_key)) for lang_node in lang_nodes: - lang = lang_node.guess.get(lang_key, None) if self._skip_language_on_second_pass(mtree, lang_node): # Language probably split the title. Add to skip for 2nd pass. # if filetype is subtitle and the language appears last, just before # the extension, then it is likely a subtitle language - parts = clean_string(lang_node.root.value).split() - if (m.get('type') in ['moviesubtitle', 'episodesubtitle'] and - (parts.index(lang_node.value) == len(parts) - 2)): - continue - - to_skip_language_nodes.append(lang_node) - elif not lang in langs: - langs[lang] = lang_node - else: - # The same language was found. Keep the more confident one, - # and add others to skip for 2nd pass. - existing_lang_node = langs[lang] - to_skip = None - if (existing_lang_node.guess.confidence('language') >= - lang_node.guess.confidence('language')): - # lang_node is to remove - to_skip = lang_node - else: - # existing_lang_node is to remove - langs[lang] = lang_node - to_skip = existing_lang_node - to_skip_language_nodes.append(to_skip) + parts = mtree.clean_string(lang_node.root.value).split() + if m.get('type') in ['moviesubtitle', 'episodesubtitle']: + if (lang_node.value in parts and parts.index(lang_node.value) == len(parts) - 2): + continue + + to_skip_langs.add(lang_node.value) - if to_skip_language_nodes: + if to_skip_langs: # Also skip same value nodes - skipped_values = [skip_node.value for skip_node in to_skip_language_nodes] + lang_nodes = (set(mtree.leaves_containing('language')) | + set(mtree.leaves_containing('subtitleLanguage'))) - for lang_key in ('language', 'subtitleLanguage'): - lang_nodes = set(mtree.leaves_containing(lang_key)) + to_skip = [node for node in lang_nodes if node.value in to_skip_langs] + return {'skip_nodes': to_skip} - for lang_node in lang_nodes: - if lang_node not in to_skip_language_nodes and lang_node.value in skipped_values: - to_skip_language_nodes.append(lang_node) - return {'skip_nodes': to_skip_language_nodes} return None def should_process(self, mtree, options=None): options = options or {} - return 'nolanguage' not in options + return options.get('language', True) def process(self, mtree, options=None): GuessFinder(self.guess_language, None, self.log, options).process_nodes(mtree.unidentified_leaves()) - def promote_subtitle(self, node): - node.guess.set('subtitleLanguage', node.guess['language'], - confidence=node.guess.confidence('language')) - del node.guess['language'] + @staticmethod + def promote_subtitle(node): + if 'language' in node.guess: + node.guess.set('subtitleLanguage', node.guess['language'], + confidence=node.guess.confidence('language')) + del node.guess['language'] def post_process(self, mtree, options=None): # 1- try to promote language to subtitle language where it makes sense + prefixes = [] + for node in mtree.nodes(): if 'language' not in node.guess: continue @@ -146,8 +149,9 @@ # the group is the last group of the filename, it is probably the # language of the subtitle # (eg: 'xxx.english.srt') - if (mtree.node_at((-1,)).value.lower() in subtitle_exts and - node == list(mtree.leaves())[-2]): + ext_node = list(filter(lambda x: x.category == 'path', mtree.nodes()))[-1] + if (ext_node.value.lower() in subtitle_exts and + node == list(mtree.leaves())[-2]): self.promote_subtitle(node) # - if we find in the same explicit group @@ -159,12 +163,8 @@ for sub_prefix in subtitle_prefixes: if (sub_prefix in find_words(group_str) and - 0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])): - self.promote_subtitle(node) - - for sub_suffix in subtitle_suffixes: - if (sub_suffix in find_words(group_str) and - (node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)): + 0 <= group_str.find(sub_prefix) < (node.span[0] - explicit_group.span[0])): + prefixes.append((explicit_group, sub_prefix)) self.promote_subtitle(node) # - if a language is in an explicit group just preceded by "st", @@ -176,3 +176,21 @@ self.promote_subtitle(node) except IndexError: pass + + for node in mtree.nodes(): + if 'language' not in node.guess: + continue + + explicit_group = mtree.node_at(node.node_idx[:2]) + group_str = explicit_group.value.lower() + + for sub_suffix in subtitle_suffixes: + if (sub_suffix in find_words(group_str) and + (node.span[0] - explicit_group.span[0]) < group_str.find(sub_suffix)): + is_a_prefix = False + for prefix in prefixes: + if prefix[0] == explicit_group and group_str.find(prefix[1]) == group_str.find(sub_suffix): + is_a_prefix = True + break + if not is_a_prefix: + self.promote_subtitle(node) diff -Nru guessit-0.8/guessit/transfo/guess_movie_title_from_position.py guessit-0.11.0/guessit/transfo/guess_movie_title_from_position.py --- guessit-0.8/guessit/transfo/guess_movie_title_from_position.py 2014-05-30 22:04:42.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_movie_title_from_position.py 2015-09-04 20:33:23.000000000 +0000 @@ -23,6 +23,8 @@ from guessit.plugins.transformers import Transformer from guessit.matcher import found_property from guessit import u +from guessit.patterns.list import all_separators +from guessit.language import all_lang_prefixes_suffixes class GuessMovieTitleFromPosition(Transformer): @@ -36,19 +38,31 @@ options = options or {} return not options.get('skip_title') and not mtree.guess.get('type', '').startswith('episode') + @staticmethod + def excluded_word(*values): + for value in values: + if value.clean_value.lower() in all_separators + all_lang_prefixes_suffixes: + return True + return False + def process(self, mtree, options=None): """ try to identify the remaining unknown groups by looking at their position relative to other known elements """ - basename = mtree.node_at((-2,)) + if 'title' in mtree.info: + return + + path_nodes = list(filter(lambda x: x.category == 'path', mtree.nodes())) + + basename = path_nodes[-2] all_valid = lambda leaf: len(leaf.clean_value) > 0 basename_leftover = list(basename.unidentified_leaves(valid=all_valid)) try: - folder = mtree.node_at((-3,)) + folder = path_nodes[-3] folder_leftover = list(folder.unidentified_leaves()) - except ValueError: + except IndexError: folder = None folder_leftover = [] @@ -59,8 +73,8 @@ # if we find the same group both in the folder name and the filename, # it's a good candidate for title if (folder_leftover and basename_leftover and - folder_leftover[0].clean_value == basename_leftover[0].clean_value): - + folder_leftover[0].clean_value == basename_leftover[0].clean_value and + not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])): found_property(folder_leftover[0], 'title', confidence=0.8) return @@ -69,51 +83,45 @@ # group, and the folder only contains 1 unidentified one, then we have # a series # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv - try: + if len(folder_leftover) > 0 and len(basename_leftover) > 1: series = folder_leftover[0] - filmNumber = basename_leftover[0] + film_number = basename_leftover[0] title = basename_leftover[1] basename_leaves = list(basename.leaves()) - num = int(filmNumber.clean_value) - - self.log.debug('series: %s' % series.clean_value) - self.log.debug('title: %s' % title.clean_value) - if (series.clean_value != title.clean_value and - series.clean_value != filmNumber.clean_value and - basename_leaves.index(filmNumber) == 0 and - basename_leaves.index(title) == 1): - - found_property(title, 'title', confidence=0.6) - found_property(series, 'filmSeries', confidence=0.6) - found_property(filmNumber, 'filmNumber', num, confidence=0.6) - return - except Exception: - pass - - # specific cases: - # - movies/tttttt (yyyy)/tttttt.ccc - try: - if mtree.node_at((-4, 0)).value.lower() == 'movies': - folder = mtree.node_at((-3,)) - - # Note:too generic, might solve all the unittests as they all - # contain 'movies' in their path - # - # if containing_folder.is_leaf() and not containing_folder.guess: - # containing_folder.guess = - # Guess({ 'title': clean_string(containing_folder.value) }, - # confidence=0.7) - - year_group = folder.first_leaf_containing('year') - groups_before = folder.previous_unidentified_leaves(year_group) - - found_property(next(groups_before), 'title', confidence=0.8) + num = None + try: + num = int(film_number.clean_value) + except ValueError: + pass + + if num: + self.log.debug('series: %s' % series.clean_value) + self.log.debug('title: %s' % title.clean_value) + if (series.clean_value != title.clean_value and + series.clean_value != film_number.clean_value and + basename_leaves.index(film_number) == 0 and + basename_leaves.index(title) == 1 and + not GuessMovieTitleFromPosition.excluded_word(title, series)): + + found_property(title, 'title', confidence=0.6) + found_property(series, 'filmSeries', confidence=0.6) + found_property(film_number, 'filmNumber', num, confidence=0.6) return - except Exception: - pass + if folder: + year_group = folder.first_leaf_containing('year') + if year_group: + groups_before = folder.previous_unidentified_leaves(year_group) + if groups_before: + try: + node = next(groups_before) + if not GuessMovieTitleFromPosition.excluded_word(node): + found_property(node, 'title', confidence=0.8) + return + except StopIteration: + pass # if we have either format or videoCodec in the folder containing the # file or one of its parents, then we should probably look for the title @@ -132,8 +140,10 @@ # if they're all in the same group, take leftover info from there leftover = mtree.node_at((group_idx,)).unidentified_leaves() try: - found_property(next(leftover), 'title', confidence=0.7) - return + node = next(leftover) + if not GuessMovieTitleFromPosition.excluded_word(node): + found_property(node, 'title', confidence=0.7) + return except StopIteration: pass @@ -145,8 +155,8 @@ # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi <-- TODO: gets caught here? if (basename_leftover[0].clean_value.count(' ') == 0 and - folder_leftover and - folder_leftover[0].clean_value.count(' ') >= 2): + folder_leftover and folder_leftover[0].clean_value.count(' ') >= 2 and + not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0])): found_property(folder_leftover[0], 'title', confidence=0.7) return @@ -156,26 +166,28 @@ # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi if basename_leftover[0].is_explicit(): for basename_leftover_elt in basename_leftover: - if not basename_leftover_elt.is_explicit(): + if not basename_leftover_elt.is_explicit() and not GuessMovieTitleFromPosition.excluded_word(basename_leftover_elt): found_property(basename_leftover_elt, 'title', confidence=0.8) return # if all else fails, take the first remaining unidentified group in the # basename as title - found_property(basename_leftover[0], 'title', confidence=0.6) - return + if not GuessMovieTitleFromPosition.excluded_word(basename_leftover[0]): + found_property(basename_leftover[0], 'title', confidence=0.6) + return # if there are no leftover groups in the basename, look in the folder name - if folder_leftover: + if folder_leftover and not GuessMovieTitleFromPosition.excluded_word(folder_leftover[0]): found_property(folder_leftover[0], 'title', confidence=0.5) return # if nothing worked, look if we have a very small group at the beginning # of the basename - basename = mtree.node_at((-2,)) basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True) try: - found_property(next(basename_leftover), 'title', confidence=0.4) - return + node = next(basename_leftover) + if not GuessMovieTitleFromPosition.excluded_word(node): + found_property(node, 'title', confidence=0.4) + return except StopIteration: pass diff -Nru guessit-0.8/guessit/transfo/guess_properties.py guessit-0.11.0/guessit/transfo/guess_properties.py --- guessit-0.8/guessit/transfo/guess_properties.py 2014-05-30 20:52:37.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_properties.py 2015-08-31 16:59:30.000000000 +0000 @@ -20,10 +20,14 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer +import re + +from guessit.containers import PropertiesContainer, WeakValidator, LeavesValidator, QualitiesContainer, ChainedValidator, DefaultValidator, OnlyOneValidator, LeftValidator, NeighborValidator, FullMatchValidator +from guessit.patterns import sep, build_or_pattern from guessit.patterns.extension import subtitle_exts, video_exts, info_exts +from guessit.patterns.numeral import numeral, parse_numeral from guessit.plugins.transformers import Transformer -from guessit.matcher import GuessFinder +from guessit.matcher import GuessFinder, found_property class GuessProperties(Transformer): @@ -33,15 +37,22 @@ self.container = PropertiesContainer() self.qualities = QualitiesContainer() - def register_property(propname, props): + def register_property(propname, props, **kwargs): """props a dict of {value: [patterns]}""" for canonical_form, patterns in props.items(): if isinstance(patterns, tuple): - patterns2, kwargs = patterns - kwargs = dict(kwargs) - kwargs['canonical_form'] = canonical_form - self.container.register_property(propname, *patterns2, **kwargs) - + patterns2, pattern_kwarg = patterns + if kwargs: + current_kwarg = dict(kwargs) + current_kwarg.update(pattern_kwarg) + else: + current_kwarg = dict(pattern_kwarg) + current_kwarg['canonical_form'] = canonical_form + self.container.register_property(propname, *patterns2, **current_kwarg) + elif kwargs: + current_kwarg = dict(kwargs) + current_kwarg['canonical_form'] = canonical_form + self.container.register_property(propname, *patterns, **current_kwarg) else: self.container.register_property(propname, *patterns, canonical_form=canonical_form) @@ -50,25 +61,24 @@ for canonical_form, quality in quality_dict.items(): self.qualities.register_quality(propname, canonical_form, quality) - register_property('container', {'mp4': ['MP4']}) # http://en.wikipedia.org/wiki/Pirated_movie_release_types - register_property('format', {'VHS': ['VHS'], - 'Cam': ['CAM', 'CAMRip'], + register_property('format', {'VHS': ['VHS', 'VHS-Rip'], + 'Cam': ['CAM', 'CAMRip', 'HD-CAM'], #'Telesync': ['TELESYNC', 'PDVD'], - 'Telesync': (['TS'], {'confidence': 0.2}), + 'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}), 'Workprint': ['WORKPRINT', 'WP'], 'Telecine': ['TELECINE', 'TC'], 'PPV': ['PPV', 'PPV-Rip'], # Pay Per View 'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'], 'DVB': ['DVB-Rip', 'DVB', 'PD-TV'], - 'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS'], - 'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'], + 'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'], + 'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'], 'VOD': ['VOD', 'VOD-Rip'], 'WEBRip': ['WEB-Rip'], - 'WEB-DL': ['WEB-DL'], - 'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'], - 'BluRay': ['Blu-ray', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50'] + 'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'], + 'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'], + 'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50'] }) register_quality('format', {'VHS': -100, @@ -96,9 +106,18 @@ '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'], '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'], '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'], - '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080(?:p?x?)'], + '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'], '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)'] - }) + }, + validator=ChainedValidator(DefaultValidator(), OnlyOneValidator())) + + _digits_re = re.compile('\d+') + + def resolution_formatter(value): + digits = _digits_re.findall(value) + return 'x'.join(digits) + + self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter) register_quality('screenSize', {'360p': -300, '368p': -200, @@ -135,7 +154,8 @@ self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) - self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) + self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit') + self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit') self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) @@ -152,12 +172,12 @@ # releases use it and it helps to identify release groups, so we adapt register_property('videoApi', {'DXVA': ['DXVA']}) - register_property('audioCodec', {'MP3': ['MP3'], + register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'], 'DolbyDigital': ['DD'], 'AAC': ['AAC'], 'AC3': ['AC3'], 'Flac': ['FLAC'], - 'DTS': ['DTS'], + 'DTS': (['DTS'], {'validator': LeftValidator()}), 'TrueHD': ['True-HD'] }) @@ -183,8 +203,8 @@ 'HE': 20 }) - register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch'], - '5.1': ['5[\W_]1', '5ch'], + register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'], + '5.1': ['5[\W_]1', '5ch', '6ch'], '2.0': ['2[\W_]0', '2ch', 'stereo'], '1.0': ['1[\W_]0', '1ch', 'mono'] }) @@ -197,6 +217,11 @@ self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode') + self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False) + + part_words = ['pt', 'part'] + self.container.register_property(None, '(' + build_or_pattern(part_words) + sep + '?(?P' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral) + register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'], 'SyncFix': ['Sync-Fix', 'Sync-Fixed'], 'DualAudio': ['Dual-Audio'], @@ -204,10 +229,19 @@ 'Netflix': ['Netflix', 'NF'] }) - self.container.register_property('other', 'Real', 'Fix', canonical_form="Proper", validator=WeakValidator()) - self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form="Proper") - - self.container.register_canonical_properties('other', 'R5', 'Screener', '3D', 'HD', 'HQ', 'DDC', 'HR') + self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=ChainedValidator(FullMatchValidator(), NeighborValidator())) + self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper') + self.container.register_property('other', 'Fansub', canonical_form='Fansub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator())) + self.container.register_property('other', 'Fastsub', canonical_form='Fastsub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator())) + self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete') + self.container.register_property('other', 'R5', 'RC', canonical_form='R5') + self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair') + self.container.register_property('other', 'CC') # Close Caption + self.container.register_property('other', 'LD', 'MD') # Line/Mic Dubbed + + self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ', + 'DDC', + 'HR', 'PAL', 'SECAM', 'NTSC') self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator()) for prop in self.container.get_properties('format'): @@ -218,14 +252,39 @@ self.container.register_property('container', container, confidence=0.3) def guess_properties(self, string, node=None, options=None): - found = self.container.find_properties(string, node) - return self.container.as_guess(found, string) + found = self.container.find_properties(string, node, options) + guess = self.container.as_guess(found, string) + + if guess and node: + if 'part' in guess: + # If two guesses contains both part in same group, create an partList + for existing_guess in node.group_node().guesses: + if 'part' in existing_guess: + if 'partList' not in existing_guess: + existing_guess['partList'] = [existing_guess['part']] + existing_guess['partList'].append(guess['part']) + existing_guess['partList'].sort() + if existing_guess['part'] > guess['part']: + existing_guess.set_confidence('part', 0) + else: + guess.set_confidence('part', 0) + guess['partList'] = list(existing_guess['partList']) + + return guess def supported_properties(self): - return self.container.get_supported_properties() + supported_properties = list(self.container.get_supported_properties()) + supported_properties.append('partList') + return supported_properties def process(self, mtree, options=None): GuessFinder(self.guess_properties, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves()) + proper_count = 0 + for other_leaf in mtree.leaves_containing('other'): + if 'other' in other_leaf.info and 'Proper' in other_leaf.info['other']: + proper_count += 1 + if proper_count: + found_property(mtree, 'properCount', proper_count) def rate_quality(self, guess, *props): return self.qualities.rate_quality(guess, *props) diff -Nru guessit-0.8/guessit/transfo/guess_release_group.py guessit-0.11.0/guessit/transfo/guess_release_group.py --- guessit-0.8/guessit/transfo/guess_release_group.py 2014-05-30 21:06:09.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_release_group.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,8 +20,10 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import re + from guessit.plugins.transformers import Transformer -from guessit.matcher import GuessFinder +from guessit.matcher import GuessFinder, build_guess from guessit.containers import PropertiesContainer from guessit.patterns import sep from guessit.guess import Guess @@ -31,25 +33,35 @@ class GuessReleaseGroup(Transformer): def __init__(self): Transformer.__init__(self, -190) + self.container = PropertiesContainer(canonical_from_pattern=False) - self._allowed_groupname_pattern = '[\w@#€£$&]' + self._allowed_groupname_pattern = '[\w@#€£$&!\?]' self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], - lambda elt: self._is_number(elt), - ] + lambda elt: self._is_number(elt)] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. - self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels'] - + self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'screenSize', 'other'] + self.previous_safe_values = {'other': ['Complete']} + self.next_safe_properties = ['extension', 'website'] + self.next_safe_values = {'format': ['Telesync']} + self.next_unsafe_properties = list(self.previous_safe_properties) + self.next_unsafe_properties.extend(['episodeNumber', 'season']) self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+') + self.re_sep = re.compile('(' + sep + ')') + + def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): + naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', + help='Expected release group (can be used multiple times)') def supported_properties(self): return self.container.get_supported_properties() - def _is_number(self, s): + @staticmethod + def _is_number(s): try: int(s) return True @@ -58,27 +70,36 @@ def validate_group_name(self, guess): val = guess['releaseGroup'] - if len(val) >= 2: - - if '-' in val: - checked_val = "" - for elt in val.split('-'): + if len(val) > 1: + checked_val = "" + forbidden = False + for elt in self.re_sep.split(val): # separators are in the list because of capturing group + if forbidden: + # Previous was forbidden, don't had separator forbidden = False - for forbidden_lambda in self._forbidden_groupname_lambda: - forbidden = forbidden_lambda(elt.lower()) - if forbidden: - break - if not forbidden: + continue + for forbidden_lambda in self._forbidden_groupname_lambda: + forbidden = forbidden_lambda(elt.lower()) + if forbidden: if checked_val: - checked_val += '-' - checked_val += elt - else: + # Removing previous separator + checked_val = checked_val[0:len(checked_val) - 1] break - val = checked_val - if not val: - return False - guess['releaseGroup'] = val + if not forbidden: + checked_val += elt + val = checked_val + if not val: + return False + if self.re_sep.match(val[-1]): + val = val[:len(val)-1] + if not val: + return False + if self.re_sep.match(val[0]): + val = val[1:] + if not val: + return False + guess['releaseGroup'] = val forbidden = False for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(val.lower()) @@ -88,23 +109,70 @@ return True return False - def is_leaf_previous(self, leaf, node): + @staticmethod + def is_leaf_previous(leaf, node): if leaf.span[1] <= node.span[0]: for idx in range(leaf.span[1], node.span[0]): - if not leaf.root.value[idx] in sep: + if leaf.root.value[idx] not in sep: return False return True return False + def validate_next_leaves(self, node): + if 'series' in node.root.info or 'title' in node.root.info: + # --expected-series or --expected-title is used. + return True + + next_leaf = node.root.next_leaf(node) + node_idx = node.node_last_idx + while next_leaf and next_leaf.node_last_idx >= node_idx: + node_idx = next_leaf.node_last_idx + # Check next properties in the same group are not in unsafe properties list + for next_unsafe_property in self.next_unsafe_properties: + if next_unsafe_property in next_leaf.info: + return False + next_leaf = next_leaf.root.next_leaf(next_leaf) + + # Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise. + leaves = node.root.unidentified_leaves() + return len(list(leaves)) > 1 + + def validate_node(self, leaf, node, safe=False): + if not self.is_leaf_previous(leaf, node): + return False + if not self.validate_next_leaves(node): + return False + if safe: + for k, v in leaf.guess.items(): + if k in self.previous_safe_values and v not in self.previous_safe_values[k]: + return False + return True + def guess_release_group(self, string, node=None, options=None): - found = self.container.find_properties(string, node, 'releaseGroup') - guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-') + if options and options.get('expected_group'): + expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False) + for expected_group in options.get('expected_group'): + if expected_group.startswith('re:'): + expected_group = expected_group[3:] + expected_group = expected_group.replace(' ', '-') + expected_container.register_property('releaseGroup', expected_group, enhance=True) + else: + expected_group = re.escape(expected_group) + expected_container.register_property('releaseGroup', expected_group, enhance=False) + + found = expected_container.find_properties(string, node, options, 'releaseGroup') + guess = expected_container.as_guess(found, string, self.validate_group_name) + if guess: + return guess + + found = self.container.find_properties(string, node, options, 'releaseGroup') + guess = self.container.as_guess(found, string, self.validate_group_name) validated_guess = None if guess: - explicit_group_node = node.group_node() - if explicit_group_node: - for leaf in explicit_group_node.leaves_containing(self.previous_safe_properties): - if self.is_leaf_previous(leaf, node): + group_node = node.group_node() + if group_node: + for leaf in group_node.leaves_containing(self.previous_safe_properties): + if self.validate_node(leaf, node, True): if leaf.root.value[leaf.span[1]] == '-': guess.metadata().confidence = 1 else: @@ -117,7 +185,7 @@ previous_group_node = node.previous_group_node() if previous_group_node: for leaf in previous_group_node.leaves_containing(self.previous_safe_properties): - if self.is_leaf_previous(leaf, node): + if self.validate_node(leaf, node, False): guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value))) if self.validate_group_name(guess): node.guess = guess @@ -139,6 +207,12 @@ else: break + if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group + validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1]) + validated_guess.metadata().confidence = 0.4 + validated_guess.metadata().span = 1, len(node.value) + node.guess = validated_guess + if validated_guess: # Strip brackets validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup']) diff -Nru guessit-0.8/guessit/transfo/guess_video_rexps.py guessit-0.11.0/guessit/transfo/guess_video_rexps.py --- guessit-0.8/guessit/transfo/guess_video_rexps.py 2014-02-22 17:04:16.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_video_rexps.py 2015-04-04 08:19:11.000000000 +0000 @@ -51,7 +51,7 @@ return self.container.get_supported_properties() def guess_video_rexps(self, string, node=None, options=None): - found = self.container.find_properties(string, node) + found = self.container.find_properties(string, node, options) return self.container.as_guess(found, string) def process(self, mtree, options=None): diff -Nru guessit-0.8/guessit/transfo/guess_weak_episodes_rexps.py guessit-0.11.0/guessit/transfo/guess_weak_episodes_rexps.py --- guessit-0.8/guessit/transfo/guess_weak_episodes_rexps.py 2014-02-22 17:04:16.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_weak_episodes_rexps.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,9 +20,13 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import re +from guessit.patterns.list import list_parser, all_separators_re + from guessit.plugins.transformers import Transformer + from guessit.matcher import GuessFinder -from guessit.patterns import sep +from guessit.patterns import sep, build_or_pattern from guessit.containers import PropertiesContainer from guessit.patterns.numeral import numeral, parse_numeral from guessit.date import valid_year @@ -32,10 +36,18 @@ def __init__(self): Transformer.__init__(self, 15) - self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False) + of_separators = ['of', 'sur', '/', '\\'] + of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) + + self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True) - def _formater(episodeNumber): - epnum = parse_numeral(episodeNumber) + episode_words = ['episodes?'] + + def episode_list_parser(value): + return list_parser(value, 'episodeList') + + def season_episode_parser(episode_number): + epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 @@ -47,18 +59,45 @@ else: return epnum - self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater) - self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3) + self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=season_episode_parser, disabler=lambda options: options.get('episode_prefer_number') if options else False) + self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=season_episode_parser) + self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral) + self.container.register_property(None, r'(?P' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P' + numeral +')', confidence=0.6, formatter=parse_numeral) + self.container.register_property('episodeNumber', '[^0-9](\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True) + self.container.register_property('episodeNumber', r'^' + sep + '?(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep, confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True) + self.container.register_property('episodeNumber', sep + r'(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep + '?$', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True) def supported_properties(self): - return self.properties.get_supported_properties() + return self.container.get_supported_properties() def guess_weak_episodes_rexps(self, string, node=None, options=None): - if node and 'episodeNumber' in node.root.info: - return None + properties = self.container.find_properties(string, node, options) + guess = self.container.as_guess(properties, string) - properties = self.properties.find_properties(string, node) - guess = self.properties.as_guess(properties, string) + if node and guess: + if 'episodeNumber' in guess and 'season' in guess: + existing_guesses = list(filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses)) + if existing_guesses: + return None + elif 'episodeNumber' in guess: + # If we only have episodeNumber in the guess, and another node contains both season and episodeNumber + # keep only the second. + safe_guesses = list(filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses)) + if safe_guesses: + return None + else: + # If we have other nodes containing episodeNumber, create an episodeList. + existing_guesses = list(filter(lambda x: 'season' not in x and 'episodeNumber' in x, node.group_node().guesses)) + for existing_guess in existing_guesses: + if 'episodeList' not in existing_guess: + existing_guess['episodeList'] = [existing_guess['episodeNumber']] + existing_guess['episodeList'].append(guess['episodeNumber']) + existing_guess['episodeList'].sort() + if existing_guess['episodeNumber'] > guess['episodeNumber']: + existing_guess.set_confidence('episodeNumber', 0) + else: + guess.set_confidence('episodeNumber', 0) + guess['episodeList'] = list(existing_guess['episodeList']) return guess diff -Nru guessit-0.8/guessit/transfo/guess_website.py guessit-0.11.0/guessit/transfo/guess_website.py --- guessit-0.8/guessit/transfo/guess_website.py 2014-06-22 12:06:20.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_website.py 2015-06-06 12:39:15.000000000 +0000 @@ -19,16 +19,20 @@ # from __future__ import absolute_import, division, print_function, unicode_literals + +from pkg_resources import resource_stream # @UnresolvedImport + from guessit.patterns import build_or_pattern from guessit.containers import PropertiesContainer from guessit.plugins.transformers import Transformer from guessit.matcher import GuessFinder -from pkg_resources import resource_stream # @UnresolvedImport + TLDS = [l.strip().decode('utf-8') for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines() if b'--' not in l][1:] + class GuessWebsite(Transformer): def __init__(self): Transformer.__init__(self, 45) @@ -48,7 +52,7 @@ return self.container.get_supported_properties() def guess_website(self, string, node=None, options=None): - found = self.container.find_properties(string, node, 'website') + found = self.container.find_properties(string, node, options, 'website') return self.container.as_guess(found, string) def process(self, mtree, options=None): diff -Nru guessit-0.8/guessit/transfo/guess_year.py guessit-0.11.0/guessit/transfo/guess_year.py --- guessit-0.8/guessit/transfo/guess_year.py 2014-05-30 19:51:12.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/guess_year.py 2015-08-31 16:57:28.000000000 +0000 @@ -32,7 +32,8 @@ def supported_properties(self): return ['year'] - def guess_year(self, string, node=None, options=None): + @staticmethod + def guess_year(string, node=None, options=None): year, span = search_year(string) if year: return {'year': year}, span @@ -41,8 +42,13 @@ def second_pass_options(self, mtree, options=None): year_nodes = list(mtree.leaves_containing('year')) - if len(year_nodes) > 1: - return {'skip_nodes': year_nodes[:len(year_nodes) - 1]} + # if we found a year, let's try by ignoring all instances of that year + # as a candidate, let's take the one that appears last in the filename + if year_nodes: + year_candidate = year_nodes[-1].guess['year'] + year_nodes = [year for year in year_nodes if year.guess['year'] != year_candidate] + if year_nodes: + return {'skip_nodes': year_nodes} return None def process(self, mtree, options=None): diff -Nru guessit-0.8/guessit/transfo/split_explicit_groups.py guessit-0.11.0/guessit/transfo/split_explicit_groups.py --- guessit-0.8/guessit/transfo/split_explicit_groups.py 2014-02-13 23:04:33.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/split_explicit_groups.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,15 +20,16 @@ from __future__ import absolute_import, division, print_function, unicode_literals +from functools import reduce + from guessit.plugins.transformers import Transformer from guessit.textutils import find_first_level_groups from guessit.patterns import group_delimiters -from functools import reduce class SplitExplicitGroups(Transformer): def __init__(self): - Transformer.__init__(self, 245) + Transformer.__init__(self, 250) def process(self, mtree, options=None): """split each of those into explicit groups (separated by parentheses or square brackets) @@ -36,7 +37,7 @@ :return: return the string split into explicit groups, that is, those either between parenthese, square brackets or curly braces, and those separated by a dash.""" - for c in mtree.children: + for c in mtree.unidentified_leaves(): groups = find_first_level_groups(c.value, group_delimiters[0]) for delimiters in group_delimiters: flatten = lambda l, x: l + find_first_level_groups(x, delimiters) @@ -46,4 +47,24 @@ # patterns, such as dates, etc... # groups = functools.reduce(lambda l, x: l + x.split('-'), groups, []) - c.split_on_components(groups) + c.split_on_components(groups, category='explicit') + + def post_process(self, mtree, options=None): + """ + Decrease confidence for properties found in explicit groups. + + :param mtree: + :param options: + :return: + """ + if not options.get('name_only'): + explicit_nodes = [node for node in mtree.nodes() if node.category == 'explicit' and node.is_explicit()] + + for explicit_node in explicit_nodes: + self.alter_confidence(explicit_node, 0.5) + + def alter_confidence(self, node, factor): + for guess in node.guesses: + for k in guess.keys(): + confidence = guess.confidence(k) + guess.set_confidence(k, confidence * factor) diff -Nru guessit-0.8/guessit/transfo/split_on_dash.py guessit-0.11.0/guessit/transfo/split_on_dash.py --- guessit-0.8/guessit/transfo/split_on_dash.py 2014-02-13 23:04:33.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/split_on_dash.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,14 +20,15 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import re + from guessit.plugins.transformers import Transformer from guessit.patterns import sep -import re class SplitOnDash(Transformer): def __init__(self): - Transformer.__init__(self, 190) + Transformer.__init__(self, 245) def process(self, mtree, options=None): """split into '-' separated subgroups (with required separator chars @@ -44,4 +45,4 @@ match = pattern.search(node.value, span[1]) if indices: - node.partition(indices) + node.partition(indices, category='dash') diff -Nru guessit-0.8/guessit/transfo/split_path_components.py guessit-0.11.0/guessit/transfo/split_path_components.py --- guessit-0.8/guessit/transfo/split_path_components.py 2014-02-20 19:26:42.000000000 +0000 +++ guessit-0.11.0/guessit/transfo/split_path_components.py 2015-08-31 16:57:28.000000000 +0000 @@ -20,9 +20,10 @@ from __future__ import absolute_import, division, print_function, unicode_literals +from os.path import splitext + from guessit.plugins.transformers import Transformer from guessit import fileutils -from os.path import splitext class SplitPathComponents(Transformer): @@ -40,6 +41,32 @@ components += list(splitext(basename)) components[-1] = components[-1][1:] # remove the '.' from the extension - mtree.split_on_components(components) + mtree.split_on_components(components, category='path') else: - mtree.split_on_components([mtree.value, '']) + mtree.split_on_components([mtree.value, ''], category='path') + + def post_process(self, mtree, options=None): + """ + Decrease confidence for properties found in directories, filename should always have priority. + + :param mtree: + :param options: + :return: + """ + if not options.get('name_only'): + path_nodes = [node for node in mtree.nodes() if node.category == 'path'] + + for path_node in path_nodes[:-2]: + self.alter_confidence(path_node, 0.3) + + try: + last_directory_node = path_nodes[-2] + self.alter_confidence(last_directory_node, 0.6) + except IndexError: + pass + + def alter_confidence(self, node, factor): + for guess in node.guesses: + for k in guess.keys(): + confidence = guess.confidence(k) + guess.set_confidence(k, confidence * factor) diff -Nru guessit-0.8/guessit/__version__.py guessit-0.11.0/guessit/__version__.py --- guessit-0.8/guessit/__version__.py 2014-07-06 14:00:33.000000000 +0000 +++ guessit-0.11.0/guessit/__version__.py 2015-09-04 20:44:49.000000000 +0000 @@ -17,4 +17,4 @@ # You should have received a copy of the Lesser GNU General Public License # along with this program. If not, see . # -__version__ = '0.8' +__version__ = '0.11.0' diff -Nru guessit-0.8/guessit.egg-info/pbr.json guessit-0.11.0/guessit.egg-info/pbr.json --- guessit-0.8/guessit.egg-info/pbr.json 1970-01-01 00:00:00.000000000 +0000 +++ guessit-0.11.0/guessit.egg-info/pbr.json 2015-09-04 20:51:34.000000000 +0000 @@ -0,0 +1 @@ +{"git_version": "da60e4b", "is_release": false} \ No newline at end of file diff -Nru guessit-0.8/guessit.egg-info/PKG-INFO guessit-0.11.0/guessit.egg-info/PKG-INFO --- guessit-0.8/guessit.egg-info/PKG-INFO 2014-07-06 14:02:07.000000000 +0000 +++ guessit-0.11.0/guessit.egg-info/PKG-INFO 2015-09-04 20:51:34.000000000 +0000 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: guessit -Version: 0.8 +Version: 0.11.0 Summary: GuessIt - a library for guessing information from video files. Home-page: http://guessit.readthedocs.org/ Author: Nicolas Wack Author-email: wackou@gmail.com License: LGPLv3 -Download-URL: https://pypi.python.org/packages/source/g/guessit/guessit-0.8.tar.gz +Download-URL: https://pypi.python.org/packages/source/g/guessit/guessit-0.11.0.tar.gz Description: GuessIt ======= @@ -27,11 +27,11 @@ :alt: Coveralls - GuessIt is a python library that tries to extract as much information as + GuessIt is a python library that extracts as much information as possible from a video file. It has a very powerful filename matcher that allows to guess a lot of - metadata from a video using only its filename. This matcher works with + metadata from a video using its filename only. This matcher works with both movies and tv shows episodes. For example, GuessIt can do the following:: @@ -52,34 +52,57 @@ } + Install + ------- + + Installing GuessIt is simple with `pip `_:: + + $ pip install guessit + + or, with `easy_install `_:: + + $ easy_install guessit + + But, you really `shouldn't do that `_. + + You can now launch a demo:: + + $ guessit -d + + and guess your own filename:: + + $ guessit "Breaking.Bad.S05E08.720p.MP4.BDRip.[KoTuWa].mkv" + For: Breaking.Bad.S05E08.720p.MP4.BDRip.[KoTuWa].mkv + GuessIt found: { + [1.00] "mimetype": "video/x-matroska", + [1.00] "episodeNumber": 8, + [0.30] "container": "mkv", + [1.00] "format": "BluRay", + [0.70] "series": "Breaking Bad", + [1.00] "releaseGroup": "KoTuWa", + [1.00] "screenSize": "720p", + [1.00] "season": 5, + [1.00] "type": "episode" + } + + + + Filename matcher + ---------------- - Features - -------- + The filename matcher is based on pattern matching and is able to recognize many properties from the filename, + like ``title``, ``year``, ``series``, ``episodeNumber``, ``seasonNumber``, + ``videoCodec``, ``screenSize``, ``language``. Guessed values are cleaned up and given in a readable format + which may not match exactly the raw filename. - At the moment, the filename matcher is able to recognize the following - property types:: + The full list of available properties can be seen in the + `main documentation `_. - [ title, # for movies and episodes - series, season, # for episodes only - episodeNumber, episodeDetails, # for episodes only - date, year, # 'date' instance of datetime.date - language, subtitleLanguage, # instances of babelfish.Language - country, # instances of babelfish.Country - fileSize, duration, # when detecting video file metadata - container, format, - videoCodec, audioCodec, - videoProfile, audioProfile, - audioChannels, screenSize, - releaseGroup, website, - cdNumber, cdNumberTotal, - filmNumber, filmSeries, - bonusNumber, edition, - idNumber, # tries to identify a hash or a serial number - other - ] + Other features + -------------- - GuessIt also allows you to compute a whole lof of hashes from a file, + GuessIt also allows you to compute a whole lot of hashes from a file, namely all the ones you can find in the hashlib python module (md5, sha1, ...), but also the Media Player Classic hash that is used (amongst others) by OpenSubtitles and SMPlayer, as well as the ed2k hash. @@ -91,19 +114,88 @@ properties from the actual video file metadata. - Install - ------- + Usage + ----- - Installing GuessIt is simple with `pip `_:: - - $ pip install guessit - - or, with `easy_install `_:: - - $ easy_install guessit - - But, you really `shouldn't do that `_. + guessit can be use from command line:: + $ guessit + usage: guessit [-h] [-t TYPE] [-n] [-c] [-X DISABLED_TRANSFORMERS] [-v] + [-P SHOW_PROPERTY] [-u] [-a] [-y] [-f INPUT_FILE] [-d] [-p] + [-V] [-s] [--version] [-b] [-i INFO] [-S EXPECTED_SERIES] + [-T EXPECTED_TITLE] [-Y] [-D] [-L ALLOWED_LANGUAGES] [-E] + [-C ALLOWED_COUNTRIES] [-G EXPECTED_GROUP] + [filename [filename ...]] + + positional arguments: + filename Filename or release name to guess + + optional arguments: + -h, --help show this help message and exit + + Naming: + -t TYPE, --type TYPE The suggested file type: movie, episode. If undefined, + type will be guessed. + -n, --name-only Parse files as name only. Disable folder parsing, + extension parsing, and file content analysis. + -c, --split-camel Split camel case part of filename. + -X DISABLED_TRANSFORMERS, --disabled-transformer DISABLED_TRANSFORMERS + Transformer to disable (can be used multiple time) + -S EXPECTED_SERIES, --expected-series EXPECTED_SERIES + Expected series to parse (can be used multiple times) + -T EXPECTED_TITLE, --expected-title EXPECTED_TITLE + Expected title (can be used multiple times) + -Y, --date-year-first + If short date is found, consider the first digits as + the year. + -D, --date-day-first If short date is found, consider the second digits as + the day. + -L ALLOWED_LANGUAGES, --allowed-languages ALLOWED_LANGUAGES + Allowed language (can be used multiple times) + -E, --episode-prefer-number + Guess "serie.213.avi" as the episodeNumber 213. + Without this option, it will be guessed as season 2, + episodeNumber 13 + -C ALLOWED_COUNTRIES, --allowed-country ALLOWED_COUNTRIES + Allowed country (can be used multiple times) + -G EXPECTED_GROUP, --expected-group EXPECTED_GROUP + Expected release group (can be used multiple times) + + Output: + -v, --verbose Display debug output + -P SHOW_PROPERTY, --show-property SHOW_PROPERTY + Display the value of a single property (title, series, + videoCodec, year, type ...) + -u, --unidentified Display the unidentified parts. + -a, --advanced Display advanced information for filename guesses, as + json output + -y, --yaml Display information for filename guesses as yaml + output (like unit-test) + -f INPUT_FILE, --input-file INPUT_FILE + Read filenames from an input file. + -d, --demo Run a few builtin tests instead of analyzing a file + + Information: + -p, --properties Display properties that can be guessed. + -V, --values Display property values that can be guessed. + -s, --transformers Display transformers that can be used. + --version Display the guessit version. + + guessit.io: + -b, --bug Submit a wrong detection to the guessit.io service + + Other features: + -i INFO, --info INFO The desired information type: filename, video, + hash_mpc or a hash from python's hashlib module, such + as hash_md5, hash_sha1, ...; or a list of any of them, + comma-separated + + + It can also be used as a python module:: + + >>> from guessit import guess_file_info + >>> guess_file_info('Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi') + {u'mimetype': 'video/x-msvideo', u'episodeNumber': 3, u'videoCodec': u'XviD', u'container': u'avi', u'format': u'HDTV', u'series': u'Treme', u'title': u'Right Place, Wrong Time', u'releaseGroup': u'NoTV', u'season': 1, u'type': u'episode'} Support @@ -142,17 +234,147 @@ GuessIt is licensed under the `LGPLv3 license `_. - .. This is your project NEWS file which will contain the release notes. - .. Example: http://www.python.org/download/releases/2.6/NEWS.txt - .. The content of this file, along with README.rst, will appear in your - .. project's PyPI page. - History ======= + 0.11.0 (2015-09-04) + ------------------- + + * Fixed year-season episodes with 'x' separator + * Fixed name guessing when a subdirectory contains a number + * Fixed possible IndexError in release_group plugin + * Fixed infinite recursion when multiple languages from same node are ignored in the second pass + * Added skip of language guess for 2-3 letters directories + * Added exclusion of common words from title guessing + * Added a higher confidence on filename over directories + + + 0.10.4 (2015-08-19) + ------------------- + * Added ``LD``/``MD`` properties + * Added better support for ``episodeList`` + * Added more rules for filetype autodetection + * Added support for ``episodeList`` on weak episode patterns + * Added ``partList`` property (list for ``part`` property) + * Added vob to supported file extensions + * Added more ignore words to language detection + * Added string options support for API methods (will be parsed like command-line) + * Added better subtitle detection (prefix priority over suffix) + * Fixed ``version`` property no detected when detached from ``episodeNumber`` + * Fixed ``releaseGroup`` property no detected when prefixed by ``screenSize`` + * Fixed single digit detected as an ``episodeNumber`` + * Fixed an internal issue in matcher causing absolute and relative group spans confusion + * Fixed an internal issue in properties container causing invalid ordering of found patterns + * Fixed raw value for some properties (--advanced) + * Use pytest as test runner + * Remove support for python 2.6 + + + 0.10.3 (2015-04-04) + ------------------- + + * Fix issues related to unicode encoding/decoding + * Fix possible crashes in guess_video_rexps + * Fix invalid guess result when crc32 contains 6 digits than can be parsed as a date + + + 0.10.2 (2015-03-08) + ------------------- + + * Use common words to resolve conflicts on strings + * Bump babelfish version + * Fix setuptools deprecation warning + * Package argparse dependency only if python<2.7 + + + 0.10.1 (2015-01-05) + ------------------- + + * Avoid word Stay to be recognized as AY subtitle + * Fixed exception when no unidentified leaves remains + * Avoid usage of deprecated EntryPoint.load() require argument + * Fixed invalid raw data for some properties (title, series and maybe others) + + + 0.10.0 (2014-12-27) + ------------------- + * Fixed exception when serie title starts with Ep + * Fixed exception when trying to parse a full length country name + * Removed deprecated optparse module, replaced by argparse + + + 0.9.4 (2014-11-10) + ------------------ + + * Fixed exception when filename contains multiple languages ISO codes + * Fixed transformers initialization logging + * Fixed possible exception in language transformer + * Added more words to common english words + + + 0.9.3 (2014-09-14) + ------------------ + + * Added ``Preair`` and ``Remux`` to ``other`` property + * Better detection of ``audioProfile`` = ``HD`` / ``HDMA`` for ``audioCodec`` = ``DTS`` + * Better detection of ``format``` = ``BluRay`` (when followed by Rip) + * Recognize ``RC`` as ``R5`` + * Recognize ``WEB-HD```and ``ẀEB`` as ``WEB-DL`` + + + 0.9.2 (2014-09-13) + ------------------ + + * Added support of option registration on transformers + * Better detection of ``releaseGroup`` when using ``expected-series`` or ``expected-title`` option + * Better ``audioChannel`` = ``5.1`` / ``7.1`` guessing (``6ch``, ``8ch``) + * Fixed usage not showing when invalid options were passed + * Added ``PAL``, ``SECAM`` and ``NTSC`` to ``other`` possible values + * Recognize DVD-9 and DVD-5 as ``format`` = ``DVD`` property + + + 0.9.1 (2014-09-06) + ------------------ + + * Added ``--unidentified`` option to display unidentified parts of the filename + This option affects command line only - From API `unidentified` properties will + always be grabbed regardless this settings + * Better guessing of ``releaseGroup`` property + * Added ``mHD`` and ``HDLight`` to ``other properties`` + * Better guessing of ``format`` = ``DVD`` property (DVD-R pattern) + * Some ``info`` logs changed to ``debug`` for quiet integration + * Small fixes + + + 0.9.0 (2014-09-05) + ------------------ + + * Better auto-detection of anime episodes, containing a ``crc32`` or a digits ``episodeNumber``. + * Better listing of options on ``guessit -h`` + * Added ``--allowed-countries`` and ``--allowed-languages`` to avoid two or three + letters words to be guessed as ``country`` or ``language`` + * Added ``--disabled-transformers`` option to disable transformer plugin at runtime. + * Added ``--episode-prefer-number`` option, for ``guess -t episode 'serie.123.avi'`` + to return ``episodeNumber`` = ``123`` instead of ``season`` = ``1`` + ``episodeNumber`` = 23`` + * Added ``--split-camel`` option (now disabled by default) + * Added ``episodeCount`` and ``seasonCount`` properties (x-of-n notation) + * Added ``--date-year-first``` and ``--date-day-first`` options + * Added ``--expected-title``, ``--expected-series`` and ``--expected-groups`` + to help finding values when those properties are known + * Added ``10bit`` value to ``videoProfile`` + * Added ``--show-property`` option to only show a single property + * Added ``--input-file`` option to parse a list of + * Added ``--version`` option + * Added ``ass```to subtitle extensions + * Added ``Fansub`` value for ``other`` property + * Added more date formats support with ``dateutil`` dependency + * Added customizable ``clean_function`` (API) + * Added ``default_options`` (API) + * Fixed ``--yaml`` option to support ``language`` and ``country`` + * Fixed ``transformers.add_transformer()`` function (API) - 0.8 (2014-06-06) + 0.8 (2014-07-06) ---------------- * New webservice that allows to use GuessIt just by sending a POST request to @@ -346,9 +568,9 @@ Classifier: Operating System :: OS Independent Classifier: Intended Audience :: Developers Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 Classifier: Topic :: Multimedia Classifier: Topic :: Software Development :: Libraries :: Python Modules diff -Nru guessit-0.8/guessit.egg-info/requires.txt guessit-0.11.0/guessit.egg-info/requires.txt --- guessit-0.8/guessit.egg-info/requires.txt 2014-07-06 14:02:07.000000000 +0000 +++ guessit-0.11.0/guessit.egg-info/requires.txt 2015-09-04 20:51:34.000000000 +0000 @@ -1,9 +1,10 @@ -babelfish>=0.5.3 +babelfish>=0.5.4 stevedore>=0.14 requests +python-dateutil>=2.1 [language_detection] guess-language>=0.2 [video_metadata] -enzyme \ No newline at end of file +enzyme diff -Nru guessit-0.8/guessit.egg-info/SOURCES.txt guessit-0.11.0/guessit.egg-info/SOURCES.txt --- guessit-0.8/guessit.egg-info/SOURCES.txt 2014-07-06 14:02:08.000000000 +0000 +++ guessit-0.11.0/guessit.egg-info/SOURCES.txt 2015-09-04 20:51:34.000000000 +0000 @@ -8,36 +8,6 @@ docs/index.rst docs/presentation.rst docs/projectinfo.rst -docs/_build/html/genindex.html -docs/_build/html/index.html -docs/_build/html/presentation.html -docs/_build/html/projectinfo.html -docs/_build/html/py-modindex.html -docs/_build/html/search.html -docs/_build/html/_static/comment-bright.png -docs/_build/html/_static/comment-close.png -docs/_build/html/_static/comment.png -docs/_build/html/_static/down-pressed.png -docs/_build/html/_static/down.png -docs/_build/html/_static/file.png -docs/_build/html/_static/guessit-logo.png -docs/_build/html/_static/lgplv3-88x31.png -docs/_build/html/_static/minus.png -docs/_build/html/_static/plus.png -docs/_build/html/_static/up-pressed.png -docs/_build/html/_static/up.png -docs/_build/html/_static/coinwidget/icon_bitcoin.png -docs/_build/html/_static/coinwidget/icon_litecoin.png -docs/_build/html/_static/coinwidget/icon_qrcode.png -docs/_build/html/_static/coinwidget/icon_wallet.png -docs/_build/html/_themes/README.html -docs/_build/html/api/guess.html -docs/_build/html/api/matcher.html -docs/_build/html/api/matchtree.html -docs/_build/html/dev/internals.html -docs/_build/html/user/commandline.html -docs/_build/html/user/install.html -docs/_build/html/user/python.html docs/_static/guessit-logo.png docs/_static/lgplv3-88x31.png docs/_static/coinwidget/icon_bitcoin.png @@ -65,6 +35,7 @@ docs/dev/internals.rst docs/user/commandline.rst docs/user/install.rst +docs/user/properties.rst docs/user/python.rst guessit/__init__.py guessit/__main__.py @@ -87,15 +58,16 @@ guessit.egg-info/SOURCES.txt guessit.egg-info/dependency_links.txt guessit.egg-info/entry_points.txt +guessit.egg-info/pbr.json guessit.egg-info/requires.txt guessit.egg-info/top_level.txt guessit/patterns/__init__.py guessit/patterns/extension.py +guessit/patterns/list.py guessit/patterns/numeral.py guessit/plugins/__init__.py guessit/plugins/transformers.py guessit/test/__init__.py -guessit/test/__main__.py guessit/test/autodetect.yaml guessit/test/dummy.srt guessit/test/episodes.yaml @@ -105,7 +77,6 @@ guessit/test/test_api.py guessit/test/test_autodetect.py guessit/test/test_autodetect_all.py -guessit/test/test_doctests.py guessit/test/test_episode.py guessit/test/test_hashes.py guessit/test/test_language.py @@ -115,6 +86,8 @@ guessit/test/test_quality.py guessit/test/test_utils.py guessit/transfo/__init__.py +guessit/transfo/expected_series.py +guessit/transfo/expected_title.py guessit/transfo/guess_bonus_features.py guessit/transfo/guess_country.py guessit/transfo/guess_date.py diff -Nru guessit-0.8/HISTORY.rst guessit-0.11.0/HISTORY.rst --- guessit-0.8/HISTORY.rst 2014-07-06 14:00:15.000000000 +0000 +++ guessit-0.11.0/HISTORY.rst 2015-09-04 20:49:46.000000000 +0000 @@ -1,14 +1,144 @@ -.. This is your project NEWS file which will contain the release notes. -.. Example: http://www.python.org/download/releases/2.6/NEWS.txt -.. The content of this file, along with README.rst, will appear in your -.. project's PyPI page. - History ======= +0.11.0 (2015-09-04) +------------------- + +* Fixed year-season episodes with 'x' separator +* Fixed name guessing when a subdirectory contains a number +* Fixed possible IndexError in release_group plugin +* Fixed infinite recursion when multiple languages from same node are ignored in the second pass +* Added skip of language guess for 2-3 letters directories +* Added exclusion of common words from title guessing +* Added a higher confidence on filename over directories + + +0.10.4 (2015-08-19) +------------------- +* Added ``LD``/``MD`` properties +* Added better support for ``episodeList`` +* Added more rules for filetype autodetection +* Added support for ``episodeList`` on weak episode patterns +* Added ``partList`` property (list for ``part`` property) +* Added vob to supported file extensions +* Added more ignore words to language detection +* Added string options support for API methods (will be parsed like command-line) +* Added better subtitle detection (prefix priority over suffix) +* Fixed ``version`` property no detected when detached from ``episodeNumber`` +* Fixed ``releaseGroup`` property no detected when prefixed by ``screenSize`` +* Fixed single digit detected as an ``episodeNumber`` +* Fixed an internal issue in matcher causing absolute and relative group spans confusion +* Fixed an internal issue in properties container causing invalid ordering of found patterns +* Fixed raw value for some properties (--advanced) +* Use pytest as test runner +* Remove support for python 2.6 + + +0.10.3 (2015-04-04) +------------------- + +* Fix issues related to unicode encoding/decoding +* Fix possible crashes in guess_video_rexps +* Fix invalid guess result when crc32 contains 6 digits than can be parsed as a date + + +0.10.2 (2015-03-08) +------------------- + +* Use common words to resolve conflicts on strings +* Bump babelfish version +* Fix setuptools deprecation warning +* Package argparse dependency only if python<2.7 + + +0.10.1 (2015-01-05) +------------------- + +* Avoid word Stay to be recognized as AY subtitle +* Fixed exception when no unidentified leaves remains +* Avoid usage of deprecated EntryPoint.load() require argument +* Fixed invalid raw data for some properties (title, series and maybe others) + + +0.10.0 (2014-12-27) +------------------- +* Fixed exception when serie title starts with Ep +* Fixed exception when trying to parse a full length country name +* Removed deprecated optparse module, replaced by argparse + + +0.9.4 (2014-11-10) +------------------ + +* Fixed exception when filename contains multiple languages ISO codes +* Fixed transformers initialization logging +* Fixed possible exception in language transformer +* Added more words to common english words + + +0.9.3 (2014-09-14) +------------------ + +* Added ``Preair`` and ``Remux`` to ``other`` property +* Better detection of ``audioProfile`` = ``HD`` / ``HDMA`` for ``audioCodec`` = ``DTS`` +* Better detection of ``format``` = ``BluRay`` (when followed by Rip) +* Recognize ``RC`` as ``R5`` +* Recognize ``WEB-HD```and ``ẀEB`` as ``WEB-DL`` + + +0.9.2 (2014-09-13) +------------------ + +* Added support of option registration on transformers +* Better detection of ``releaseGroup`` when using ``expected-series`` or ``expected-title`` option +* Better ``audioChannel`` = ``5.1`` / ``7.1`` guessing (``6ch``, ``8ch``) +* Fixed usage not showing when invalid options were passed +* Added ``PAL``, ``SECAM`` and ``NTSC`` to ``other`` possible values +* Recognize DVD-9 and DVD-5 as ``format`` = ``DVD`` property + + +0.9.1 (2014-09-06) +------------------ + +* Added ``--unidentified`` option to display unidentified parts of the filename + This option affects command line only - From API `unidentified` properties will + always be grabbed regardless this settings +* Better guessing of ``releaseGroup`` property +* Added ``mHD`` and ``HDLight`` to ``other properties`` +* Better guessing of ``format`` = ``DVD`` property (DVD-R pattern) +* Some ``info`` logs changed to ``debug`` for quiet integration +* Small fixes + + +0.9.0 (2014-09-05) +------------------ + +* Better auto-detection of anime episodes, containing a ``crc32`` or a digits ``episodeNumber``. +* Better listing of options on ``guessit -h`` +* Added ``--allowed-countries`` and ``--allowed-languages`` to avoid two or three + letters words to be guessed as ``country`` or ``language`` +* Added ``--disabled-transformers`` option to disable transformer plugin at runtime. +* Added ``--episode-prefer-number`` option, for ``guess -t episode 'serie.123.avi'`` + to return ``episodeNumber`` = ``123`` instead of ``season`` = ``1`` + ``episodeNumber`` = 23`` +* Added ``--split-camel`` option (now disabled by default) +* Added ``episodeCount`` and ``seasonCount`` properties (x-of-n notation) +* Added ``--date-year-first``` and ``--date-day-first`` options +* Added ``--expected-title``, ``--expected-series`` and ``--expected-groups`` + to help finding values when those properties are known +* Added ``10bit`` value to ``videoProfile`` +* Added ``--show-property`` option to only show a single property +* Added ``--input-file`` option to parse a list of +* Added ``--version`` option +* Added ``ass```to subtitle extensions +* Added ``Fansub`` value for ``other`` property +* Added more date formats support with ``dateutil`` dependency +* Added customizable ``clean_function`` (API) +* Added ``default_options`` (API) +* Fixed ``--yaml`` option to support ``language`` and ``country`` +* Fixed ``transformers.add_transformer()`` function (API) -0.8 (2014-06-06) +0.8 (2014-07-06) ---------------- * New webservice that allows to use GuessIt just by sending a POST request to diff -Nru guessit-0.8/PKG-INFO guessit-0.11.0/PKG-INFO --- guessit-0.8/PKG-INFO 2014-07-06 14:02:08.000000000 +0000 +++ guessit-0.11.0/PKG-INFO 2015-09-04 20:51:34.000000000 +0000 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: guessit -Version: 0.8 +Version: 0.11.0 Summary: GuessIt - a library for guessing information from video files. Home-page: http://guessit.readthedocs.org/ Author: Nicolas Wack Author-email: wackou@gmail.com License: LGPLv3 -Download-URL: https://pypi.python.org/packages/source/g/guessit/guessit-0.8.tar.gz +Download-URL: https://pypi.python.org/packages/source/g/guessit/guessit-0.11.0.tar.gz Description: GuessIt ======= @@ -27,11 +27,11 @@ :alt: Coveralls - GuessIt is a python library that tries to extract as much information as + GuessIt is a python library that extracts as much information as possible from a video file. It has a very powerful filename matcher that allows to guess a lot of - metadata from a video using only its filename. This matcher works with + metadata from a video using its filename only. This matcher works with both movies and tv shows episodes. For example, GuessIt can do the following:: @@ -52,34 +52,57 @@ } + Install + ------- + + Installing GuessIt is simple with `pip `_:: + + $ pip install guessit + + or, with `easy_install `_:: + + $ easy_install guessit + + But, you really `shouldn't do that `_. + + You can now launch a demo:: + + $ guessit -d + + and guess your own filename:: + + $ guessit "Breaking.Bad.S05E08.720p.MP4.BDRip.[KoTuWa].mkv" + For: Breaking.Bad.S05E08.720p.MP4.BDRip.[KoTuWa].mkv + GuessIt found: { + [1.00] "mimetype": "video/x-matroska", + [1.00] "episodeNumber": 8, + [0.30] "container": "mkv", + [1.00] "format": "BluRay", + [0.70] "series": "Breaking Bad", + [1.00] "releaseGroup": "KoTuWa", + [1.00] "screenSize": "720p", + [1.00] "season": 5, + [1.00] "type": "episode" + } + + + + Filename matcher + ---------------- - Features - -------- + The filename matcher is based on pattern matching and is able to recognize many properties from the filename, + like ``title``, ``year``, ``series``, ``episodeNumber``, ``seasonNumber``, + ``videoCodec``, ``screenSize``, ``language``. Guessed values are cleaned up and given in a readable format + which may not match exactly the raw filename. - At the moment, the filename matcher is able to recognize the following - property types:: + The full list of available properties can be seen in the + `main documentation `_. - [ title, # for movies and episodes - series, season, # for episodes only - episodeNumber, episodeDetails, # for episodes only - date, year, # 'date' instance of datetime.date - language, subtitleLanguage, # instances of babelfish.Language - country, # instances of babelfish.Country - fileSize, duration, # when detecting video file metadata - container, format, - videoCodec, audioCodec, - videoProfile, audioProfile, - audioChannels, screenSize, - releaseGroup, website, - cdNumber, cdNumberTotal, - filmNumber, filmSeries, - bonusNumber, edition, - idNumber, # tries to identify a hash or a serial number - other - ] + Other features + -------------- - GuessIt also allows you to compute a whole lof of hashes from a file, + GuessIt also allows you to compute a whole lot of hashes from a file, namely all the ones you can find in the hashlib python module (md5, sha1, ...), but also the Media Player Classic hash that is used (amongst others) by OpenSubtitles and SMPlayer, as well as the ed2k hash. @@ -91,19 +114,88 @@ properties from the actual video file metadata. - Install - ------- + Usage + ----- - Installing GuessIt is simple with `pip `_:: - - $ pip install guessit - - or, with `easy_install `_:: - - $ easy_install guessit - - But, you really `shouldn't do that `_. + guessit can be use from command line:: + $ guessit + usage: guessit [-h] [-t TYPE] [-n] [-c] [-X DISABLED_TRANSFORMERS] [-v] + [-P SHOW_PROPERTY] [-u] [-a] [-y] [-f INPUT_FILE] [-d] [-p] + [-V] [-s] [--version] [-b] [-i INFO] [-S EXPECTED_SERIES] + [-T EXPECTED_TITLE] [-Y] [-D] [-L ALLOWED_LANGUAGES] [-E] + [-C ALLOWED_COUNTRIES] [-G EXPECTED_GROUP] + [filename [filename ...]] + + positional arguments: + filename Filename or release name to guess + + optional arguments: + -h, --help show this help message and exit + + Naming: + -t TYPE, --type TYPE The suggested file type: movie, episode. If undefined, + type will be guessed. + -n, --name-only Parse files as name only. Disable folder parsing, + extension parsing, and file content analysis. + -c, --split-camel Split camel case part of filename. + -X DISABLED_TRANSFORMERS, --disabled-transformer DISABLED_TRANSFORMERS + Transformer to disable (can be used multiple time) + -S EXPECTED_SERIES, --expected-series EXPECTED_SERIES + Expected series to parse (can be used multiple times) + -T EXPECTED_TITLE, --expected-title EXPECTED_TITLE + Expected title (can be used multiple times) + -Y, --date-year-first + If short date is found, consider the first digits as + the year. + -D, --date-day-first If short date is found, consider the second digits as + the day. + -L ALLOWED_LANGUAGES, --allowed-languages ALLOWED_LANGUAGES + Allowed language (can be used multiple times) + -E, --episode-prefer-number + Guess "serie.213.avi" as the episodeNumber 213. + Without this option, it will be guessed as season 2, + episodeNumber 13 + -C ALLOWED_COUNTRIES, --allowed-country ALLOWED_COUNTRIES + Allowed country (can be used multiple times) + -G EXPECTED_GROUP, --expected-group EXPECTED_GROUP + Expected release group (can be used multiple times) + + Output: + -v, --verbose Display debug output + -P SHOW_PROPERTY, --show-property SHOW_PROPERTY + Display the value of a single property (title, series, + videoCodec, year, type ...) + -u, --unidentified Display the unidentified parts. + -a, --advanced Display advanced information for filename guesses, as + json output + -y, --yaml Display information for filename guesses as yaml + output (like unit-test) + -f INPUT_FILE, --input-file INPUT_FILE + Read filenames from an input file. + -d, --demo Run a few builtin tests instead of analyzing a file + + Information: + -p, --properties Display properties that can be guessed. + -V, --values Display property values that can be guessed. + -s, --transformers Display transformers that can be used. + --version Display the guessit version. + + guessit.io: + -b, --bug Submit a wrong detection to the guessit.io service + + Other features: + -i INFO, --info INFO The desired information type: filename, video, + hash_mpc or a hash from python's hashlib module, such + as hash_md5, hash_sha1, ...; or a list of any of them, + comma-separated + + + It can also be used as a python module:: + + >>> from guessit import guess_file_info + >>> guess_file_info('Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi') + {u'mimetype': 'video/x-msvideo', u'episodeNumber': 3, u'videoCodec': u'XviD', u'container': u'avi', u'format': u'HDTV', u'series': u'Treme', u'title': u'Right Place, Wrong Time', u'releaseGroup': u'NoTV', u'season': 1, u'type': u'episode'} Support @@ -142,17 +234,147 @@ GuessIt is licensed under the `LGPLv3 license `_. - .. This is your project NEWS file which will contain the release notes. - .. Example: http://www.python.org/download/releases/2.6/NEWS.txt - .. The content of this file, along with README.rst, will appear in your - .. project's PyPI page. - History ======= + 0.11.0 (2015-09-04) + ------------------- + + * Fixed year-season episodes with 'x' separator + * Fixed name guessing when a subdirectory contains a number + * Fixed possible IndexError in release_group plugin + * Fixed infinite recursion when multiple languages from same node are ignored in the second pass + * Added skip of language guess for 2-3 letters directories + * Added exclusion of common words from title guessing + * Added a higher confidence on filename over directories + + + 0.10.4 (2015-08-19) + ------------------- + * Added ``LD``/``MD`` properties + * Added better support for ``episodeList`` + * Added more rules for filetype autodetection + * Added support for ``episodeList`` on weak episode patterns + * Added ``partList`` property (list for ``part`` property) + * Added vob to supported file extensions + * Added more ignore words to language detection + * Added string options support for API methods (will be parsed like command-line) + * Added better subtitle detection (prefix priority over suffix) + * Fixed ``version`` property no detected when detached from ``episodeNumber`` + * Fixed ``releaseGroup`` property no detected when prefixed by ``screenSize`` + * Fixed single digit detected as an ``episodeNumber`` + * Fixed an internal issue in matcher causing absolute and relative group spans confusion + * Fixed an internal issue in properties container causing invalid ordering of found patterns + * Fixed raw value for some properties (--advanced) + * Use pytest as test runner + * Remove support for python 2.6 + + + 0.10.3 (2015-04-04) + ------------------- + + * Fix issues related to unicode encoding/decoding + * Fix possible crashes in guess_video_rexps + * Fix invalid guess result when crc32 contains 6 digits than can be parsed as a date + + + 0.10.2 (2015-03-08) + ------------------- + + * Use common words to resolve conflicts on strings + * Bump babelfish version + * Fix setuptools deprecation warning + * Package argparse dependency only if python<2.7 + + + 0.10.1 (2015-01-05) + ------------------- + + * Avoid word Stay to be recognized as AY subtitle + * Fixed exception when no unidentified leaves remains + * Avoid usage of deprecated EntryPoint.load() require argument + * Fixed invalid raw data for some properties (title, series and maybe others) + + + 0.10.0 (2014-12-27) + ------------------- + * Fixed exception when serie title starts with Ep + * Fixed exception when trying to parse a full length country name + * Removed deprecated optparse module, replaced by argparse + + + 0.9.4 (2014-11-10) + ------------------ + + * Fixed exception when filename contains multiple languages ISO codes + * Fixed transformers initialization logging + * Fixed possible exception in language transformer + * Added more words to common english words + + + 0.9.3 (2014-09-14) + ------------------ + + * Added ``Preair`` and ``Remux`` to ``other`` property + * Better detection of ``audioProfile`` = ``HD`` / ``HDMA`` for ``audioCodec`` = ``DTS`` + * Better detection of ``format``` = ``BluRay`` (when followed by Rip) + * Recognize ``RC`` as ``R5`` + * Recognize ``WEB-HD```and ``ẀEB`` as ``WEB-DL`` + + + 0.9.2 (2014-09-13) + ------------------ + + * Added support of option registration on transformers + * Better detection of ``releaseGroup`` when using ``expected-series`` or ``expected-title`` option + * Better ``audioChannel`` = ``5.1`` / ``7.1`` guessing (``6ch``, ``8ch``) + * Fixed usage not showing when invalid options were passed + * Added ``PAL``, ``SECAM`` and ``NTSC`` to ``other`` possible values + * Recognize DVD-9 and DVD-5 as ``format`` = ``DVD`` property + + + 0.9.1 (2014-09-06) + ------------------ + + * Added ``--unidentified`` option to display unidentified parts of the filename + This option affects command line only - From API `unidentified` properties will + always be grabbed regardless this settings + * Better guessing of ``releaseGroup`` property + * Added ``mHD`` and ``HDLight`` to ``other properties`` + * Better guessing of ``format`` = ``DVD`` property (DVD-R pattern) + * Some ``info`` logs changed to ``debug`` for quiet integration + * Small fixes + + + 0.9.0 (2014-09-05) + ------------------ + + * Better auto-detection of anime episodes, containing a ``crc32`` or a digits ``episodeNumber``. + * Better listing of options on ``guessit -h`` + * Added ``--allowed-countries`` and ``--allowed-languages`` to avoid two or three + letters words to be guessed as ``country`` or ``language`` + * Added ``--disabled-transformers`` option to disable transformer plugin at runtime. + * Added ``--episode-prefer-number`` option, for ``guess -t episode 'serie.123.avi'`` + to return ``episodeNumber`` = ``123`` instead of ``season`` = ``1`` + ``episodeNumber`` = 23`` + * Added ``--split-camel`` option (now disabled by default) + * Added ``episodeCount`` and ``seasonCount`` properties (x-of-n notation) + * Added ``--date-year-first``` and ``--date-day-first`` options + * Added ``--expected-title``, ``--expected-series`` and ``--expected-groups`` + to help finding values when those properties are known + * Added ``10bit`` value to ``videoProfile`` + * Added ``--show-property`` option to only show a single property + * Added ``--input-file`` option to parse a list of + * Added ``--version`` option + * Added ``ass```to subtitle extensions + * Added ``Fansub`` value for ``other`` property + * Added more date formats support with ``dateutil`` dependency + * Added customizable ``clean_function`` (API) + * Added ``default_options`` (API) + * Fixed ``--yaml`` option to support ``language`` and ``country`` + * Fixed ``transformers.add_transformer()`` function (API) - 0.8 (2014-06-06) + 0.8 (2014-07-06) ---------------- * New webservice that allows to use GuessIt just by sending a POST request to @@ -346,9 +568,9 @@ Classifier: Operating System :: OS Independent Classifier: Intended Audience :: Developers Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 Classifier: Topic :: Multimedia Classifier: Topic :: Software Development :: Libraries :: Python Modules diff -Nru guessit-0.8/README.rst guessit-0.11.0/README.rst --- guessit-0.8/README.rst 2014-05-31 10:03:31.000000000 +0000 +++ guessit-0.11.0/README.rst 2015-09-01 05:20:25.000000000 +0000 @@ -18,11 +18,11 @@ :alt: Coveralls -GuessIt is a python library that tries to extract as much information as +GuessIt is a python library that extracts as much information as possible from a video file. It has a very powerful filename matcher that allows to guess a lot of -metadata from a video using only its filename. This matcher works with +metadata from a video using its filename only. This matcher works with both movies and tv shows episodes. For example, GuessIt can do the following:: @@ -43,34 +43,57 @@ } +Install +------- + +Installing GuessIt is simple with `pip `_:: + + $ pip install guessit + +or, with `easy_install `_:: -Features --------- + $ easy_install guessit + +But, you really `shouldn't do that `_. + +You can now launch a demo:: + + $ guessit -d -At the moment, the filename matcher is able to recognize the following -property types:: +and guess your own filename:: + + $ guessit "Breaking.Bad.S05E08.720p.MP4.BDRip.[KoTuWa].mkv" + For: Breaking.Bad.S05E08.720p.MP4.BDRip.[KoTuWa].mkv + GuessIt found: { + [1.00] "mimetype": "video/x-matroska", + [1.00] "episodeNumber": 8, + [0.30] "container": "mkv", + [1.00] "format": "BluRay", + [0.70] "series": "Breaking Bad", + [1.00] "releaseGroup": "KoTuWa", + [1.00] "screenSize": "720p", + [1.00] "season": 5, + [1.00] "type": "episode" + } - [ title, # for movies and episodes - series, season, # for episodes only - episodeNumber, episodeDetails, # for episodes only - date, year, # 'date' instance of datetime.date - language, subtitleLanguage, # instances of babelfish.Language - country, # instances of babelfish.Country - fileSize, duration, # when detecting video file metadata - container, format, - videoCodec, audioCodec, - videoProfile, audioProfile, - audioChannels, screenSize, - releaseGroup, website, - cdNumber, cdNumberTotal, - filmNumber, filmSeries, - bonusNumber, edition, - idNumber, # tries to identify a hash or a serial number - other - ] -GuessIt also allows you to compute a whole lof of hashes from a file, +Filename matcher +---------------- + +The filename matcher is based on pattern matching and is able to recognize many properties from the filename, +like ``title``, ``year``, ``series``, ``episodeNumber``, ``seasonNumber``, +``videoCodec``, ``screenSize``, ``language``. Guessed values are cleaned up and given in a readable format +which may not match exactly the raw filename. + +The full list of available properties can be seen in the +`main documentation `_. + + +Other features +-------------- + +GuessIt also allows you to compute a whole lot of hashes from a file, namely all the ones you can find in the hashlib python module (md5, sha1, ...), but also the Media Player Classic hash that is used (amongst others) by OpenSubtitles and SMPlayer, as well as the ed2k hash. @@ -82,19 +105,88 @@ properties from the actual video file metadata. -Install -------- - -Installing GuessIt is simple with `pip `_:: - - $ pip install guessit - -or, with `easy_install `_:: - - $ easy_install guessit +Usage +----- -But, you really `shouldn't do that `_. +guessit can be use from command line:: + $ guessit + usage: guessit [-h] [-t TYPE] [-n] [-c] [-X DISABLED_TRANSFORMERS] [-v] + [-P SHOW_PROPERTY] [-u] [-a] [-y] [-f INPUT_FILE] [-d] [-p] + [-V] [-s] [--version] [-b] [-i INFO] [-S EXPECTED_SERIES] + [-T EXPECTED_TITLE] [-Y] [-D] [-L ALLOWED_LANGUAGES] [-E] + [-C ALLOWED_COUNTRIES] [-G EXPECTED_GROUP] + [filename [filename ...]] + + positional arguments: + filename Filename or release name to guess + + optional arguments: + -h, --help show this help message and exit + + Naming: + -t TYPE, --type TYPE The suggested file type: movie, episode. If undefined, + type will be guessed. + -n, --name-only Parse files as name only. Disable folder parsing, + extension parsing, and file content analysis. + -c, --split-camel Split camel case part of filename. + -X DISABLED_TRANSFORMERS, --disabled-transformer DISABLED_TRANSFORMERS + Transformer to disable (can be used multiple time) + -S EXPECTED_SERIES, --expected-series EXPECTED_SERIES + Expected series to parse (can be used multiple times) + -T EXPECTED_TITLE, --expected-title EXPECTED_TITLE + Expected title (can be used multiple times) + -Y, --date-year-first + If short date is found, consider the first digits as + the year. + -D, --date-day-first If short date is found, consider the second digits as + the day. + -L ALLOWED_LANGUAGES, --allowed-languages ALLOWED_LANGUAGES + Allowed language (can be used multiple times) + -E, --episode-prefer-number + Guess "serie.213.avi" as the episodeNumber 213. + Without this option, it will be guessed as season 2, + episodeNumber 13 + -C ALLOWED_COUNTRIES, --allowed-country ALLOWED_COUNTRIES + Allowed country (can be used multiple times) + -G EXPECTED_GROUP, --expected-group EXPECTED_GROUP + Expected release group (can be used multiple times) + + Output: + -v, --verbose Display debug output + -P SHOW_PROPERTY, --show-property SHOW_PROPERTY + Display the value of a single property (title, series, + videoCodec, year, type ...) + -u, --unidentified Display the unidentified parts. + -a, --advanced Display advanced information for filename guesses, as + json output + -y, --yaml Display information for filename guesses as yaml + output (like unit-test) + -f INPUT_FILE, --input-file INPUT_FILE + Read filenames from an input file. + -d, --demo Run a few builtin tests instead of analyzing a file + + Information: + -p, --properties Display properties that can be guessed. + -V, --values Display property values that can be guessed. + -s, --transformers Display transformers that can be used. + --version Display the guessit version. + + guessit.io: + -b, --bug Submit a wrong detection to the guessit.io service + + Other features: + -i INFO, --info INFO The desired information type: filename, video, + hash_mpc or a hash from python's hashlib module, such + as hash_md5, hash_sha1, ...; or a list of any of them, + comma-separated + + +It can also be used as a python module:: + + >>> from guessit import guess_file_info + >>> guess_file_info('Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi') + {u'mimetype': 'video/x-msvideo', u'episodeNumber': 3, u'videoCodec': u'XviD', u'container': u'avi', u'format': u'HDTV', u'series': u'Treme', u'title': u'Right Place, Wrong Time', u'releaseGroup': u'NoTV', u'season': 1, u'type': u'episode'} Support diff -Nru guessit-0.8/setup.cfg guessit-0.11.0/setup.cfg --- guessit-0.8/setup.cfg 2014-07-06 14:02:08.000000000 +0000 +++ guessit-0.11.0/setup.cfg 2015-09-04 20:51:34.000000000 +0000 @@ -2,7 +2,7 @@ python-file-with-version = guessit/__version__.py [egg_info] -tag_date = 0 -tag_svn_revision = 0 tag_build = +tag_svn_revision = 0 +tag_date = 0 diff -Nru guessit-0.8/setup.py guessit-0.11.0/setup.py --- guessit-0.8/setup.py 2014-06-21 23:52:22.000000000 +0000 +++ guessit-0.11.0/setup.py 2015-06-06 12:39:15.000000000 +0000 @@ -19,8 +19,10 @@ # from setuptools import setup, find_packages +from setuptools.command.test import test as TestCommand import os +import sys here = os.path.abspath(os.path.dirname(__file__)) @@ -28,9 +30,12 @@ HISTORY = open(os.path.join(here, 'HISTORY.rst')).read() -install_requires = ['babelfish>=0.5.3', 'stevedore>=0.14', 'requests'] +install_requires = ['babelfish>=0.5.4', 'stevedore>=0.14', 'requests', 'python-dateutil>=2.1'] +if sys.version_info < (2, 7): + # argparse is part of the standard library in python 2.7+ + install_requires.append('argparse') -tests_require = ['PyYAML'] # Fabric not available (yet!) for python3 +tests_require = ['pytest', 'PyYAML'] # Fabric not available (yet!) for python3 setup_requires = [] @@ -45,6 +50,24 @@ dependency_links = [] + +class PyTest(TestCommand): + user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")] + + def initialize_options(self): + TestCommand.initialize_options(self) + self.pytest_args = [] + + def finalize_options(self): + TestCommand.finalize_options(self) + self.test_args = [] + + def run(self): + import pytest + errno = pytest.main(self.pytest_args) + exit(errno) + + exec(open("guessit/__version__.py").read()) # load version without importing guessit args = dict(name='guessit', @@ -57,10 +80,10 @@ 'Operating System :: OS Independent', 'Intended Audience :: Developers', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', 'Topic :: Multimedia', 'Topic :: Software Development :: Libraries :: Python Modules' ], @@ -71,6 +94,7 @@ download_url='https://pypi.python.org/packages/source/g/guessit/guessit-%s.tar.gz' % __version__, license='LGPLv3', packages=find_packages(), + cmdclass={"test": PyTest}, include_package_data=True, install_requires=install_requires, setup_requires=setup_requires,