Merge lp:~aaron-whitehouse/duplicity/split_glob_matching_from_select into lp:~duplicity-team/duplicity/0.7-series

Proposed by Aaron Whitehouse
Status: Merged
Merged at revision: 1199
Proposed branch: lp:~aaron-whitehouse/duplicity/split_glob_matching_from_select
Merge into: lp:~duplicity-team/duplicity/0.7-series
Diff against target: 382 lines (+191/-127)
4 files modified
duplicity/globmatch.py (+135/-0)
duplicity/selection.py (+15/-115)
testing/unit/test_globmatch.py (+41/-0)
testing/unit/test_selection.py (+0/-12)
To merge this branch: bzr merge lp:~aaron-whitehouse/duplicity/split_glob_matching_from_select
Reviewer Review Type Date Requested Status
duplicity-team Pending
Review via email: mp+288206@code.launchpad.net

Commit message

Move glob matching code out of selection.py's Select function and into globmatch.py.

Description of the change

Move glob matching code out of selection.py's Select function and into globmatch.py for maintainability of the code base.

The Select function in selection.py is quite difficult to follow and is currently very long. Much of the bulk of the function comprises glob matching, which does not require any state from the class. Moving this out into globmatch.py will allow these more obvious functions to be maintained without the need to understand how the Select function works/interacts with other parts of duplicity.

This should also make it easier to replace parts of the glob matching code, such as replacing the regular expression approach with fnmatch, without unexpected effects to the rest of the code base.

To post a comment you must log in.
1203. By Aaron Whitehouse <email address hidden>

Re-added import of re, as re.compile still used in selection.py.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added file 'duplicity/globmatch.py'
--- duplicity/globmatch.py 1970-01-01 00:00:00 +0000
+++ duplicity/globmatch.py 2016-03-05 18:56:41 +0000
@@ -0,0 +1,135 @@
1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
2#
3# Copyright 2002 Ben Escoto <ben@emerose.org>
4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
5# Copyright 2014 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
6#
7# This file is part of duplicity.
8#
9# Duplicity is free software; you can redistribute it and/or modify it
10# under the terms of the GNU General Public License as published by the
11# Free Software Foundation; either version 2 of the License, or (at your
12# option) any later version.
13#
14# Duplicity is distributed in the hope that it will be useful, but
15# WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17# General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with duplicity; if not, write to the Free Software Foundation,
21# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
23import re
24
25
26class GlobbingError(Exception):
27 """Something has gone wrong when parsing a glob string"""
28 pass
29
30
31class FilePrefixError(GlobbingError):
32 """Signals that a specified file doesn't start with correct prefix"""
33 pass
34
35
36def _glob_get_prefix_regexs(glob_str):
37 """Return list of regexps equivalent to prefixes of glob_str"""
38 # Internal. Used by glob_get_normal_sf.
39 glob_parts = glob_str.split("/")
40 if "" in glob_parts[1:-1]:
41 # "" OK if comes first or last, as in /foo/
42 raise GlobbingError("Consecutive '/'s found in globbing string "
43 + glob_str)
44
45 prefixes = ["/".join(glob_parts[:i + 1]) for i in range(len(glob_parts))]
46 # we must make exception for root "/", only dir to end in slash
47 if prefixes[0] == "":
48 prefixes[0] = "/"
49 return list(map(glob_to_regex, prefixes))
50
51
52def path_matches_glob(path, glob_str, include):
53 """Tests whether path matches glob, as per the Unix shell rules, taking as
54 arguments a path, a glob string and include (0 indicating that the glob
55 string is an exclude glob and 1 indicating that it is an include glob,
56 returning:
57 0 - if the file should be excluded
58 1 - if the file should be included
59 2 - if the folder should be scanned for any included/excluded files
60 None - if the selection function has nothing to say about the file
61 """
62 match_only_dirs = False
63
64 # ToDo: Test behaviour of "/" on its own - should always match
65 if glob_str != "/" and glob_str[-1] == "/":
66 match_only_dirs = True
67 # Remove trailing / from directory name (unless that is the entire
68 # string)
69 glob_str = glob_str[:-1]
70
71 re_comp = lambda r: re.compile(r, re.S)
72
73 # matches what glob matches and any files in directory
74 glob_comp_re = re_comp("^%s($|/)" % glob_to_regex(glob_str))
75
76 if glob_str.find("**") != -1:
77 glob_str = glob_str[:glob_str.find("**") + 2] # truncate after **
78
79 scan_comp_re = re_comp("^(%s)$" %
80 "|".join(_glob_get_prefix_regexs(glob_str)))
81
82 if match_only_dirs and not path.isdir():
83 # If the glob ended with a /, only match directories
84 return None
85 elif glob_comp_re.match(path.name):
86 return include
87 elif include == 1 and scan_comp_re.match(path.name):
88 return 2
89 else:
90 return None
91
92
93def glob_to_regex(pat):
94 """Returned regular expression equivalent to shell glob pat
95
96 Currently only the ?, *, [], and ** expressions are supported.
97 Ranges like [a-z] are also currently unsupported. There is no
98 way to quote these special characters.
99
100 This function taken with minor modifications from efnmatch.py
101 by Donovan Baarda.
102
103 """
104 # Internal. Used by glob_get_normal_sf, glob_get_prefix_res and unit tests.
105 i, n, res = 0, len(pat), ''
106 while i < n:
107 c, s = pat[i], pat[i:i + 2]
108 i = i + 1
109 if s == '**':
110 res = res + '.*'
111 i = i + 1
112 elif c == '*':
113 res = res + '[^/]*'
114 elif c == '?':
115 res = res + '[^/]'
116 elif c == '[':
117 j = i
118 if j < n and pat[j] in '!^':
119 j = j + 1
120 if j < n and pat[j] == ']':
121 j = j + 1
122 while j < n and pat[j] != ']':
123 j = j + 1
124 if j >= n:
125 res = res + '\\[' # interpret the [ literally
126 else:
127 # Deal with inside of [..]
128 stuff = pat[i:j].replace('\\', '\\\\')
129 i = j + 1
130 if stuff[0] in '!^':
131 stuff = '^' + stuff[1:]
132 res = res + '[' + stuff + ']'
133 else:
134 res = res + re.escape(c)
135 return res
0136
=== modified file 'duplicity/selection.py'
--- duplicity/selection.py 2016-01-29 11:43:58 +0000
+++ duplicity/selection.py 2016-03-05 18:56:41 +0000
@@ -23,15 +23,17 @@
23from future_builtins import filter, map23from future_builtins import filter, map
2424
25import os # @UnusedImport25import os # @UnusedImport
26import re # @UnusedImport
27import stat # @UnusedImport26import stat # @UnusedImport
28import sys27import sys
28import re
2929
30from duplicity.path import * # @UnusedWildImport30from duplicity.path import * # @UnusedWildImport
31from duplicity import log # @Reimport31from duplicity import log # @Reimport
32from duplicity import globals # @Reimport32from duplicity import globals # @Reimport
33from duplicity import diffdir33from duplicity import diffdir
34from duplicity import util # @Reimport34from duplicity import util # @Reimport
35from duplicity.globmatch import GlobbingError, FilePrefixError, \
36 path_matches_glob
3537
36"""Iterate exactly the requested files in a directory38"""Iterate exactly the requested files in a directory
3739
@@ -46,16 +48,6 @@
46 pass48 pass
4749
4850
49class FilePrefixError(SelectError):
50 """Signals that a specified file doesn't start with correct prefix"""
51 pass
52
53
54class GlobbingError(SelectError):
55 """Something has gone wrong when parsing a glob string"""
56 pass
57
58
59class Select:51class Select:
60 """Iterate appropriate Paths in given directory52 """Iterate appropriate Paths in given directory
6153
@@ -540,57 +532,24 @@
540 """532 """
541 # Internal. Used by glob_get_sf and unit tests.533 # Internal. Used by glob_get_sf and unit tests.
542534
543 match_only_dirs = False535 ignore_case = False
544
545 if glob_str != "/" and glob_str[-1] == "/":
546 match_only_dirs = True
547 # Remove trailing / from directory name (unless that is the entire
548 # string)
549 glob_str = glob_str[:-1]
550536
551 if glob_str.lower().startswith("ignorecase:"):537 if glob_str.lower().startswith("ignorecase:"):
552 re_comp = lambda r: re.compile(r, re.I | re.S)538 # Glob string starts with ignorecase, so remove that from the
553 glob_str = glob_str[len("ignorecase:"):]539 # string and change it to lowercase.
554 else:540 glob_str = glob_str[len("ignorecase:"):].lower()
555 re_comp = lambda r: re.compile(r, re.S)541 ignore_case = True
556
557 # matches what glob matches and any files in directory
558 glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
559
560 if glob_str.find("**") != -1:
561 glob_str = glob_str[:glob_str.find("**") + 2] # truncate after **
562
563 scan_comp_re = re_comp("^(%s)$" %
564 "|".join(self.glob_get_prefix_res(glob_str)))
565
566 def include_sel_func(path):
567 if match_only_dirs and not path.isdir():
568 # If the glob ended with a /, only match directories
569 return None
570 elif glob_comp_re.match(path.name):
571 return 1
572 elif scan_comp_re.match(path.name):
573 return 2
574 else:
575 return None
576
577 def exclude_sel_func(path):
578 if match_only_dirs and not path.isdir():
579 # If the glob ended with a /, only match directories
580 return None
581 elif glob_comp_re.match(path.name):
582 return 0
583 else:
584 return None
585542
586 # Check to make sure prefix is ok543 # Check to make sure prefix is ok
587 if not include_sel_func(self.rootpath):544 if not path_matches_glob(self.rootpath, glob_str, include=1):
588 raise FilePrefixError(glob_str)545 raise FilePrefixError(glob_str)
589546
590 if include:547 def sel_func(path):
591 return include_sel_func548 if ignore_case:
592 else:549 path.name = path.name.lower()
593 return exclude_sel_func550 return path_matches_glob(path, glob_str, include)
551
552 return sel_func
594553
595 def exclude_older_get_sf(self, date):554 def exclude_older_get_sf(self, date):
596 """Return selection function based on files older than modification date """555 """Return selection function based on files older than modification date """
@@ -609,62 +568,3 @@
609 sel_func.exclude = True568 sel_func.exclude = True
610 sel_func.name = "Select older than %s" % (date,)569 sel_func.name = "Select older than %s" % (date,)
611 return sel_func570 return sel_func
612
613 def glob_get_prefix_res(self, glob_str):
614 """Return list of regexps equivalent to prefixes of glob_str"""
615 # Internal. Used by glob_get_normal_sf.
616 glob_parts = glob_str.split("/")
617 if "" in glob_parts[1:-1]:
618 # "" OK if comes first or last, as in /foo/
619 raise GlobbingError("Consecutive '/'s found in globbing string "
620 + glob_str)
621
622 prefixes = ["/".join(glob_parts[:i + 1]) for i in range(len(glob_parts))]
623 # we must make exception for root "/", only dir to end in slash
624 if prefixes[0] == "":
625 prefixes[0] = "/"
626 return map(self.glob_to_re, prefixes)
627
628 def glob_to_re(self, pat):
629 """Returned regular expression equivalent to shell glob pat
630
631 Currently only the ?, *, [], and ** expressions are supported.
632 Ranges like [a-z] are also currently unsupported. There is no
633 way to quote these special characters.
634
635 This function taken with minor modifications from efnmatch.py
636 by Donovan Baarda.
637
638 """
639 # Internal. Used by glob_get_normal_sf, glob_get_prefix_res and unit tests.
640 i, n, res = 0, len(pat), ''
641 while i < n:
642 c, s = pat[i], pat[i:i + 2]
643 i = i + 1
644 if s == '**':
645 res = res + '.*'
646 i = i + 1
647 elif c == '*':
648 res = res + '[^/]*'
649 elif c == '?':
650 res = res + '[^/]'
651 elif c == '[':
652 j = i
653 if j < n and pat[j] in '!^':
654 j = j + 1
655 if j < n and pat[j] == ']':
656 j = j + 1
657 while j < n and pat[j] != ']':
658 j = j + 1
659 if j >= n:
660 res = res + '\\[' # interpret the [ literally
661 else:
662 # Deal with inside of [..]
663 stuff = pat[i:j].replace('\\', '\\\\')
664 i = j + 1
665 if stuff[0] in '!^':
666 stuff = '^' + stuff[1:]
667 res = res + '[' + stuff + ']'
668 else:
669 res = res + re.escape(c)
670 return res
671571
=== added file 'testing/unit/test_globmatch.py'
--- testing/unit/test_globmatch.py 1970-01-01 00:00:00 +0000
+++ testing/unit/test_globmatch.py 2016-03-05 18:56:41 +0000
@@ -0,0 +1,41 @@
1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
2#
3# Copyright 2002 Ben Escoto <ben@emerose.org>
4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
5# Copyright 2014 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
6#
7# This file is part of duplicity.
8#
9# Duplicity is free software; you can redistribute it and/or modify it
10# under the terms of the GNU General Public License as published by the
11# Free Software Foundation; either version 2 of the License, or (at your
12# option) any later version.
13#
14# Duplicity is distributed in the hope that it will be useful, but
15# WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17# General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with duplicity; if not, write to the Free Software Foundation,
21# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
23import unittest
24from duplicity.globmatch import *
25from . import UnitTestCase
26
27
28class MatchingTest(UnitTestCase):
29 """Test matching of file names against various selection functions"""
30
31 def test_glob_re(self):
32 """test_glob_re - test translation of shell pattern to regular exp"""
33 assert glob_to_regex("hello") == "hello"
34 assert glob_to_regex(".e?ll**o") == "\\.e[^/]ll.*o"
35 r = glob_to_regex("[abc]el[^de][!fg]h")
36 assert r == "[abc]el[^de][^fg]h", r
37 r = glob_to_regex("/usr/*/bin/")
38 assert r == "\\/usr\\/[^/]*\\/bin\\/", r
39 assert glob_to_regex("[a.b/c]") == "[a.b/c]"
40 r = glob_to_regex("[a*b-c]e[!]]")
41 assert r == "[a*b-c]e[^]]", r
0\ No newline at end of file42\ No newline at end of file
143
=== modified file 'testing/unit/test_selection.py'
--- testing/unit/test_selection.py 2016-01-24 21:44:51 +0000
+++ testing/unit/test_selection.py 2016-03-05 18:56:41 +0000
@@ -116,18 +116,6 @@
116 assert sf2(self.makeext("what/ever.py")) == 0116 assert sf2(self.makeext("what/ever.py")) == 0
117 assert sf2(self.makeext("what/ever.py/foo")) == 0117 assert sf2(self.makeext("what/ever.py/foo")) == 0
118118
119 def test_glob_re(self):
120 """test_glob_re - test translation of shell pattern to regular exp"""
121 assert self.Select.glob_to_re("hello") == "hello"
122 assert self.Select.glob_to_re(".e?ll**o") == "\\.e[^/]ll.*o"
123 r = self.Select.glob_to_re("[abc]el[^de][!fg]h")
124 assert r == "[abc]el[^de][^fg]h", r
125 r = self.Select.glob_to_re("/usr/*/bin/")
126 assert r == "\\/usr\\/[^/]*\\/bin\\/", r
127 assert self.Select.glob_to_re("[a.b/c]") == "[a.b/c]"
128 r = self.Select.glob_to_re("[a*b-c]e[!]]")
129 assert r == "[a*b-c]e[^]]", r
130
131 def test_simple_glob_double_asterisk(self):119 def test_simple_glob_double_asterisk(self):
132 """test_simple_glob_double_asterisk - primarily to check that the defaults used by the error tests work"""120 """test_simple_glob_double_asterisk - primarily to check that the defaults used by the error tests work"""
133 assert self.Select.glob_get_normal_sf("**", 1)121 assert self.Select.glob_get_normal_sf("**", 1)

Subscribers

People subscribed via source and target branches