Merge lp:~aaron-whitehouse/duplicity/split_glob_matching_from_select into lp:~duplicity-team/duplicity/0.7-series

Proposed by Aaron Whitehouse
Status: Merged
Merged at revision: 1199
Proposed branch: lp:~aaron-whitehouse/duplicity/split_glob_matching_from_select
Merge into: lp:~duplicity-team/duplicity/0.7-series
Diff against target: 382 lines (+191/-127)
4 files modified
duplicity/globmatch.py (+135/-0)
duplicity/selection.py (+15/-115)
testing/unit/test_globmatch.py (+41/-0)
testing/unit/test_selection.py (+0/-12)
To merge this branch: bzr merge lp:~aaron-whitehouse/duplicity/split_glob_matching_from_select
Reviewer Review Type Date Requested Status
duplicity-team Pending
Review via email: mp+288206@code.launchpad.net

Commit message

Move glob matching code out of selection.py's Select function and into globmatch.py.

Description of the change

Move glob matching code out of selection.py's Select function and into globmatch.py for maintainability of the code base.

The Select function in selection.py is quite difficult to follow and is currently very long. Much of the bulk of the function comprises glob matching, which does not require any state from the class. Moving this out into globmatch.py will allow these more obvious functions to be maintained without the need to understand how the Select function works/interacts with other parts of duplicity.

This should also make it easier to replace parts of the glob matching code, such as replacing the regular expression approach with fnmatch, without unexpected effects to the rest of the code base.

To post a comment you must log in.
1203. By Aaron Whitehouse <email address hidden>

Re-added import of re, as re.compile still used in selection.py.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'duplicity/globmatch.py'
2--- duplicity/globmatch.py 1970-01-01 00:00:00 +0000
3+++ duplicity/globmatch.py 2016-03-05 18:56:41 +0000
4@@ -0,0 +1,135 @@
5+# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
6+#
7+# Copyright 2002 Ben Escoto <ben@emerose.org>
8+# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
9+# Copyright 2014 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
10+#
11+# This file is part of duplicity.
12+#
13+# Duplicity is free software; you can redistribute it and/or modify it
14+# under the terms of the GNU General Public License as published by the
15+# Free Software Foundation; either version 2 of the License, or (at your
16+# option) any later version.
17+#
18+# Duplicity is distributed in the hope that it will be useful, but
19+# WITHOUT ANY WARRANTY; without even the implied warranty of
20+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21+# General Public License for more details.
22+#
23+# You should have received a copy of the GNU General Public License
24+# along with duplicity; if not, write to the Free Software Foundation,
25+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26+
27+import re
28+
29+
30+class GlobbingError(Exception):
31+ """Something has gone wrong when parsing a glob string"""
32+ pass
33+
34+
35+class FilePrefixError(GlobbingError):
36+ """Signals that a specified file doesn't start with correct prefix"""
37+ pass
38+
39+
40+def _glob_get_prefix_regexs(glob_str):
41+ """Return list of regexps equivalent to prefixes of glob_str"""
42+ # Internal. Used by glob_get_normal_sf.
43+ glob_parts = glob_str.split("/")
44+ if "" in glob_parts[1:-1]:
45+ # "" OK if comes first or last, as in /foo/
46+ raise GlobbingError("Consecutive '/'s found in globbing string "
47+ + glob_str)
48+
49+ prefixes = ["/".join(glob_parts[:i + 1]) for i in range(len(glob_parts))]
50+ # we must make exception for root "/", only dir to end in slash
51+ if prefixes[0] == "":
52+ prefixes[0] = "/"
53+ return list(map(glob_to_regex, prefixes))
54+
55+
56+def path_matches_glob(path, glob_str, include):
57+ """Tests whether path matches glob, as per the Unix shell rules, taking as
58+ arguments a path, a glob string and include (0 indicating that the glob
59+ string is an exclude glob and 1 indicating that it is an include glob,
60+ returning:
61+ 0 - if the file should be excluded
62+ 1 - if the file should be included
63+ 2 - if the folder should be scanned for any included/excluded files
64+ None - if the selection function has nothing to say about the file
65+ """
66+ match_only_dirs = False
67+
68+ # ToDo: Test behaviour of "/" on its own - should always match
69+ if glob_str != "/" and glob_str[-1] == "/":
70+ match_only_dirs = True
71+ # Remove trailing / from directory name (unless that is the entire
72+ # string)
73+ glob_str = glob_str[:-1]
74+
75+ re_comp = lambda r: re.compile(r, re.S)
76+
77+ # matches what glob matches and any files in directory
78+ glob_comp_re = re_comp("^%s($|/)" % glob_to_regex(glob_str))
79+
80+ if glob_str.find("**") != -1:
81+ glob_str = glob_str[:glob_str.find("**") + 2] # truncate after **
82+
83+ scan_comp_re = re_comp("^(%s)$" %
84+ "|".join(_glob_get_prefix_regexs(glob_str)))
85+
86+ if match_only_dirs and not path.isdir():
87+ # If the glob ended with a /, only match directories
88+ return None
89+ elif glob_comp_re.match(path.name):
90+ return include
91+ elif include == 1 and scan_comp_re.match(path.name):
92+ return 2
93+ else:
94+ return None
95+
96+
97+def glob_to_regex(pat):
98+ """Returned regular expression equivalent to shell glob pat
99+
100+ Currently only the ?, *, [], and ** expressions are supported.
101+ Ranges like [a-z] are also currently unsupported. There is no
102+ way to quote these special characters.
103+
104+ This function taken with minor modifications from efnmatch.py
105+ by Donovan Baarda.
106+
107+ """
108+ # Internal. Used by glob_get_normal_sf, glob_get_prefix_res and unit tests.
109+ i, n, res = 0, len(pat), ''
110+ while i < n:
111+ c, s = pat[i], pat[i:i + 2]
112+ i = i + 1
113+ if s == '**':
114+ res = res + '.*'
115+ i = i + 1
116+ elif c == '*':
117+ res = res + '[^/]*'
118+ elif c == '?':
119+ res = res + '[^/]'
120+ elif c == '[':
121+ j = i
122+ if j < n and pat[j] in '!^':
123+ j = j + 1
124+ if j < n and pat[j] == ']':
125+ j = j + 1
126+ while j < n and pat[j] != ']':
127+ j = j + 1
128+ if j >= n:
129+ res = res + '\\[' # interpret the [ literally
130+ else:
131+ # Deal with inside of [..]
132+ stuff = pat[i:j].replace('\\', '\\\\')
133+ i = j + 1
134+ if stuff[0] in '!^':
135+ stuff = '^' + stuff[1:]
136+ res = res + '[' + stuff + ']'
137+ else:
138+ res = res + re.escape(c)
139+ return res
140
141=== modified file 'duplicity/selection.py'
142--- duplicity/selection.py 2016-01-29 11:43:58 +0000
143+++ duplicity/selection.py 2016-03-05 18:56:41 +0000
144@@ -23,15 +23,17 @@
145 from future_builtins import filter, map
146
147 import os # @UnusedImport
148-import re # @UnusedImport
149 import stat # @UnusedImport
150 import sys
151+import re
152
153 from duplicity.path import * # @UnusedWildImport
154 from duplicity import log # @Reimport
155 from duplicity import globals # @Reimport
156 from duplicity import diffdir
157 from duplicity import util # @Reimport
158+from duplicity.globmatch import GlobbingError, FilePrefixError, \
159+ path_matches_glob
160
161 """Iterate exactly the requested files in a directory
162
163@@ -46,16 +48,6 @@
164 pass
165
166
167-class FilePrefixError(SelectError):
168- """Signals that a specified file doesn't start with correct prefix"""
169- pass
170-
171-
172-class GlobbingError(SelectError):
173- """Something has gone wrong when parsing a glob string"""
174- pass
175-
176-
177 class Select:
178 """Iterate appropriate Paths in given directory
179
180@@ -540,57 +532,24 @@
181 """
182 # Internal. Used by glob_get_sf and unit tests.
183
184- match_only_dirs = False
185-
186- if glob_str != "/" and glob_str[-1] == "/":
187- match_only_dirs = True
188- # Remove trailing / from directory name (unless that is the entire
189- # string)
190- glob_str = glob_str[:-1]
191+ ignore_case = False
192
193 if glob_str.lower().startswith("ignorecase:"):
194- re_comp = lambda r: re.compile(r, re.I | re.S)
195- glob_str = glob_str[len("ignorecase:"):]
196- else:
197- re_comp = lambda r: re.compile(r, re.S)
198-
199- # matches what glob matches and any files in directory
200- glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
201-
202- if glob_str.find("**") != -1:
203- glob_str = glob_str[:glob_str.find("**") + 2] # truncate after **
204-
205- scan_comp_re = re_comp("^(%s)$" %
206- "|".join(self.glob_get_prefix_res(glob_str)))
207-
208- def include_sel_func(path):
209- if match_only_dirs and not path.isdir():
210- # If the glob ended with a /, only match directories
211- return None
212- elif glob_comp_re.match(path.name):
213- return 1
214- elif scan_comp_re.match(path.name):
215- return 2
216- else:
217- return None
218-
219- def exclude_sel_func(path):
220- if match_only_dirs and not path.isdir():
221- # If the glob ended with a /, only match directories
222- return None
223- elif glob_comp_re.match(path.name):
224- return 0
225- else:
226- return None
227+ # Glob string starts with ignorecase, so remove that from the
228+ # string and change it to lowercase.
229+ glob_str = glob_str[len("ignorecase:"):].lower()
230+ ignore_case = True
231
232 # Check to make sure prefix is ok
233- if not include_sel_func(self.rootpath):
234+ if not path_matches_glob(self.rootpath, glob_str, include=1):
235 raise FilePrefixError(glob_str)
236
237- if include:
238- return include_sel_func
239- else:
240- return exclude_sel_func
241+ def sel_func(path):
242+ if ignore_case:
243+ path.name = path.name.lower()
244+ return path_matches_glob(path, glob_str, include)
245+
246+ return sel_func
247
248 def exclude_older_get_sf(self, date):
249 """Return selection function based on files older than modification date """
250@@ -609,62 +568,3 @@
251 sel_func.exclude = True
252 sel_func.name = "Select older than %s" % (date,)
253 return sel_func
254-
255- def glob_get_prefix_res(self, glob_str):
256- """Return list of regexps equivalent to prefixes of glob_str"""
257- # Internal. Used by glob_get_normal_sf.
258- glob_parts = glob_str.split("/")
259- if "" in glob_parts[1:-1]:
260- # "" OK if comes first or last, as in /foo/
261- raise GlobbingError("Consecutive '/'s found in globbing string "
262- + glob_str)
263-
264- prefixes = ["/".join(glob_parts[:i + 1]) for i in range(len(glob_parts))]
265- # we must make exception for root "/", only dir to end in slash
266- if prefixes[0] == "":
267- prefixes[0] = "/"
268- return map(self.glob_to_re, prefixes)
269-
270- def glob_to_re(self, pat):
271- """Returned regular expression equivalent to shell glob pat
272-
273- Currently only the ?, *, [], and ** expressions are supported.
274- Ranges like [a-z] are also currently unsupported. There is no
275- way to quote these special characters.
276-
277- This function taken with minor modifications from efnmatch.py
278- by Donovan Baarda.
279-
280- """
281- # Internal. Used by glob_get_normal_sf, glob_get_prefix_res and unit tests.
282- i, n, res = 0, len(pat), ''
283- while i < n:
284- c, s = pat[i], pat[i:i + 2]
285- i = i + 1
286- if s == '**':
287- res = res + '.*'
288- i = i + 1
289- elif c == '*':
290- res = res + '[^/]*'
291- elif c == '?':
292- res = res + '[^/]'
293- elif c == '[':
294- j = i
295- if j < n and pat[j] in '!^':
296- j = j + 1
297- if j < n and pat[j] == ']':
298- j = j + 1
299- while j < n and pat[j] != ']':
300- j = j + 1
301- if j >= n:
302- res = res + '\\[' # interpret the [ literally
303- else:
304- # Deal with inside of [..]
305- stuff = pat[i:j].replace('\\', '\\\\')
306- i = j + 1
307- if stuff[0] in '!^':
308- stuff = '^' + stuff[1:]
309- res = res + '[' + stuff + ']'
310- else:
311- res = res + re.escape(c)
312- return res
313
314=== added file 'testing/unit/test_globmatch.py'
315--- testing/unit/test_globmatch.py 1970-01-01 00:00:00 +0000
316+++ testing/unit/test_globmatch.py 2016-03-05 18:56:41 +0000
317@@ -0,0 +1,41 @@
318+# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
319+#
320+# Copyright 2002 Ben Escoto <ben@emerose.org>
321+# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
322+# Copyright 2014 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
323+#
324+# This file is part of duplicity.
325+#
326+# Duplicity is free software; you can redistribute it and/or modify it
327+# under the terms of the GNU General Public License as published by the
328+# Free Software Foundation; either version 2 of the License, or (at your
329+# option) any later version.
330+#
331+# Duplicity is distributed in the hope that it will be useful, but
332+# WITHOUT ANY WARRANTY; without even the implied warranty of
333+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
334+# General Public License for more details.
335+#
336+# You should have received a copy of the GNU General Public License
337+# along with duplicity; if not, write to the Free Software Foundation,
338+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
339+
340+import unittest
341+from duplicity.globmatch import *
342+from . import UnitTestCase
343+
344+
345+class MatchingTest(UnitTestCase):
346+ """Test matching of file names against various selection functions"""
347+
348+ def test_glob_re(self):
349+ """test_glob_re - test translation of shell pattern to regular exp"""
350+ assert glob_to_regex("hello") == "hello"
351+ assert glob_to_regex(".e?ll**o") == "\\.e[^/]ll.*o"
352+ r = glob_to_regex("[abc]el[^de][!fg]h")
353+ assert r == "[abc]el[^de][^fg]h", r
354+ r = glob_to_regex("/usr/*/bin/")
355+ assert r == "\\/usr\\/[^/]*\\/bin\\/", r
356+ assert glob_to_regex("[a.b/c]") == "[a.b/c]"
357+ r = glob_to_regex("[a*b-c]e[!]]")
358+ assert r == "[a*b-c]e[^]]", r
359\ No newline at end of file
360
361=== modified file 'testing/unit/test_selection.py'
362--- testing/unit/test_selection.py 2016-01-24 21:44:51 +0000
363+++ testing/unit/test_selection.py 2016-03-05 18:56:41 +0000
364@@ -116,18 +116,6 @@
365 assert sf2(self.makeext("what/ever.py")) == 0
366 assert sf2(self.makeext("what/ever.py/foo")) == 0
367
368- def test_glob_re(self):
369- """test_glob_re - test translation of shell pattern to regular exp"""
370- assert self.Select.glob_to_re("hello") == "hello"
371- assert self.Select.glob_to_re(".e?ll**o") == "\\.e[^/]ll.*o"
372- r = self.Select.glob_to_re("[abc]el[^de][!fg]h")
373- assert r == "[abc]el[^de][^fg]h", r
374- r = self.Select.glob_to_re("/usr/*/bin/")
375- assert r == "\\/usr\\/[^/]*\\/bin\\/", r
376- assert self.Select.glob_to_re("[a.b/c]") == "[a.b/c]"
377- r = self.Select.glob_to_re("[a*b-c]e[!]]")
378- assert r == "[a*b-c]e[^]]", r
379-
380 def test_simple_glob_double_asterisk(self):
381 """test_simple_glob_double_asterisk - primarily to check that the defaults used by the error tests work"""
382 assert self.Select.glob_get_normal_sf("**", 1)

Subscribers

People subscribed via source and target branches