diff -Nru python-nameparser-1.0.6/debian/changelog python-nameparser-1.1.1/debian/changelog --- python-nameparser-1.0.6/debian/changelog 2021-03-12 21:22:23.000000000 +0000 +++ python-nameparser-1.1.1/debian/changelog 2022-02-01 15:25:35.000000000 +0000 @@ -1,3 +1,11 @@ +python-nameparser (1.1.1-1) unstable; urgency=medium + + * New upstream release. + * Update copyright year. + * Update Standards-Version. + + -- Edward Betts Tue, 01 Feb 2022 15:25:35 +0000 + python-nameparser (1.0.6-2) unstable; urgency=medium [ Debian Janitor ] diff -Nru python-nameparser-1.0.6/debian/control python-nameparser-1.1.1/debian/control --- python-nameparser-1.0.6/debian/control 2021-03-12 21:20:22.000000000 +0000 +++ python-nameparser-1.1.1/debian/control 2022-02-01 15:21:32.000000000 +0000 @@ -10,7 +10,7 @@ python3-nose, python3-setuptools Rules-Requires-Root: no -Standards-Version: 4.5.1 +Standards-Version: 4.6.0 Homepage: https://github.com/derek73/python-nameparser Vcs-Browser: https://salsa.debian.org/python-team/packages/python-nameparser Vcs-Git: https://salsa.debian.org/python-team/packages/python-nameparser.git diff -Nru python-nameparser-1.0.6/debian/copyright python-nameparser-1.1.1/debian/copyright --- python-nameparser-1.0.6/debian/copyright 2021-03-12 21:20:34.000000000 +0000 +++ python-nameparser-1.1.1/debian/copyright 2022-02-01 15:20:23.000000000 +0000 @@ -8,7 +8,7 @@ License: LGPL-2.1+ Files: debian/* -Copyright: 2016-2021, Edward Betts +Copyright: 2016-2022, Edward Betts License: LGPL-2.1+ diff -Nru python-nameparser-1.0.6/nameparser/config/__init__.py python-nameparser-1.1.1/nameparser/config/__init__.py --- python-nameparser-1.0.6/nameparser/config/__init__.py 2019-12-12 03:33:09.000000000 +0000 +++ python-nameparser-1.1.1/nameparser/config/__init__.py 2022-01-29 02:29:27.000000000 +0000 @@ -49,35 +49,37 @@ DEFAULT_ENCODING = 'UTF-8' + class SetManager(Set): ''' Easily add and remove config variables per module or instance. Subclass of ``collections.abc.Set``. - + Only special functionality beyond that provided by set() is to normalize constants for comparison (lower case, no periods) when they are add()ed and remove()d and allow passing multiple string arguments to the :py:func:`add()` and :py:func:`remove()` methods. - + ''' + def __init__(self, elements): self.elements = set(elements) - + def __call__(self): return self.elements - + def __repr__(self): - return "SetManager({})".format(self.elements) # used for docs - + return "SetManager({})".format(self.elements) # used for docs + def __iter__(self): return iter(self.elements) - + def __contains__(self, value): return value in self.elements - + def __len__(self): return len(self.elements) - + def next(self): return self.__next__() @@ -89,7 +91,7 @@ c = self.count self.count = c + 1 return getattr(self, self.elements[c]) or next(self) - + def add_with_encoding(self, s, encoding=None): """ Add the lower case and no-period version of the string to the set. Pass an @@ -111,7 +113,7 @@ """ [self.add_with_encoding(s) for s in strings] return self - + def remove(self, *strings): """ Remove the lower case and no-period version of the string arguments from the set. @@ -126,10 +128,11 @@ A dictionary with dot.notation access. Subclass of ``dict``. Makes the tuple constants more friendly. ''' + def __getattr__(self, attr): return self.get(attr) - __setattr__= dict.__setitem__ - __delattr__= dict.__delitem__ + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ def __getstate__(self): return dict(self) @@ -140,6 +143,7 @@ def __reduce__(self): return (TupleManager, (), self.__getstate__()) + class Constants(object): """ An instance of this class hold all of the configuration constants for the parser. @@ -163,11 +167,23 @@ :param regexes: :py:attr:`regexes` wrapped with :py:class:`TupleManager`. """ - + string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" """ The default string format use for all new `HumanName` instances. """ + + initials_format = "{first} {middle} {last}" + """ + The default initials format used for all new `HumanName` instances. + """ + + initials_delimiter = "." + """ + The default initials delimiter used for all new `HumanName` instances. + Will be used to add a delimiter between each initial. + """ + empty_attribute_default = '' """ Default return value for empty attributes. @@ -183,6 +199,7 @@ 'John' """ + capitalize_name = False """ If set, applies :py:meth:`~nameparser.parser.HumanName.capitalize` to @@ -197,6 +214,7 @@ 'Bob V. de la MacDole-Eisenhower Ph.D.' """ + force_mixed_case_capitalization = False """ If set, forces the capitalization of mixed case strings when @@ -213,27 +231,26 @@ """ - - def __init__(self, - prefixes=PREFIXES, - suffix_acronyms=SUFFIX_ACRONYMS, - suffix_not_acronyms=SUFFIX_NOT_ACRONYMS, - titles=TITLES, - first_name_titles=FIRST_NAME_TITLES, - conjunctions=CONJUNCTIONS, - capitalization_exceptions=CAPITALIZATION_EXCEPTIONS, - regexes=REGEXES - ): - self.prefixes = SetManager(prefixes) - self.suffix_acronyms = SetManager(suffix_acronyms) + def __init__(self, + prefixes=PREFIXES, + suffix_acronyms=SUFFIX_ACRONYMS, + suffix_not_acronyms=SUFFIX_NOT_ACRONYMS, + titles=TITLES, + first_name_titles=FIRST_NAME_TITLES, + conjunctions=CONJUNCTIONS, + capitalization_exceptions=CAPITALIZATION_EXCEPTIONS, + regexes=REGEXES + ): + self.prefixes = SetManager(prefixes) + self.suffix_acronyms = SetManager(suffix_acronyms) self.suffix_not_acronyms = SetManager(suffix_not_acronyms) - self.titles = SetManager(titles) - self.first_name_titles = SetManager(first_name_titles) - self.conjunctions = SetManager(conjunctions) + self.titles = SetManager(titles) + self.first_name_titles = SetManager(first_name_titles) + self.conjunctions = SetManager(conjunctions) self.capitalization_exceptions = TupleManager(capitalization_exceptions) - self.regexes = TupleManager(regexes) + self.regexes = TupleManager(regexes) self._pst = None - + @property def suffixes_prefixes_titles(self): if not self._pst: @@ -242,15 +259,16 @@ def __repr__(self): return "" - + def __setstate__(self, state): self.__init__(state) - + def __getstate__(self): attrs = [x for x in dir(self) if not x.startswith('_')] - return dict([(a,getattr(self, a)) for a in attrs]) + return dict([(a, getattr(self, a)) for a in attrs]) + -#: A module-level instance of the :py:class:`Constants()` class. +#: A module-level instance of the :py:class:`Constants()` class. #: Provides a common instance for the module to share #: to easily adjust configuration for the entire module. #: See `Customizing the Parser with Your Own Configuration `_. diff -Nru python-nameparser-1.0.6/nameparser/config/prefixes.py python-nameparser-1.1.1/nameparser/config/prefixes.py --- python-nameparser-1.0.6/nameparser/config/prefixes.py 2019-12-12 02:56:45.000000000 +0000 +++ python-nameparser-1.1.1/nameparser/config/prefixes.py 2022-01-29 02:29:27.000000000 +0000 @@ -12,11 +12,13 @@ #: correct parsing of the last name "von bergen wessels". PREFIXES = set([ 'abu', + 'al', 'bin', 'bon', 'da', 'dal', 'de', + 'de\'', 'degli', 'dei', 'del', @@ -34,11 +36,15 @@ 'ibn', 'la', 'le', + 'mac', + 'mc', 'san', 'santa', 'st', 'ste', 'van', + 'vander', 'vel', 'von', + 'vom', ]) diff -Nru python-nameparser-1.0.6/nameparser/config/suffixes.py python-nameparser-1.1.1/nameparser/config/suffixes.py --- python-nameparser-1.0.6/nameparser/config/suffixes.py 2019-12-12 05:00:40.000000000 +0000 +++ python-nameparser-1.1.1/nameparser/config/suffixes.py 2022-01-29 02:29:27.000000000 +0000 @@ -239,6 +239,7 @@ 'cpm', 'cpo', 'cpp', + 'cppm', 'cprc', 'cpre', 'cprp', diff -Nru python-nameparser-1.0.6/nameparser/config/titles.py python-nameparser-1.1.1/nameparser/config/titles.py --- python-nameparser-1.0.6/nameparser/config/titles.py 2019-12-12 04:35:59.000000000 +0000 +++ python-nameparser-1.1.1/nameparser/config/titles.py 2022-01-29 02:29:27.000000000 +0000 @@ -117,6 +117,7 @@ 'banner', 'bard', 'baron', + 'baroness', 'barrister', 'baseball', 'bearer', @@ -136,6 +137,7 @@ 'bodhisattva', 'bookseller', 'botanist', + 'bp', 'brigadier', 'briggen', 'british', @@ -223,6 +225,7 @@ 'cwo5', 'cyclist', 'dancer', + 'dcn', 'deacon', 'delegate', 'deputy', @@ -249,7 +252,7 @@ 'druid', 'drummer', 'duchesse', - 'duke', + # 'duke', # a common first name 'dutchess', 'ecologist', 'economist', @@ -288,6 +291,7 @@ 'foreign', 'forester', 'founder', + 'fr', 'friar', 'gaf', 'gen', @@ -395,6 +399,7 @@ 'member', 'memoirist', 'merchant', + 'met', 'metropolitan', 'mg', 'mgr', @@ -425,6 +430,7 @@ 'murshid', 'musician', 'musicologist', + 'mx', 'mystery', 'nanny', 'narrator', @@ -568,6 +574,7 @@ 'srta', 'ssg', 'ssgt', + 'st', 'staff', 'state', 'states', diff -Nru python-nameparser-1.0.6/nameparser/__init__.py python-nameparser-1.1.1/nameparser/__init__.py --- python-nameparser-1.0.6/nameparser/__init__.py 2020-02-08 21:35:00.000000000 +0000 +++ python-nameparser-1.1.1/nameparser/__init__.py 2022-01-29 02:29:27.000000000 +0000 @@ -1,4 +1,4 @@ -VERSION = (1, 0, 6) +VERSION = (1, 1, 1) __version__ = '.'.join(map(str, VERSION)) __author__ = "Derek Gulbranson" __author_email__ = 'derek73@gmail.com' diff -Nru python-nameparser-1.0.6/nameparser/parser.py python-nameparser-1.1.1/nameparser/parser.py --- python-nameparser-1.0.6/nameparser/parser.py 2020-02-08 21:32:27.000000000 +0000 +++ python-nameparser-1.1.1/nameparser/parser.py 2022-01-29 02:29:27.000000000 +0000 @@ -2,6 +2,7 @@ from __future__ import unicode_literals import sys +import re from operator import itemgetter from itertools import groupby @@ -15,6 +16,7 @@ ENCODING = 'utf-8' + def group_contiguous_integers(data): """ return list of tuples containing first and last index @@ -27,6 +29,7 @@ ranges.append((group[0], group[-1])) return ranges + class HumanName(object): """ Parse a person's name into individual components. @@ -51,6 +54,8 @@ `per-instance config `_. :param str encoding: string representing the encoding of your input :param str string_format: python string formatting + :param str initials_format: python initials string formatting + :param str initials_delimter: string delimiter for initials """ C = CONSTANTS @@ -67,18 +72,20 @@ """ _count = 0 - _members = ['title','first','middle','last','suffix','nickname'] + _members = ['title', 'first', 'middle', 'last', 'suffix', 'nickname'] unparsable = True _full_name = '' def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING, - string_format=None): + string_format=None, initials_format=None, initials_delimiter=None): self.C = constants if type(self.C) is not type(CONSTANTS): self.C = Constants() self.encoding = encoding self.string_format = string_format or self.C.string_format + self.initials_format = initials_format or self.C.initials_format + self.initials_delimiter = initials_delimiter or self.C.initials_delimiter # full_name setter triggers the parse self.full_name = full_name @@ -130,7 +137,7 @@ # string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" _s = self.string_format.format(**self.as_dict()) # remove trailing punctuation from missing nicknames - _s = _s.replace(str(self.C.empty_attribute_default),'').replace(" ()","").replace(" ''","").replace(' ""',"") + _s = _s.replace(str(self.C.empty_attribute_default), '').replace(" ()", "").replace(" ''", "").replace(' ""', "") return self.collapse_whitespace(_s).strip(', ') return " ".join(self) @@ -141,7 +148,7 @@ def __repr__(self): if self.unparsable: - _string = "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__,} + _string = "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__, } else: _string = "<%(class)s : [\n\ttitle: '%(title)s' \n\tfirst: '%(first)s' \n\tmiddle: '%(middle)s' \n\tlast: '%(last)s' \n\tsuffix: '%(suffix)s'\n\tnickname: '%(nickname)s'\n]>" % { 'class': self.__class__.__name__, @@ -182,6 +189,73 @@ d[m] = val return d + def __process_initial__(self, name_part, firstname=False): + """ + Name parts may include prefixes or conjuctions. This function filters these from the name unless it is + a first name, since first names cannot be conjunctions or prefixes. + """ + parts = name_part.split(" ") + initials = [] + if len(parts) and isinstance(parts, list): + for part in parts: + if not (self.is_prefix(part) or self.is_conjunction(part)) or firstname == True: + initials.append(part[0]) + if len(initials) > 0: + return " ".join(initials) + else: + return self.C.empty_attribute_default + + def initials_list(self): + """ + Returns the initials as a list + + .. doctest:: + + >>> name = HumanName("Sir Bob Andrew Dole") + >>> name.initials_list() + ["B", "A", "D"] + >>> name = HumanName("J. Doe") + >>> name.initials_list() + ["J", "D"] + """ + first_initials_list = [self.__process_initial__(name, True) for name in self.first_list if name] + middle_initials_list = [self.__process_initial__(name) for name in self.middle_list if name] + last_initials_list = [self.__process_initial__(name) for name in self.last_list if name] + return first_initials_list + middle_initials_list + last_initials_list + + def initials(self): + """ + Return period-delimited initials of the first, middle and optionally last name. + + :param bool include_last_name: Include the last name as part of the initials + :rtype: str + + .. doctest:: + + >>> name = HumanName("Sir Bob Andrew Dole") + >>> name.initials() + "B. A. D." + >>> name = HumanName("Sir Bob Andrew Dole", initials_format="{first} {middle}") + >>> name.initials() + "B. A." + """ + + first_initials_list = [self.__process_initial__(name, True) for name in self.first_list if name] + middle_initials_list = [self.__process_initial__(name) for name in self.middle_list if name] + last_initials_list = [self.__process_initial__(name) for name in self.last_list if name] + + initials_dict = { + "first": (self.initials_delimiter + " ").join(first_initials_list) + self.initials_delimiter + if len(first_initials_list) else self.C.empty_attribute_default, + "middle": (self.initials_delimiter + " ").join(middle_initials_list) + self.initials_delimiter + if len(middle_initials_list) else self.C.empty_attribute_default, + "last": (self.initials_delimiter + " ").join(last_initials_list) + self.initials_delimiter + if len(last_initials_list) else self.C.empty_attribute_default + } + + _s = self.initials_format.format(**initials_dict) + return self.collapse_whitespace(_s) + @property def has_own_config(self): """ @@ -190,7 +264,7 @@ """ return self.C is not CONSTANTS - ### attributes + # attributes @property def title(self): @@ -259,7 +333,7 @@ """ return " ".join(self.surnames_list) or self.C.empty_attribute_default - ### setter methods + # setter methods def _set_list(self, attr, value): if isinstance(value, list): @@ -270,8 +344,8 @@ val = [] else: raise TypeError( - "Can only assign strings, lists or None to name attributes." - " Got {0}".format(type(value))) + "Can only assign strings, lists or None to name attributes." + " Got {0}".format(type(value))) setattr(self, attr+"_list", self.parse_pieces(val)) @title.setter @@ -298,22 +372,32 @@ def nickname(self, value): self._set_list('nickname', value) - ### Parse helpers + # Parse helpers def is_title(self, value): """Is in the :py:data:`~nameparser.config.titles.TITLES` set.""" return lc(value) in self.C.titles def is_conjunction(self, piece): - """Is in the conjuctions set and not :py:func:`is_an_initial()`.""" - return piece.lower() in self.C.conjunctions and not self.is_an_initial(piece) + """Is in the conjunctions set and not :py:func:`is_an_initial()`.""" + if isinstance(piece, list): + for item in piece: + if self.is_conjunction(item): + return True + else: + return piece.lower() in self.C.conjunctions and not self.is_an_initial(piece) def is_prefix(self, piece): """ Lowercase and no periods version of piece is in the :py:data:`~nameparser.config.prefixes.PREFIXES` set. """ - return lc(piece) in self.C.prefixes + if isinstance(piece, list): + for item in piece: + if self.is_prefix(item): + return True + else: + return lc(piece) in self.C.prefixes def is_roman_numeral(self, value): """ @@ -331,9 +415,14 @@ `C.suffix_acronyms`. """ # suffixes may have periods inside them like "M.D." - return ((lc(piece).replace('.','') in self.C.suffix_acronyms) \ - or (lc(piece) in self.C.suffix_not_acronyms)) \ - and not self.is_an_initial(piece) + if isinstance(piece, list): + for item in piece: + if self.is_suffix(item): + return True + else: + return ((lc(piece).replace('.', '') in self.C.suffix_acronyms) + or (lc(piece) in self.C.suffix_not_acronyms)) \ + and not self.is_an_initial(piece) def are_suffixes(self, pieces): """Return True if all pieces are suffixes.""" @@ -358,8 +447,7 @@ """ return bool(self.C.regexes.initial.match(value)) - - ### full_name parser + # full_name parser @property def full_name(self): @@ -376,7 +464,7 @@ def collapse_whitespace(self, string): # collapse multiple spaces into single space - string = self.C.regexes.spaces.sub(" ", string.strip()) + string = self.C.regexes.spaces.sub(" ", string.strip()) if string.endswith(","): string = string[:-1] return string @@ -404,11 +492,14 @@ self.handle_capitalization() def fix_phd(self): - _re = self.C.regexes.phd - match = _re.search(self._full_name) - if match: - self.suffix_list.append(match.group(1)) - self._full_name = _re.sub('', self._full_name) + try: + _re = self.C.regexes.phd + match = _re.search(self._full_name) + if match: + self.suffix_list.append(match.group(1)) + self._full_name = _re.sub('', self._full_name) + except AttributeError: + pass def parse_nicknames(self): """ @@ -422,10 +513,12 @@ Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`; `quoted_word`, `double_quotes` and `parenthesis`. """ - - re_quoted_word = self.C.regexes.quoted_word - re_double_quotes = self.C.regexes.double_quotes - re_parenthesis = self.C.regexes.parenthesis + + empty_re = re.compile("") + + re_quoted_word = self.C.regexes.quoted_word or empty_re + re_double_quotes = self.C.regexes.double_quotes or empty_re + re_parenthesis = self.C.regexes.parenthesis or empty_re for _re in (re_quoted_word, re_double_quotes, re_parenthesis): if _re.search(self._full_name): @@ -474,7 +567,6 @@ self.nickname_list = [] self.unparsable = True - self.pre_process() self._full_name = self.collapse_whitespace(self._full_name) @@ -516,7 +608,7 @@ # numeral but this piece is not an initial self.is_roman_numeral(nxt) and i == p_len - 2 and not self.is_an_initial(piece) - ): + ): self.last_list.append(piece) self.suffix_list += pieces[i+1:] break @@ -540,7 +632,6 @@ # title first middle last [suffix], suffix [suffix] [, suffix] # parts[0], parts[1:...] - self.suffix_list += parts[1:] pieces = self.parse_pieces(parts[0].split(' ')) log.debug("pieces: %s", u(pieces)) @@ -614,7 +705,6 @@ self.unparsable = False self.post_process() - def parse_pieces(self, parts, additional_parts_count=0): """ Split parts on spaces and remove commas, join on conjunctions and @@ -644,11 +734,11 @@ # constants so they get parsed correctly later for part in output: # if this part has a period not at the beginning or end - if self.C.regexes.period_not_at_end.match(part): + if self.C.regexes.period_not_at_end and self.C.regexes.period_not_at_end.match(part): # split on periods, any of the split pieces titles or suffixes? # ("Lt.Gov.") period_chunks = part.split(".") - titles = list(filter(self.is_title, period_chunks)) + titles = list(filter(self.is_title, period_chunks)) suffixes = list(filter(self.is_suffix, period_chunks)) # add the part to the constant so it will be found @@ -695,7 +785,7 @@ # other, then join those newly joined conjunctions and any single # conjunctions to the piece before and after it conj_index = [i for i, piece in enumerate(pieces) - if self.is_conjunction(piece)] + if self.is_conjunction(piece)] contiguous_conj_i = [] for i, val in enumerate(conj_index): @@ -710,14 +800,14 @@ delete_i = [] for i in contiguous_conj_i: if type(i) == tuple: - new_piece = " ".join(pieces[ i[0] : i[1]+1] ) - delete_i += list(range( i[0]+1, i[1]+1 )) + new_piece = " ".join(pieces[i[0]: i[1]+1]) + delete_i += list(range(i[0]+1, i[1]+1)) pieces[i[0]] = new_piece else: - new_piece = " ".join(pieces[ i : i+2 ]) + new_piece = " ".join(pieces[i: i+2]) delete_i += [i+1] pieces[i] = new_piece - #add newly joined conjunctions to constants to be found later + # add newly joined conjunctions to constants to be found later self.C.conjunctions.add(new_piece) for i in reversed(delete_i): @@ -747,9 +837,9 @@ pieces[i] = new_piece pieces.pop(i+1) # subtract 1 from the index of all the remaining conjunctions - for j,val in enumerate(conj_index): + for j, val in enumerate(conj_index): if val > i: - conj_index[j]=val-1 + conj_index[j] = val-1 else: new_piece = " ".join(pieces[i-1:i+2]) @@ -766,11 +856,10 @@ # subtract the number of removed pieces from the index # of all the remaining conjunctions - for j,val in enumerate(conj_index): + for j, val in enumerate(conj_index): if val > i: conj_index[j] = val - rm_count - # join prefixes to following lastnames: ['de la Vega'], ['van Buren'] prefixes = list(filter(self.is_prefix, pieces)) if prefixes: @@ -790,6 +879,9 @@ # join everything after the prefix until the next prefix or suffix try: + if i == 0 and total_length >= 1: + # If it's the first piece and there are more than 1 rootnames, assume it's a first name + continue next_prefix = next(iter(filter(self.is_prefix, pieces[i + 1:]))) j = pieces.index(next_prefix) if j == i + 1: @@ -813,12 +905,11 @@ log.debug("pieces: %s", pieces) return pieces - - ### Capitalization Support + # Capitalization Support def cap_word(self, word, attribute): - if (self.is_prefix(word) and attribute in ('last','middle')) \ - or self.is_conjunction(word): + if (self.is_prefix(word) and attribute in ('last', 'middle')) \ + or self.is_conjunction(word): return word.lower() exceptions = self.C.capitalization_exceptions if lc(word) in exceptions: @@ -834,7 +925,8 @@ def cap_piece(self, piece, attribute): if not piece: return "" - replacement = lambda m: self.cap_word(m.group(0), attribute) + + def replacement(m): return self.cap_word(m.group(0), attribute) return self.C.regexes.word.sub(replacement, piece) def capitalize(self, force=None): @@ -872,10 +964,10 @@ if not force and not (name == name.upper() or name == name.lower()): return - self.title_list = self.cap_piece(self.title , 'title').split(' ') - self.first_list = self.cap_piece(self.first , 'first').split(' ') + self.title_list = self.cap_piece(self.title, 'title').split(' ') + self.first_list = self.cap_piece(self.first, 'first').split(' ') self.middle_list = self.cap_piece(self.middle, 'middle').split(' ') - self.last_list = self.cap_piece(self.last , 'last').split(' ') + self.last_list = self.cap_piece(self.last, 'last').split(' ') self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ') def handle_capitalization(self): diff -Nru python-nameparser-1.0.6/nameparser.egg-info/PKG-INFO python-nameparser-1.1.1/nameparser.egg-info/PKG-INFO --- python-nameparser-1.0.6/nameparser.egg-info/PKG-INFO 2020-02-08 21:41:38.000000000 +0000 +++ python-nameparser-1.1.1/nameparser.egg-info/PKG-INFO 2022-01-29 02:29:36.000000000 +0000 @@ -1,158 +1,11 @@ -Metadata-Version: 1.1 +Metadata-Version: 2.1 Name: nameparser -Version: 1.0.6 +Version: 1.1.1 Summary: A simple Python module for parsing human names into their individual components. Home-page: https://github.com/derek73/python-nameparser Author: Derek Gulbranson Author-email: derek73@gmail.com License: LGPL -Description: Name Parser - =========== - - |Build Status| |PyPI| |PyPI version| |Documentation| - - A simple Python (3.2+ & 2.6+) module for parsing human names into their - individual components. - - * hn.title - * hn.first - * hn.middle - * hn.last - * hn.suffix - * hn.nickname - * hn.surnames *(middle + last)* - - Supported Name Structures - ~~~~~~~~~~~~~~~~~~~~~~~~~ - - The supported name structure is generally "Title First Middle Last Suffix", where all pieces - are optional. Comma-separated format like "Last, First" is also supported. - - 1. Title Firstname "Nickname" Middle Middle Lastname Suffix - 2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix] - 3. Title Firstname M Lastname [Suffix], Suffix [Suffix] [, Suffix] - - Instantiating the `HumanName` class with a string splits on commas and then spaces, - classifying name parts based on placement in the string and matches against known name - pieces like titles and suffixes. - - It correctly handles some common conjunctions and special prefixes to last names - like "del". Titles and conjunctions can be chained together to handle complex - titles like "Asst Secretary of State". It can also try to correct capitalization - of names that are all upper- or lowercase names. - - It attempts the best guess that can be made with a simple, rule-based approach. - Its main use case is English and it is not likely to be useful for languages - that do not conform to the supported name structure. It's not perfect, but it - gets you pretty far. - - Installation - ------------ - - :: - - pip install nameparser - - If you want to try out the latest code from GitHub you can - install with pip using the command below. - - ``pip install -e git+git://github.com/derek73/python-nameparser.git#egg=nameparser`` - - If you need to handle lists of names, check out - `namesparser `_, a - compliment to this module that handles multiple names in a string. - - - Quick Start Example - ------------------- - - :: - - >>> from nameparser import HumanName - >>> name = HumanName("Dr. Juan Q. Xavier de la Vega III (Doc Vega)") - >>> name - - >>> name.last - 'de la Vega' - >>> name.as_dict() - {'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'} - >>> str(name) - 'Dr. Juan Q. Xavier de la Vega III (Doc Vega)' - >>> name.string_format = "{first} {last}" - >>> str(name) - 'Juan de la Vega' - - - The parser does not attempt to correct mistakes in the input. It mostly just splits on white - space and puts things in buckets based on their position in the string. This also means - the difference between 'title' and 'suffix' is positional, not semantic. "Dr" is a title - when it comes before the name and a suffix when it comes after. ("Pre-nominal" - and "post-nominal" would probably be better names.) - - :: - - >>> name = HumanName("1 & 2, 3 4 5, Mr.") - >>> name - - - Customization - ------------- - - Your project may need some adjustment for your dataset. You can - do this in your own pre- or post-processing, by `customizing the configured pre-defined - sets`_ of titles, prefixes, etc., or by subclassing the `HumanName` class. See the - `full documentation`_ for more information. - - - `Full documentation`_ - ~~~~~~~~~~~~~~~~~~~~~ - - .. _customizing the configured pre-defined sets: http://nameparser.readthedocs.org/en/latest/customize.html - .. _Full documentation: http://nameparser.readthedocs.org/en/latest/ - - - Contributing - ------------ - - If you come across name piece that you think should be in the default config, you're - probably right. `Start a New Issue`_ and we can get them added. - - Please let me know if there are ways this library could be structured to make - it easier for you to use in your projects. Read CONTRIBUTING.md_ for more info - on running the tests and contributing to the project. - - **GitHub Project** - - https://github.com/derek73/python-nameparser - - .. _CONTRIBUTING.md: https://github.com/derek73/python-nameparser/tree/master/CONTRIBUTING.md - .. _Start a New Issue: https://github.com/derek73/python-nameparser/issues - .. _click here to propose changes to the titles: https://github.com/derek73/python-nameparser/edit/master/nameparser/config/titles.py - - - .. |Build Status| image:: https://travis-ci.org/derek73/python-nameparser.svg?branch=master - :target: https://travis-ci.org/derek73/python-nameparser - .. |PyPI| image:: https://img.shields.io/pypi/v/nameparser.svg - :target: https://pypi.org/project/nameparser/ - .. |Documentation| image:: https://readthedocs.org/projects/nameparser/badge/?version=latest - :target: http://nameparser.readthedocs.io/en/latest/?badge=latest - .. |PyPI version| image:: https://img.shields.io/pypi/pyversions/nameparser.svg - :target: https://pypi.org/project/nameparser/ - Keywords: names,parser Platform: UNKNOWN Classifier: Intended Audience :: Developers @@ -165,3 +18,154 @@ Classifier: Natural Language :: English Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Text Processing :: Linguistic +License-File: LICENSE +License-File: AUTHORS + +Name Parser +=========== + +|Build Status| |PyPI| |PyPI version| |Documentation| + +A simple Python (3.2+ & 2.6+) module for parsing human names into their +individual components. + +* hn.title +* hn.first +* hn.middle +* hn.last +* hn.suffix +* hn.nickname +* hn.surnames *(middle + last)* +* hn.initials *(first initial of each name part)* + +Supported Name Structures +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The supported name structure is generally "Title First Middle Last Suffix", where all pieces +are optional. Comma-separated format like "Last, First" is also supported. + +1. Title Firstname "Nickname" Middle Middle Lastname Suffix +2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix] +3. Title Firstname M Lastname [Suffix], Suffix [Suffix] [, Suffix] + +Instantiating the `HumanName` class with a string splits on commas and then spaces, +classifying name parts based on placement in the string and matches against known name +pieces like titles and suffixes. + +It correctly handles some common conjunctions and special prefixes to last names +like "del". Titles and conjunctions can be chained together to handle complex +titles like "Asst Secretary of State". It can also try to correct capitalization +of names that are all upper- or lowercase names. + +It attempts the best guess that can be made with a simple, rule-based approach. +Its main use case is English and it is not likely to be useful for languages +that do not conform to the supported name structure. It's not perfect, but it +gets you pretty far. + +Installation +------------ + +:: + + pip install nameparser + +If you want to try out the latest code from GitHub you can +install with pip using the command below. + +``pip install -e git+git://github.com/derek73/python-nameparser.git#egg=nameparser`` + +If you need to handle lists of names, check out +`namesparser `_, a +compliment to this module that handles multiple names in a string. + + +Quick Start Example +------------------- + +:: + + >>> from nameparser import HumanName + >>> name = HumanName("Dr. Juan Q. Xavier de la Vega III (Doc Vega)") + >>> name + + >>> name.last + 'de la Vega' + >>> name.as_dict() + {'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'} + >>> str(name) + 'Dr. Juan Q. Xavier de la Vega III (Doc Vega)' + >>> name.string_format = "{first} {last}" + >>> str(name) + 'Juan de la Vega' + + +The parser does not attempt to correct mistakes in the input. It mostly just splits on white +space and puts things in buckets based on their position in the string. This also means +the difference between 'title' and 'suffix' is positional, not semantic. "Dr" is a title +when it comes before the name and a suffix when it comes after. ("Pre-nominal" +and "post-nominal" would probably be better names.) + +:: + + >>> name = HumanName("1 & 2, 3 4 5, Mr.") + >>> name + + +Customization +------------- + +Your project may need some adjustment for your dataset. You can +do this in your own pre- or post-processing, by `customizing the configured pre-defined +sets`_ of titles, prefixes, etc., or by subclassing the `HumanName` class. See the +`full documentation`_ for more information. + + +`Full documentation`_ +~~~~~~~~~~~~~~~~~~~~~ + +.. _customizing the configured pre-defined sets: http://nameparser.readthedocs.org/en/latest/customize.html +.. _Full documentation: http://nameparser.readthedocs.org/en/latest/ + + +Contributing +------------ + +If you come across name piece that you think should be in the default config, you're +probably right. `Start a New Issue`_ and we can get them added. + +Please let me know if there are ways this library could be structured to make +it easier for you to use in your projects. Read CONTRIBUTING.md_ for more info +on running the tests and contributing to the project. + +**GitHub Project** + +https://github.com/derek73/python-nameparser + +.. _CONTRIBUTING.md: https://github.com/derek73/python-nameparser/tree/master/CONTRIBUTING.md +.. _Start a New Issue: https://github.com/derek73/python-nameparser/issues +.. _click here to propose changes to the titles: https://github.com/derek73/python-nameparser/edit/master/nameparser/config/titles.py + +.. |Build Status| image:: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml/badge.svg + :target: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml +.. |PyPI| image:: https://img.shields.io/pypi/v/nameparser.svg + :target: https://pypi.org/project/nameparser/ +.. |Documentation| image:: https://readthedocs.org/projects/nameparser/badge/?version=latest + :target: http://nameparser.readthedocs.io/en/latest/?badge=latest +.. |PyPI version| image:: https://img.shields.io/pypi/pyversions/nameparser.svg + :target: https://pypi.org/project/nameparser/ + + diff -Nru python-nameparser-1.0.6/PKG-INFO python-nameparser-1.1.1/PKG-INFO --- python-nameparser-1.0.6/PKG-INFO 2020-02-08 21:41:39.000000000 +0000 +++ python-nameparser-1.1.1/PKG-INFO 2022-01-29 02:29:36.415858500 +0000 @@ -1,158 +1,11 @@ -Metadata-Version: 1.1 +Metadata-Version: 2.1 Name: nameparser -Version: 1.0.6 +Version: 1.1.1 Summary: A simple Python module for parsing human names into their individual components. Home-page: https://github.com/derek73/python-nameparser Author: Derek Gulbranson Author-email: derek73@gmail.com License: LGPL -Description: Name Parser - =========== - - |Build Status| |PyPI| |PyPI version| |Documentation| - - A simple Python (3.2+ & 2.6+) module for parsing human names into their - individual components. - - * hn.title - * hn.first - * hn.middle - * hn.last - * hn.suffix - * hn.nickname - * hn.surnames *(middle + last)* - - Supported Name Structures - ~~~~~~~~~~~~~~~~~~~~~~~~~ - - The supported name structure is generally "Title First Middle Last Suffix", where all pieces - are optional. Comma-separated format like "Last, First" is also supported. - - 1. Title Firstname "Nickname" Middle Middle Lastname Suffix - 2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix] - 3. Title Firstname M Lastname [Suffix], Suffix [Suffix] [, Suffix] - - Instantiating the `HumanName` class with a string splits on commas and then spaces, - classifying name parts based on placement in the string and matches against known name - pieces like titles and suffixes. - - It correctly handles some common conjunctions and special prefixes to last names - like "del". Titles and conjunctions can be chained together to handle complex - titles like "Asst Secretary of State". It can also try to correct capitalization - of names that are all upper- or lowercase names. - - It attempts the best guess that can be made with a simple, rule-based approach. - Its main use case is English and it is not likely to be useful for languages - that do not conform to the supported name structure. It's not perfect, but it - gets you pretty far. - - Installation - ------------ - - :: - - pip install nameparser - - If you want to try out the latest code from GitHub you can - install with pip using the command below. - - ``pip install -e git+git://github.com/derek73/python-nameparser.git#egg=nameparser`` - - If you need to handle lists of names, check out - `namesparser `_, a - compliment to this module that handles multiple names in a string. - - - Quick Start Example - ------------------- - - :: - - >>> from nameparser import HumanName - >>> name = HumanName("Dr. Juan Q. Xavier de la Vega III (Doc Vega)") - >>> name - - >>> name.last - 'de la Vega' - >>> name.as_dict() - {'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'} - >>> str(name) - 'Dr. Juan Q. Xavier de la Vega III (Doc Vega)' - >>> name.string_format = "{first} {last}" - >>> str(name) - 'Juan de la Vega' - - - The parser does not attempt to correct mistakes in the input. It mostly just splits on white - space and puts things in buckets based on their position in the string. This also means - the difference between 'title' and 'suffix' is positional, not semantic. "Dr" is a title - when it comes before the name and a suffix when it comes after. ("Pre-nominal" - and "post-nominal" would probably be better names.) - - :: - - >>> name = HumanName("1 & 2, 3 4 5, Mr.") - >>> name - - - Customization - ------------- - - Your project may need some adjustment for your dataset. You can - do this in your own pre- or post-processing, by `customizing the configured pre-defined - sets`_ of titles, prefixes, etc., or by subclassing the `HumanName` class. See the - `full documentation`_ for more information. - - - `Full documentation`_ - ~~~~~~~~~~~~~~~~~~~~~ - - .. _customizing the configured pre-defined sets: http://nameparser.readthedocs.org/en/latest/customize.html - .. _Full documentation: http://nameparser.readthedocs.org/en/latest/ - - - Contributing - ------------ - - If you come across name piece that you think should be in the default config, you're - probably right. `Start a New Issue`_ and we can get them added. - - Please let me know if there are ways this library could be structured to make - it easier for you to use in your projects. Read CONTRIBUTING.md_ for more info - on running the tests and contributing to the project. - - **GitHub Project** - - https://github.com/derek73/python-nameparser - - .. _CONTRIBUTING.md: https://github.com/derek73/python-nameparser/tree/master/CONTRIBUTING.md - .. _Start a New Issue: https://github.com/derek73/python-nameparser/issues - .. _click here to propose changes to the titles: https://github.com/derek73/python-nameparser/edit/master/nameparser/config/titles.py - - - .. |Build Status| image:: https://travis-ci.org/derek73/python-nameparser.svg?branch=master - :target: https://travis-ci.org/derek73/python-nameparser - .. |PyPI| image:: https://img.shields.io/pypi/v/nameparser.svg - :target: https://pypi.org/project/nameparser/ - .. |Documentation| image:: https://readthedocs.org/projects/nameparser/badge/?version=latest - :target: http://nameparser.readthedocs.io/en/latest/?badge=latest - .. |PyPI version| image:: https://img.shields.io/pypi/pyversions/nameparser.svg - :target: https://pypi.org/project/nameparser/ - Keywords: names,parser Platform: UNKNOWN Classifier: Intended Audience :: Developers @@ -165,3 +18,154 @@ Classifier: Natural Language :: English Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Text Processing :: Linguistic +License-File: LICENSE +License-File: AUTHORS + +Name Parser +=========== + +|Build Status| |PyPI| |PyPI version| |Documentation| + +A simple Python (3.2+ & 2.6+) module for parsing human names into their +individual components. + +* hn.title +* hn.first +* hn.middle +* hn.last +* hn.suffix +* hn.nickname +* hn.surnames *(middle + last)* +* hn.initials *(first initial of each name part)* + +Supported Name Structures +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The supported name structure is generally "Title First Middle Last Suffix", where all pieces +are optional. Comma-separated format like "Last, First" is also supported. + +1. Title Firstname "Nickname" Middle Middle Lastname Suffix +2. Lastname [Suffix], Title Firstname (Nickname) Middle Middle[,] Suffix [, Suffix] +3. Title Firstname M Lastname [Suffix], Suffix [Suffix] [, Suffix] + +Instantiating the `HumanName` class with a string splits on commas and then spaces, +classifying name parts based on placement in the string and matches against known name +pieces like titles and suffixes. + +It correctly handles some common conjunctions and special prefixes to last names +like "del". Titles and conjunctions can be chained together to handle complex +titles like "Asst Secretary of State". It can also try to correct capitalization +of names that are all upper- or lowercase names. + +It attempts the best guess that can be made with a simple, rule-based approach. +Its main use case is English and it is not likely to be useful for languages +that do not conform to the supported name structure. It's not perfect, but it +gets you pretty far. + +Installation +------------ + +:: + + pip install nameparser + +If you want to try out the latest code from GitHub you can +install with pip using the command below. + +``pip install -e git+git://github.com/derek73/python-nameparser.git#egg=nameparser`` + +If you need to handle lists of names, check out +`namesparser `_, a +compliment to this module that handles multiple names in a string. + + +Quick Start Example +------------------- + +:: + + >>> from nameparser import HumanName + >>> name = HumanName("Dr. Juan Q. Xavier de la Vega III (Doc Vega)") + >>> name + + >>> name.last + 'de la Vega' + >>> name.as_dict() + {'last': 'de la Vega', 'suffix': 'III', 'title': 'Dr.', 'middle': 'Q. Xavier', 'nickname': 'Doc Vega', 'first': 'Juan'} + >>> str(name) + 'Dr. Juan Q. Xavier de la Vega III (Doc Vega)' + >>> name.string_format = "{first} {last}" + >>> str(name) + 'Juan de la Vega' + + +The parser does not attempt to correct mistakes in the input. It mostly just splits on white +space and puts things in buckets based on their position in the string. This also means +the difference between 'title' and 'suffix' is positional, not semantic. "Dr" is a title +when it comes before the name and a suffix when it comes after. ("Pre-nominal" +and "post-nominal" would probably be better names.) + +:: + + >>> name = HumanName("1 & 2, 3 4 5, Mr.") + >>> name + + +Customization +------------- + +Your project may need some adjustment for your dataset. You can +do this in your own pre- or post-processing, by `customizing the configured pre-defined +sets`_ of titles, prefixes, etc., or by subclassing the `HumanName` class. See the +`full documentation`_ for more information. + + +`Full documentation`_ +~~~~~~~~~~~~~~~~~~~~~ + +.. _customizing the configured pre-defined sets: http://nameparser.readthedocs.org/en/latest/customize.html +.. _Full documentation: http://nameparser.readthedocs.org/en/latest/ + + +Contributing +------------ + +If you come across name piece that you think should be in the default config, you're +probably right. `Start a New Issue`_ and we can get them added. + +Please let me know if there are ways this library could be structured to make +it easier for you to use in your projects. Read CONTRIBUTING.md_ for more info +on running the tests and contributing to the project. + +**GitHub Project** + +https://github.com/derek73/python-nameparser + +.. _CONTRIBUTING.md: https://github.com/derek73/python-nameparser/tree/master/CONTRIBUTING.md +.. _Start a New Issue: https://github.com/derek73/python-nameparser/issues +.. _click here to propose changes to the titles: https://github.com/derek73/python-nameparser/edit/master/nameparser/config/titles.py + +.. |Build Status| image:: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml/badge.svg + :target: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml +.. |PyPI| image:: https://img.shields.io/pypi/v/nameparser.svg + :target: https://pypi.org/project/nameparser/ +.. |Documentation| image:: https://readthedocs.org/projects/nameparser/badge/?version=latest + :target: http://nameparser.readthedocs.io/en/latest/?badge=latest +.. |PyPI version| image:: https://img.shields.io/pypi/pyversions/nameparser.svg + :target: https://pypi.org/project/nameparser/ + + diff -Nru python-nameparser-1.0.6/README.rst python-nameparser-1.1.1/README.rst --- python-nameparser-1.0.6/README.rst 2018-09-22 20:34:08.000000000 +0000 +++ python-nameparser-1.1.1/README.rst 2022-01-29 02:29:27.000000000 +0000 @@ -13,6 +13,7 @@ * hn.suffix * hn.nickname * hn.surnames *(middle + last)* +* hn.initials *(first initial of each name part)* Supported Name Structures ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -135,9 +136,8 @@ .. _Start a New Issue: https://github.com/derek73/python-nameparser/issues .. _click here to propose changes to the titles: https://github.com/derek73/python-nameparser/edit/master/nameparser/config/titles.py - -.. |Build Status| image:: https://travis-ci.org/derek73/python-nameparser.svg?branch=master - :target: https://travis-ci.org/derek73/python-nameparser +.. |Build Status| image:: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml/badge.svg + :target: https://github.com/derek73/python-nameparser/actions/workflows/python-package.yml .. |PyPI| image:: https://img.shields.io/pypi/v/nameparser.svg :target: https://pypi.org/project/nameparser/ .. |Documentation| image:: https://readthedocs.org/projects/nameparser/badge/?version=latest diff -Nru python-nameparser-1.0.6/tests.py python-nameparser-1.1.1/tests.py --- python-nameparser-1.0.6/tests.py 2019-12-12 05:08:04.000000000 +0000 +++ python-nameparser-1.1.1/tests.py 2022-01-29 02:29:27.000000000 +0000 @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import unittest """ Run this file to run the tests. @@ -19,6 +20,7 @@ """ import logging +import re try: import dill except ImportError: @@ -26,11 +28,10 @@ from nameparser import HumanName from nameparser.util import u -from nameparser.config import Constants +from nameparser.config import Constants, TupleManager log = logging.getLogger('HumanName') -import unittest try: unittest.expectedFailure except AttributeError: @@ -114,7 +115,6 @@ hn.first_list = ["Larry"] self.m(hn.full_name, "Larry Williams", hn) - def test_assignment_to_attribute(self): hn = HumanName("John A. Kenneth Doe, Jr.") hn.last = "de la Vega" @@ -200,6 +200,71 @@ hn = HumanName("John Edgar Casey Williams III") self.m(hn.surnames, "Edgar Casey Williams", hn) + def test_is_prefix_with_list(self): + hn = HumanName() + items = ['firstname', 'lastname', 'del'] + self.assertTrue(hn.is_prefix(items)) + self.assertTrue(hn.is_prefix(items[1:])) + + def test_is_conjunction_with_list(self): + hn = HumanName() + items = ['firstname', 'lastname', 'and'] + self.assertTrue(hn.is_conjunction(items)) + self.assertTrue(hn.is_conjunction(items[1:])) + + def test_override_constants(self): + C = Constants() + hn = HumanName(constants=C) + self.assertTrue(hn.C is C) + + def test_override_regex(self): + var = TupleManager([("spaces", re.compile(r"\s+", re.U)),]) + C = Constants(regexes=var) + hn = HumanName(constants=C) + self.assertTrue(hn.C.regexes == var) + + def test_override_titles(self): + var = ["abc","def"] + C = Constants(titles=var) + hn = HumanName(constants=C) + self.assertTrue(sorted(hn.C.titles) == sorted(var)) + + def test_override_first_name_titles(self): + var = ["abc","def"] + C = Constants(first_name_titles=var) + hn = HumanName(constants=C) + self.assertTrue(sorted(hn.C.first_name_titles) == sorted(var)) + + def test_override_prefixes(self): + var = ["abc","def"] + C = Constants(prefixes=var) + hn = HumanName(constants=C) + self.assertTrue(sorted(hn.C.prefixes) == sorted(var)) + + def test_override_suffix_acronyms(self): + var = ["abc","def"] + C = Constants(suffix_acronyms=var) + hn = HumanName(constants=C) + self.assertTrue(sorted(hn.C.suffix_acronyms) == sorted(var)) + + def test_override_suffix_not_acronyms(self): + var = ["abc","def"] + C = Constants(suffix_not_acronyms=var) + hn = HumanName(constants=C) + self.assertTrue(sorted(hn.C.suffix_not_acronyms) == sorted(var)) + + def test_override_conjunctions(self): + var = ["abc","def"] + C = Constants(conjunctions=var) + hn = HumanName(constants=C) + self.assertTrue(sorted(hn.C.conjunctions) == sorted(var)) + + def test_override_capitalization_exceptions(self): + var = TupleManager([("spaces", re.compile(r"\s+", re.U)),]) + C = Constants(capitalization_exceptions=var) + hn = HumanName(constants=C) + self.assertTrue(hn.C.capitalization_exceptions == var) + class FirstNameHandlingTests(HumanNameTestBase): def test_first_name(self): @@ -210,16 +275,16 @@ hn = HumanName("Rev Andrews") self.m(hn.title, "Rev", hn) self.m(hn.last, "Andrews", hn) - + # TODO: Seems "Andrews, M.D.", Andrews should be treated as a last name - # but other suffixes like "George Jr." should be first names. Might be + # but other suffixes like "George Jr." should be first names. Might be # related to https://github.com/derek73/python-nameparser/issues/2 @unittest.expectedFailure def test_assume_suffix_title_and_one_other_name_is_last_name(self): hn = HumanName("Andrews, M.D.") self.m(hn.suffix, "M.D.", hn) self.m(hn.last, "Andrews", hn) - + def test_suffix_in_lastname_part_of_lastname_comma_format(self): hn = HumanName("Smith Jr., John") self.m(hn.last, "Smith", hn) @@ -230,22 +295,22 @@ hn = HumanName("Sir Gerald") self.m(hn.title, "Sir", hn) self.m(hn.first, "Gerald", hn) - + def test_king_exception_to_first_name_rule(self): hn = HumanName("King Henry") self.m(hn.title, "King", hn) self.m(hn.first, "Henry", hn) - + def test_queen_exception_to_first_name_rule(self): hn = HumanName("Queen Elizabeth") self.m(hn.title, "Queen", hn) self.m(hn.first, "Elizabeth", hn) - + def test_dame_exception_to_first_name_rule(self): hn = HumanName("Dame Mary") self.m(hn.title, "Dame", hn) self.m(hn.first, "Mary", hn) - + def test_first_name_is_not_prefix_if_only_two_parts(self): """When there are only two parts, don't join prefixes or conjunctions""" hn = HumanName("Van Nguyen") @@ -263,7 +328,7 @@ hn = HumanName("Mr. Van Nguyen") self.m(hn.first, "Van", hn) self.m(hn.last, "Nguyen", hn) - + class HumanNameBruteForceTests(HumanNameTestBase): @@ -1084,7 +1149,7 @@ def test_multiple_conjunctions2(self): hn = HumanName("part1 of and The part2 of the part3 And part4") self.m(hn.first, "part1 of and The part2 of the part3 And part4", hn) - + def test_ends_with_conjunction(self): hn = HumanName("Jon Dough and") self.m(hn.first, "Jon", hn) @@ -1242,12 +1307,12 @@ self.m(hn.first, "Yin", hn) self.m(hn.middle, "a", hn) self.m(hn.last, "Le", hn) - + def test_conjunction_in_an_address_with_a_title(self): hn = HumanName("His Excellency Lord Duncan") self.m(hn.title, "His Excellency Lord", hn) self.m(hn.last, "Duncan", hn) - + @unittest.expectedFailure def test_conjunction_in_an_address_with_a_first_name_title(self): hn = HumanName("Her Majesty Queen Elizabeth") @@ -1272,7 +1337,7 @@ self.m(hn.title, "Te", hn) self.m(hn.first, "Awanui-a-Rangi", hn) self.m(hn.last, "Black", hn) - + def test_remove_title(self): hn = HumanName("Hon Solo", constants=None) start_len = len(hn.C.titles) @@ -1282,7 +1347,7 @@ hn.parse_full_name() self.m(hn.first, "Hon", hn) self.m(hn.last, "Solo", hn) - + def test_add_multiple_arguments(self): hn = HumanName("Assoc Dean of Chemistry Robert Johns", constants=None) hn.C.titles.add('dean', 'Chemistry') @@ -1310,7 +1375,7 @@ self.assertEqual(hn2.has_own_config, False) # clean up so we don't mess up other tests hn.C.titles.add('hon') - + def test_remove_multiple_arguments(self): hn = HumanName("Ms Hon Solo", constants=None) hn.C.titles.remove('hon', 'ms') @@ -1370,7 +1435,7 @@ self.m(hn.middle, "", hn) self.m(hn.last, "Franklin", hn) self.m(hn.nickname, "Ben", hn) - + def test_two_word_nickname_in_parenthesis(self): hn = HumanName("Benjamin (Big Ben) Franklin") self.m(hn.first, "Benjamin", hn) @@ -1391,7 +1456,7 @@ self.m(hn.middle, "", hn) self.m(hn.last, "Franklin", hn) self.m(hn.nickname, "Ben", hn) - + def test_nickname_in_parenthesis_with_comma_and_suffix(self): hn = HumanName("Franklin, Benjamin (Ben), Jr.") self.m(hn.first, "Benjamin", hn) @@ -1399,7 +1464,7 @@ self.m(hn.last, "Franklin", hn) self.m(hn.suffix, "Jr.", hn) self.m(hn.nickname, "Ben", hn) - + def test_nickname_in_single_quotes(self): hn = HumanName("Benjamin 'Ben' Franklin") self.m(hn.first, "Benjamin", hn) @@ -1413,28 +1478,28 @@ self.m(hn.middle, "", hn) self.m(hn.last, "Franklin", hn) self.m(hn.nickname, "Ben", hn) - + def test_single_quotes_on_first_name_not_treated_as_nickname(self): hn = HumanName("Brian Andrew O'connor") self.m(hn.first, "Brian", hn) self.m(hn.middle, "Andrew", hn) self.m(hn.last, "O'connor", hn) self.m(hn.nickname, "", hn) - + def test_single_quotes_on_both_name_not_treated_as_nickname(self): hn = HumanName("La'tanya O'connor") self.m(hn.first, "La'tanya", hn) self.m(hn.middle, "", hn) self.m(hn.last, "O'connor", hn) self.m(hn.nickname, "", hn) - + def test_single_quotes_on_end_of_last_name_not_treated_as_nickname(self): hn = HumanName("Mari' Aube'") self.m(hn.first, "Mari'", hn) self.m(hn.middle, "", hn) self.m(hn.last, "Aube'", hn) self.m(hn.nickname, "", hn) - + def test_okina_inside_name_not_treated_as_nickname(self): hn = HumanName("Harrieta Keōpūolani Nāhiʻenaʻena") self.m(hn.first, "Harrieta", hn) @@ -1492,7 +1557,6 @@ self.m(hn.nickname, "Rick", hn) - # class MaidenNameTestCase(HumanNameTestBase): # # def test_parenthesis_and_quotes_together(self): @@ -1542,17 +1606,28 @@ hn = HumanName("Juan del Sur") self.m(hn.first, "Juan", hn) self.m(hn.last, "del Sur", hn) - + def test_prefix_with_period(self): hn = HumanName("Jill St. John") self.m(hn.first, "Jill", hn) self.m(hn.last, "St. John", hn) - + def test_prefix_before_two_part_last_name(self): hn = HumanName("pennie von bergen wessels") self.m(hn.first, "pennie", hn) self.m(hn.last, "von bergen wessels", hn) + def test_prefix_is_first_name(self): + hn = HumanName("Van Johnson") + self.m(hn.first, "Van", hn) + self.m(hn.last, "Johnson", hn) + + def test_prefix_is_first_name_with_middle_name(self): + hn = HumanName("Van Jeremy Johnson") + self.m(hn.first, "Van", hn) + self.m(hn.middle, "Jeremy", hn) + self.m(hn.last, "Johnson", hn) + def test_prefix_before_two_part_last_name_with_suffix(self): hn = HumanName("pennie von bergen wessels III") self.m(hn.first, "pennie", hn) @@ -1641,7 +1716,7 @@ class SuffixesTestCase(HumanNameTestBase): - + def test_suffix(self): hn = HumanName("Joe Franklin Jr") self.m(hn.first, "Joe", hn) @@ -1716,13 +1791,13 @@ self.m(hn.first, "Adolph", hn) self.m(hn.last, "D", hn) - # http://en.wikipedia.org/wiki/Ma_(surname) + def test_potential_suffix_that_is_also_last_name(self): hn = HumanName("Jack Ma") self.m(hn.first, "Jack", hn) self.m(hn.last, "Ma", hn) - + def test_potential_suffix_that_is_also_last_name_comma(self): hn = HumanName("Ma, Jack") self.m(hn.first, "Jack", hn) @@ -1784,8 +1859,8 @@ self.m(hn.suffix, "Jr.", hn) def test_last_name_is_also_title_with_comma(self): - hn = HumanName("Duke Martin Luther King, Jr.") - self.m(hn.title, "Duke", hn) + hn = HumanName("Dr Martin Luther King, Jr.") + self.m(hn.title, "Dr", hn) self.m(hn.first, "Martin", hn) self.m(hn.middle, "Luther", hn) self.m(hn.last, "King", hn) @@ -1820,27 +1895,27 @@ self.m(hn.first, "Marc", hn) self.m(hn.middle, "Thomas", hn) self.m(hn.last, "Treadwell", hn) - + def test_conflict_with_chained_title_first_name_initial(self): hn = HumanName("U. S. Grant") self.m(hn.first, "U.", hn) self.m(hn.middle, "S.", hn) self.m(hn.last, "Grant", hn) - + def test_chained_title_first_name_initial_with_no_period(self): hn = HumanName("US Magistrate Judge T Michael Putnam") self.m(hn.title, "US Magistrate Judge", hn) self.m(hn.first, "T", hn) self.m(hn.middle, "Michael", hn) self.m(hn.last, "Putnam", hn) - + def test_chained_hyphenated_title(self): hn = HumanName("US Magistrate-Judge Elizabeth E Campbell") self.m(hn.title, "US Magistrate-Judge", hn) self.m(hn.first, "Elizabeth", hn) self.m(hn.middle, "E", hn) self.m(hn.last, "Campbell", hn) - + def test_chained_hyphenated_title_with_comma_suffix(self): hn = HumanName("Mag-Judge Harwell G Davis, III") self.m(hn.title, "Mag-Judge", hn) @@ -1883,7 +1958,7 @@ self.m(hn.title, "King", hn) self.m(hn.first, "John", hn) self.m(hn.last, "V.", hn) - + def test_initials_also_suffix(self): hn = HumanName("Smith, J.R.") self.m(hn.first, "J.R.", hn) @@ -1981,6 +2056,21 @@ self.m(hn.first, "John", hn) self.m(hn.last, "Doe", hn) + def test_mac_with_spaces(self): + hn = HumanName("Jane Mac Beth") + self.m(hn.first, "Jane", hn) + self.m(hn.last, "Mac Beth", hn) + + def test_mac_as_first_name(self): + hn = HumanName("Mac Miller") + self.m(hn.first, "Mac", hn) + self.m(hn.last, "Miller", hn) + + def test_multiple_prefixes(self): + hn = HumanName("Mike van der Velt") + self.m(hn.first, "Mike", hn) + self.m(hn.last, "van der Velt", hn) + class HumanNameCapitalizationTestCase(HumanNameTestBase): def test_capitalization_exception_for_III(self): @@ -2062,10 +2152,10 @@ class HumanNameOutputFormatTests(HumanNameTestBase): - + def test_formatting_init_argument(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)", - string_format="TEST1") + string_format="TEST1") self.assertEqual(u(hn), "TEST1") def test_formatting_constants_attribute(self): @@ -2160,7 +2250,7 @@ self.assertEqual(u(hn), "Rev John (Kenny) A. Kenneth Doe III") hn.nickname = '' self.assertEqual(u(hn), "Rev John A. Kenneth Doe III") - + def test_remove_emojis(self): hn = HumanName("Sam Smith 😊") self.m(hn.first, "Sam", hn) @@ -2184,6 +2274,77 @@ # test cleanup +class InitialsTestCase(HumanNameTestBase): + def test_initials(self): + hn = HumanName("Andrew Boris Petersen") + self.m(hn.initials(), "A. B. P.", hn) + + def test_initials_simple_name(self): + hn = HumanName("John Doe") + self.m(hn.initials(), "J. D.", hn) + hn = HumanName("John Doe", initials_format="{first} {last}") + self.m(hn.initials(), "J. D.", hn) + hn = HumanName("John Doe", initials_format="{last}") + self.m(hn.initials(), "D.", hn) + hn = HumanName("John Doe", initials_format="{first}") + self.m(hn.initials(), "J.", hn) + hn = HumanName("John Doe", initials_format="{middle}") + self.m(hn.initials(), "", hn) + + def test_initials_complex_name(self): + hn = HumanName("Doe, John A. Kenneth, Jr.") + self.m(hn.initials(), "J. A. K. D.", hn) + + def test_initials_format(self): + hn = HumanName("Doe, John A. Kenneth, Jr.", initials_format="{first} {middle}") + self.m(hn.initials(), "J. A. K.", hn) + hn = HumanName("Doe, John A. Kenneth, Jr.", initials_format="{first} {last}") + self.m(hn.initials(), "J. D.", hn) + hn = HumanName("Doe, John A. Kenneth, Jr.", initials_format="{middle} {last}") + self.m(hn.initials(), "A. K. D.", hn) + hn = HumanName("Doe, John A. Kenneth, Jr.", initials_format="{first}, {last}") + self.m(hn.initials(), "J., D.", hn) + + def test_initials_format_constants(self): + from nameparser.config import CONSTANTS + _orig = CONSTANTS.initials_format + CONSTANTS.initials_format = "{first} {last}" + hn = HumanName("Doe, John A. Kenneth, Jr.") + self.m(hn.initials(), "J. D.", hn) + CONSTANTS.initials_format = "{first} {last}" + hn = HumanName("Doe, John A. Kenneth, Jr.") + self.m(hn.initials(), "J. D.", hn) + CONSTANTS.initials_format = _orig + + def test_initials_delimiter(self): + hn = HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter=";") + self.m(hn.initials(), "J; A; K; D;", hn) + + def test_initials_delimiter_constants(self): + from nameparser.config import CONSTANTS + _orig = CONSTANTS.initials_delimiter + CONSTANTS.initials_delimiter = ";" + hn = HumanName("Doe, John A. Kenneth, Jr.") + self.m(hn.initials(), "J; A; K; D;", hn) + CONSTANTS.initials_delimiter = _orig + + def test_initials_list(self): + hn = HumanName("Andrew Boris Petersen") + self.m(hn.initials_list(), ["A", "B", "P"], hn) + + def test_initials_list_complex_name(self): + hn = HumanName("Doe, John A. Kenneth, Jr.") + self.m(hn.initials_list(), ["J", "A", "K", "D"], hn) + + def test_initials_with_prefix_firstname(self): + hn = HumanName("Van Jeremy Johnson") + self.m(hn.initials_list(), ["V", "J", "J"], hn) + + def test_initials_with_prefix(self): + hn = HumanName("Alex van Johnson") + self.m(hn.initials_list(), ["A", "J"], hn) + + TEST_NAMES = ( "John Doe", "John Doe, Jr.", @@ -2359,7 +2520,7 @@ "U.S. District Judge Marc Thomas Treadwell", "Dra. Andréia da Silva", "Srta. Andréia da Silva", - + ) @@ -2411,6 +2572,7 @@ print((repr(hn_instance))) hn_instance.capitalize() print((repr(hn_instance))) + print("Initials: " + hn_instance.initials()) else: print("-"*80) print("Running tests")