Binary files /tmp/TIkJHYDtUv/ruby-text-1.2.1/checksums.yaml.gz and /tmp/yj7adBX0RK/ruby-text-1.3.0/checksums.yaml.gz differ diff -Nru ruby-text-1.2.1/COPYING.txt ruby-text-1.3.0/COPYING.txt --- ruby-text-1.2.1/COPYING.txt 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/COPYING.txt 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,21 @@ +== Licence (MIT) + +Copyright (c) 2006-2013 Paul Battley, Michael Neumann, Tim Fletcher + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -Nru ruby-text-1.2.1/debian/changelog ruby-text-1.3.0/debian/changelog --- ruby-text-1.2.1/debian/changelog 2014-04-30 12:06:45.000000000 +0000 +++ ruby-text-1.3.0/debian/changelog 2014-10-13 22:20:20.000000000 +0000 @@ -1,8 +1,16 @@ -ruby-text (1.2.1-1build1) utopic; urgency=medium +ruby-text (1.3.0-1) unstable; urgency=low - * No-change rebuild to update the Ruby-Version attribute. + * Team upload. + * Imported Upstream version 1.3.0 + * d/control: + - bumped up standards version to 3.9.6 (no changes needed) + - added testsuite header + - switched homepage to https + * d/copyright: + - update copyright to Expat + - switched homepage to https - -- Matthias Klose Wed, 30 Apr 2014 12:06:45 +0000 + -- Jonas Genannt Mon, 13 Oct 2014 16:07:18 +0200 ruby-text (1.2.1-1) unstable; urgency=low diff -Nru ruby-text-1.2.1/debian/control ruby-text-1.3.0/debian/control --- ruby-text-1.2.1/debian/control 2013-09-03 22:06:38.000000000 +0000 +++ ruby-text-1.3.0/debian/control 2014-10-13 22:20:20.000000000 +0000 @@ -4,11 +4,12 @@ Maintainer: Debian Ruby Extras Maintainers Uploaders: Olivier Sallou , Cédric Boutillier Build-Depends: debhelper (>= 7.0.50~), gem2deb (>= 0.2.10~), rake -Standards-Version: 3.9.4 +Standards-Version: 3.9.6 Vcs-Git: git://anonscm.debian.org/pkg-ruby-extras/ruby-text.git Vcs-Browser: http://anonscm.debian.org/gitweb?p=pkg-ruby-extras/ruby-text.git;a=summary -Homepage: http://github.com/threedaymonk/text +Homepage: https://github.com/threedaymonk/text XS-Ruby-Versions: all +Testsuite: autopkgtest-pkg-ruby Package: ruby-text Architecture: all diff -Nru ruby-text-1.2.1/debian/copyright ruby-text-1.3.0/debian/copyright --- ruby-text-1.2.1/debian/copyright 2012-06-20 22:42:56.000000000 +0000 +++ ruby-text-1.3.0/debian/copyright 2014-10-13 22:20:20.000000000 +0000 @@ -1,14 +1,14 @@ Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: text -Source: http://github.com/threedaymonk/text +Source: https://github.com/threedaymonk/text Files: * -Copyright: 2009 Paul Battley - 2009 Michael Neumann - 2009 Tim Fletcher -License: GPL-2 or Ruby +Copyright: 2006-2013 Paul Battley + 2006-2013 Michael Neumann + 2006-2013 Tim Fletcher +License: Expat -Files: lib/text/porter_Stemming +Files: lib/text/porter_stemming.rb Copyright: 2003 Ray Pereda License: BSD-2-clause Redistribution and use in source and binary forms, with or without modification, are @@ -36,9 +36,28 @@ or implied, of copyright holder. Files: debian/* -Copyright: 2011 +Copyright: 2011 Olivier Sallou License: GPL-2 or Ruby +License: Expat + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + License: GPL-2 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public diff -Nru ruby-text-1.2.1/lib/text/double_metaphone.rb ruby-text-1.3.0/lib/text/double_metaphone.rb --- ruby-text-1.2.1/lib/text/double_metaphone.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/lib/text/double_metaphone.rb 2014-07-17 08:18:45.000000000 +0000 @@ -5,7 +5,7 @@ # # Based on Stephen Woodbridge's PHP version - http://swoodbridge.com/DoubleMetaPhone/ # -# Author: Tim Fletcher (twoggle@gmail.com) +# Author: Tim Fletcher (mail@tfletcher.com) # module Text # :nodoc: diff -Nru ruby-text-1.2.1/lib/text/levenshtein.rb ruby-text-1.3.0/lib/text/levenshtein.rb --- ruby-text-1.2.1/lib/text/levenshtein.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/lib/text/levenshtein.rb 2014-07-17 08:18:45.000000000 +0000 @@ -16,29 +16,107 @@ # Calculate the Levenshtein distance between two strings +str1+ and +str2+. # + # The optional argument max_distance can reduce the number of iterations by + # stopping if the Levenshtein distance exceeds this value. This increases + # performance where it is only necessary to compare the distance with a + # reference value instead of calculating the exact distance. # - # In Ruby 1.8, +str1+ and +str2+ should be ASCII, UTF-8, or a one-byte-per - # character encoding such as ISO-8859-*. They will be treated as UTF-8 if - # $KCODE is set appropriately (i.e. 'u'). Otherwise, the comparison will be - # performed byte-by-byte. There is no specific support for Shift-JIS or EUC - # strings. + # The distance is calculated in terms of Unicode codepoints. Be aware that + # this algorithm does not perform normalisation: if there is a possibility + # of different normalised forms being used, normalisation should be performed + # beforehand. # - # In Ruby 1.9+, the strings will be processed as UTF-8. - # - # When using Unicode text, be aware that this algorithm does not perform - # normalisation. If there is a possibility of different normalised forms - # being used, normalisation should be performed beforehand. - # - def distance(str1, str2) - prepare = - if "ruby".respond_to?(:encoding) - lambda { |str| str.encode(Encoding::UTF_8).unpack("U*") } + def distance(str1, str2, max_distance = nil) + if max_distance + distance_with_maximum(str1, str2, max_distance) + else + distance_without_maximum(str1, str2) + end + end + +private + def distance_with_maximum(str1, str2, max_distance) # :nodoc: + s, t = [str1, str2].sort_by(&:length). + map{ |str| str.encode(Encoding::UTF_8).unpack("U*") } + n = s.length + m = t.length + big_int = n * m + return m if n.zero? + return n if m.zero? + return 0 if s == t + + # If the length difference is already greater than the max_distance, then + # there is nothing else to check + if (n - m).abs >= max_distance + return max_distance + end + + # The values necessary for our threshold are written; the ones after must + # be filled with large integers since the tailing member of the threshold + # window in the bottom array will run min across them + d = (m + 1).times.map { |i| + if i < m || i < max_distance + 1 + i + else + big_int + end + } + x = nil + e = nil + + n.times do |i| + # Since we're reusing arrays, we need to be sure to wipe the value left + # of the starting index; we don't have to worry about the value above the + # ending index as the arrays were initially filled with large integers + # and we progress to the right + if e.nil? + e = i + 1 else - rule = $KCODE.match(/^U/i) ? "U*" : "C*" - lambda { |str| str.unpack(rule) } + e = big_int end - s, t = [str1, str2].map(&prepare) + diag_index = t.length - s.length + i + + # If max_distance was specified, we can reduce second loop. So we set + # up our threshold window. + # See: + # Gusfield, Dan (1997). Algorithms on strings, trees, and sequences: + # computer science and computational biology. + # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8. + # pp. 263–264. + min = [0, i - max_distance - 1].max + max = [m - 1, i + max_distance].min + + (min .. max).each do |j| + # If the diagonal value is already greater than the max_distance + # then we can safety return: the diagonal will never go lower again. + # See: http://www.levenshtein.net/ + if j == diag_index && d[j] >= max_distance + return max_distance + end + + cost = s[i] == t[j] ? 0 : 1 + x = [ + d[j+1] + 1, # insertion + e + 1, # deletion + d[j] + cost # substitution + ].min + + d[j] = e + e = x + end + d[m] = x + end + + if x > max_distance + return max_distance + else + return x + end + end + + def distance_without_maximum(str1, str2) # :nodoc: + s, t = [str1, str2].map{ |str| str.encode(Encoding::UTF_8).unpack("U*") } n = s.length m = t.length return m if n.zero? diff -Nru ruby-text-1.2.1/lib/text/metaphone.rb ruby-text-1.3.0/lib/text/metaphone.rb --- ruby-text-1.2.1/lib/text/metaphone.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/lib/text/metaphone.rb 2014-07-17 08:18:45.000000000 +0000 @@ -45,7 +45,7 @@ [ /c(?=ia)/, 'X' ], [ /[st](?=i[ao])/, 'X' ], [ /s?c(?=[iey])/, 'S' ], - [ /[cq]/, 'K' ], + [ /(ck?|q)/, 'K' ], [ /dg(?=[iey])/, 'J' ], [ /d/, 'T' ], [ /g(?=h[^aeiou])/, '' ], @@ -94,4 +94,4 @@ extend self end -end \ No newline at end of file +end diff -Nru ruby-text-1.2.1/lib/text/soundex.rb ruby-text-1.3.0/lib/text/soundex.rb --- ruby-text-1.2.1/lib/text/soundex.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/lib/text/soundex.rb 2014-07-17 08:18:45.000000000 +0000 @@ -27,9 +27,9 @@ # do not change the parameter "str" # def soundex_str(str) + str = str.upcase.gsub(/[^A-Z]/, "") return nil if str.empty? - str = str.upcase last_code = get_code(str[0,1]) soundex_code = str[0,1] @@ -40,15 +40,13 @@ if code == "0" then last_code = nil - elsif code == nil then - return nil elsif code != last_code then soundex_code += code last_code = code end end # for - return soundex_code + "000"[0,4-soundex_code.size] + return soundex_code.ljust(4, "0") end module_function :soundex_str diff -Nru ruby-text-1.2.1/lib/text/version.rb ruby-text-1.3.0/lib/text/version.rb --- ruby-text-1.2.1/lib/text/version.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/lib/text/version.rb 2014-07-17 08:18:45.000000000 +0000 @@ -1,8 +1,8 @@ module Text module VERSION #:nodoc: MAJOR = 1 - MINOR = 2 - TINY = 1 + MINOR = 3 + TINY = 0 STRING = [MAJOR, MINOR, TINY].join('.') end diff -Nru ruby-text-1.2.1/metadata.yml ruby-text-1.3.0/metadata.yml --- ruby-text-1.2.1/metadata.yml 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/metadata.yml 2014-07-17 08:18:47.000000000 +0000 @@ -1,8 +1,7 @@ --- !ruby/object:Gem::Specification name: text version: !ruby/object:Gem::Version - version: 1.2.1 - prerelease: + version: 1.3.0 platform: ruby authors: - Paul Battley @@ -11,64 +10,78 @@ autorequire: bindir: bin cert_chain: [] -date: 2012-06-10 00:00:00.000000000 Z -dependencies: [] -description: ! 'A collection of text algorithms: Levenshtein, Soundex, Metaphone, - Double Metaphone, Porter Stemming' +date: 2014-06-23 00:00:00.000000000 Z +dependencies: +- !ruby/object:Gem::Dependency + name: rake + requirement: !ruby/object:Gem::Requirement + requirements: + - - "~>" + - !ruby/object:Gem::Version + version: '10.0' + type: :development + prerelease: false + version_requirements: !ruby/object:Gem::Requirement + requirements: + - - "~>" + - !ruby/object:Gem::Version + version: '10.0' +description: 'A collection of text algorithms: Levenshtein, Soundex, Metaphone, Double + Metaphone, Porter Stemming' email: pbattley@gmail.com executables: [] extensions: [] extra_rdoc_files: - README.rdoc +- COPYING.txt files: +- COPYING.txt +- README.rdoc +- Rakefile +- lib/text.rb +- lib/text/double_metaphone.rb - lib/text/levenshtein.rb - lib/text/metaphone.rb -- lib/text/soundex.rb -- lib/text/white_similarity.rb -- lib/text/double_metaphone.rb - lib/text/porter_stemming.rb +- lib/text/soundex.rb - lib/text/version.rb -- lib/text.rb -- test/test_porter_stemming.rb -- test/data/chunky.flf -- test/data/porter_stemming_input.txt -- test/data/metaphone.txt +- lib/text/white_similarity.rb - test/data/double_metaphone.csv +- test/data/metaphone.yml +- test/data/metaphone_buggy.yml +- test/data/porter_stemming_input.txt - test/data/porter_stemming_output.txt -- test/data/metaphone_buggy.txt -- test/data/chunky.txt -- test/data/big.txt -- test/preamble.rb -- test/test_soundex.rb -- test/test_text.rb -- test/test_white_similarity.rb -- test/test_metaphone.rb -- test/test_levenshtein.rb -- test/test_double_metaphone.rb -- README.rdoc -- Rakefile +- test/data/soundex.yml +- test/double_metaphone_test.rb +- test/levenshtein_test.rb +- test/metaphone_test.rb +- test/porter_stemming_test.rb +- test/soundex_test.rb +- test/test_helper.rb +- test/text_test.rb +- test/white_similarity_test.rb homepage: http://github.com/threedaymonk/text -licenses: [] +licenses: +- MIT +metadata: {} post_install_message: rdoc_options: [] require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement - none: false requirements: - - - ! '>=' + - - ">=" - !ruby/object:Gem::Version version: '0' required_rubygems_version: !ruby/object:Gem::Requirement - none: false requirements: - - - ! '>=' + - - ">=" - !ruby/object:Gem::Version version: '0' requirements: [] rubyforge_project: text -rubygems_version: 1.8.11 +rubygems_version: 2.2.2 signing_key: -specification_version: 3 +specification_version: 4 summary: A collection of text algorithms test_files: [] diff -Nru ruby-text-1.2.1/Rakefile ruby-text-1.3.0/Rakefile --- ruby-text-1.2.1/Rakefile 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/Rakefile 2014-07-17 08:18:45.000000000 +0000 @@ -3,7 +3,14 @@ Rake::TestTask.new do |t| t.libs << 'test' + t.test_files = FileList['test/*_test.rb'] t.verbose = false end +desc "Run benchmark" +task :benchmark do |t| + system "ruby -v" + system "ruby perf/benchmark.rb" +end + task :default => :test diff -Nru ruby-text-1.2.1/README.rdoc ruby-text-1.3.0/README.rdoc --- ruby-text-1.2.1/README.rdoc 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/README.rdoc 2014-07-17 08:18:45.000000000 +0000 @@ -12,6 +12,10 @@ # => 0 Text::Levenshtein.distance('test', 'tent') # => 1 + Text::Levenshtein.distance('test', 'testing') + # => 3 + Text::Levenshtein.distance('test', 'testing', 2) + # => 2 === Metaphone @@ -52,4 +56,4 @@ == License -Same as Ruby. +MIT. See COPYING.txt for details. diff -Nru ruby-text-1.2.1/test/data/big.txt ruby-text-1.3.0/test/data/big.txt --- ruby-text-1.2.1/test/data/big.txt 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/data/big.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ - _ _ _ _ __ __ _ _ - | | | | | | | \ \ / / | | | | - | |__| | ___| | | ___ \ \ /\ / /__ _ __| | __| | - | __ |/ _ \ | |/ _ \ \ \/ \/ / _ \| '__| |/ _` | - | | | | __/ | | (_) | \ /\ / (_) | | | | (_| | - |_| |_|\___|_|_|\___/ \/ \/ \___/|_| |_|\__,_| - - \ No newline at end of file diff -Nru ruby-text-1.2.1/test/data/chunky.flf ruby-text-1.3.0/test/data/chunky.flf --- ruby-text-1.2.1/test/data/chunky.flf 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/data/chunky.flf 1970-01-01 00:00:00.000000000 +0000 @@ -1,512 +0,0 @@ -flf2a$ 5 4 20 15 1 -Square by Chris Gill, 30-JUN-94 -- based on .sig of Jeb Hagan. -$@ -$@ -$@ -$@ -$@@ - __ @ -| |@ -|__|@ -|__|@ - @@ - ____ @ -| | |@ - |_|_|@ - @ - @@ - _____ @ - _| | |_ @ -|_ _|@ -|_ _|@ - |__|__| @@ - __,-,__ @ -| ' '__|@ -|__ |@ -|_______|@ - |_| @@ - __ ___ @ -|__| |@ -| __|@ -|___|__|@ - @@ - __,-,__ @ -| ' '__|@ -| __|@ -|_______|@ - |_| @@ - __ @ -| |@ - |_|@ - @ - @@ - ___ @ -,' _|@ -| | @ -| |_ @ -`.___|@@ - ___ @ -|_ `.@ - | |@ - _| |@ -|___,'@@ - __ _ __ @ -| | | |@ - > < @ -|__|_|__|@ - @@ - __ @ - _| |_ @ -|_ _|@ - |__| @ - @@ - @ - @ - __ @ -| |@ - |_|@@ - @ - ______ @ -|______|@ - @ - @@ - @ - @ - __ @ -|__|@ - @@ - ___@ - / /@ - ,' ,' @ -/__/ @ - @@ - ______ @ -| |@ -| -- |@ -|______|@ - @@ - ____ @ -|_ | @ - _| |_ @ -|______|@ - @@ - ______ @ -|__ |@ -| __|@ -|______|@ - @@ - ______ @ -|__ |@ -|__ |@ -|______|@ - @@ - _____ @ -| | | @ -|__ |@ - |__| @ - @@ - ______ @ -| __|@ -|__ |@ -|______|@ - @@ - ______ @ -| __|@ -| __ |@ -|______|@ - @@ - ______ @ -| |@ -|_ |@ - |____|@ - @@ - ______ @ -| __ |@ -| __ |@ -|______|@ - @@ - ______ @ -| __ |@ -|__ |@ -|______|@ - @@ - __ @ -|__|@ - __ @ -|__|@ - @@ - __ @ -|__|@ - __ @ -| |@ - |_|@@ - __ @ - ,' _|@ -/ / @ -\ \_ @ - `.__|@@ - @ - ______ @ -|______|@ -|______|@ - @@ - __ @ -|_ `. @ - \ \@ - _/ /@ -|__,' @@ - _____ @ - |__ |@ - ', ,-'@ - |--| @ - '--' @@ - _________ @ -| ___ |@ -| | _ |@ -| |______|@ -|_________|@@ - _______ @ -| _ |@ -| |@ -|___|___|@ - @@ - ______ @ -| __ \@ -| __ <@ -|______/@ - @@ - ______ @ -| |@ -| ---|@ -|______|@ - @@ - _____ @ -| \ @ -| -- |@ -|_____/ @ - @@ - _______ @ -| ___|@ -| ___|@ -|_______|@ - @@ - _______ @ -| ___|@ -| ___|@ -|___| @ - @@ - _______ @ -| __|@ -| | |@ -|_______|@ - @@ - _______ @ -| | |@ -| |@ -|___|___|@ - @@ - _______ @ -|_ _|@ - _| |_ @ -|_______|@ - @@ - _____ @ - _| |@ -| |@ -|_______|@ - @@ - __ __ @ -| |/ |@ -| < @ -|__|\__|@ - @@ - _____ @ -| |_ @ -| |@ -|_______|@ - @@ - _______ @ -| | |@ -| |@ -|__|_|__|@ - @@ - _______ @ -| | |@ -| |@ -|__|____|@ - @@ - _______ @ -| |@ -| - |@ -|_______|@ - @@ - ______ @ -| __ \@ -| __/@ -|___| @ - @@ - _______ @ -| |@ -| - _|@ -|_______|@ - @@ - ______ @ -| __ \@ -| <@ -|___|__|@ - @@ - _______ @ -| __|@ -|__ |@ -|_______|@ - @@ - _______ @ -|_ _|@ - | | @ - |___| @ - @@ - _______ @ -| | |@ -| | |@ -|_______|@ - @@ - ___ ___ @ -| | |@ -| | |@ - \_____/ @ - @@ - ________ @ -| | | |@ -| | | |@ -|________|@ - @@ - ___ ___ @ -| | |@ -|- -|@ -|___|___|@ - @@ - ___ ___ @ -| | |@ - \ / @ - |___| @ - @@ - _______ @ -|__ |@ -| __|@ -|_______|@ - @@ - ____ @ -| _|@ -| | @ -| |_ @ -|____|@@ -___ @ -\ \ @ - `. `. @ - \__\@ - @@ - ____ @ -|_ |@ - | |@ - _| |@ -|____|@@ - ____ @ -| |@ -|_/\_|@ - @ - @@ - @ - @ - @ - ______ @ -|______|@@ - __ @ -| |@ -|_| @ - @ - @@ - @ -.---.-.@ -| _ |@ -|___._|@ - @@ - __ @ -| |--.@ -| _ |@ -|_____|@ - @@ - @ -.----.@ -| __|@ -|____|@ - @@ - __ @ -.--| |@ -| _ |@ -|_____|@ - @@ - @ -.-----.@ -| -__|@ -|_____|@ - @@ - ___ @ -.' _|@ -| _|@ -|__| @ - @@ - @ -.-----.@ -| _ |@ -|___ |@ -|_____|@@ - __ @ -| |--.@ -| |@ -|__|__|@ - @@ - __ @ -|__|@ -| |@ -|__|@ - @@ - __ @ - |__|@ - | |@ - | |@ -|___|@@ - __ @ -| |--.@ -| < @ -|__|__|@ - @@ - __ @ -| |@ -| |@ -|__|@ - @@ - @ -.--------.@ -| |@ -|__|__|__|@ - @@ - @ -.-----.@ -| |@ -|__|__|@ - @@ - @ -.-----.@ -| _ |@ -|_____|@ - @@ - @ -.-----.@ -| _ |@ -| __|@ -|__| @@ - @ -.-----.@ -| _ |@ -|__ |@ - |__|@@ - @ -.----.@ -| _|@ -|__| @ - @@ - @ -.-----.@ -|__ --|@ -|_____|@ - @@ - __ @ -| |_ @ -| _|@ -|____|@ - @@ - @ -.--.--.@ -| | |@ -|_____|@ - @@ - @ -.--.--.@ -| | |@ - \___/ @ - @@ - @ -.--.--.--.@ -| | | |@ -|________|@ - @@ - @ -.--.--.@ -|_ _|@ -|__.__|@ - @@ - @ -.--.--.@ -| | |@ -|___ |@ -|_____|@@ - @ -.-----.@ -|-- __|@ -|_____|@ - @@ - ___ @ - | _|@ -/ / @ -\ \_ @ - |___|@@ - __ @ -| |@ -| |@ -| |@ -|__|@@ - ___ @ -|_ | @ - \ \@ - _/ /@ -|___| @@ - ___ @ - | ' |@ -|_,_| @ - @ - @@ -.--.--.@ -|-----|@ -| - |@ -|__|__|@ - @@ -.--.--.@ -|-----|@ -| _ |@ -|_____|@ - @@ -.--.--.@ -|--|--|@ -| | |@ -|_____|@ - @@ -.--.--.@ -|---.-|@ -| _ |@ -|___._|@ - @@ -.--.--.@ -|-----|@ -| _ |@ -|_____|@ - @@ -.--.--.@ -|--|--|@ -| | |@ -|_____|@ - @@ - _______ @ -| __ \@ -| __ <@ -| |____/@ -|__| @@ diff -Nru ruby-text-1.2.1/test/data/chunky.txt ruby-text-1.3.0/test/data/chunky.txt --- ruby-text-1.2.1/test/data/chunky.txt 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/data/chunky.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ - ______ __ __ ______ -| || |--..--.--..-----.| |--..--.--. | __ \.---.-..----..-----..-----. -| ---|| || | || || < | | | | __ <| _ || __|| _ || | -|______||__|__||_____||__|__||__|__||___ | |______/|___._||____||_____||__|__| - |_____| \ No newline at end of file diff -Nru ruby-text-1.2.1/test/data/metaphone_buggy.txt ruby-text-1.3.0/test/data/metaphone_buggy.txt --- ruby-text-1.2.1/test/data/metaphone_buggy.txt 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/data/metaphone_buggy.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -# -# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt, -# this mimics the behaviour of Lawrence Philips's BASIC implementation, -# which appears to contain bugs when compared to his description of the -# algorithm. -# -ANASTHA: ANS0 -DAVIS-CARTER: TFSKRTR -ESCARMANT: ESKRMNT -MCCALL: MKKL -MCCROREY: MKKRR -MERSEAL: MRSL -PIEURISSAINT: PRSNT -ROTMAN: RTMN -SCHEVEL: SXFL -SCHROM: SXRM -SEAL: SL -SPARR: SPR -STARLEPER: STRLPR -THRASH: 0RX -LOGGING: LKNK -LOGIC: LJK -JUDGES: JJS -SHOOS: XS -SHOES: XS -CHUTE: XT -SCHUSS: SXS -OTTO: OT -ERIC: ERK -DAVE: TF -CATHERINE: K0RN -KATHERINE: K0RN -AUBREY: ABR -BRYAN: BRYN -BRYCE: BRS -STEVEN: STFN -RICHARD: RXRT -HEIDI: HT -AUTO: AT -MAURICE: MRS -RANDY: RNT -CAMBRILLO: KMRL -BRIAN: BRN -RAY: R -GEOFF: JF -BOB: BB -AHA: AH -AAH: A -PAUL: PL -BATTLEY: BTL -WROTE: RT -THIS: 0S diff -Nru ruby-text-1.2.1/test/data/metaphone_buggy.yml ruby-text-1.3.0/test/data/metaphone_buggy.yml --- ruby-text-1.2.1/test/data/metaphone_buggy.yml 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/data/metaphone_buggy.yml 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,52 @@ +# +# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt, +# this mimics the behaviour of Lawrence Philips's BASIC implementation, +# which appears to contain bugs when compared to his description of the +# algorithm. +# +ANASTHA: ANS0 +DAVIS-CARTER: TFSKRTR +ESCARMANT: ESKRMNT +MCCALL: MKKL +MCCROREY: MKKRR +MERSEAL: MRSL +PIEURISSAINT: PRSNT +ROTMAN: RTMN +SCHEVEL: SXFL +SCHROM: SXRM +SEAL: SL +SPARR: SPR +STARLEPER: STRLPR +THRASH: 0RX +LOGGING: LKNK +LOGIC: LJK +JUDGES: JJS +SHOOS: XS +SHOES: XS +CHUTE: XT +SCHUSS: SXS +OTTO: OT +ERIC: ERK +DAVE: TF +CATHERINE: K0RN +KATHERINE: K0RN +AUBREY: ABR +BRYAN: BRYN +BRYCE: BRS +STEVEN: STFN +RICHARD: RXRT +HEIDI: HT +AUTO: AT +MAURICE: MRS +RANDY: RNT +CAMBRILLO: KMRL +BRIAN: BRN +RAY: R +GEOFF: JF +BOB: BB +AHA: AH +AAH: A +PAUL: PL +BATTLEY: BTL +WROTE: RT +THIS: 0S diff -Nru ruby-text-1.2.1/test/data/metaphone.txt ruby-text-1.3.0/test/data/metaphone.txt --- ruby-text-1.2.1/test/data/metaphone.txt 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/data/metaphone.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -# -# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt, -# with surprising results changed to 'correct' ones (according to my interpretation -# of the algorithm description), and some more results from around the web: -# -ANASTHA: ANS0 -DAVIS-CARTER: TFSKRTR -ESCARMANT: ESKRMNT -MCCALL: MKL -MCCROREY: MKRR -MERSEAL: MRSL -PIEURISSAINT: PRSNT -ROTMAN: RTMN -SCHEVEL: SXFL -SCHROM: SXRM -SEAL: SL -SPARR: SPR -STARLEPER: STRLPR -THRASH: 0RX -LOGGING: LKNK -LOGIC: LJK -JUDGES: JJS -SHOOS: XS -SHOES: XS -CHUTE: XT -SCHUSS: SXS -OTTO: OT -ERIC: ERK -DAVE: TF -CATHERINE: K0RN -KATHERINE: K0RN -AUBREY: ABR -BRYAN: BRYN -BRYCE: BRS -STEVEN: STFN -RICHARD: RXRT -HEIDI: HT -AUTO: AT -MAURICE: MRS -RANDY: RNT -CAMBRILLO: KMBRL -BRIAN: BRN -RAY: R -GEOFF: JF -BOB: BB -AHA: AH -AAH: A -PAUL: PL -BATTLEY: BTL -WROTE: RT -THIS: 0S diff -Nru ruby-text-1.2.1/test/data/metaphone.yml ruby-text-1.3.0/test/data/metaphone.yml --- ruby-text-1.2.1/test/data/metaphone.yml 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/data/metaphone.yml 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,53 @@ +# +# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt, +# with surprising results changed to 'correct' ones (according to my interpretation +# of the algorithm description), and some more results from around the web: +# +ANASTHA: ANS0 +DAVIS-CARTER: TFSKRTR +ESCARMANT: ESKRMNT +MCCALL: MKL +MCCROREY: MKRR +MERSEAL: MRSL +PIEURISSAINT: PRSNT +ROTMAN: RTMN +SCHEVEL: SXFL +SCHROM: SXRM +SEAL: SL +SPARR: SPR +STARLEPER: STRLPR +THRASH: 0RX +LOGGING: LKNK +LOGIC: LJK +JUDGES: JJS +SHOOS: XS +SHOES: XS +CHUTE: XT +SCHUSS: SXS +OTTO: OT +ERIC: ERK +BUCK: BK +COCK: KK +DAVE: TF +CATHERINE: K0RN +KATHERINE: K0RN +AUBREY: ABR +BRYAN: BRYN +BRYCE: BRS +STEVEN: STFN +RICHARD: RXRT +HEIDI: HT +AUTO: AT +MAURICE: MRS +RANDY: RNT +CAMBRILLO: KMBRL +BRIAN: BRN +RAY: R +GEOFF: JF +BOB: BB +AHA: AH +AAH: A +PAUL: PL +BATTLEY: BTL +WROTE: RT +THIS: 0S diff -Nru ruby-text-1.2.1/test/data/soundex.yml ruby-text-1.3.0/test/data/soundex.yml --- ruby-text-1.2.1/test/data/soundex.yml 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/data/soundex.yml 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,14 @@ +Euler: E460 +Ellery: E460 +Gauss: G200 +Ghosh: G200 +Hilbert: H416 +Heilbronn: H416 +Knuth: K530 +Kant: K530 +Lloyd: L300 +Ladd: L300 +Lukasiewicz: L222 +Lissajous: L222 +SanFrancisco: S516 +"San Francisco": S516 diff -Nru ruby-text-1.2.1/test/double_metaphone_test.rb ruby-text-1.3.0/test/double_metaphone_test.rb --- ruby-text-1.2.1/test/double_metaphone_test.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/double_metaphone_test.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,17 @@ +require_relative "./test_helper" +require "text/double_metaphone" + +require 'csv' + +class DoubleMetaphoneTest < Test::Unit::TestCase + + def test_cases + CSV.open(data_file_path('double_metaphone.csv'), 'r').to_a.each do |row| + primary, secondary = Text::Metaphone.double_metaphone(row[0]) + + assert_equal row[1], primary + assert_equal row[2], secondary.nil?? primary : secondary + end + end + +end diff -Nru ruby-text-1.2.1/test/levenshtein_test.rb ruby-text-1.3.0/test/levenshtein_test.rb --- ruby-text-1.2.1/test/levenshtein_test.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/levenshtein_test.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,363 @@ +# coding: UTF-8 + +require_relative "./test_helper" +require "text/levenshtein" + +class LevenshteinTest < Test::Unit::TestCase + include Text::Levenshtein + + def iso_8859_1(s) + s.force_encoding(Encoding::ISO_8859_1) + end + + def test_should_calculate_lengths_for_basic_examples + assert_equal 0, distance("test", "test") + assert_equal 1, distance("test", "tent") + assert_equal 2, distance("gumbo", "gambol") + assert_equal 3, distance("kitten", "sitting") + end + + def test_should_give_full_distances_for_empty_strings + assert_equal 3, distance("foo", "") + assert_equal 0, distance("", "") + assert_equal 1, distance("a", "") + end + + def test_should_treat_utf_8_codepoints_as_one_element + assert_equal 1, distance("föo", "foo") + assert_equal 1, distance("français", "francais") + assert_equal 1, distance("français", "franæais") + assert_equal 2, distance("私の名前はポールです", "ぼくの名前はポールです") + end + + def test_should_process_single_byte_encodings + assert_equal 1, distance(iso_8859_1("f\xF6o"), iso_8859_1("foo")) + assert_equal 1, distance(iso_8859_1("fran\xE7ais"), iso_8859_1("francais")) + assert_equal 1, distance(iso_8859_1("fran\xE7ais"), iso_8859_1("fran\xE6ais")) + end + + def test_should_process_edge_cases_as_expected + assert_equal 0, distance("a", "a") + assert_equal 26, distance("0123456789", "abcdefghijklmnopqrstuvwxyz") + end + + def test_should_return_calculated_distance_when_less_than_maximum + assert_equal 0, distance("test", "test", 1) + assert_equal 1, distance("test", "tent", 2) + assert_equal 2, distance("gumbo", "gambol", 3) + assert_equal 3, distance("kitten", "sitting", 4) + end + + def test_should_return_calculated_distance_when_same_as_maximum + assert_equal 0, distance("test", "test", 0) + assert_equal 1, distance("test", "tent", 1) + assert_equal 2, distance("gumbo", "gambol", 2) + assert_equal 3, distance("kitten", "sitting", 3) + end + + def test_should_return_specified_maximum_if_distance_is_more + assert_equal 1, distance("gumbo", "gambol", 1) + assert_equal 2, distance("kitten", "sitting", 2) + assert_equal 1, distance("test", "tasf", 1) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_start + assert_equal 1, distance("1234", "01234") + assert_equal 0, distance("1234", "01234", 0) + assert_equal 1, distance("1234", "01234", 1) + assert_equal 1, distance("1234", "01234", 2) + assert_equal 1, distance("1234", "01234", 3) + assert_equal 1, distance("1234", "01234", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_end + assert_equal 2, distance("1234", "123400") + assert_equal 0, distance("1234", "123400", 0) + assert_equal 1, distance("1234", "123400", 1) + assert_equal 2, distance("1234", "123400", 2) + assert_equal 2, distance("1234", "123400", 3) + assert_equal 2, distance("1234", "123400", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_in_the_middle + assert_equal 1, distance("1234", "12034") + assert_equal 0, distance("1234", "12034", 0) + assert_equal 1, distance("1234", "12034", 1) + assert_equal 1, distance("1234", "12034", 2) + assert_equal 1, distance("1234", "12034", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_start_and_in_the_middle + assert_equal 2, distance("1234", "012034") + assert_equal 0, distance("1234", "012034", 0) + assert_equal 1, distance("1234", "012034", 1) + assert_equal 2, distance("1234", "012034", 2) + assert_equal 2, distance("1234", "012034", 3) + assert_equal 2, distance("1234", "012034", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_end_and_in_the_middle + assert_equal 2, distance("1234", "120340") + assert_equal 0, distance("1234", "120340", 0) + assert_equal 1, distance("1234", "120340", 1) + assert_equal 2, distance("1234", "120340", 2) + assert_equal 2, distance("1234", "120340", 3) + assert_equal 2, distance("1234", "120340", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_start_at_end_and_in_the_middle + assert_equal 3, distance("1234", "0120340") + assert_equal 0, distance("1234", "0120340", 0) + assert_equal 3, distance("1234", "0120340", 3) + assert_equal 3, distance("1234", "0120340", 4) + assert_equal 3, distance("1234", "0120340", 6) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_start_and_char_changes + assert_equal 3, distance("1234", "001233") + assert_equal 0, distance("1234", "001233", 0) + assert_equal 2, distance("1234", "001233", 2) + assert_equal 3, distance("1234", "001233", 3) + assert_equal 3, distance("1234", "001233", 4) + assert_equal 3, distance("1234", "001233", 5) + end + + def test_should_return_maximum_distance_for_strings_with_deletions_at_end + assert_equal 1, distance("1234", "123") + assert_equal 0, distance("1234", "123", 0) + assert_equal 1, distance("1234", "123", 1) + assert_equal 1, distance("1234", "123", 2) + assert_equal 1, distance("1234", "123", 5) + end + + def test_should_return_maximum_distance_for_strings_with_deletions_at_start + assert_equal 1, distance("1234", "234") + assert_equal 0, distance("1234", "234", 0) + assert_equal 1, distance("1234", "234", 1) + assert_equal 1, distance("1234", "234", 2) + assert_equal 1, distance("1234", "234", 5) + end + + def test_should_return_maximum_distance_for_strings_with_deletions_at_start_and_in_the_middle + assert_equal 2, distance("1234", "24") + assert_equal 0, distance("1234", "24", 0) + assert_equal 1, distance("1234", "24", 1) + assert_equal 2, distance("1234", "24", 2) + assert_equal 2, distance("1234", "24", 3) + assert_equal 2, distance("1234", "24", 5) + end + + def test_should_return_maximum_distance_for_strings_with_deletions_at_end_and_in_the_middle + assert_equal 2, distance("1234", "13") + assert_equal 0, distance("1234", "13", 0) + assert_equal 1, distance("1234", "13", 1) + assert_equal 2, distance("1234", "13", 2) + assert_equal 2, distance("1234", "13", 3) + assert_equal 2, distance("1234", "13", 5) + end + + def test_should_return_maximum_distance_for_strings_with_deletions_at_start_at_end_and_in_the_middle + assert_equal 3, distance("12345", "24") + assert_equal 0, distance("12345", "24", 0) + assert_equal 2, distance("12345", "24", 2) + assert_equal 3, distance("12345", "24", 3) + assert_equal 3, distance("12345", "24", 4) + assert_equal 3, distance("12345", "24", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_start_and_deletions_in_the_middle + assert_equal 2, distance("1234", "0124") + assert_equal 0, distance("1234", "0124", 0) + assert_equal 1, distance("1234", "0124", 1) + assert_equal 2, distance("1234", "0124", 2) + assert_equal 2, distance("1234", "0124", 3) + assert_equal 2, distance("1234", "0124", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_start_and_deletions_at_end + assert_equal 2, distance("1234", "0123") + assert_equal 0, distance("1234", "0123", 0) + assert_equal 1, distance("1234", "0123", 1) + assert_equal 2, distance("1234", "0123", 2) + assert_equal 2, distance("1234", "0123", 3) + assert_equal 2, distance("1234", "0123", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_in_the_middle_and_deletions_at_end + assert_equal 2, distance("1234", "1293") + assert_equal 0, distance("1234", "1293", 0) + assert_equal 1, distance("1234", "1293", 1) + assert_equal 2, distance("1234", "1293", 2) + assert_equal 2, distance("1234", "1293", 3) + assert_equal 2, distance("1234", "1293", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_in_the_middle_and_deletions_at_start + assert_equal 2, distance("1234", "2934") + assert_equal 0, distance("1234", "2934", 0) + assert_equal 1, distance("1234", "2934", 1) + assert_equal 2, distance("1234", "2934", 2) + assert_equal 2, distance("1234", "2934", 3) + assert_equal 2, distance("1234", "2934", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_end_and_deletions_at_start + assert_equal 2, distance("1234", "2345") + assert_equal 0, distance("1234", "2345", 0) + assert_equal 1, distance("1234", "2345", 1) + assert_equal 2, distance("1234", "2345", 2) + assert_equal 2, distance("1234", "2345", 3) + assert_equal 2, distance("1234", "2345", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_at_end_and_deletions_in_the_middle + assert_equal 2, distance("1234", "1245") + assert_equal 0, distance("1234", "1245", 0) + assert_equal 1, distance("1234", "1245", 1) + assert_equal 2, distance("1234", "1245", 2) + assert_equal 2, distance("1234", "1245", 3) + assert_equal 2, distance("1234", "1245", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_in_the_middle_and_deletions_in_the_middle + assert_equal 2, distance("12345", "12035") + assert_equal 0, distance("12345", "12035", 0) + assert_equal 1, distance("12345", "12035", 1) + assert_equal 2, distance("12345", "12035", 2) + assert_equal 2, distance("12345", "12035", 3) + assert_equal 2, distance("12345", "12035", 5) + end + + def test_should_return_maximum_distance_for_strings_with_additions_deletions_and_char_changes + assert_equal 3, distance("1234", "0193") + assert_equal 0, distance("1234", "0193", 0) + assert_equal 1, distance("1234", "0193", 1) + assert_equal 2, distance("1234", "0193", 2) + assert_equal 3, distance("1234", "0193", 3) + assert_equal 3, distance("1234", "0193", 4) + assert_equal 3, distance("1234", "0193", 5) + + assert_equal 3, distance("1234", "2395") + assert_equal 0, distance("1234", "2395", 0) + assert_equal 1, distance("1234", "2395", 1) + assert_equal 2, distance("1234", "2395", 2) + assert_equal 3, distance("1234", "2395", 3) + assert_equal 3, distance("1234", "2395", 4) + assert_equal 3, distance("1234", "2395", 5) + end + + def test_should_return_maximum_distance_for_strings_with_only_one_char + assert_equal 1, distance("t", "a") + assert_equal 0, distance("t", "a", 0) + assert_equal 1, distance("t", "a", 1) + assert_equal 1, distance("t", "a", 2) + assert_equal 1, distance("t", "a", 10) + + assert_equal 0, distance("t", "t") + assert_equal 0, distance("t", "t", 1) + assert_equal 0, distance("t", "t", 4) + + assert_equal 1, distance("te", "t") + assert_equal 0, distance("te", "t", 0) + assert_equal 1, distance("te", "t", 1) + assert_equal 1, distance("te", "t", 2) + assert_equal 1, distance("te", "t", 4) + end + + def test_should_return_maximum_distance_for_a_long_string + assert_equal 440, distance( "Having a catchy name, easy reminder for all is fundamental when choosing the name for a new product. A bad name can be the beginning of the end product and immediately forget this.

Primary keys to choose a good brand name are, first: choose a name that only has one word and at most three, such being the optimum. Try to make it easier to read and pronounce, as this will be easier to remember for all the time to talk about your product. Remember, too, that the use of capitalization also influence, you should treat the name of your product as if it were the same logo. And finally, you should avoid using numbers in your product name, unless it is a very easy to remember because this number were tied deeply with your product. Always think globally, independent of which only sell locally, you never know when it can come out in sales and need to make a point.", + "All product lines work with tags that identify its products and differentiate it from the others or with labels for packaged, or perhaps labels to be placed in the envelopes that you send to your customers. There are thousands options, shapes, designs and colors that you can use and advantage of these is that they can also be adhesive. If you need a label that serve you and that you identify will have your order. You will receive many proposals that you can discard if they don't like you or you keep it if you like and fits your needs. Don't miss the opportunity to innovate and use all the tools that allow you to continue to grow as a company. REMEMBER! a good label, with a good design can increase your sales by 20% just by its appearance.", + 440 ) + end + +end + +class LevenshteinGeneratedDataTest < Test::Unit::TestCase + Element = Struct.new(:char, :added) do + def to_s + char + end + end + + def one_of(str) + str[rand(str.length)] + end + + def letter + one_of "abcdefghijklmnopqrstuvwxyzáéíóúあいうえお日月火水木" + end + + def word + (rand(10) + 2).times.map { letter }.join("") + end + + def sentence + (rand(10) + 2).times.map { word }.join(" ") + end + + def sequence + sentence.scan(/./).map { |c| Element.new(c, true) } + end + + def insert(seq) + elem = Element.new(letter, true) + pos = rand(seq.length) + return [seq[0, pos] + [elem] + seq[pos .. -1], 1] + end + + # Delete an element, but only if we didn't add it - that would make the + # calculations complicated + def delete(seq) + pos = rand(seq.length) + if seq[pos].added + return [seq, 0] + else + return [seq[0, pos] + seq[(pos + 1) .. -1], 1] + end + end + + def substitute(seq) + pos = rand(seq.length) + if seq[pos].added + return [seq, 0] + else + elem = Element.new(letter, false) + return [seq[0, pos] + [elem] + se[(pos + 1) .. -1], 1] + end + end + + def mutate(seq) + distance = 0 + rand(seq.length).times do + method = [:insert, :delete, :substitute][rand(2)] + seq, d = send(method, seq) + distance += d + end + return [seq, distance] + end + + def test_generated_samples + 100.times do + input = sequence + output, distance = mutate(input) + a = input.map(&:to_s).join("") + b = output.map(&:to_s).join("") + assert_equal distance, Text::Levenshtein.distance(a, b) + end + end + + def test_generated_samples_with_maximum_distance + 100.times do + input = sequence + output, distance = mutate(input) + a = input.map(&:to_s).join("") + b = output.map(&:to_s).join("") + (0 .. distance).each do |d| + assert_equal d, Text::Levenshtein.distance(a, b, d) + end + (distance .. sequence.length).each do |d| + assert_equal distance, Text::Levenshtein.distance(a, b, d) + end + end + end +end diff -Nru ruby-text-1.2.1/test/metaphone_test.rb ruby-text-1.3.0/test/metaphone_test.rb --- ruby-text-1.2.1/test/metaphone_test.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/metaphone_test.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,40 @@ +require_relative "./test_helper" +require "text/metaphone" +require 'yaml' + +class MetaphoneTest < Test::Unit::TestCase + + def test_cases + YAML.load(data_file('metaphone.yml')).each do |input, expected_output| + assert_equal expected_output, Text::Metaphone.metaphone(input) + end + end + + def test_cases_for_buggy_implementation + YAML.load(data_file('metaphone_buggy.yml')).each do |input, expected_output| + assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true) + end + end + + def test_junk + assert_equal Text::Metaphone.metaphone('foobar'), + Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&') + assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true), + Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true) + end + + def test_caps + assert_equal Text::Metaphone.metaphone('foobar'), + Text::Metaphone.metaphone('FOOBAR') + assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true), + Text::Metaphone.metaphone('FOOBAR', :buggy=>true) + end + + def test_string + assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz') + assert_equal 'N WT', Text::Metaphone.metaphone('gnu what') + assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true) + assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true) + end + +end diff -Nru ruby-text-1.2.1/test/porter_stemming_test.rb ruby-text-1.3.0/test/porter_stemming_test.rb --- ruby-text-1.2.1/test/porter_stemming_test.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/porter_stemming_test.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,15 @@ +require_relative "./test_helper" +require "text/porter_stemming" + +class PorterStemmingTest < Test::Unit::TestCase + + def test_cases + inputs = data_file('porter_stemming_input.txt').split(/\n/) + outputs = data_file('porter_stemming_output.txt').split(/\n/) + + inputs.zip(outputs).each do |word, expected_output| + assert_equal expected_output, Text::PorterStemming.stem(word) + end + end + +end diff -Nru ruby-text-1.2.1/test/preamble.rb ruby-text-1.3.0/test/preamble.rb --- ruby-text-1.2.1/test/preamble.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/preamble.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -require 'test/unit' - -lib = File.expand_path("../../lib") -$:.unshift lib unless $:.include?(lib) - -class Test::Unit::TestCase - def data_file_path(*path) - File.join(File.dirname(__FILE__), "data", *path) - end - - def data_file(*path) - File.read(data_file_path(*path)) - end -end diff -Nru ruby-text-1.2.1/test/soundex_test.rb ruby-text-1.3.0/test/soundex_test.rb --- ruby-text-1.2.1/test/soundex_test.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/soundex_test.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,20 @@ +require_relative "./test_helper" +require "text/soundex" +require 'yaml' + +class SoundexTest < Test::Unit::TestCase + + def test_cases + YAML.load(data_file('soundex.yml')).each do |input, expected_output| + assert_equal expected_output, Text::Soundex.soundex(input) + end + end + + def test_should_return_nil_for_empty_string + assert_nil Text::Soundex.soundex("") + end + + def test_should_return_nil_for_string_with_no_letters + assert_nil Text::Soundex.soundex("!@#123") + end +end diff -Nru ruby-text-1.2.1/test/test_double_metaphone.rb ruby-text-1.3.0/test/test_double_metaphone.rb --- ruby-text-1.2.1/test/test_double_metaphone.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/test_double_metaphone.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -require "preamble" -require "text/double_metaphone" - -require 'csv' - -class DoubleMetaphoneTest < Test::Unit::TestCase - - def test_cases - CSV.open(data_file_path('double_metaphone.csv'), 'r').to_a.each do |row| - primary, secondary = Text::Metaphone.double_metaphone(row[0]) - - assert_equal row[1], primary - assert_equal row[2], secondary.nil?? primary : secondary - end - end - -end diff -Nru ruby-text-1.2.1/test/test_helper.rb ruby-text-1.3.0/test/test_helper.rb --- ruby-text-1.2.1/test/test_helper.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/test_helper.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,14 @@ +require 'test/unit' + +lib = File.expand_path("../../lib", __FILE__) +$:.unshift lib unless $:.include?(lib) + +class Test::Unit::TestCase + def data_file_path(*path) + File.join(File.dirname(__FILE__), "data", *path) + end + + def data_file(*path) + File.read(data_file_path(*path)) + end +end diff -Nru ruby-text-1.2.1/test/test_levenshtein.rb ruby-text-1.3.0/test/test_levenshtein.rb --- ruby-text-1.2.1/test/test_levenshtein.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/test_levenshtein.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,93 +0,0 @@ -require "preamble" -require "text/levenshtein" - -class LevenshteinTest < Test::Unit::TestCase - - include Text::Levenshtein - - TEST_CASES = { - :easy => [ - ['test', 'test', 0], - ['test', 'tent', 1], - ['gumbo', 'gambol', 2], - ['kitten', 'sitting', 3] - ], - :empty => [ - ['foo', '', 3], - ['', '', 0], - ['a', '', 1] - ], - :utf8 => [ - ["f\303\266o", 'foo', 1], - ["fran\303\247ais", 'francais', 1], - ["fran\303\247ais", "fran\303\246ais", 1], - [ - "\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<< - "\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231", - "\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<< - "\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231", - 2 - ] # Japanese - ], - :iso_8859_1 => [ - ["f\366o", 'foo', 1], - ["fran\347ais", 'francais', 1], - ["fran\347ais", "fran\346ais", 1] - ], - :edge => [ - ['a', 'a', 0], - ['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26] - ] - } - - def assert_set(name) - TEST_CASES[name].each do |s, t, x| - if defined?(Encoding) && Encoding.default_internal # Change the encoding if in 1.9 - t.force_encoding(Encoding.default_internal) - s.force_encoding(Encoding.default_internal) - end - - assert_equal x, distance(s, t) - assert_equal x, distance(t, s) - end - end - - def with_encoding(kcode, encoding) - if "ruby".respond_to?(:encoding) - old_encoding = Encoding.default_internal - Encoding.default_internal = encoding - yield - Encoding.default_internal = old_encoding - else # 1.8 backwards compat - old_kcode = $KCODE - $KCODE = kcode - yield - $KCODE = old_kcode - end - end - - def test_easy_cases - assert_set(:easy) - end - - def test_empty_cases - assert_set(:empty) - end - - def test_edge_cases - assert_set(:edge) - end - - def test_utf8_cases - with_encoding('U', 'UTF-8') do - assert_set(:utf8) - end - end - - def test_iso_8859_1_cases - with_encoding('NONE', 'ISO-8859-1') do - assert_set(:iso_8859_1) - end - end - -end diff -Nru ruby-text-1.2.1/test/test_metaphone.rb ruby-text-1.3.0/test/test_metaphone.rb --- ruby-text-1.2.1/test/test_metaphone.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/test_metaphone.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -require "preamble" -require "text/metaphone" -require 'yaml' - -class MetaphoneTest < Test::Unit::TestCase - - def test_cases - YAML.load(data_file('metaphone.txt')).each do |input, expected_output| - assert_equal expected_output, Text::Metaphone.metaphone(input) - end - end - - def test_cases_for_buggy_implementation - YAML.load(data_file('metaphone_buggy.txt')).each do |input, expected_output| - assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true) - end - end - - def test_junk - assert_equal Text::Metaphone.metaphone('foobar'), - Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&') - assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true), - Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true) - end - - def test_caps - assert_equal Text::Metaphone.metaphone('foobar'), - Text::Metaphone.metaphone('FOOBAR') - assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true), - Text::Metaphone.metaphone('FOOBAR', :buggy=>true) - end - - def test_string - assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz') - assert_equal 'N WT', Text::Metaphone.metaphone('gnu what') - assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true) - assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true) - end - -end diff -Nru ruby-text-1.2.1/test/test_porter_stemming.rb ruby-text-1.3.0/test/test_porter_stemming.rb --- ruby-text-1.2.1/test/test_porter_stemming.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/test_porter_stemming.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -require "preamble" -require "text/porter_stemming" - -class PorterStemmingTest < Test::Unit::TestCase - - def test_cases - inputs = data_file('porter_stemming_input.txt').split(/\n/) - outputs = data_file('porter_stemming_output.txt').split(/\n/) - - inputs.zip(outputs).each do |word, expected_output| - assert_equal expected_output, Text::PorterStemming.stem(word) - end - end - -end diff -Nru ruby-text-1.2.1/test/test_soundex.rb ruby-text-1.3.0/test/test_soundex.rb --- ruby-text-1.2.1/test/test_soundex.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/test_soundex.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -require "preamble" -require "text/soundex" -require 'yaml' - -class SoundexTest < Test::Unit::TestCase - - def test_cases - YAML.load(%{ - - Euler: E460 - Ellery: E460 - Gauss: G200 - Ghosh: G200 - Hilbert: H416 - Heilbronn: H416 - Knuth: K530 - Kant: K530 - Lloyd: L300 - Ladd: L300 - Lukasiewicz: L222 - Lissajous: L222 - - }).each do |input, expected_output| - assert_equal expected_output, Text::Soundex.soundex(input) - end - end - -end diff -Nru ruby-text-1.2.1/test/test_text.rb ruby-text-1.3.0/test/test_text.rb --- ruby-text-1.2.1/test/test_text.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/test_text.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -require 'preamble' - -class TextTest < Test::Unit::TestCase - def test_should_load_all_components - require 'text' - assert defined? Text::Levenshtein - assert defined? Text::Metaphone - assert defined? Text::PorterStemming - assert defined? Text::Soundex - assert defined? Text::VERSION - assert defined? Text::WhiteSimilarity - end -end diff -Nru ruby-text-1.2.1/test/test_white_similarity.rb ruby-text-1.3.0/test/test_white_similarity.rb --- ruby-text-1.2.1/test/test_white_similarity.rb 2012-06-20 14:56:13.000000000 +0000 +++ ruby-text-1.3.0/test/test_white_similarity.rb 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -require "preamble" -require "text/white_similarity" - -class WhiteSimilarityTest < Test::Unit::TestCase - - def test_similarity - word = "Healed" - - assert_in_delta 0.8, Text::WhiteSimilarity.similarity(word, "Sealed"), 0.01 - assert_in_delta 0.55, Text::WhiteSimilarity.similarity(word, "Healthy"), 0.01 - assert_in_delta 0.44, Text::WhiteSimilarity.similarity(word, "Heard"), 0.01 - assert_in_delta 0.40, Text::WhiteSimilarity.similarity(word, "Herded"), 0.01 - assert_in_delta 0.25, Text::WhiteSimilarity.similarity(word, "Help"), 0.01 - assert_in_delta 0.0, Text::WhiteSimilarity.similarity(word, "Sold"), 0.01 - end - - def test_similarity_with_caching - word = "Healed" - - white = Text::WhiteSimilarity.new - - assert_in_delta 0.8, white.similarity(word, "Sealed"), 0.01 - assert_in_delta 0.55, white.similarity(word, "Healthy"), 0.01 - assert_in_delta 0.44, white.similarity(word, "Heard"), 0.01 - assert_in_delta 0.40, white.similarity(word, "Herded"), 0.01 - assert_in_delta 0.25, white.similarity(word, "Help"), 0.01 - assert_in_delta 0.0, white.similarity(word, "Sold"), 0.01 - end - - def test_should_not_clobber_cached_values - white = Text::WhiteSimilarity.new - word = "Healed" - assert_equal white.similarity(word, word), white.similarity(word, word) - end - - def test_similarity_with_examples_from_article - assert_in_delta 0.4, Text::WhiteSimilarity.similarity("GGGGG", "GG"), 0.01 - assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"), 0.01 - assert_in_delta 0.0, Text::WhiteSimilarity.similarity("FRANCE", "QUEBEC"), 0.01 - assert_in_delta 0.72, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF FRANCE"), 0.01 - assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"), 0.01 - end - - def test_similarity_with_equal_strings - assert_equal 1.0, Text::WhiteSimilarity.similarity("aaaaa", "aaaaa") - assert_equal 1.0, Text::WhiteSimilarity.similarity("REPUBLIC OF CUBA", "REPUBLIC OF CUBA") - end - -end diff -Nru ruby-text-1.2.1/test/text_test.rb ruby-text-1.3.0/test/text_test.rb --- ruby-text-1.2.1/test/text_test.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/text_test.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,13 @@ +require_relative "./test_helper" + +class TextTest < Test::Unit::TestCase + def test_should_load_all_components + require 'text' + assert defined? Text::Levenshtein + assert defined? Text::Metaphone + assert defined? Text::PorterStemming + assert defined? Text::Soundex + assert defined? Text::VERSION + assert defined? Text::WhiteSimilarity + end +end diff -Nru ruby-text-1.2.1/test/white_similarity_test.rb ruby-text-1.3.0/test/white_similarity_test.rb --- ruby-text-1.2.1/test/white_similarity_test.rb 1970-01-01 00:00:00.000000000 +0000 +++ ruby-text-1.3.0/test/white_similarity_test.rb 2014-07-17 08:18:45.000000000 +0000 @@ -0,0 +1,49 @@ +require_relative "./test_helper" +require "text/white_similarity" + +class WhiteSimilarityTest < Test::Unit::TestCase + + def test_similarity + word = "Healed" + + assert_in_delta 0.8, Text::WhiteSimilarity.similarity(word, "Sealed"), 0.01 + assert_in_delta 0.55, Text::WhiteSimilarity.similarity(word, "Healthy"), 0.01 + assert_in_delta 0.44, Text::WhiteSimilarity.similarity(word, "Heard"), 0.01 + assert_in_delta 0.40, Text::WhiteSimilarity.similarity(word, "Herded"), 0.01 + assert_in_delta 0.25, Text::WhiteSimilarity.similarity(word, "Help"), 0.01 + assert_in_delta 0.0, Text::WhiteSimilarity.similarity(word, "Sold"), 0.01 + end + + def test_similarity_with_caching + word = "Healed" + + white = Text::WhiteSimilarity.new + + assert_in_delta 0.8, white.similarity(word, "Sealed"), 0.01 + assert_in_delta 0.55, white.similarity(word, "Healthy"), 0.01 + assert_in_delta 0.44, white.similarity(word, "Heard"), 0.01 + assert_in_delta 0.40, white.similarity(word, "Herded"), 0.01 + assert_in_delta 0.25, white.similarity(word, "Help"), 0.01 + assert_in_delta 0.0, white.similarity(word, "Sold"), 0.01 + end + + def test_should_not_clobber_cached_values + white = Text::WhiteSimilarity.new + word = "Healed" + assert_equal white.similarity(word, word), white.similarity(word, word) + end + + def test_similarity_with_examples_from_article + assert_in_delta 0.4, Text::WhiteSimilarity.similarity("GGGGG", "GG"), 0.01 + assert_in_delta 0.56, Text::WhiteSimilarity.similarity("REPUBLIC OF FRANCE", "FRANCE"), 0.01 + assert_in_delta 0.0, Text::WhiteSimilarity.similarity("FRANCE", "QUEBEC"), 0.01 + assert_in_delta 0.72, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF FRANCE"), 0.01 + assert_in_delta 0.61, Text::WhiteSimilarity.similarity("FRENCH REPUBLIC", "REPUBLIC OF CUBA"), 0.01 + end + + def test_similarity_with_equal_strings + assert_equal 1.0, Text::WhiteSimilarity.similarity("aaaaa", "aaaaa") + assert_equal 1.0, Text::WhiteSimilarity.similarity("REPUBLIC OF CUBA", "REPUBLIC OF CUBA") + end + +end