diff -Nru similarity-tester-2.21/add_run.c similarity-tester-2.26/add_run.c --- similarity-tester-2.21/add_run.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/add_run.c 2008-09-23 09:07:11.000000000 +0000 @@ -1,13 +1,12 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: add_run.c,v 2.5 2001/11/08 12:30:28 dick Exp $ + $Id: add_run.c,v 2.7 2008/09/23 09:07:11 dick Exp $ */ -#include - #include "sim.h" #include "runs.h" #include "percentages.h" +#include "Malloc.h" #include "options.h" #include "error.h" #include "add_run.h" @@ -33,17 +32,18 @@ ) { /* Adds the run of given size to our collection. */ - register struct run *r = (struct run *)malloc(sizeof (struct run)); + struct run *r = new(struct run); - if (!r) fatal("out of memory"); set_chunk(&r->rn_cn0, txt0, i0 - txt0->tx_start, size); set_chunk(&r->rn_cn1, txt1, i1 - txt1->tx_start, size); r->rn_size = size; - if (option_set('p') ? add_to_percentages(r) : add_to_runs(r)) { - /* OK */ + if (option_set('p')) { + add_to_percentages(r); + } + else { + add_to_runs(r); } - else fatal("out of memory"); } static void diff -Nru similarity-tester-2.21/aiso.bdy similarity-tester-2.26/aiso.bdy --- similarity-tester-2.21/aiso.bdy 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/aiso.bdy 2008-04-03 12:24:52.000000000 +0000 @@ -1,37 +1,41 @@ -/* - Module: Arbitrary-In Sorted-Out (AISO) - Author: dick@cs.vu.nl (Dick Grune @ Vrije Universiteit, Amsterdam) +/* This file is part of the module Arbitrary-In Sorted-Out (AISO). + Written by Dick Grune, Vrije Universiteit, Amsterdam. + $Id: aiso.bdy,v 1.3 2008/04/03 12:24:52 dick Exp $ +*/ +/* Description: This is the body of a module that builds an arbitrary-in - sorted-out data structure, to be used as a heap, a priority queue, etc. - See aiso.spc for further info. + sorted-out data structure, to be used as a heap, a priority queue, + etc. + See aiso.spc for further information. */ -#include +#include +#include static struct aiso_node *root; /* root of tree */ -#ifdef AISO_ITERATOR +#ifdef AISO_ITER static struct aiso_node *list; /* start of linked list */ -#endif /* AISO_ITERATOR */ +#endif /* AISO_ITER */ /* the policy */ static int aiso_size = 0; -static int access_mark = 1; +static int acc_mark = 1; #define add_entry() (aiso_size++) -#define remove_entry() (aiso_size--) -#define reset_access() (access_mark = 1) -#define count_access() (access_mark <<= 1) -#define must_rotate() (access_mark > aiso_size) +#define rem_entry() (aiso_size--) +#define reset_access() (acc_mark = 1) +#define count_access() (acc_mark <<= 1) +#define must_rotate() (acc_mark > aiso_size) int InsertAiso(AISO_TYPE v) { - register struct aiso_node *new_node; - register struct aiso_node **hook = &root; -#ifdef AISO_ITERATOR - register struct aiso_node **prev = &list; -#endif /* AISO_ITERATOR */ + struct aiso_node *new_node; + struct aiso_node **hook = &root; +#ifdef AISO_ITER + struct aiso_node **prev = &list; +#endif /* AISO_ITER */ new_node = (struct aiso_node *)malloc(sizeof (struct aiso_node)); if (!new_node) { @@ -40,7 +44,7 @@ } while (*hook) { - register struct aiso_node *an = *hook; + struct aiso_node *an = *hook; count_access(); if (AISO_BEFORE(v, an->an_value)) { @@ -51,7 +55,7 @@ } else { /* change (l A r) B (C) into (l) A (r B C) */ - register struct aiso_node *anl = an->an_left; + struct aiso_node *anl = an->an_left; an->an_left = anl->an_right; anl->an_right = an; @@ -67,37 +71,37 @@ } else { /* change (A) B (l C r) into (A B l) C (r) */ - register struct aiso_node *anr = an->an_right; + struct aiso_node *anr = an->an_right; an->an_right = anr->an_left; anr->an_left = an; *hook = anr; reset_access(); } -#ifdef AISO_ITERATOR +#ifdef AISO_ITER prev = &an->an_next; -#endif /* AISO_ITERATOR */ +#endif /* AISO_ITER */ } } new_node->an_left = 0; new_node->an_right = 0; -#ifdef AISO_ITERATOR +#ifdef AISO_ITER new_node->an_next = *prev; *prev = new_node; -#endif /* AISO_ITERATOR */ +#endif /* AISO_ITER */ new_node->an_value = v; *hook = new_node; add_entry(); return 1; } -#ifdef AISO_EXTRACTOR +#ifdef AISO_EXTR int ExtractAiso(AISO_TYPE *vp) { - register struct aiso_node **hook = &root; - register struct aiso_node *an; + struct aiso_node **hook = &root; + struct aiso_node *an; if (!root) return 0; @@ -110,7 +114,7 @@ } else { /* change (l A r) B (C) into (l) A (r B C) */ - register struct aiso_node *anl = an->an_left; + struct aiso_node *anl = an->an_left; an->an_left = anl->an_right; anl->an_right = an; @@ -121,17 +125,17 @@ /* found the first */ *vp = an->an_value; *hook = an->an_right; -#ifdef AISO_ITERATOR +#ifdef AISO_ITER list = an->an_next; -#endif /* AISO_ITERATOR */ - free((char *)an); - remove_entry(); +#endif /* AISO_ITER */ + free((void *)an); + rem_entry(); return 1; } -#endif /* AISO_EXTRACTOR */ +#endif /* AISO_EXTR */ -#ifdef AISO_ITERATOR +#ifdef AISO_ITER void OpenIter(AisoIter *ip) { @@ -140,7 +144,7 @@ int GetAisoItem(AisoIter *ip, AISO_TYPE *vp) { - register struct aiso_node *an = *ip; + struct aiso_node *an = *ip; if (!an) return 0; @@ -154,33 +158,31 @@ *ip = 0; } -#endif /* AISO_ITERATOR */ +#endif /* AISO_ITER */ -#ifdef AISO_DEBUG - -#include +#ifdef AISO_DEBUG /* requires AISO_FORMAT */ static void -print_inf(int level, char ch, struct aiso_node *an) { - register int i; +pr_inf(int level, char ch, struct aiso_node *an) { + int i; if (!an) return; - print_inf(level+1, '/', an->an_right); + pr_inf(level+1, '/', an->an_right); for (i = 0; i < level; i++) { printf(" "); } printf("%c", ch); printf(AISO_FORMAT, an->an_value); printf("\n"); - print_inf(level+1, '\\', an->an_left); + pr_inf(level+1, '\\', an->an_left); } void -PrintAisoTree(void) -{ - print_inf(0, '-', root); +pr_tree(void) { + pr_inf(0, '-', root); printf("================\n"); } #endif /* AISO_DEBUG */ + diff -Nru similarity-tester-2.21/aiso.spc similarity-tester-2.26/aiso.spc --- similarity-tester-2.21/aiso.spc 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/aiso.spc 2008-02-05 16:48:42.000000000 +0000 @@ -1,45 +1,51 @@ -/* - Module: Arbitrary-In Sorted-Out (AISO) - Author: dick@cs.vu.nl (Dick Grune @ Vrije Universiteit, Amsterdam) - Version: Tue Aug 23 12:54:22 1988 +/* This file is part of the module Arbitrary-In Sorted-Out (AISO). + Written by Dick Grune, Vrije Universiteit, Amsterdam. + $Id: aiso.spc,v 1.2 2008/02/05 16:48:42 dick Exp $ +*/ +/* Description: - This is the specification of a generic module that builds an - arbitrary-in sorted-out data structure, to be used as a heap, a - priority queue, etc. Elements can be inserted, the first element - extracted and the set scanned at any moment. + This is the specification of a module that builds an arbitrary-in + sorted-out data structure, to be used as a heap, a priority queue, + etc. + Elements can be inserted, the first element extracted and the set + scanned at any moment. + The module is not generic, in that only one copy of it can be + instantiated per program. Instantiation: The module is instantiated as follows. - Create a file M.h for some M, which contains at least: + Create a file X.h, where X is arbitrary, which contains at least: + - a definition of AISO_TYPE, the type of the object to be stored - - a possible definition of AISO_EXTRACTOR; see below - - a possible definition of AISO_ITERATOR; see below + - a possible definition of AISO_EXTR; see below + - a possible definition of AISO_ITER; see below - #include "aiso.spc" - This file M.h is to be included in all files that use the aiso + This file X.h is to be included in all files that use the aiso package. - Create a file M.c which contains at least: - - #include "M.h" + Create a file X.c which contains at least: + + - #include "X.h" - a definition of a routine int AISO_BEFORE(AISO_TYPE v, AISO_TYPE w) which yields non-zero if v is to be sorted before w - #include "aiso.bdy" - This file compiles into the module object. + This file X.c compiles into the module object. Specification: The module always supplies: int InsertAiso(AISO_TYPE value) inserts value in its proper place; fails if out of memory - If AISO_EXTRACTOR is defined, the module will also supply: + If AISO_EXTR is defined, the module will also supply: int ExtractAiso(AISO_TYPE *value) yields the first value in the aiso and removes it; fails if empty - If AISO_ITERATOR is defined, the module also supplies a type AisoIter + If AISO_ITER is defined, the module also supplies a type AisoIter which declares an iterator, i.e., a structure that records a position in the ordered set, plus routines for manipulating the iterator, thus enabling the user to scan the ordered set. The iterator should be @@ -47,19 +53,16 @@ AisoIter iter; and is manipulated by the following commands: - void OpenIter(AisoIter *iter) + OpenIter(AisoIter *iter) opens the iterator for scanning the existing set in order int GetAisoItem(AisoIter *iter, AISO_TYPE *value) yields the next value in the iterator; fails if exhausted - void CloseIter(AisoIter *iter) + CloseIter(AisoIter *iter) closes the iterator - If AISO_DEBUG is defined the module will also supply: - void PrintAisoTree(void) - prints the AISO tree; requires AISO_FORMAT, to be set to - a format suitable to print a value of type AISO_TYPE + For the use of AISO_DEBUG see aiso.bdy. Implementation: The AISO implementation is based on a self-adjusting binary tree. @@ -71,32 +74,26 @@ The iterator is implemented as an additional linear linked list through the tree. This is simpler than and at least as efficient as clever tree-wiring. - -Restrictions: - Due to built-in fixed names, there can only be one AISO per program. */ struct aiso_node { struct aiso_node *an_left; struct aiso_node *an_right; -#ifdef AISO_ITERATOR +#ifdef AISO_ITER struct aiso_node *an_next; -#endif /* AISO_ITERATOR */ +#endif /* AISO_ITER */ AISO_TYPE an_value; }; extern int InsertAiso(AISO_TYPE value); -#ifdef AISO_EXTRACTOR +#ifdef AISO_EXTR extern int ExtractAiso(AISO_TYPE *value); -#endif /* AISO_EXTRACTOR */ +#endif /* AISO_EXTR */ -#ifdef AISO_ITERATOR +#ifdef AISO_ITER typedef struct aiso_node *AisoIter; extern void OpenIter(AisoIter *iter); extern int GetAisoItem(AisoIter *iter, AISO_TYPE *value); extern void CloseIter(AisoIter *iter); -#endif /* AISO_ITERATOR */ +#endif /* AISO_ITER */ -#ifdef AISO_DEBUG -extern void PrintAisoTree(void); -#endif /* AISO_ITERATOR */ diff -Nru similarity-tester-2.21/algollike.c similarity-tester-2.26/algollike.c --- similarity-tester-2.21/algollike.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/algollike.c 2008-09-23 09:07:11.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: algollike.c,v 2.4 2005/02/20 17:02:59 dick Exp $ + $Id: algollike.c,v 2.5 2008/09/23 09:07:11 dick Exp $ */ /* This module implements the routines InitLanguage, MayBeStartOfRun @@ -87,9 +87,9 @@ Does not check the nesting of the parentheses, but then, sim is syntax-free anyway. */ - register unsigned int mrb_size = 0; /* most recent balancing size */ - register unsigned int pos; - register int i; + unsigned int mrb_size = 0; /* most recent balancing size */ + unsigned int pos; + int i; int balance_count[256]; int n_imbalances; @@ -101,8 +101,8 @@ /* scan str[] and see how far we get */ for (pos = 0; pos < size; pos++) { - register int tkval = TOKEN2int(str[pos]); - register int pp; /* parenthesis position */ + int tkval = TOKEN2int(str[pos]); + int pp; /* parenthesis position */ /* account for openers */ if ((pp = openers[tkval])) { diff -Nru similarity-tester-2.21/ChangeLog similarity-tester-2.26/ChangeLog --- similarity-tester-2.21/ChangeLog 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/ChangeLog 2008-09-23 09:07:10.000000000 +0000 @@ -1,3 +1,30 @@ +2008-09-22 + * added newargs.[ch], to supply file names from standard input, + for those copilers that do not have the @ facilty. Implemented + without fixed limits. + +2008-09-21 + * changed default format back to original, and inverted the + -v(erbose) option into a -T(erse) option. + +2008-03-31 Dick Grune + * *.l: the following are not universally recognized; removed. + %option nounput + %option never-interactive + +2008-03-31 + Introduced aiso.* and Malloc.? as imported modules. + +2007-11-21 Carlos Maziero + - output format modified in order to facilitate "grep" filtering + - added option "-v" for a more verbose output + - added option "-tN" to define a threshold %N (only similarities + over N% are shown) + - fixed SEGV on writing to the output file + - the file list can be informed through STDIN (one file per line, + accepts "/" marker); this is useful for compilers that lack the + @ facility + 2007-08-23 Dick Grune LICENSE.txt added. @@ -576,5 +603,5 @@ # This file is part of the software similarity tester SIM. # Written by Dick Grune, Vrije Universiteit, Amsterdam. -# $Id: ChangeLog,v 2.12 2007/08/27 09:57:30 dick Exp $ +# $Id: ChangeLog,v 2.15 2008/09/23 09:07:10 dick Exp $ # diff -Nru similarity-tester-2.21/clang.l similarity-tester-2.26/clang.l --- similarity-tester-2.21/clang.l 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/clang.l 2008-09-23 09:07:11.000000000 +0000 @@ -1,7 +1,7 @@ %{ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: clang.l,v 2.9 2007/08/29 09:10:31 dick Exp $ + $Id: clang.l,v 2.11 2008/09/23 09:07:11 dick Exp $ */ /* @@ -68,7 +68,7 @@ static TOKEN idf2token(int hashing) { - register TOKEN tk; + TOKEN tk; tk = idf_in_list(yytext, reserved, sizeof reserved, IDF); if (TOKEN_EQ(tk, IDF) && hashing) { @@ -131,9 +131,6 @@ %} -%option nounput -%option never-interactive - %Start Comment Layout ([ \t\r\f]) @@ -191,7 +188,7 @@ } ^#{Layout}*{Idf} { /* a preprocessor line */ - register char *idf = yytext+1; + char *idf = yytext+1; /* skip layout in front of preprocessor identifier */ while (*idf == ' ' || *idf == '\t') { @@ -205,14 +202,14 @@ } {Idf}/"(" { /* identifier in front of ( */ - register TOKEN tk; + TOKEN tk; tk = idf2token(option_set('F')); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); } {Idf} { /* identifier */ - register TOKEN tk; + TOKEN tk; tk = idf2token(0 /* no hashing */); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); diff -Nru similarity-tester-2.21/compare.c similarity-tester-2.26/compare.c --- similarity-tester-2.21/compare.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/compare.c 2008-09-23 09:07:11.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: compare.c,v 2.5 2001/09/28 09:03:47 dick Exp $ + $Id: compare.c,v 2.6 2008/09/23 09:07:11 dick Exp $ */ #include "sim.h" @@ -29,10 +29,10 @@ void Compare(void) { - register int n; + int n; for (n = 0; n < NumberOfNewTexts; n++) { - register int first = + int first = ( option_set('S') ? NumberOfNewTexts + 1 : option_set('s') ? n + 1 : n @@ -40,7 +40,7 @@ if (option_set('e')) { /* from first to NumberOfTexts in steps */ - register int m; + int m; for (m = first; m < NumberOfTexts; m++) { compare1text(n, m, m+1); @@ -61,10 +61,10 @@ int first, /* first text to be compared to */ int limit /* limit text in comparison */ ) { - register unsigned int i_first = Text[first].tx_start; - register unsigned int i_limit = Text[limit-1].tx_limit; - register struct text *txt0 = &Text[n]; - register unsigned int i0 = txt0->tx_start; + unsigned int i_first = Text[first].tx_start; + unsigned int i_limit = Text[limit-1].tx_limit; + struct text *txt0 = &Text[n]; + unsigned int i0 = txt0->tx_start; while ( /* there may still be a useful substring */ i0 + MinRunSize - 1 < txt0->tx_limit @@ -72,7 +72,7 @@ /* see if there really is one */ struct text *txt_best; unsigned int i_best; - register unsigned int size_best = + unsigned int size_best = lcs(txt0, i0, &txt_best, &i_best, i_first, i_limit); if (size_best) { @@ -102,21 +102,21 @@ Writes the position in tbp and ibp and returns the size. Returns 0 if no common substring is found. */ - register struct text *txt1 = txt0; - register unsigned int i1 = i0; - register unsigned int size_best = 0; - register unsigned int txt0limit = txt0->tx_limit; - register unsigned int txt1limit = txt1->tx_limit; + struct text *txt1 = txt0; + unsigned int i1 = i0; + unsigned int size_best = 0; + unsigned int txt0limit = txt0->tx_limit; + unsigned int txt1limit = txt1->tx_limit; while ( /* there is a next opportunity */ (i1 = ForwardReference(i1)) && /* it is still in range */ i1 < i_limit ) { - register unsigned int min_size; - register unsigned int new_size; - register unsigned int j0; - register unsigned int j1; + unsigned int min_size; + unsigned int new_size; + unsigned int j0; + unsigned int j1; if (i1 < i_first) { /* not in range */ continue; @@ -141,7 +141,7 @@ j0 < j1 - min_size + 1 ) { /* there would be room enough */ - register int cnt = min_size; + int cnt = min_size; /* does the text match? */ while ( cnt @@ -156,7 +156,7 @@ /* yes, we are; how long can we make it? */ { - register unsigned int size = min_size; + unsigned int size = min_size; j0 = i0 + min_size; j1 = i1 + min_size; diff -Nru similarity-tester-2.21/debian/changelog similarity-tester-2.26/debian/changelog --- similarity-tester-2.21/debian/changelog 2010-10-15 16:50:30.000000000 +0000 +++ similarity-tester-2.26/debian/changelog 2010-10-15 16:50:30.000000000 +0000 @@ -1,3 +1,19 @@ +similarity-tester (2.26-2) unstable; urgency=low + + * Made build process use correct directory settings. (Closes: #587710) + * Bumped Standard-Version to 3.9.0, no changes needed. + + -- Michael Meskes Thu, 01 Jul 2010 08:38:43 +0200 + +similarity-tester (2.26-1) unstable; urgency=low + + * New upstream version. + * Bumped Standard-Version to 3.8.4, no changes needed. + * Converted to debhelper 7. + * Added watch and source/format files. + + -- Michael Meskes Fri, 25 Jun 2010 10:27:50 +0200 + similarity-tester (2.21-5) unstable; urgency=low * Recompiled against up-to-date version of the build chain. Old binary diff -Nru similarity-tester-2.21/debian/compat similarity-tester-2.26/debian/compat --- similarity-tester-2.21/debian/compat 2010-10-15 16:50:30.000000000 +0000 +++ similarity-tester-2.26/debian/compat 2010-10-15 16:50:30.000000000 +0000 @@ -1 +1 @@ -5 +7 diff -Nru similarity-tester-2.21/debian/control similarity-tester-2.26/debian/control --- similarity-tester-2.21/debian/control 2010-10-15 16:50:30.000000000 +0000 +++ similarity-tester-2.26/debian/control 2010-10-15 16:50:30.000000000 +0000 @@ -2,12 +2,12 @@ Section: devel Priority: extra Maintainer: Michael Meskes -Build-Depends: debhelper (>= 5), flex -Standards-Version: 3.8.0 +Build-Depends: debhelper (>= 7.0.50~), flex +Standards-Version: 3.9.0 Package: similarity-tester Architecture: any -Depends: ${shlibs:Depends} +Depends: ${shlibs:Depends}, ${misc:Depends} Description: Find lexical similarities between files Find lexical similarities in texts in C, Java, Pascal, Modula-2, Lisp, Miranda and natural language. This can be used to detect potentially duplicated diff -Nru similarity-tester-2.21/debian/copyright similarity-tester-2.26/debian/copyright --- similarity-tester-2.21/debian/copyright 2010-10-15 16:50:30.000000000 +0000 +++ similarity-tester-2.26/debian/copyright 2010-10-15 16:50:30.000000000 +0000 @@ -44,6 +44,6 @@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -The Debian packaging is (C) 2007, Michael Meskes and +The Debian packaging is Copyright (C) 2007-2010, Michael Meskes and is licensed under the GPL, see `/usr/share/common-licenses/GPL'. diff -Nru similarity-tester-2.21/debian/docs similarity-tester-2.26/debian/docs --- similarity-tester-2.21/debian/docs 2010-10-15 16:50:30.000000000 +0000 +++ similarity-tester-2.26/debian/docs 2010-10-15 16:50:30.000000000 +0000 @@ -1,3 +1,3 @@ -README.1st +README.1ST READ_ME TechnReport diff -Nru similarity-tester-2.21/debian/links similarity-tester-2.26/debian/links --- similarity-tester-2.21/debian/links 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/debian/links 2010-10-15 16:50:30.000000000 +0000 @@ -0,0 +1,7 @@ +usr/share/man/man1/similarity-tester.1.gz /usr/share/man/man1/sim_mira.1.gz +usr/share/man/man1/similarity-tester.1.gz /usr/share/man/man1/sim_java.1.gz +usr/share/man/man1/similarity-tester.1.gz /usr/share/man/man1/sim_lisp.1.gz +usr/share/man/man1/similarity-tester.1.gz /usr/share/man/man1/sim_m2.1.gz +usr/share/man/man1/similarity-tester.1.gz /usr/share/man/man1/sim_c.1.gz +usr/share/man/man1/similarity-tester.1.gz /usr/share/man/man1/sim_text.1.gz +usr/share/man/man1/similarity-tester.1.gz /usr/share/man/man1/sim_pasc.1.gz diff -Nru similarity-tester-2.21/debian/rules similarity-tester-2.26/debian/rules --- similarity-tester-2.21/debian/rules 2010-10-15 16:50:30.000000000 +0000 +++ similarity-tester-2.26/debian/rules 2010-10-15 16:50:30.000000000 +0000 @@ -1,84 +1,18 @@ #!/usr/bin/make -f -# -*- makefile -*- -# Sample debian/rules that uses debhelper. -# This file was originally written by Joey Hess and Craig Small. -# As a special exception, when this file is copied by dh-make into a -# dh-make output file, you may use that output file without restriction. -# This special exception was added by Craig Small in version 0.37 of dh-make. - -# Uncomment this to turn on verbose mode. -#export DH_VERBOSE=1 - -CFLAGS = -Wall -g - -ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) - CFLAGS += -O0 -else - CFLAGS += -O2 -endif - -configure: configure-stamp -configure-stamp: - dh_testdir - # Add here commands to configure the package. - - touch configure-stamp - +%: + dh $@ build: build-stamp +build-stamp: + dh build + touch build-stamp + +binary-arch: build +binary: binary-arch binary-indep + +override_dh_install: + dh_install + mv $(CURDIR)/debian/similarity-tester/usr/share/man/man1/sim.1 $(CURDIR)/debian/similarity-tester/usr/share/man/man1/similarity-tester.1 -build-stamp: configure-stamp - dh_testdir - - # Add here commands to compile the package. - $(MAKE) all - - touch $@ - -clean: - dh_testdir - dh_testroot - rm -f build-stamp configure-stamp - - # Add here commands to clean up after the build process. - [ ! -f Makefile ] || $(MAKE) clean - - # remove sim.txt because it is deleted during build process anyway - -rm sim.txt - - dh_clean - -install: build - dh_testdir - dh_testroot - dh_clean -k - dh_installdirs - - # Add here commands to install the package into debian/similarity-tester. - $(MAKE) BINDIR=$(CURDIR)/debian/similarity-tester/usr/bin MANDIR=$(CURDIR)/debian/similarity-tester/usr/share/man/man1 install_all - -mv $(CURDIR)/debian/similarity-tester/usr/share/man/man1/sim.1 $(CURDIR)/debian/similarity-tester/usr/share/man/man1/similarity-tester.1 - (cd $(CURDIR)/debian/similarity-tester/usr/share/man/man1; ls ../../../bin | while read i; do ln -s similarity-tester.1.gz $$i.1.gz; done) - -# Build architecture-independent files here. -binary-indep: build install -# We have nothing to do by default. - -# Build architecture-dependent files here. -binary-arch: build install - dh_testdir - dh_testroot - dh_installchangelogs ChangeLog - dh_installdocs - dh_installexamples - dh_link - dh_strip - dh_compress - dh_fixperms - dh_installdeb - dh_shlibdeps - dh_gencontrol - dh_md5sums - dh_builddeb - -binary: binary-indep binary-arch -.PHONY: build clean binary-indep binary-arch binary install configure +override_dh_auto_install: + dh_auto_install -- BINDIR=$(CURDIR)/debian/similarity-tester/usr/bin MANDIR=$(CURDIR)/debian/similarity-tester/usr/share/man/man1 COPY=install diff -Nru similarity-tester-2.21/debian/source/format similarity-tester-2.26/debian/source/format --- similarity-tester-2.21/debian/source/format 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/debian/source/format 2010-10-15 16:50:30.000000000 +0000 @@ -0,0 +1 @@ +1.0 diff -Nru similarity-tester-2.21/debian/watch similarity-tester-2.26/debian/watch --- similarity-tester-2.21/debian/watch 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/debian/watch 2010-10-15 16:50:30.000000000 +0000 @@ -0,0 +1,4 @@ +version=3 + +opts=uversionmangle=s/_/./ \ + ftp://ftp.cs.vu.nl/pub/dick/similarity_tester/sim_(.*).zip diff -Nru similarity-tester-2.21/hash.c similarity-tester-2.26/hash.c --- similarity-tester-2.21/hash.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/hash.c 2008-09-23 09:07:11.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: hash.c,v 2.8 2005/02/20 17:03:00 dick Exp $ + $Id: hash.c,v 2.10 2008/09/23 09:07:11 dick Exp $ */ /* Text is compared by comparing every substring to all substrings @@ -32,11 +32,11 @@ */ #include -#include #include "system.par" #include "debug.par" #include "sim.h" +#include "Malloc.h" #include "error.h" #include "language.h" #include "token.h" @@ -45,7 +45,7 @@ #include "hash.h" /* MAIN ENTRIES */ -static unsigned int *forward_references; /* to be filled by malloc() */ +static unsigned int *forward_references; /* to be filled by Malloc() */ static int n_forward_references; static void make_forward_references_hash1(void); @@ -63,12 +63,9 @@ n_forward_references = TextLength(); forward_references = - (unsigned int *)calloc( + (unsigned int *)Calloc( n_forward_references, sizeof (unsigned int) ); - if (!forward_references) { - fatal("out of memory"); - } make_forward_references_hash1(); make_forward_references_hash2(); #ifdef DB_FORW_REF @@ -86,7 +83,7 @@ void FreeForwardReferences(void) { - free((char *)forward_references); + Free((char *)forward_references); } /* HASHING */ @@ -143,7 +140,7 @@ static void init_hash_table(void) { - register int n; + int n; /* find the ideal hash table size */ n = 0; @@ -157,7 +154,7 @@ while (!last_index && n >= 0) { hash_table_size = prime[n]; last_index = (unsigned int *) - calloc(hash_table_size, sizeof (unsigned int)); + TryCalloc(hash_table_size, sizeof (unsigned int)); n--; } if (!last_index) { @@ -178,14 +175,14 @@ static void make_forward_references_hash1(void) { - register int n; + int n; init_hash_table(); /* set up the forward references using the last_index hash table */ for (n = 0; n < NumberOfTexts; n++) { - register struct text *txt = &Text[n]; - register unsigned int j; + struct text *txt = &Text[n]; + unsigned int j; for ( /* all pos'ns in txt except the last MinRunSize-1 */ j = txt->tx_start; /* >= 1 */ @@ -193,7 +190,7 @@ j++ ) { if (MayBeStartOfRun(TokenArray[j])) { - register int h = hash1(&TokenArray[j]); + int h = hash1(&TokenArray[j]); if (last_index[h]) { forward_references[last_index[h]] = j; @@ -202,7 +199,7 @@ } } } - free((char *)last_index); + Free((char *)last_index); #ifdef DB_FORW_REF db_forward_references("first hashing"); @@ -215,8 +212,8 @@ tokens starting at p; caller guarantees that there are at least MinRunSize tokens. */ - register int32 h_val; - register int n; + int32 h_val; + int n; h_val = 0; for (n = 0; n < N_SAMPLES; n++) { @@ -237,7 +234,7 @@ static void make_forward_references_hash2(void) { - register unsigned int i; + unsigned int i; /* do a second hash only if the original hash table was reduced */ /* Meanwhile, the quality of the primary hashing is so bad @@ -250,8 +247,8 @@ wrong copy. */ for (i = 0; i+MinRunSize < TextLength(); i++) { - register unsigned int j = i; - register int h2 = hash2(&TokenArray[i]); + unsigned int j = i; + int h2 = hash2(&TokenArray[i]); /* Find the first token sequence in the chain with same secondary hash code. @@ -286,13 +283,13 @@ static void make_forward_references_hash3(void) { - register unsigned int i; + unsigned int i; /* do a third hash to check up on the previous two */ /* this time we use a genuine compare */ for (i = 0; i+MinRunSize < TextLength(); i++) { - register unsigned int j = i; + unsigned int j = i; while ( /* there is still a forward reference */ (j = forward_references[j]) @@ -321,9 +318,9 @@ static int db_frw_chain(int n, char *crossed_out) { - register int chain_len = -1; + int chain_len = -1; /* if there are two values, the chain length is still 1 */ - register int fw; + int fw; for (fw = n; fw; fw = forward_references[fw]) { if (crossed_out[fw]) { @@ -350,10 +347,7 @@ fprintf(DebugFile, "hash_table_size = %u\n", hash_table_size); fprintf(DebugFile, "N_SAMPLES = %d\n", N_SAMPLES); - crossed_out = (char *)calloc(TextLength(), sizeof (char)); - if (!crossed_out) { - fatal(">>>> no room for db_forward_references debug table <<<<\n"); - } + crossed_out = (char *)Calloc(TextLength(), sizeof (char)); /* Each forward_references[n] starts in principle a new chain, and these chains never touch each other. @@ -375,7 +369,7 @@ tot_frwc_len += db_frw_chain(n, crossed_out); } } - free((char *)crossed_out); + Free((char *)crossed_out); fprintf(DebugFile, "text length = %u, # forward chains = %d, total frw chain length = %d\n\n", diff -Nru similarity-tester-2.21/idf.c similarity-tester-2.26/idf.c --- similarity-tester-2.21/idf.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/idf.c 2008-09-23 09:07:11.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: idf.c,v 2.8 2005/02/20 17:03:00 dick Exp $ + $Id: idf.c,v 2.9 2008/09/23 09:07:11 dick Exp $ */ #include @@ -16,11 +16,11 @@ unsigned int listsize, TOKEN dflt ) { - register int first = 0; - register int last = (listsize / sizeof (struct idf)) - 1; + int first = 0; + int last = (listsize / sizeof (struct idf)) - 1; while (first < last) { - register int middle = (first + last) / 2; + int middle = (first + last) / 2; if (strcmp(str, list[middle].id_tag) > 0) { first = middle + 1; @@ -37,7 +37,7 @@ TOKEN idf_hashed(const char *str) { - register int32 h = 0; + int32 h = 0; /* let's be careful about ranges; if done wrong it's hard to debug */ while (*str) { diff -Nru similarity-tester-2.21/javalang.l similarity-tester-2.26/javalang.l --- similarity-tester-2.21/javalang.l 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/javalang.l 2008-09-23 09:07:11.000000000 +0000 @@ -1,7 +1,7 @@ %{ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: javalang.l,v 1.4 2007/08/29 09:10:32 dick Exp $ + $Id: javalang.l,v 1.6 2008/09/23 09:07:11 dick Exp $ */ /* @@ -71,7 +71,7 @@ static TOKEN idf2token(int hashing) { - register TOKEN tk; + TOKEN tk; tk = idf_in_list(yytext, reserved, sizeof reserved, IDF); if (TOKEN_EQ(tk, IDF) && hashing) { @@ -151,9 +151,6 @@ %} -%option nounput -%option never-interactive - %Start Comment Layout ([ \t\r\f]) @@ -223,14 +220,14 @@ } {Idf}/"(" { /* identifier in front of ( */ - register TOKEN tk; + TOKEN tk; tk = idf2token(option_set('F')); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); } {Idf} { /* identifier */ - register TOKEN tk; + TOKEN tk; tk = idf2token(0 /* no hashing */); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); diff -Nru similarity-tester-2.21/lisplang.l similarity-tester-2.26/lisplang.l --- similarity-tester-2.21/lisplang.l 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/lisplang.l 2008-03-31 10:35:16.000000000 +0000 @@ -1,7 +1,7 @@ %{ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: lisplang.l,v 2.9 2007/08/29 09:10:33 dick Exp $ + $Id: lisplang.l,v 2.10 2008/03/31 10:35:16 dick Exp $ */ /* @@ -62,9 +62,6 @@ %} -%option nounput -%option never-interactive - %Start Comment Layout ([ \t\r\f]) diff -Nru similarity-tester-2.21/m2lang.l similarity-tester-2.26/m2lang.l --- similarity-tester-2.21/m2lang.l 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/m2lang.l 2008-09-23 09:07:11.000000000 +0000 @@ -1,7 +1,7 @@ %{ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: m2lang.l,v 2.9 2007/08/29 09:10:33 dick Exp $ + $Id: m2lang.l,v 2.11 2008/09/23 09:07:11 dick Exp $ */ /* @@ -120,7 +120,7 @@ static TOKEN idf2token(int hashing) { - register TOKEN tk; + TOKEN tk; /* the token can be on two lists, reserved and standard */ tk = idf_in_list(yytext, reserved, sizeof reserved, IDF); @@ -200,9 +200,6 @@ %} -%option nounput -%option never-interactive - %Start Comment Layout ([ \t\r\f]) @@ -266,14 +263,14 @@ } {Idf}/"(" { /* identifier in front of ( */ - register TOKEN tk; + TOKEN tk; tk = idf2token(option_set('F')); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); } {Idf} { /* identifier */ - register TOKEN tk; + TOKEN tk; tk = idf2token(0 /* no hashing */); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); diff -Nru similarity-tester-2.21/Makefile similarity-tester-2.26/Makefile --- similarity-tester-2.21/Makefile 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/Makefile 2008-09-23 09:07:10.000000000 +0000 @@ -1,6 +1,6 @@ # This file is part of the software similarity tester SIM. # Written by Dick Grune, Vrije Universiteit, Amsterdam. -# $Id: Makefile,v 2.17 2007/08/27 09:57:31 dick Exp $ +# $Id: Makefile,v 2.22 2008/09/23 09:07:10 dick Exp $ # @@ -28,36 +28,60 @@ @echo '' @echo 'sim_exes: create DOS executables in MSDOS; set date; make clean' @echo 'simexe.zip: create DOS executables package in UNIX' - @echo 'VERSION=2.X install_ftp: install in the FTP directory in UNIX' -VERSION = 2_21 # # When you modify any of the following flags, do 'make clean' # -include sysidf.mk - - -# Flags -OPTLEVEL = -O4# # - -CFLAGS = $(SYSTEM) $(OPTLEVEL) $(TESTTOKEN) -LFLAGS = # # loader flags +# System dependencies +# =============== including ../lib/sysidf.mk here +# This file is part of the auxiliary libraries. +# Written by Dick Grune, Vrije Universiteit, Amsterdam. +# $Id: sysidf.mk,v 1.4 2008/02/20 20:46:47 dick Exp $ +# -TESTTOKEN = #-DTESTTOKEN# # define to test the token type +# Locations +DIR = /home/dick +BINDIR = $(DIR)/bin.`arch` +MAN1DIR = $(DIR)/man/man1 +DICTDIR = $(DIR)/lib + +# Which Operating System + +SYSTEM = UNIX +EXE = # +LN = ln +ZIP = zip +# =============== end of ../lib/sysidf.mk + +# Compiler Options +C_OPT = -O4# # C optimization level +LIBFLAGS = # # library flags +LINTFLAGS = -h# # lint flags + +# Compilers: GNU + DJGPP v2 +CC = gcc +CFLAGS = -pedantic -D$(SYSTEM) $(C_OPT) $(TESTTOKEN) +LINT = lint -ansi -D$(SYSTEM) # T E S T P A R A M E T E R S # Parameters for two simple test runs, sim.res and stream.res: TEST_LANG = c# # to test sim_X for language X -TEST_OPT = -f -r 20# # options to sim_X -TEST_INP = pass3.c# # guinea pig input +# percentage test TEST_OPT = -p# # options to sim_X TEST_INP = *.l# # guinea pig input -TEST_INP = simple*# # guinea pig input + +# -i option test +TEST_OPT = -f -r 20 -i stream.res - wc stream.res $(TEST_INP) + ./sim_$(TEST_LANG)$(EXE) -- $(TEST_INP) >$@ + wc $@ $(TEST_INP) + +percentages.res:sim_$(TEST_LANG)$(EXE) $(TEST_INP) + ./sim_$(TEST_LANG)$(EXE) -p $(TEST_INP) $(TEST_INP) TEST_GRB = stream.res @@ -313,7 +339,7 @@ ./sim_c$(EXE) -fr 20 $(SRC) # Lint -lint: $(SIM_C_CFS) +lint: $(SIM_SRC) $(LINT) $(LINTFLAGS) $(SIM_C_CFS) | grep -v yy lint.all: $(SIM_C_CFS) $(SIM_JAVA_CFS) $(SIM_PASC_CFS) $(SIM_M2_CFS) \ @@ -368,17 +394,6 @@ $(MANDIR)/sim.1: sim.1 $(COPY) sim.1 $@ -FTPFILES = README.1st READ_ME LICENSE.txt TechnReport - -install_ftp: $(FTPFILES) simsrc.shr simexe.zip sim.pdf - cp -p simsrc.shr sim_$(VERSION).shar - cp -p simexe.zip sim_$(VERSION).zip - cp -p $(FTPFILES) sim_$(VERSION).shar sim_$(VERSION).zip \ - README.1st READ.ME READ_ME sim.pdf \ - $(FTPDIR)/. - rm -f sim_$(VERSION).shar sim_$(VERSION).zip - ls -l $(FTPDIR)/. - simsrc.shr: $(ALL_FLS) shar $(ALL_FLS) >$@ @@ -451,6 +466,7 @@ percentages.o: runs.h percentages.o: aiso.spc percentages.o: error.h +percentages.o: Malloc.h percentages.o: percentages.h percentages.o: sortlist.bdy sim.o: settings.par @@ -564,3 +580,9 @@ textlang.o: idf.h textlang.o: lex.h textlang.o: lang.h +text.o: Malloc.h +tokenarray.o: Malloc.h +percentages.o: Malloc.h +hash.o: Malloc.h +add_run.o: Malloc.h +pass3.o: Malloc.h diff -Nru similarity-tester-2.21/Malloc.c similarity-tester-2.26/Malloc.c --- similarity-tester-2.21/Malloc.c 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/Malloc.c 2008-04-24 11:14:14.000000000 +0000 @@ -0,0 +1,340 @@ +/* This file is part of the memory management and leak detector MALLOC. + Written by Dick Grune, Vrije Universiteit, Amsterdam. + $Id: Malloc.c,v 1.2 2008/04/03 12:24:59 dick Exp $ +*/ + +#include +#include +#include +#include + +#include "Malloc.h" +#undef new /* don't call Malloc in Malloc.c */ +#define my_new(type) ((type *)malloc(sizeof (type))) + +static void +fprintloc(FILE *f, const char *fname, int l_nmb) { + fprintf(f, "\"%s\", line %d: ", fname, l_nmb); +} + +static void +out_of_memory(const char *fname, int l_nmb, size_t size) { + fprintloc(stderr, fname, l_nmb); + fprintf(stderr, "Out of memory, requested size = %d\n", size); + exit(1); +} + +#if defined MEMLEAK || defined MEMCLOBBER +/* Both need almost the same information: MEMLEAK obviously needs a list of + all blocks still allocated, but MEMCLOBBER needs the same list to find + the size of a block given to Free(), in order to clobber it. + MEMCLOBBER does not need total, balance and max, but finecombing them out + would be too much. +*/ + +static size_t total = 0; +static size_t balance = 0; +static size_t max = 0; + +struct record { + struct record *next; + const char *addr; + size_t size; + const char *fname; + int l_nmb; +}; + +#define HASH_SIZE 16381 /* largest prime under 2^16 */ +static struct record *record_hash[HASH_SIZE]; +#define chain_start(x) record_hash[((unsigned int)(x)%HASH_SIZE)] + +static void +record_alloc(char *addr, size_t size, const char *fname, int l_nmb) { + struct record *new; + struct record **r_hook = &chain_start(addr); + + if (addr == 0) return; + + new = my_new(struct record); + new->addr = addr; + new->size = size; + new->fname = fname; /* no need to copy fname */ + new->l_nmb = l_nmb; + new->next = *r_hook; + *r_hook = new; + + total += size; + balance += size; + if (balance > max) { + max = balance; + } +} + + +static struct record ** +record_pointer_for_address(const char *addr) { + struct record **rp = &chain_start(addr); + + while (*rp) { + if ((*rp)->addr == addr) break; + rp = &(*rp)->next; + } + + return rp; +} + +static size_t +record_free(char *addr) { + struct record **oldp = record_pointer_for_address(addr); + struct record *old = *oldp; + + if (old == 0) { + return 0; + } + + *oldp = old->next; + + balance -= old->size; + + return old->size; +} + +#endif /* defined MEMLEAK || defined MEMCLOBBER */ + +#ifdef MEMCLOBBER +static void +clobber(char *s, size_t size) { + unsigned char byte; + size_t i; + + byte = 0xaa; + for (i = 0; i < size; i++) { + s[i] = byte; + byte ^= 0xff; + } +} +#endif /* MEMCLOBBER */ + +#ifdef MEMLEAK + +struct entry { + struct entry *next; + const char *fname; + int l_nmb; + int n_blocks; + int var_size; /* all blocks have the same size or not */ + int size; /* !var_size: the one size; var_size: sum of sizes */ +}; + +static struct entry * +compacted_leaks(void) { + struct entry *res = 0; + int i; + + for (i = 0; i < HASH_SIZE; i++) { + struct record *r = record_hash[i]; + + while (r) { + struct entry *e = res; + + /* try to find an entry for this location */ + while (e) { + if ( e->fname == r->fname + && e->l_nmb == r->l_nmb + ) break; + e = e->next; + } + + if (e) { /* update the entry */ + if (e->var_size) { + e->size += r->size; + } + else if (e->size != r->size) { + /* switch to var_size */ + e->var_size = 1; + e->size = + e->n_blocks*e->size + r->size; + } + e->n_blocks++; + } + else { /* create a new entry */ + e = my_new(struct entry); + e->fname = r->fname; + e->l_nmb = r->l_nmb; + e->n_blocks = 1; + e->var_size = 0; + e->size = r->size; + + e->next = res; + res = e; + } + + r = r->next; + } + } + + return res; +} + +static void +report_actual_leaks(FILE *f) { + struct entry *e = compacted_leaks(); + + fprintf(f, "\nMemory leaks:\n"); + + while (e) { + fprintloc(f, e->fname, e->l_nmb); + fprintf(f, "left allocated: %d block%s of size ", + e->n_blocks, (e->n_blocks == 1 ? "" : "s") + ); + if (e->var_size) { + /* e->size is the sum of the sizes */ + fprintf(f, "%d on average", + (e->size+e->n_blocks/2) / e->n_blocks + ); + } + else { + /* e->size is the single size */ + fprintf(f, "%d", e->size); + } + fprintf(f, "\n"); + + e = e->next; + } + fprintf(f, "\n"); +} + +void +ReportMemoryLeaks(FILE *f) { + if (balance == 0) { + fprintf(f, "\nNo memory leaks\n\n"); + } + else { + report_actual_leaks(f); + } + + fprintf(f, "total memory allocated= %u", total); + fprintf(f, ", maximum allocated = %u", max); + fprintf(f, ", garbage left = %u", balance); + fprintf(f, "\n"); +} + +#else /* no MEMLEAK */ + +/*ARGSUSED*/ +void +ReportMemoryLeaks(FILE *f) { +} + +#endif /* MEMLEAK */ + +void * +_leak_malloc(int chk, size_t size, const char *fname, int l_nmb) { + void *res = malloc(size); + + if (chk && res == 0) { + out_of_memory(fname, l_nmb, size); + /*NOTREACHED*/ + } + +#if defined MEMLEAK || defined MEMCLOBBER + record_alloc(res, size, fname, l_nmb); + +#ifdef MEMCLOBBER + clobber((char *)res, size); +#endif +#endif + + return res; +} + +void * +_leak_calloc(int chk, int n, size_t size, const char *fname, int l_nmb) { + void *res = calloc(n, size); + + if (chk && res == 0) { + out_of_memory(fname, l_nmb, n*size); + /*NOTREACHED*/ + } + +#if defined MEMLEAK || defined MEMCLOBBER + record_alloc(res, n*size, fname, l_nmb); +#endif + + return res; +} + +void * +_leak_realloc(int chk, void *addr, size_t size, const char *fname, int l_nmb) { + void *res; +#if defined MEMLEAK || defined MEMCLOBBER + size_t old_size = record_free(addr); + + /* we report first, because the realloc() below may cause a crash */ + if (old_size == 0) { + fprintloc(stderr, fname, l_nmb); + fprintf(stderr, ">>>> unallocated block reallocated <<<<\n"); + } +#endif + + res = realloc(addr, size); + if (chk && res == 0) { + out_of_memory(fname, l_nmb, size); + /*NOTREACHED*/ + } + +#if defined MEMLEAK || defined MEMCLOBBER + record_alloc(res, size, fname, l_nmb); +#endif + +#ifdef MEMCLOBBER + if (old_size > 0 && size > old_size) { + clobber(((char *)res)+old_size, size-old_size); + } +#endif + + return res; +} + +/* ARGSUSED */ +void +_leak_free(void *addr, const char *fname, int l_nmb) { +#if defined MEMLEAK || defined MEMCLOBBER + size_t old_size = record_free(addr); + + /* we report first, because the free() below may cause a crash */ + if (old_size == 0) { + fprintloc(stderr, fname, l_nmb); + fprintf(stderr, ">>>> unallocated block freed "); + fprintf(stderr, "or multiple free of allocated block <<<<\n"); + } + else { +#ifdef MEMCLOBBER + clobber((char *)addr, old_size); +#endif + } +#endif + free(addr); +} + +char * +_new_string(const char *s, const char *fname, int l_nmb) { + return strcpy((char *)(_leak_malloc(1, strlen(s)+1, fname, l_nmb)), s); +} + +#ifdef lint +static void +satisfy_lint(void *x) { + void *v; + + v = _leak_malloc(0, 0, 0, 0); + v = _leak_calloc(0, 0, 0, 0, 0); + v = _leak_realloc(0, 0, 0, 0, 0); + _leak_free(x, 0, 0); + + ReportMemoryLeaks(0); + + v = _new_string(0, 0, 0); + satisfy_lint(v); +} +#endif diff -Nru similarity-tester-2.21/Malloc.h similarity-tester-2.26/Malloc.h --- similarity-tester-2.21/Malloc.h 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/Malloc.h 2008-04-24 11:18:13.000000000 +0000 @@ -0,0 +1,89 @@ +/* This file is part of the memory management and leak detector MALLOC. + Written by Dick Grune, Vrije Universiteit, Amsterdam. + $Id: Malloc.h,v 1.2 2008/04/21 14:35:43 dick Exp $ +*/ + +#include + +/***** +The files Malloc.[ch] provide several functionalities: + +- checking for "out of memory": to simplify programming +- allocating memory using new(type) " " " " +- detecting memory leaks: to obtain cleaner programs +- clobbering freshly allocated memory: to obtain safer programs + +The module defines several sets of routines: + +1. void *Malloc(size_t s) + void *Calloc(int n, size_t s) + void *Realloc(void *p, size_t s) + void *Free(void *p) + +2. void *TryMalloc(size_t s) + void *TryCalloc(int n, size_t s) + void *TryRealloc(void *p, size_t s) + +3. T *new(T) + char *new_string(const char *s) + +4. void ReportMemoryLeaks(FILE *f) + +* The members of the first set act like their Unix counterparts, except that + they never return NULL; upon out-of-memory an error message is given on + standard error, showing the file name and the line number of the call. Since + in almost all cases there is nothing more intelligent to do, this is almost + always adequate, and makes for simpler and safer programming. + + In those rare cases that the program *can* continue when out of memory, the + routines in the second set can be used; they act exactly like their Unix + counterparts. + + Note that automatic out-of-memory detection is active, regardless of the + -DMEM... flags described below. + +* A call of new(T), with T any type, yields a pointer of type T* to a block + of type T, allocated using Malloc(). + A call of new_string(s), with s a string, yields a pointer to a copy of s, + allocated using Malloc(); it is equivalent to strdup() except that it uses + Malloc(). + +* Normally, a call of ReportMemoryLeaks() does nothing, but when Malloc.c is + compiled with -DMEMLEAK, it produces a compacted list of allocated but not + yet freed blocks on the stream f, with information about where they were + allocated. + This is useful to get insight into memory use and abuse. + +* When Malloc.c is compiled with -DMEMCLOBBER, it clobbers all newly allocated + memory from Malloc() and Realloc() just after allocation, and all freed + memory just before freeing it. Clobbering means overwriting it with the bit + pattern 1010101001010101. This is done in the hope that improper use of + memory will cause some evident error somewhere. + +* Compiled with any of the -DMEM... flags, Malloc will also produce run-time + error messages for multiple Free()s of the same block, and Realloc()s on + not-allocated blocks. It then allows the program to continue. + +* The system consumes hardly any time and is fast enough to be kept active all + the time. +*****/ + +#define Malloc(s) (_leak_malloc(1, (size_t)(s), __FILE__, __LINE__)) +#define Calloc(n,s) (_leak_calloc(1, (n), (size_t)(s), __FILE__, __LINE__)) +#define Realloc(p,s) (_leak_realloc(1, (void *)(p), (size_t)(s), __FILE__, __LINE__)) +#define TryMalloc(s) (_leak_malloc(0, (size_t)(s), __FILE__, __LINE__)) +#define TryCalloc(n,s) (_leak_calloc(0, (n), (size_t)(s), __FILE__, __LINE__)) +#define TryRealloc(p,s) (_leak_realloc(0, (void *)(p), (size_t)(s), __FILE__, __LINE__)) +#define Free(p) (_leak_free((void *)(p), __FILE__, __LINE__)) + +#define new(type) ((type *)Malloc(sizeof (type))) +#define new_string(s) (_new_string((s), __FILE__, __LINE__)) + +extern void *_leak_malloc(int chk, size_t size, const char *fname, int l_nmb); +extern void *_leak_calloc(int chk, int n, size_t size, const char *fname, int l_nmb); +extern void *_leak_realloc(int chk, void *addr, size_t size, const char *fname, int l_nmb); +extern void _leak_free(void *addr, const char *fname, int l_nmb); + +extern void ReportMemoryLeaks(FILE *f); + +extern char *_new_string(const char *s, const char *fname, int l_nmb); diff -Nru similarity-tester-2.21/miralang.l similarity-tester-2.26/miralang.l --- similarity-tester-2.21/miralang.l 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/miralang.l 2008-03-31 10:35:16.000000000 +0000 @@ -1,7 +1,7 @@ %{ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: miralang.l,v 1.3 2007/08/29 09:10:34 dick Exp $ + $Id: miralang.l,v 1.4 2008/03/31 10:35:16 dick Exp $ */ /* @@ -64,9 +64,6 @@ %} -%option nounput -%option never-interactive - %Start Comment Layout ([ \t\r\f]) diff -Nru similarity-tester-2.21/newargs.c similarity-tester-2.26/newargs.c --- similarity-tester-2.21/newargs.c 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/newargs.c 2008-09-23 09:07:11.000000000 +0000 @@ -0,0 +1,99 @@ +/* This file is part of the software similarity tester SIM. + Written by Dick Grune, Vrije Universiteit, Amsterdam. + $Id: newargs.c,v 2.1 2008/09/23 09:07:11 dick Exp $ +*/ + +#include +#include + +#include "Malloc.h" +#include "error.h" + +#define ARGS_INCR 1024 + +static char * +std_input(void) { + /* in the form {name [\n|space|tab|...]}* \0 */ + char *args = 0; + int args_free = 0; + int args_size = 0; + + /* get all of standard input */ + int ch; + int last_char = ' '; + + do { + ch = getchar(); + /* omit duplicate layout */ + if (ch != EOF && isspace(ch) && isspace(last_char)) continue; + + /* add ch to args */ + if (args_free == args_size) { + /* allocated array is full; increase its size */ + int new_size = args_size + ARGS_INCR; + char *new_args = (char *)Realloc( + (char *)args, sizeof (char *) * new_size + ); + args = new_args, args_size = new_size; + } + + /* now we are sure there is room enough */ + args[args_free++] = ch; + + last_char = ch; + } while (ch != EOF); + /* now make sure the result conforms to the form above */ + if (args_free == 1 || isspace(args[args_free-2])) { + /* OK */ + } + else fatal("standard input not terminated with newline"); + args[args_free-1] = '\0'; /* overwrites the EOF */ + + return args; +} + +static int +n_names(const char *s) { + int cnt = 0; + + while (*s) { + if (isspace(*s)) { + cnt++; + } + s++; + } + return cnt; +} + +static const char ** +new_argv(int argc, char *args) { + /* converts the layout in args to \0, and constructs an argv list */ + const char **argv = (const char **)Malloc((argc+1) * sizeof (char *)); + char *p = args; + char last_char = ' '; + + argc = 0; + while (*p) { + if (isspace(last_char)) { + /* here a new name starts */ + argv[argc++] = p; + } + last_char = *p; + if (isspace(*p)) { + *p = '\0'; + } + p++; + } + argv[argc] = 0; + + return argv; +} + +void +get_new_args(int *argcp, const char **argvp[]) { + char *args = std_input(); + int argc = n_names(args); + const char **argv = new_argv(argc, args); + + *argcp = argc, *argvp = argv; +} diff -Nru similarity-tester-2.21/newargs.h similarity-tester-2.26/newargs.h --- similarity-tester-2.21/newargs.h 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/newargs.h 2008-09-23 09:07:11.000000000 +0000 @@ -0,0 +1,6 @@ +/* This file is part of the software similarity tester SIM. + Written by Dick Grune, Vrije Universiteit, Amsterdam. + $Id: newargs.h,v 2.1 2008/09/23 09:07:11 dick Exp $ +*/ + +extern void get_new_args(int *argcp, const char **argvp[]); diff -Nru similarity-tester-2.21/option-i.inp similarity-tester-2.26/option-i.inp --- similarity-tester-2.21/option-i.inp 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/option-i.inp 2008-09-22 18:55:46.000000000 +0000 @@ -0,0 +1,2 @@ +pass1.c + pass2.c pass3.c diff -Nru similarity-tester-2.21/options.c similarity-tester-2.26/options.c --- similarity-tester-2.21/options.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/options.c 2008-09-23 09:07:11.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: options.c,v 1.3 2001/11/13 12:55:53 dick Exp $ + $Id: options.c,v 1.5 2008/09/23 09:07:11 dick Exp $ */ #include @@ -13,17 +13,20 @@ static void bad_option( const char *progname, const struct option *optlist, char *msg, int c ); -static int opt_value(const struct option *op, const char *arg, char *argv[]); +static int opt_value( + const char *progname, const struct option *op, + const char *arg, const char *argv[] +); static int do_arg( const char *progname, const struct option *optlist, - const char *arg, char *argv[] + const char *arg, const char *argv[] ); int do_options( const char *progname, const struct option *optlist, - int argc, char *argv[] + int argc, const char *argv[] ) { int skips = 0; @@ -37,35 +40,30 @@ } int -option_set(char ch) { - return options[(int)ch]; +option_set(int ch) { + return options[ch]; } static int do_arg( const char *progname, const struct option *optlist, - const char *arg, char *argv[] + const char *arg, const char *argv[] ) { int consumed = 0; while (*arg) { /* treat argument character */ - register char opc = *arg++; - register const struct option *op; + char opc = *arg++; + const struct option *op; for (op = optlist; op->op_char; op++) { /* for every allowed option */ if (opc == op->op_char) { options[(int)opc]++; if (op->op_indicator != ' ') { - consumed = opt_value(op, arg, argv); - if (consumed < 0) { - bad_option(progname, (struct option *)0, - " option -%c requires another argument", - op->op_char - ); - /*NOTREACHED*/ - } + consumed = opt_value( + progname, op, arg, argv + ); } break; } @@ -86,19 +84,30 @@ } static int -opt_value(const struct option *op, const char *arg, char *argv[]) { +opt_value( + const char *progname, const struct option *op, + const char *arg, const char *argv[] +) { /* locate the option value */ if (*arg) { /* argument is continuation of option */ *op->op_stringp = arg; return 1; } - else { + else + if (argv[1]) { /* argument follows option */ - if (!argv[1]) return -1; *op->op_stringp = argv[1]; return 2; } + else { + bad_option(progname, (struct option *)0, + " option -%c requires another argument", + op->op_char + ); + /*NOTREACHED*/ + } + } static void @@ -110,7 +119,7 @@ fprintf(stderr, "\n"); if (msg[0] != ' ') { - register const struct option *op; + const struct option *op; fprintf(stderr, "Possible options are:\n"); for (op = optlist; op->op_char; op++) { diff -Nru similarity-tester-2.21/options.h similarity-tester-2.26/options.h --- similarity-tester-2.21/options.h 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/options.h 2008-09-23 09:07:11.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: options.h,v 1.3 2001/11/13 12:55:53 dick Exp $ + $Id: options.h,v 1.5 2008/09/23 09:07:11 dick Exp $ */ /* Setting and consulting command line options @@ -13,8 +13,8 @@ const char **op_stringp;/* string value to be picked up */ }; -extern int option_set(char ch); +extern int option_set(int ch); extern int do_options( const char *progname, const struct option *optlist, - int argc, char *argv[] + int argc, const char *argv[] ); diff -Nru similarity-tester-2.21/pascallang.l similarity-tester-2.26/pascallang.l --- similarity-tester-2.21/pascallang.l 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/pascallang.l 2008-09-23 09:07:12.000000000 +0000 @@ -1,7 +1,7 @@ %{ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: pascallang.l,v 2.9 2007/08/29 09:10:35 dick Exp $ + $Id: pascallang.l,v 2.11 2008/09/23 09:07:12 dick Exp $ */ /* @@ -78,7 +78,7 @@ /* Turns upper case into lower case, since Pascal does not distinguish between them. */ - register char *s; + char *s; for (s = str; *s; s++) { if ('A' <= *s && *s <= 'Z') { @@ -89,7 +89,7 @@ static TOKEN idf2token(int hashing) { - register TOKEN tk; + TOKEN tk; lower_case(yytext); tk = idf_in_list(yytext, reserved, sizeof reserved, IDF); @@ -148,9 +148,6 @@ %} -%option nounput -%option never-interactive - %Start Comment Layout ([ \t\r\f]) @@ -195,7 +192,7 @@ } ^#{Layout}*{Idf} { /* a preprocessor line */ - register char *idf = yytext+1; + char *idf = yytext+1; /* skip layout in front of preprocessor identifier */ while (*idf == ' ' || *idf == '\t') { @@ -209,14 +206,14 @@ } {Idf}/"(" { /* identifier in front of ( */ - register TOKEN tk; + TOKEN tk; tk = idf2token(option_set('F')); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); } {Idf} { /* identifier */ - register TOKEN tk; + TOKEN tk; tk = idf2token(0 /* no hashing */); if (!TOKEN_EQ(tk, SKIP)) return_tk(tk); diff -Nru similarity-tester-2.21/pass1.c similarity-tester-2.26/pass1.c --- similarity-tester-2.21/pass1.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/pass1.c 2008-09-23 09:07:12.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: pass1.c,v 2.8 2007/08/27 09:57:32 dick Exp $ + $Id: pass1.c,v 2.10 2008/09/23 09:07:12 dick Exp $ */ #include @@ -12,6 +12,7 @@ #include "tokenarray.h" #include "lex.h" #include "error.h" +#include "options.h" #include "pass1.h" #ifdef DB_TEXT @@ -21,8 +22,8 @@ static void print_count(unsigned int cnt, const char *); void -Pass1(int argc, char *argv[]) { - register int n; +Pass1(int argc, const char *argv[]) { + int n; InitText(argc); InitTokenArray(); @@ -32,22 +33,28 @@ /* read the files */ for (n = 0; n < NumberOfTexts; n++) { - register char *fname = argv[n]; - register struct text *txt = &Text[n]; - - fprintf(OutputFile, "File %s: ", fname); + const char *fname = argv[n]; + struct text *txt = &Text[n]; + + if (!option_set('T')) { + fprintf(OutputFile, "File %s: ", fname); + } txt->tx_fname = fname; txt->tx_pos = 0; txt->tx_start = txt->tx_limit = TextLength(); if (strcmp(fname, "/") == 0) { - fprintf(OutputFile, "separator\n"); + if (!option_set('T')) { + fprintf(OutputFile, "separator\n"); + } NumberOfNewTexts = n; } else { if (!OpenText(First, txt)) { - fprintf(OutputFile, ">>>> cannot open <<<< "); + if (!option_set('T')) { + fprintf(OutputFile, ">>>> cannot open <<<< "); + } /* the file has still been opened with a null file for uniformity */ @@ -61,14 +68,17 @@ txt->tx_limit = TextLength(); /* report */ - print_count(txt->tx_limit - txt->tx_start, "token"); - if (lex_non_ascii_cnt) { - fprintf(DebugFile, ", "); - print_count(lex_non_ascii_cnt, - "non-ASCII character" - ); + if (!option_set('T')) { + print_count(txt->tx_limit - txt->tx_start, "token"); + if (lex_non_ascii_cnt) { + fprintf(DebugFile, ", "); + print_count(lex_non_ascii_cnt, + "non-ASCII character" + ); + } + fprintf(OutputFile, "\n"); } - fprintf(OutputFile, "\n"); + #ifdef DB_TEXT db_print_text(txt); #endif /* DB_TEXT */ @@ -77,10 +87,12 @@ } /* report total */ - fprintf(OutputFile, "Total: "); - print_count(TextLength() - 1, "token"); - fprintf(OutputFile, "\n\n"); - fflush(OutputFile); + if (!option_set('T')) { + fprintf(OutputFile, "Total: "); + print_count(TextLength() - 1, "token"); + fprintf(OutputFile, "\n\n"); + fflush(OutputFile); + } } static void @@ -96,7 +108,7 @@ static void db_print_text(const struct text *txt) { /* prints a text (in compressed form) */ - register int i; + int i; fprintf(DebugFile, "\n\n**** DB_PRINT_TEXT ****\n"); diff -Nru similarity-tester-2.21/pass1.h similarity-tester-2.26/pass1.h --- similarity-tester-2.21/pass1.h 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/pass1.h 2008-09-23 09:07:12.000000000 +0000 @@ -1,9 +1,9 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: pass1.h,v 1.3 2001/09/28 09:03:50 dick Exp $ + $Id: pass1.h,v 1.4 2008/09/23 09:07:12 dick Exp $ */ /* Reads the input files; stores the tokens in TOKEN TokenArray[] and the input file descriptions in struct text text[]. */ -extern void Pass1(int argc, char *argv[]); +extern void Pass1(int argc, const char *argv[]); diff -Nru similarity-tester-2.21/pass2.c similarity-tester-2.26/pass2.c --- similarity-tester-2.21/pass2.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/pass2.c 2008-09-23 09:07:12.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: pass2.c,v 2.10 2004/08/05 09:49:46 dick Exp $ + $Id: pass2.c,v 2.11 2008/09/23 09:07:12 dick Exp $ */ #include @@ -37,8 +37,8 @@ static void pass2_txt(struct text *txt) { - register struct position *pos; - register unsigned int old_nl_cnt; + struct position *pos; + unsigned int old_nl_cnt; if (!txt->tx_pos) /* no need to scan the file */ return; diff -Nru similarity-tester-2.21/pass3.c similarity-tester-2.26/pass3.c --- similarity-tester-2.21/pass3.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/pass3.c 2008-09-23 09:07:12.000000000 +0000 @@ -1,16 +1,16 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: pass3.c,v 2.11 2005/02/20 17:03:03 dick Exp $ + $Id: pass3.c,v 2.13 2008/09/23 09:07:12 dick Exp $ */ #include #include -#include #include "system.par" #include "debug.par" #include "sim.h" #include "runs.h" +#include "Malloc.h" #include "error.h" #include "options.h" #include "pass3.h" @@ -34,7 +34,7 @@ static int unslen(unsigned int); static int maxline; /* Actual maximum line length */ -static char *line0; /* by malloc() */ +static char *line0; /* by Malloc() */ static char *line1; void @@ -53,9 +53,8 @@ struct run *run; maxline = PageWidth / 2 - 2; - line0 = malloc((unsigned int)((maxline + 1) * sizeof (char))); - line1 = malloc((unsigned int)((maxline + 1) * sizeof (char))); - if (!line0 || !line1) fatal("out of memory"); + line0 = Malloc((unsigned int)((maxline + 1) * sizeof (char))); + line1 = Malloc((unsigned int)((maxline + 1) * sizeof (char))); OpenIter(&iter); while (GetAisoItem(&iter, &run)) { @@ -67,18 +66,18 @@ } CloseIter(&iter); - free(line0); line0 = 0; - free(line1); line1 = 0; + Free(line0); line0 = 0; + Free(line1); line1 = 0; } static void show_run(const struct run *run) { /* The animals came in two by two ... */ - register const struct chunk *cnk0 = &run->rn_cn0; - register const struct chunk *cnk1 = &run->rn_cn1; - register unsigned int nl_cnt0 = + const struct chunk *cnk0 = &run->rn_cn0; + const struct chunk *cnk1 = &run->rn_cn1; + unsigned int nl_cnt0 = cnk0->ch_last.ps_nl_cnt - cnk0->ch_first.ps_nl_cnt; - register unsigned int nl_cnt1 = + unsigned int nl_cnt1 = cnk1->ch_last.ps_nl_cnt - cnk1->ch_first.ps_nl_cnt; FILE *f0; FILE *f1; @@ -86,8 +85,8 @@ /* display heading of chunk */ if (!option_set('d')) { /* no assumptions about the lengths of the file names! */ - register unsigned int size = run->rn_size; - register int pos = 0; + unsigned int size = run->rn_size; + int pos = 0; pos += prhead(cnk0); while (pos < maxline + 1) { @@ -154,7 +153,7 @@ static int prhead(const struct chunk *cnk) { - register int pos = 0; + int pos = 0; pos += prs(cnk->ch_text->tx_fname); pos += prs(": line "); @@ -178,7 +177,7 @@ static int unslen(unsigned int u) { - register int res = 1; + int res = 1; while (u > 9) { u /= 10, res++; @@ -191,9 +190,9 @@ /* opens the file in which the chunk resides, positions the file at the beginning of the chunk and returns the file pointer */ - register char *fname = cnk->ch_text->tx_fname; - register FILE *f = fopen(fname, "r"); - register unsigned int nl_cnt; + const char *fname = cnk->ch_text->tx_fname; + FILE *f = fopen(fname, "r"); + unsigned int nl_cnt; if (!f) { fprintf(stderr, ">>>> File %s disappeared <<<<\n", fname); @@ -217,8 +216,8 @@ fill_line(FILE *f, char ln[]) { /* Reads one line from f and puts it in condensed form in ln. */ - register int indent = 0, lpos = 0; - register int ch; + int indent = 0, lpos = 0; + int ch; /* condense and skip initial blank */ while ((ch = getc(f)), ch == ' ' || ch == '\t') { @@ -262,7 +261,7 @@ /* displays the contents of the two lines in a two-column format */ - register int i; + int i; for (i = 0; i < maxline && ln0[i] != '\0'; i++) { fputc(ln0[i], OutputFile); @@ -282,7 +281,7 @@ show_1C_line(FILE *f, const char *marker) { /* displays one line from f, preceded by the marker */ - register int ch; + int ch; fprintf(OutputFile, "%s", marker); while ((ch = getc(f)), ch > 0 && ch != '\n') { @@ -298,8 +297,8 @@ static void db_run(const struct run *run) { /* prints detailed data about a run */ - register const struct chunk *cnk0 = &run->rn_cn0; - register const struct chunk *cnk1 = &run->rn_cn1; + const struct chunk *cnk0 = &run->rn_cn0; + const struct chunk *cnk1 = &run->rn_cn1; fprintf(DebugFile, "File %s / file %s:\n", cnk0->ch_text->tx_fname, diff -Nru similarity-tester-2.21/percentages.c similarity-tester-2.26/percentages.c --- similarity-tester-2.21/percentages.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/percentages.c 2008-09-22 10:25:17.000000000 +0000 @@ -1,13 +1,13 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: percentages.c,v 1.3 2007/08/27 09:57:33 dick Exp $ + $Id: percentages.c,v 1.5 2008/09/22 10:24:08 dick Exp $ */ #include -#include #include "sim.h" #include "runs.h" +#include "Malloc.h" #include "error.h" #include "percentages.h" @@ -18,15 +18,14 @@ unsigned int ma_size; }; -static struct match *match_start; /* to be allocated by malloc */ +static struct match *match_start; /* to be allocated by new() */ -int +void add_to_percentages(struct run *r) { - /* fails if out of memory, in line with add_to_run() */ struct match **match_hook = &match_start; /* percentages are only meaningful between different files */ - if (r->rn_cn0.ch_text == r->rn_cn1.ch_text) return 1; + if (r->rn_cn0.ch_text == r->rn_cn1.ch_text) return; /* look (text0, text1) combination up in match list */ while (*match_hook) { @@ -37,21 +36,18 @@ ) { /* found it; now update it */ m->ma_size += r->rn_size; - return 1; + return; } match_hook = &m->ma_next; } { /* it's not there; make a new entry */ - struct match *m = *match_hook = - (struct match *)malloc(sizeof (struct match)); + struct match *m = *match_hook = new(struct match); - if (m == 0) return 0; m->ma_next = 0; m->ma_text0 = r->rn_cn0.ch_text; m->ma_text1 = r->rn_cn1.ch_text; m->ma_size = r->rn_size; - return 1; } } @@ -61,10 +57,8 @@ while (*match_hook) { struct match *m = *match_hook; - struct match *n = - (struct match *)malloc(sizeof (struct match)); + struct match *n = new(struct match); - if (!n) fatal("out of memory"); /* hook in the double */ n->ma_next = m->ma_next; m->ma_next = n; @@ -96,13 +90,15 @@ struct match *m = match_start; while (m) { - fprintf(OutputFile, - "%s consists for %d %% of %s material\n", - m->ma_text0->tx_fname, - (int)(match_percentage(m)*100.0), - m->ma_text1->tx_fname - ); - + if ((int)(match_percentage(m)*100.0) >= ThresholdPerc) { + fprintf(OutputFile, + "%s consists for %d %% of %s material\n", + m->ma_text0->tx_fname, + (int)(match_percentage(m)*100.0), + m->ma_text1->tx_fname + ); + } + m = m->ma_next; } } diff -Nru similarity-tester-2.21/percentages.h similarity-tester-2.26/percentages.h --- similarity-tester-2.21/percentages.h 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/percentages.h 2008-03-31 10:35:17.000000000 +0000 @@ -1,7 +1,7 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: percentages.h,v 1.2 2004/08/05 09:49:48 dick Exp $ + $Id: percentages.h,v 1.3 2008/03/31 10:35:17 dick Exp $ */ -extern int add_to_percentages(struct run *r); +extern void add_to_percentages(struct run *r); extern void show_percentages(void); diff -Nru similarity-tester-2.21/READ_ME similarity-tester-2.26/READ_ME --- similarity-tester-2.21/READ_ME 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/READ_ME 2008-09-23 09:07:10.000000000 +0000 @@ -1,17 +1,16 @@ # This file is part of the software similarity tester SIM. # Written by Dick Grune, Vrije Universiteit, Amsterdam. -# $Id: READ_ME,v 2.6 2005/02/20 17:02:59 dick Exp $ +# $Id: READ_ME,v 2.7 2008/09/23 09:07:10 dick Exp $ These programs test for similar (or equal) stretches in one or more program files and can be used to detect common code or plagiarism. See sim.1. Checkers are available for C, Java, Pascal, Modula-2, Lisp, Miranda and natural text. -This READ_ME file describes the UNIX version. The MSDOS version is described -in the file READ.ME. +This READ_ME file describes the UNIX/Linux version. The MSDOS version is +described in the file READ.ME. -To obtain the sources, do - sh sim_2_21.shar +To obtain the sources, unpack the archive sim_2_*.zip To compile and test, just call make diff -Nru similarity-tester-2.21/READ.ME similarity-tester-2.26/READ.ME --- similarity-tester-2.21/READ.ME 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/READ.ME 2008-09-23 09:07:10.000000000 +0000 @@ -1,6 +1,6 @@ # This file is part of the software similarity tester SIM. # Written by Dick Grune, Vrije Universiteit, Amsterdam. -# $Id: READ.ME,v 2.8 2005/02/20 17:02:58 dick Exp $ +# $Id: READ.ME,v 2.9 2008/09/23 09:07:10 dick Exp $ These programs test for similar (or equal) stretches in one or more program files and can be used to detect common code or plagiarism. See SIM.DOC. @@ -10,9 +10,9 @@ This READ.ME file describes the MSDOS version. The UNIX version is described in the file READ_ME. -The archive SIM_2_21.ZIP contains: +The FTP directory contains: READ.ME this READ.ME file - SIM.TXT a 2-page manual, UNIX-style + SIM.TXT a short manual, UNIX-style SIM_C.EXE similarity tester for C SIM_JAVA.EXE similarity tester for Java SIM_PASC.EXE similarity tester for Pascal @@ -21,8 +21,8 @@ SIM_MIRA.EXE similarity tester for Miranda SIM_TEXT.EXE similarity tester for text -The MSDOS version does not contain sources. The sources are available from -the UNIX archive sim_2_21.shar, but require a C compiler, flex and make. +The sources are available from the UNIX/Linux archive sim_2_*.zip, but require +a C compiler, flex and make. Dick Grune Vrije Universiteit diff -Nru similarity-tester-2.21/README.1st similarity-tester-2.26/README.1st --- similarity-tester-2.21/README.1st 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/README.1st 1970-01-01 00:00:00.000000000 +0000 @@ -1,68 +0,0 @@ -This is SIM, Software and text similarity tester, most recent revision - (2.19, 20050220) -by Dick Grune, Vrije Universiteit, Amsterdam, the Netherlands (dick@cs.vu.nl). - -SIM tests lexical similarity in texts in C, Java, Pascal, Modula-2, Lisp, -Miranda and natural language. It can be used - -- to detect potentially duplicated code fragments in large software projects, -- to detect plagiarism in software and text-based projects, educational and - otherwise. - -The program is fast: -the UNIX version on a Sun ULTRA does about 50000 tokens/sec, -the DOS version on a Pentium 166 does about 25000 tokens/sec. - -SIM is available for UNIX (in source code) and MSDOS (32-bit executables). - -UNIX: - To obtain the files, do: - sh sim_2_21.shar - This unpacks the sources, the Makefile, sim.1 and READ_ME. - For installation notes and other info then see READ_ME. - -MSDOS: - To obtain the files, do: - [pk]unzip SIM_2_21.zip - This unpacks the executables, SIM.DOC and READ.ME. - For other info then see READ.ME. - -Changes from Release 2.19: - Various changes necessitated by Linux flex being different - -Changes from Release 2.16: - Various updates and adjustments in the code and the installation - procedure. - -Changes from Release 2.13: - Percentage reporting feature added. - -Changes from Release 2.12: - Miranda checker added. - -Changes from Release 2.9: - Java checker added. - The C checker 'sim' was renamed to 'sim_c', for uniformity. - Converted the sources to ANSI C. - All versions now report non_ASCI characters in the input. - -Changes from Release 2.8: - DOS versions can now compare very large files (>400000 tokens) - -Changes from Release 1.21, as posted in comp.sources.unix (1987): - Ported to MSDOS - Significant speed improvements - New options: -e, -S and / , to compare files group-wise - New option: -F , to require function names to match exactly - Lisp version added - Miscellaneous improvements - - - Dick Grune - Vrije Universiteit - de Boelelaan 1081 - 1081 HV Amsterdam - the Netherlands - email: dick@cs.vu.nl - ftp://ftp.cs.vu.nl/pub/dick - http://www.cs.vu.nl/~dick diff -Nru similarity-tester-2.21/README.1ST similarity-tester-2.26/README.1ST --- similarity-tester-2.21/README.1ST 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/README.1ST 2008-09-23 08:18:33.000000000 +0000 @@ -0,0 +1,66 @@ +This is SIM, Software and text similarity tester, most recent revision + (2.26, 20080923) +by Dick Grune, Vrije Universiteit, Amsterdam, the Netherlands (dick@cs.vu.nl). + +SIM tests lexical similarity in texts in C, Java, Pascal, Modula-2, Lisp, +Miranda and natural language. It can be used + +- to detect potentially duplicated code fragments in large software projects, +- to detect plagiarism in software and text-based projects, educational and + otherwise. + +The program is fast: +the UNIX version on a Sun ULTRA does about 50000 tokens/sec, +the DOS version on a Pentium 166 does about 25000 tokens/sec. + +SIM is available for UNIX (in source code) and MSDOS (32-bit executables). + +UNIX: + To obtain the files, unpack sim_2_*.zip + This yields the sources, the Makefile, sim.pdf and READ_ME. + For installation notes and other info then see READ_ME. + +MSDOS: + The executables are available as SIM_*.EXE, where * is the language. + For other info then see READ.ME. + +Changes from Release 2.19: + A more terse output format is available; file names can be read from + standard input; a threshold can be set for percentages. + +Changes from Release 2.16: + Various updates and adjustments in the code and the installation + procedure. + +Changes from Release 2.13: + Percentage reporting feature added. + +Changes from Release 2.12: + Miranda checker added. + +Changes from Release 2.9: + Java checker added. + The C checker 'sim' was renamed to 'sim_c', for uniformity. + Converted the sources to ANSI C. + All versions now report non_ASCI characters in the input. + +Changes from Release 2.8: + DOS versions can now compare very large files (>400000 tokens) + +Changes from Release 1.21, as posted in comp.sources.unix (1987): + Ported to MSDOS + Significant speed improvements + New options: -e, -S and / , to compare files group-wise + New option: -F , to require function names to match exactly + Lisp version added + Miscellaneous improvements + + + Dick Grune + Vrije Universiteit + de Boelelaan 1081 + 1081 HV Amsterdam + the Netherlands + email: dick@cs.vu.nl + ftp://ftp.cs.vu.nl/pub/dick + http://www.cs.vu.nl/~dick diff -Nru similarity-tester-2.21/runs.h similarity-tester-2.26/runs.h --- similarity-tester-2.21/runs.h 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/runs.h 2008-03-31 10:35:17.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: runs.h,v 1.2 2001/11/08 12:30:30 dick Exp $ + $Id: runs.h,v 1.3 2008/03/31 10:35:17 dick Exp $ */ /* Although all other segments of data in this program are described by @@ -26,7 +26,7 @@ }; #define AISO_TYPE struct run * -#define AISO_ITERATOR +#define AISO_ITER #define add_to_runs(r) InsertAiso(r) diff -Nru similarity-tester-2.21/sim.1 similarity-tester-2.26/sim.1 --- similarity-tester-2.21/sim.1 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/sim.1 2008-09-23 09:07:12.000000000 +0000 @@ -1,6 +1,6 @@ .\" This file is part of the software similarity tester SIM. .\" Written by Dick Grune, Vrije Universiteit, Amsterdam. -.\" $Id: sim.1,v 2.6 2004/08/05 09:49:49 dick Exp $ +.\" $Id: sim.1,v 2.7 2008/09/23 09:07:12 dick Exp $ .\" .TH SIM 1 2001/11/13 "Vrije Universiteit" .SH NAME @@ -8,9 +8,11 @@ .SH SYNOPSIS .B sim_c [ -.B \-[defFnpsS] +.B \-[defFinpsST] .B \-r .I N +.B \-t +.I N .B \-w .I N .B \-o @@ -84,6 +86,9 @@ Old files are never compared to each other. Since the similarity tester reads the files several times, it cannot read from standard input. +(See, however, the +.B \-i +option.) .PP There are the following options: .TP @@ -103,6 +108,15 @@ The names of functions in calls are required to match exactly (C, Java, Pascal, Modula-2 and Lisp only). .TP +.B \-i +The names of the files to be compared are read from standard input, including +a possible +.B / +; the file names need to be separated by layout. +This allows a very large number of file names to be specified; it differs from +the @ facility provided by some compilers in that it handles file names only, +and does not recognize option arguments. +.TP .B \-n Similarities found are only summarized, not displayed. .TP @@ -121,12 +135,22 @@ = 24). .TP .B \-s -The contents of a file are not compared to itself (\-s = not self). +The contents of a file are not compared to itself (\-s for "not self"). .TP .B \-S The contents of the new files are compared to the old files only \- not between themselves. .TP +.B "\-t N" +In combination with the +.B \-p +option, sets the threshold (in percents) below which similarities will not be +reported. +.TP +.B \-T +A more terse and uniform form of output is produced, which may be more +suitable for postprocessing. +.TP .B "\-w N" The page width used is set to .I N @@ -147,6 +171,9 @@ file to consist for 100 % of text from another file, while the other file consists for only 1 % of text of the first file, if their lengths differ enough. +A threshold can be set using the +.B \-T +option. Note also that the granularity of the recognized text is still governed by the .B \-r option or its default. diff -Nru similarity-tester-2.21/sim.c similarity-tester-2.26/sim.c --- similarity-tester-2.21/sim.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/sim.c 2008-09-23 09:07:12.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: sim.c,v 2.12 2007/08/27 09:57:34 dick Exp $ + $Id: sim.c,v 2.14 2008/09/23 09:07:12 dick Exp $ */ #include @@ -9,6 +9,7 @@ #include "settings.par" #include "sim.h" #include "options.h" +#include "newargs.h" #include "language.h" #include "error.h" #include "hash.h" @@ -24,29 +25,33 @@ FILE *OutputFile; FILE *DebugFile; -struct text *Text; /* to be filled in by malloc */ +struct text *Text; /* to be filled in by Malloc() */ int NumberOfTexts; /* number of text records */ int NumberOfNewTexts; /* number of new text records */ +int ThresholdPerc = 0; /* threshold percentage to show */ -char *progname; /* for error reporting */ +const char *progname; /* for error reporting */ -static const char *outputname; /* for reporting */ -static const char *minrunstring; -static const char *pagewidthstring; +static const char *output_name; /* for reporting */ +static const char *min_run_string; +static const char *page_width_string; +static const char *threshold_string; static const struct option optlist[] = { - {'r', "minimum run size", 'N', &minrunstring}, - {'w', "page width", 'N', &pagewidthstring}, + {'r', "minimum run size", 'N', &min_run_string}, + {'w', "page width", 'N', &page_width_string}, {'f', "function-like forms only", ' ', 0}, {'d', "use diff format for output", ' ', 0}, + {'T', "terse output", ' ', 0}, + {'n', "display headings only", ' ', 0}, {'p', "use percentage format for output", ' ', 0}, + {'t', "threshold level of percentage to show", 'N', &threshold_string}, {'e', "compare each file to each file separately", ' ', 0}, {'s', "do not compare a file to itself", ' ', 0}, {'S', "compare new files to old files only", ' ', 0}, {'F', "keep function identifiers in tact", ' ', 0}, - {'n', "display headings only", ' ', 0}, - {'x', "no pass2 nl_buff allocation", ' ', 0}, - {'o', "write output to file F", 'F', &outputname}, + {'i', "read arguments (file names) from standard input", ' ', 0}, + {'o', "write output to file F", 'F', &output_name}, {'-', "lexical scan output only", ' ', 0}, {0, 0, 0, 0} }; @@ -54,7 +59,7 @@ static void print_stream(const char *fname); int -main(int argc, char *argv[]) { +main(int argc, const char *argv[]) { progname = argv[0]; /* save program name */ argv++, argc--; /* and skip it */ @@ -68,25 +73,39 @@ } /* Treat the value options */ - if (minrunstring) { - MinRunSize = strtoul(minrunstring, NULL, 10); - if (MinRunSize == 0) fatal("bad or zero run size; form is: -r N"); - } - if (pagewidthstring) { - PageWidth = atoi(pagewidthstring); - if (PageWidth == 0) fatal("bad or zero page width; form is: -w N"); + if (min_run_string) { + MinRunSize = strtoul(min_run_string, NULL, 10); + if (MinRunSize == 0) + fatal("bad or zero run size; form is: -r N"); + } + if (page_width_string) { + PageWidth = atoi(page_width_string); + if (PageWidth == 0) + fatal("bad or zero page width; form is: -w N"); + } + if (threshold_string) { + ThresholdPerc = atoi(threshold_string); + if ((ThresholdPerc > 100) || (ThresholdPerc < 0)) + fatal("threshold must be between 0 and 100"); } - if (outputname) { - OutputFile = fopen(outputname, "w"); + if (output_name) { + OutputFile = fopen(output_name, "w"); if (OutputFile == 0) { char msg[500]; - sprintf(msg, "cannot open output file %s", outputname); + sprintf(msg, "cannot open output file %s", + output_name); fatal(msg); /*NOTREACHED*/ } } + if (option_set('i')) { + if (argc != 0) + fatal("-i option conflicts with file arguments"); + get_new_args(&argc, &argv); + } + if (option_set('-')) { /* it is the lexical scan only */ while (argv[0]) { @@ -128,22 +147,24 @@ return; } - fprintf(OutputFile, " showing token stream:\nnl_cnt, tk_cnt: tokens"); - - lex_token = EOL; - do { - if (TOKEN_EQ(lex_token, EOL)) { - fprintf(OutputFile, "\n%u,%u:", - lex_nl_cnt, lex_tk_cnt - ); - } - else { - print_token(OutputFile, lex_token); - } - } while (NextStreamTokenObtained()); - - fprintf(OutputFile, "\n"); + if (!option_set('T')) { + fprintf(OutputFile, + " showing token stream:\nnl_cnt, tk_cnt: tokens"); + + lex_token = EOL; + do { + if (TOKEN_EQ(lex_token, EOL)) { + fprintf(OutputFile, "\n%u,%u:", + lex_nl_cnt, lex_tk_cnt + ); + } + else { + print_token (OutputFile, lex_token); + } + } while (NextStreamTokenObtained()); + fprintf(OutputFile, "\n"); + } + CloseStream(); - } diff -Nru similarity-tester-2.21/sim.h similarity-tester-2.26/sim.h --- similarity-tester-2.21/sim.h 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/sim.h 2008-09-23 09:07:12.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: sim.h,v 2.7 2005/02/20 17:03:03 dick Exp $ + $Id: sim.h,v 2.9 2008/09/23 09:07:12 dick Exp $ */ #include @@ -16,7 +16,7 @@ }; struct text { - char *tx_fname; /* the file name */ + const char *tx_fname; /* the file name */ struct position *tx_pos;/* list of positions in this file that are part of a chunk; sorted and updated by Pass 2 @@ -35,5 +35,5 @@ extern struct text *Text; /* Text[], one for each input file */ extern int NumberOfTexts; /* number of text records */ extern int NumberOfNewTexts; /* number of new text records */ - -extern char *progname; /* for error reporting */ +extern int ThresholdPerc; /* threshold percentage */ +extern const char *progname; /* for error reporting */ diff -Nru similarity-tester-2.21/sim.html similarity-tester-2.26/sim.html --- similarity-tester-2.21/sim.html 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/sim.html 2008-09-23 09:07:12.000000000 +0000 @@ -1,5 +1,5 @@ - + The software and text similarity tester SIM @@ -29,15 +29,12 @@

-SIM 2.19 is available as - +SIM 2.26 is available as + C sources and as - -MSDOS binaries. -It is also available through ftp; the directory is -ftp.cs.vu.nl:/pub/dick/similarity_tester. +MSDOS binaries. There is a Unix-style manual page. @@ -70,7 +67,7 @@

-Since this piece of handicraft does not qualify as research, there are no +Since this piece of handicraft did not qualify as research, there are no international papers on it. The work was described in Dutch in Dick Grune, Binary files /tmp/spsq_q_IIk/similarity-tester-2.21/sim.pdf and /tmp/Iu5HALkzqP/similarity-tester-2.26/sim.pdf differ diff -Nru similarity-tester-2.21/sim.txt similarity-tester-2.26/sim.txt --- similarity-tester-2.21/sim.txt 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/sim.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,198 +0,0 @@ - - - -User Commands SIM(1) - - - -NAME - sim - find similarities in C, Java, Pascal, Modula-2, Lisp, - Miranda or text files - -SYNOPSIS - sim_c [ -[defFnpsS] -r N -w N -o F ] file ... [ / [ file ... - ] ] - sim_c ... - sim_java ... - sim_pasc ... - sim_m2 ... - sim_lisp ... - sim_mira ... - sim_text ... - -DESCRIPTION - Sim_c reads the C files file ... and looks for pieces of - text that are similar; two pieces of program text are simi- - lar if they only differ in layout, comment, identifiers and - the contents of numbers, strings and characters. If any - runs of sufficient length are found, they are reported on - standard output; the number of significant tokens in the run - is given between square brackets. - - Sim_java does the same for Java, sim_pasc for Pascal, sim_m2 - for Modula-2, sim_lisp for Lisp, and sim_mira for Miranda. - Sim_text works on arbitrary text; it is occasionally useful - on shell scripts. - - The program can be used for finding copied pieces of code in - purportedly unrelated programs (with -s or -S), or for find- - ing accidentally duplicated code in larger projects (with - -f). - - If a / is present between the input files, the latter are - divided into a group of "new" files (before the /) and a - group of "old" files; if there is no /, all files are "new". - Old files are never compared to each other. Since the simi- - larity tester reads the files several times, it cannot read - from standard input. - - There are the following options: - - -d The output is in a diff(1)-like format instead of the - default 2-column format. - - -e Each file is compared to each file in isolation; this - will find all similarities between all texts involved, - regardless of duplicates. - - -f Runs are restricted to pieces with balancing - parentheses, to isolate potential functions (C, Java, - - - -Vrije Universiteit Last change: 2001/11/13 1 - - - - - - -User Commands SIM(1) - - - - Pascal, Modula-2 and Lisp only). - - -F The names of functions in calls are required to match - exactly (C, Java, Pascal, Modula-2 and Lisp only). - - -n Similarities found are only summarized, not displayed. - - -o F The output is written to the file named F. - - -p The output is given in similarity percentages; see - below. - - -r N The minimum run length is set to N (default is N = 24). - - -s The contents of a file are not compared to itself (-s = - not self). - - -S The contents of the new files are compared to the old - files only - not between themselves. - - -w N The page width used is set to N columns (default is N = - 80). - - The -p option results in lines of the form F consists for x - % of G material meaning that x % of F's text can also be - found in G. Note that this relation is not symmetric; it is - in fact quite possible for one file to consist for 100 % of - text from another file, while the other file consists for - only 1 % of text of the first file, if their lengths differ - enough. Note also that the granularity of the recognized - text is still governed by the -r option or its default. - - Care has been taken to keep all internal processes linear in - the length of the input, with the exception of the matching - process which is almost linear, using a hash table; various - other tables are used for speed-up. If, however, there is - not enough memory for the tables, they are discarded in - order of unimportance, under which conditions the algorithms - revert to their quadratic nature. - -AUTHOR - Dick Grune, Vrije Universiteit, Amsterdam. - -BUGS - Strong periodicity in the input text (like a table of N - almost identical lines) causes problems. Sim tries to cope - with this but cannot avoid giving appr. log N messages about - it. The best advice is still to take the offending files - out of the game. - - Since it uses lex(1) on some systems, it may dump core on - any weird construction that overflows lex's internal - - - -Vrije Universiteit Last change: 2001/11/13 2 - - - - - - -User Commands SIM(1) - - - - buffers. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Vrije Universiteit Last change: 2001/11/13 3 - - - diff -Nru similarity-tester-2.21/sysidf.mk similarity-tester-2.26/sysidf.mk --- similarity-tester-2.21/sysidf.mk 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/sysidf.mk 2008-09-23 08:28:42.000000000 +0000 @@ -3,6 +3,7 @@ BINDIR = /home/dick/bin.`arch` MANDIR = /home/dick/man/man1 FTPDIR = /usr/local/ftpd/pub/dick/similarity_tester +FTPFILES = README.1st READ_ME TechnReport # C O M P I L A T I O N P A R A M E T E R S diff -Nru similarity-tester-2.21/sysidf.unix similarity-tester-2.26/sysidf.unix --- similarity-tester-2.21/sysidf.unix 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/sysidf.unix 2008-09-23 08:28:57.000000000 +0000 @@ -4,7 +4,6 @@ MANDIR = /home/dick/man/man1 FTPDIR = /usr/local/ftpd/pub/dick/similarity_tester FTPFILES = README.1st READ_ME TechnReport -VERSION = 2_19 # C O M P I L A T I O N P A R A M E T E R S diff -Nru similarity-tester-2.21/text.c similarity-tester-2.26/text.c --- similarity-tester-2.21/text.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/text.c 2008-09-23 09:07:12.000000000 +0000 @@ -1,16 +1,16 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: text.c,v 1.2 2001/11/13 12:55:58 dick Exp $ + $Id: text.c,v 1.4 2008/09/23 09:07:12 dick Exp $ */ #include -#include #include "debug.par" #include "sim.h" #include "token.h" #include "stream.h" #include "lex.h" +#include "Malloc.h" #include "options.h" #include "error.h" #include "text.h" @@ -21,7 +21,7 @@ #define NL_INCR 1000 /* increment of newline buffer size */ -static struct newline *nl_buff; /* to be filled by malloc */ +static struct newline *nl_buff; /* to be filled by Malloc() */ static unsigned int nl_size; /* size of nl_buff[] */ static unsigned int nl_free; /* next free position in nl_buff[] */ @@ -40,8 +40,7 @@ /* allocate the array of text descriptors */ NumberOfTexts = nfiles; Text = (struct text *) - malloc((unsigned int)(NumberOfTexts*sizeof (struct text))); - if (!Text) fatal("out of memory"); + Malloc((unsigned int)(NumberOfTexts*sizeof (struct text))); init_nl_buff(); } @@ -74,7 +73,7 @@ int NextTextTokenObtained(enum Pass pass) { - register int ok = 0; /* gcc does not understand enum Pass */ + int ok = 0; /* gcc does not understand enum Pass */ switch (pass) { case First: @@ -153,9 +152,7 @@ init_nl_buff(void) { /* Allocate the newline buffer, if possible */ nl_size = 0 + NL_INCR; - nl_buff = (option_set('x') ? 0 : - (struct newline *)malloc(sizeof (struct newline) * nl_size) - ); + nl_buff = (struct newline *)TryMalloc(sizeof (struct newline)*nl_size); } static void @@ -165,7 +162,7 @@ if (nl_free == nl_size) { /* allocated array is full; try to increase its size */ unsigned int new_size = nl_size + NL_INCR; - struct newline *new_buff = (struct newline *)realloc( + struct newline *new_buff = (struct newline *)TryRealloc( (char *)nl_buff, sizeof (struct newline) * new_size ); @@ -180,8 +177,8 @@ /* now we are sure there is room enough */ { - register struct newline *nl = &nl_buff[nl_free++]; - register unsigned int tk_diff = lex_tk_cnt - last_tk_cnt; + struct newline *nl = &nl_buff[nl_free++]; + unsigned int tk_diff = lex_tk_cnt - last_tk_cnt; nl->nl_tk_diff = tk_diff; if (nl->nl_tk_diff != tk_diff) { @@ -194,7 +191,7 @@ static void abandon_nl_buff(void) { if (nl_buff) { - free((char *)nl_buff); + Free((char *)nl_buff); nl_buff = 0; } } diff -Nru similarity-tester-2.21/textlang.l similarity-tester-2.26/textlang.l --- similarity-tester-2.21/textlang.l 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/textlang.l 2008-03-31 10:35:17.000000000 +0000 @@ -1,7 +1,7 @@ %{ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: textlang.l,v 1.3 2007/08/29 09:10:36 dick Exp $ + $Id: textlang.l,v 1.4 2008/03/31 10:35:17 dick Exp $ */ /* @@ -36,9 +36,6 @@ %} -%option nounput -%option never-interactive - Layout ([ \t\r\f]) %% diff -Nru similarity-tester-2.21/ToDo similarity-tester-2.26/ToDo --- similarity-tester-2.21/ToDo 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/ToDo 2008-09-22 19:29:26.000000000 +0000 @@ -0,0 +1,11 @@ +- remove language.h from algollike.h + +X Mon Apr 11 13:23:41 1994: sim_orca +X Thu May 13 23:02:46 1993: sim ook voor C++ en Ada + +Are these still useful?: +- unsigned int ????; changing to int may change the semantics! ++ register - removed + +? DB_HASH +? How good is the hashing? Need better statistics. diff -Nru similarity-tester-2.21/tokenarray.c similarity-tester-2.26/tokenarray.c --- similarity-tester-2.21/tokenarray.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/tokenarray.c 2008-09-23 09:07:12.000000000 +0000 @@ -1,25 +1,23 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: tokenarray.c,v 1.2 2001/11/13 12:55:59 dick Exp $ + $Id: tokenarray.c,v 1.5 2008/09/23 09:07:12 dick Exp $ */ -#include - #include "error.h" #include "lex.h" +#include "Malloc.h" #include "tokenarray.h" #define TK_INCR 10000 /* increment of token array size */ -TOKEN *TokenArray; /* to be filled by malloc */ +TOKEN *TokenArray; /* to be filled by Malloc() */ static unsigned int tk_size; /* size of TokenArray[] */ static unsigned int tk_free; /* next free position in TokenArray[] */ void InitTokenArray(void) { tk_size = TK_INCR; - TokenArray = (TOKEN *)malloc(sizeof (TOKEN) * tk_size); - if (!TokenArray) fatal("out of memory"); + TokenArray = (TOKEN *)Malloc(sizeof (TOKEN) * tk_size); tk_free = 1; /* don't use position 0 */ } @@ -28,17 +26,17 @@ if (tk_free == tk_size) { /* allocated array is full; try to increase its size */ unsigned int new_size = tk_size + TK_INCR; - register TOKEN *new_array = (TOKEN *)realloc( + TOKEN *new_array = (TOKEN *)TryRealloc( (char *)TokenArray, sizeof (TOKEN) * new_size ); - if (new_size < tk_free) - fatal("internal error: TK_INCR causes numeric overflow"); if (!new_array) { /* we failed */ fatal("out of memory"); } + if (new_size < tk_free) + fatal("internal error: TK_INCR causes numeric overflow"); TokenArray = new_array, tk_size = new_size; } diff -Nru similarity-tester-2.21/token.c similarity-tester-2.26/token.c --- similarity-tester-2.21/token.c 2007-08-30 10:39:53.000000000 +0000 +++ similarity-tester-2.26/token.c 2008-09-23 09:07:12.000000000 +0000 @@ -1,6 +1,6 @@ /* This file is part of the software similarity tester SIM. Written by Dick Grune, Vrije Universiteit, Amsterdam. - $Id: token.c,v 2.4 2001/11/13 12:55:58 dick Exp $ + $Id: token.c,v 2.5 2008/09/23 09:07:12 dick Exp $ */ /* @@ -19,8 +19,8 @@ A printable #A meta ^? DEL $? meta-DEL */ - register int ch = TOKEN2int(tk) & 0177; - register int meta = TOKEN2int(tk) & 0200; + int ch = TOKEN2int(tk) & 0177; + int meta = TOKEN2int(tk) & 0200; if (' ' <= ch && ch <= '~') { fprintf(ofile, "%c%c", (meta ? '#' : ' '), ch); diff -Nru similarity-tester-2.21/VERSION similarity-tester-2.26/VERSION --- similarity-tester-2.21/VERSION 1970-01-01 00:00:00.000000000 +0000 +++ similarity-tester-2.26/VERSION 1970-01-01 00:00:01.000000000 +0000 @@ -0,0 +1 @@ +2.26