BUGS

diff -Nru gmp-ecm-7.0.4+ds/acinclude.m4 gmp-ecm-7.0.5+ds/acinclude.m4 --- gmp-ecm-7.0.4+ds/acinclude.m4 2016-08-23 12:25:18.000000000 +0000 +++ gmp-ecm-7.0.5+ds/acinclude.m4 2022-06-06 14:16:49.000000000 +0000 @@ -292,8 +292,10 @@ [cat >conftes1.c <conftes2.s < compiles with $NVCC $flags + +m4_define([NVCC_CHECK_COMPILE], +[ + echo "$1" > conftest.cu + $NVCC -c conftest.cu -o conftest.o $2 &> /dev/null + ret=$? + rm conftest.cu + AS_IF([test "$ret" -eq "0"], [$3], [$4]) +]) + dnl CU_CHECK_CUDA dnl Check if a GPU version is asked, for which GPU and where CUDA is install. dnl Includes are put in CUDA_INC_FLAGS @@ -376,12 +391,20 @@ AC_DEFUN([CU_CHECK_CUDA], [ -# Is the GPU version is requested? +# Is the GPU version requested? AC_ARG_ENABLE(gpu, - AS_HELP_STRING([--enable-gpu=GPU_ARCH], - [Enable the cuda version [default=no]]), - [ AS_IF([test "x$enableval" = "xno"], [ enable_gpu="no" ], - [ enable_gpu="yes" ]) ] ) + AS_HELP_STRING([--enable-gpu@<:@=GPU_ARCH@:>@], + [Build with support for CUDA stage 1, by default builds with all possible compute capabilities + to build with a single compute capability pass use --enable-gpu=XX [default=no]]), + [ AS_IF([test "x$enableval" = "xno"], + [ enable_gpu="no" ], + [ enable_gpu="yes" + AS_CASE(["x$enableval"], + [ xyes ], [], + [ x[[2-9]][[0-9]] ], [ WANTED_GPU_ARCH="$enableval" ], + [ AC_MSG_ERROR([Didn't recognize GPU_ARCH="$enableval"]) ]) + ]) ]) + AC_ARG_WITH(cuda, AS_HELP_STRING([--with-cuda=DIR], @@ -522,67 +545,98 @@ [NVCCFLAGS=" --compiler-bindir $cuda_compiler NVCCFLAGS"]) dnl check that gcc version is compatible with nvcc version - touch conftest.cu + dnl (seth) How is this checking if gcc and nvcc are compatible? AC_MSG_CHECKING([for compatibility between gcc and nvcc]) - $NVCC -c conftest.cu -o conftest.o $NVCCFLAGS > /dev/null 2>&1 - AS_IF([test "$?" -eq "0"], + NVCC_CHECK_COMPILE([], [$NVCCFLAGS], + [AC_MSG_RESULT([yes])], [ - AC_MSG_RESULT([yes]) - ], [ AC_MSG_RESULT([no]) AC_MSG_ERROR(gcc version is not compatible with nvcc) ]) - dnl Check which GPU architecture nvcc know - NVCCTEST="$NVCC -c conftest.cu -o conftest.o $NVCCFLAGS --dryrun" + dnl Check which GPU architecture nvcc knows GPU_ARCH="" - m4_foreach_w([compute_compatibility], [20 21 30 32 35 37 50 52 53], + m4_foreach_w([compute_capability], [30 32 35 37 50 52 53 60 61 62 70 72 75 80 86 87 90], [ - testcc=compute_compatibility - AC_MSG_CHECKING([that nvcc know compute capability $testcc]) - AS_IF([test "$testcc" -eq "21"], - [ - NEW="--generate-code arch=compute_20,code=sm_21" - ], + testcc=compute_capability + AS_IF([test -z "$WANTED_GPU_ARCH" -o "$WANTED_GPU_ARCH" = "$testcc"], [ + AC_MSG_CHECKING([that nvcc know compute capability $testcc]) NEW="--generate-code arch=compute_$testcc,code=sm_$testcc" + NVCC_CHECK_COMPILE([], [$NVCCFLAGS --dryrun $NEW], + [ + AC_MSG_RESULT([yes]) + GPU_ARCH="$GPU_ARCH $NEW" + MIN_CC=${MIN_CC:-$testcc} + ], [ + AC_MSG_RESULT([no]) + ]) ]) - $NVCCTEST $NEW > /dev/null 2>&1 - AS_IF([test "$?" -eq "0"], - [ - AC_MSG_RESULT([yes]) - GPU_ARCH="$GPU_ARCH $NEW" - ], [ - AC_MSG_RESULT([no]) - ]) - ] ) + ]) + # Use JIT compilation of GPU code for forward compatibility - GPU_ARCH="--generate-code arch=compute_20,code=compute_20 $GPU_ARCH" + AC_MSG_NOTICE([Setting MIN_CC=$MIN_CC GPU_ARCH=$GPU_ARCH]) + + AS_IF([test -z "$GPU_ARCH"], + [AC_MSG_ERROR([No supported compute capabilities found])]) dnl check that nvcc know ptx instruction madc - echo "__global__ void test (int *a, int b) { - asm(\"mad.lo.cc.u32 %0, %0, %1, %1;\": - \"+r\"(*a) : \"r\"(b));} " > conftest.cu - AC_MSG_CHECKING([if nvcc know ptx instruction madc]) - $NVCC -c conftest.cu -o conftest.o $NVCCFLAGS --generate-code arch=compute_20,code=compute_20 > /dev/null 2>&1 - AS_IF([test "$?" -eq "0"], + AC_MSG_CHECKING([if nvcc knows ptx instruction madc]) + NVCC_CHECK_COMPILE( + [ + __global__ void test (int *a, int b) { + asm(\"mad.lo.cc.u32 %0, %0, %1, %1;\": + \"+r\"(*a) : \"r\"(b));} + ], + [$NVCCFLAGS --generate-code arch=compute_${MIN_CC},code=compute_${MIN_CC}], + [AC_MSG_RESULT([yes])], [ - AC_MSG_RESULT([yes]) - ], [ AC_MSG_RESULT([no]) AC_MSG_ERROR([nvcc does not recognize ptx instruction madc, you should upgrade it]) ]) + AC_ARG_WITH(cgbn_include, + AS_HELP_STRING([--with-cgbn-include=DIR], [CGBN include directory]), + [ + cgbn_include=$withval + AC_MSG_NOTICE([Using CGBN from $cgbn_include]) + AS_IF([test "x$with_cgbn_include" != "xno"], + [ + AS_IF([test -d "$cgbn_include"], + [], + [AC_MSG_ERROR([Specified CGBN include directory "$cgbn_include" does not exist])]) + + AC_MSG_CHECKING([if CGBN is present]) + + dnl AC_CHECK_HEADER can't verify NVCC compilability hence NVCC_CHECK_COMPILE + NVCC_CHECK_COMPILE( + [ + #include + #include + ], + [-I$cgbn_include $GMPLIB], + [AC_MSG_RESULT([yes])], + [ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([cgbn.h not found (check if /cgbn needed after /include)]) + ] + ) + AC_DEFINE([HAVE_CGBN_H], [1], [Define to 1 if cgbn.h exists]) + NVCCFLAGS="-I$with_cgbn_include $GMPLIB $NVCCFLAGS" + want_cgbn="yes" + ]) + ]) + LIBS="$LIBS_BACKUP" LDFLAGS="$LDFLAGS_BACKUP" - - NVCCFLAGS="$NVCCFLAGS -DWITH_GPU $GPU_ARCH" + + NVCCFLAGS="$NVCCFLAGS $GPU_ARCH" CFLAGS="$CFLAGS -DWITH_GPU" CPPFLAGS="$CPPFLAGS -DWITH_GPU" NVCCFLAGS="$NVCCFLAGS --ptxas-options=-v" NVCCFLAGS="$NVCCFLAGS --compiler-options -fno-strict-aliasing" - # If debug flag is set apply debugging compilation flags, + # If debug flag is set apply debugging compilation flags, # otherwise build compilation flags AS_IF([test "x$DEBUG" = "xtrue"], [ @@ -596,6 +650,8 @@ ]) #Set this conditional if cuda is wanted AM_CONDITIONAL([WANT_GPU], [test "x$enable_gpu" = "xyes" ]) +#Set this conditional if cuda & cgbn_include +AM_CONDITIONAL([WANT_CGBN], [test "x$want_cgbn" = "xyes" ]) AC_SUBST(NVCC) AC_SUBST(NVCCFLAGS) @@ -604,32 +660,3 @@ AC_SUBST(CUDARPATH) ]) - -dnl Checks whether the stack can be marked nonexecutable by passing an option -dnl to the C-compiler when acting on .s files. Appends that option to ASMFLAGS. -dnl This macro is adapted from one found in GMP 6.1.1. -dnl FIXME: This test looks broken. It tests that a file with .note.GNU-stack... -dnl can be compiled/assembled with -Wa,--noexecstack. It does not determine -dnl if that command-line option has any effect on general asm code. -AC_DEFUN([CL_AS_NOEXECSTACK],[ -dnl AC_REQUIRE([AC_PROG_CC]) GMP uses something else -AC_CACHE_CHECK([whether assembler supports --noexecstack option], -cl_cv_as_noexecstack, [dnl - cat > conftest.c </dev/null]) \ - && grep .note.GNU-stack conftest.s >/dev/null \ - && AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -Wa,--noexecstack - -c -o conftest.o conftest.s >/dev/null]) - then - cl_cv_as_noexecstack=yes - else - cl_cv_as_noexecstack=no - fi - rm -f conftest*]) - if test "$cl_cv_as_noexecstack" = yes; then - LIBECM_LDFLAGS="$LIBECM_LDFLAGS -Wl,-znoexecstack" - fi -]) diff -Nru gmp-ecm-7.0.4+ds/addlaws.c gmp-ecm-7.0.5+ds/addlaws.c --- gmp-ecm-7.0.4+ds/addlaws.c 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/addlaws.c 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,1301 @@ +/* addlaws.c - various addition laws for ECM + Author: F. Morain +*/ + +#include +#include +#include +#include + +#include /* GMP header file */ + +#include "ecm.h" /* ecm header file */ +#include "ecm-impl.h" +#include "ecm-ecm.h" +#include "mpmod.h" + +#include "addlaws.h" + +#if DEBUG_ADD_LAWS >= 1 +void +print_mpz_from_mpres(mpres_t x, mpmod_t n) +{ + mpz_t tmp; + + mpz_init(tmp); + mpres_get_z(tmp, x, n); + gmp_printf("%Zd", tmp); + mpz_clear(tmp); +} +#endif + +/******************** Weierstrass section ********************/ + +void +pt_w_set_to_zero(ell_point_t P, mpmod_t n) +{ + mpres_set_ui(P->x, 0, n); + mpres_set_ui(P->y, 1, n); + mpres_set_ui(P->z, 0, n); +} + +int +pt_w_is_zero(mpres_t z, mpmod_t n) +{ + return mpres_is_zero(z, n); +} + +void +pt_w_set(mpres_t x0, mpres_t y0, mpres_t z0, + mpres_t x, mpres_t y, mpres_t z, + ATTRIBUTE_UNUSED mpmod_t n) +{ + mpres_set(x0, x, n); + mpres_set(y0, y, n); + mpres_set(z0, z, n); +} + +#if DEBUG_ADD_LAWS >= 1 +void +pt_w_print(mpres_t x, mpres_t y, mpres_t z, ell_curve_t E, mpmod_t n) +{ + printf("["); + print_mpz_from_mpres(x, n); + printf(", "); + print_mpz_from_mpres(y, n); + printf(", "); + if(E->type == ECM_EC_TYPE_WEIERSTRASS && E->law == ECM_LAW_AFFINE) + gmp_printf("%Zd", z); + else + print_mpz_from_mpres(z, n); + printf("]"); +} +#endif + +/* [x0, y0, z0] <- [x1, y1, z1] + [x2, y2, z2] using lambda=num/den + with buffer inv. + + (lambda*x+mu)^2+a1*x*(lambda*x+mu)+a3*(lambda*x+mu)=x^3+a2*x^2+... + x^3+(a2-lambda^2-a1*lambda)*x^2+... = 0 + x1+x2+x3 = lambda^2+a1*lambda-a2. + y3 = lambda*(x1-x3)-y1-a1*x3-a3 + */ +static int +pt_w_common_aff(mpz_t f, mpres_t x0, mpres_t y0, mpres_t z0, + mpres_t x1, mpres_t y1, + mpres_t x2, mpres_t a1, mpres_t a3, mpres_t a2, + mpmod_t n, mpres_t num, mpres_t den, mpres_t lambda) +{ + if(mpres_invert(lambda, den, n) == 0){ + mpres_gcd(f, den, n); + return 0; + } + /** lambda = num/den **/ + mpres_mul(lambda, lambda, num, n); + /** num <- (lambda+a1)*lambda **/ + mpres_add(num, lambda, a1, n); + mpres_mul(num, num, lambda, n); + mpres_sub(num, num, a2, n); + /** x0 = den <- num-x1-x2 **/ + mpres_sub(den, num, x1, n); + mpres_sub(den, den, x2, n); + /** y0 = num <- lambda*(x1-x0)-(y1+a1*x0+a3) **/ + mpres_sub(num, x1, den, n); + mpres_mul(num, num, lambda, n); + mpres_sub(y0, num, y1, n); + mpres_sub(y0, y0, a3, n); + mpres_mul(x0, a1, den, n); + mpres_sub(y0, y0, x0, n); + /** finish **/ + mpres_set(x0, den, n); + mpz_set_ui(z0, 1); /* just in case */ + return 1; +} + +/* [x3, y3, z3] <- [2] * [x1, y1, z1] */ +int +pt_w_duplicate(mpz_t f, mpres_t x3, mpres_t y3, mpres_t z3, + mpres_t x1, mpres_t y1, mpres_t z1, + mpmod_t n, ell_curve_t E) +{ + if(pt_w_is_zero(z1, n) == 1){ + pt_w_set(x3, y3, z3, x1, y1, z1, n); + return 1; + } + if(E->type == ECM_EC_TYPE_WEIERSTRASS && E->law == ECM_LAW_AFFINE){ + /* buf[1] <- 2*y1+a1*x1+a3 */ + mpres_mul(E->buf[1], E->a1, x1, n); + mpres_add(E->buf[1], E->buf[1], E->a3, n); + mpres_add(E->buf[1], E->buf[1], y1, n); + mpres_add(E->buf[1], E->buf[1], y1, n); + if(mpres_is_zero(E->buf[1], n)){ + /* buf1 = 0 <=> P is a [2]-torsion point */ + mpres_set_ui(x3, 0, n); + mpres_set_ui(y3, 1, n); + mpres_set_ui(z3, 0, n); + return 1; + } + /* buf[0] <- 3*x^2+2*a2*x+a4-a1*y = (3*x+2*a2)*x+a4-a1*y */ + mpres_mul_ui(E->buf[0], x1, 3, n); + mpres_add(E->buf[0], E->buf[0], E->a2, n); + mpres_add(E->buf[0], E->buf[0], E->a2, n); + mpres_mul(E->buf[0], E->buf[0], x1, n); + mpres_add(E->buf[0], E->buf[0], E->a4, n); + mpres_mul(E->buf[2], E->a1, y1, n); + mpres_sub(E->buf[0], E->buf[0], E->buf[2], n); + return pt_w_common_aff(f, x3, y3, z3, x1, y1, x1, + E->a1, E->a3, E->a2, n, + E->buf[0], E->buf[1], E->buf[2]); + } + else if(E->type == ECM_EC_TYPE_WEIERSTRASS + && E->law == ECM_LAW_HOMOGENEOUS){ + /* source is dbl-2007-bl: 5M + 6S + 1*a + 7add + 3*2 + 1*3 */ + /* mapping: h = buf[0], w = buf[1], s = buf[2], RR = buf[3], B = buf[4];*/ + /* h:=X1^2 mod p; # S*/ + mpres_sqr(E->buf[0], x1, n); + /* w:=Z1^2 mod p;*/ + mpres_sqr(E->buf[1], z1, n); + /* w:=a*w mod p;*/ + mpres_mul(E->buf[1], E->buf[1], E->a4, n); + /* s:=3*h mod p; # *3*/ + mpres_mul_ui(E->buf[2], E->buf[0], 3, n); + /* w:=w+s mod p;*/ + mpres_add(E->buf[1], E->buf[1], E->buf[2], n); + /* s:=Y1*Z1 mod p;*/ + mpres_mul(E->buf[2], y1, z1, n); + /* s:=2*s mod p;*/ + mpres_mul_ui(E->buf[2], E->buf[2], 2, n); + /* Z3:=s^2 mod p;*/ + mpres_sqr(z3, E->buf[2], n); + /* Z3:=s*Z3 mod p;*/ + mpres_mul(z3, z3, E->buf[2], n); + /* RR:=Y1*s mod p; # M*/ + mpres_mul(E->buf[3], y1, E->buf[2], n); + /* B:=X1+RR mod p; # add*/ + mpres_add(E->buf[4], x1, E->buf[3], n); + /* B:=B^2 mod p;*/ + mpres_sqr(E->buf[4], E->buf[4], n); + /* RR:=RR^2 mod p; # S*/ + mpres_sqr(E->buf[3], E->buf[3], n); + /* B:=B-h mod p;*/ + mpres_sub(E->buf[4], E->buf[4], E->buf[0], n); + /* B:=B-RR mod p;*/ + mpres_sub(E->buf[4], E->buf[4], E->buf[3], n); + /* h:=w^2 mod p;*/ + mpres_sqr(E->buf[0], E->buf[1], n); + /* X3:=2*B mod p;*/ + mpres_mul_ui(x3, E->buf[4], 2, n); + /* h:=h-X3 mod p;*/ + mpres_sub(E->buf[0], E->buf[0], x3, n); + /* X3:=h*s mod p; # M*/ + mpres_mul(x3, E->buf[0], E->buf[2], n); + /* s:=B-h mod p;*/ + mpres_sub(E->buf[2], E->buf[4], E->buf[0], n); + /* s:=w*s mod p;*/ + mpres_mul(E->buf[2], E->buf[2], E->buf[1], n); + /* Y3:=2*RR mod p;*/ + mpres_mul_ui(y3, E->buf[3], 2, n); + /* Y3:=s-Y3 mod p;*/ + mpres_sub(y3, E->buf[2], y3, n); + return 1; + } + return 0; +} + +/* [x3, y3, z3] <- [x1, y1, z1] + [x2, y2, z2]; P3 can be either P1 or P2. */ +int +pt_w_add(mpz_t f, mpres_t x3, mpres_t y3, mpres_t z3, + mpres_t x1, mpres_t y1, mpres_t z1, + mpres_t x2, mpres_t y2, mpres_t z2, + mpmod_t n, ell_curve_t E) +{ + if(pt_w_is_zero(z1, n)){ + pt_w_set(x3, y3, z3, x2, y2, z2, n); + return 1; + } + else if(pt_w_is_zero(z2, n)){ + pt_w_set(x3, y3, z3, x1, y1, z1, n); + return 1; + } + if(E->type == ECM_EC_TYPE_WEIERSTRASS && E->law == ECM_LAW_AFFINE) + if(mpres_equal(x1, x2, n) && mpres_equal(y1, y2, n)) + return pt_w_duplicate(f, x3, y3, z3, x1, y1, z1, n, E); + else{ + mpres_sub(E->buf[0], y1, y2, n); + mpres_sub(E->buf[1], x1, x2, n); + return pt_w_common_aff(f, x3, y3, z3, x1, y1, x2, + E->a1, E->a3, E->a2, + n, E->buf[0], E->buf[1], E->buf[2]); + } + else if(E->type == ECM_EC_TYPE_WEIERSTRASS + && E->law == ECM_LAW_HOMOGENEOUS){ + /* Cohen-Miyaji-Ono: 12M+2S+6add+1*2 */ + /* mapping: y1z2 = buf, AA = buf+1, u = buf+2, v = buf+3, R = buf+4, */ + /* vvv = buf+5; */ +#if DEBUG_ADD_LAWS >= 2 + printf("y1="); print_mpz_from_mpres(y1, n); printf("\n"); + printf("y2="); print_mpz_from_mpres(y2, n); printf("\n"); + printf("z1="); print_mpz_from_mpres(z1, n); printf("\n"); + printf("z2="); print_mpz_from_mpres(z2, n); printf("\n"); +#endif + /* Y1Z2:=Y1*Z2 mod p; # M*/ + mpres_mul(E->buf[0], y1, z2, n); + /* A:=X1*Z2 mod p; # M*/ + mpres_mul(E->buf[1], x1, z2, n); + /* u:=Y2*Z1 mod p;*/ + mpres_mul(E->buf[2], y2, z1, n); + /* u:=u-Y1Z2 mod p;*/ + mpres_sub(E->buf[2], E->buf[2], E->buf[0], n); + /* v:=X2*Z1 mod p;*/ + mpres_mul(E->buf[3], x2, z1, n); + /* v:=v-A mod p;*/ + mpres_sub(E->buf[3], E->buf[3], E->buf[1], n); + if(mpz_sgn(E->buf[2]) == 0 && mpz_sgn(E->buf[3]) == 0){ + /* u = 0 <=> Y2*Z1 = Y1*Z2 <=> Y2/Z2 = Y1/Z1*/ + /* v = 0 <=> X2*Z1 = X1*Z2 <=> X2/Z2 = X1/Z1*/ + return pt_w_duplicate(f, x3, y3, z3, x1, y1, z1, n, E); + } + /* Z3:=Z1*Z2 mod p; # M*/ + mpres_mul(z3, z1, z2, n); + /* X3:=u^2 mod p;*/ + mpres_sqr(x3, E->buf[2], n); + /* X3:=X3*Z3 mod p;*/ + mpres_mul(x3, x3, z3, n); + /* R:=v^2 mod p;*/ + mpres_sqr(E->buf[4], E->buf[3], n); + /* vvv:=v*R mod p;*/ + mpres_mul(E->buf[5], E->buf[3], E->buf[4], n); + /* R:=R*A mod p;*/ + mpres_mul(E->buf[4], E->buf[4], E->buf[1], n); + /* Y3:=2*R mod p; # *2*/ + mpres_mul_ui(y3, E->buf[4], 2, n); + /* A:=X3-vvv mod p;*/ + mpres_sub(E->buf[1], x3, E->buf[5], n); + /* A:=A-Y3 mod p;*/ + mpres_sub(E->buf[1], E->buf[1], y3, n); + /* X3:=v*A mod p; # M*/ + mpres_mul(x3, E->buf[3], E->buf[1], n); + /* Y3:=R-A mod p;*/ + mpres_sub(y3, E->buf[4], E->buf[1], n); + /* Y3:=u*Y3 mod p;*/ + mpres_mul(y3, y3, E->buf[2], n); + /* A:=vvv*Y1Z2 mod p;*/ + mpres_mul(E->buf[1], E->buf[5], E->buf[0], n); + /* Y3:=Y3-A mod p;*/ + mpres_sub(y3, y3, E->buf[1], n); + /* Z3:=vvv*Z3 mod p; # M*/ + mpres_mul(z3, z3, E->buf[5], n); + return 1; + } + return 0; +} + +#if USE_ADD_SUB_CHAINS > 0 +/* [x3, y3, z3] <- [x1, y1, z1] - [x2, y2, z2]; P3 != P1, P3 != P2. + -P2 ~ -(x2/z2, y2/z2, 1) = (x2/z2, -y2/z2-a1*x/z2-a3, 1) + ~ (x2, -y2-a1*x2-a3*z2, z2). +*/ +int +pt_w_sub(mpz_t f, mpres_t x3, mpres_t y3, mpres_t z3, + mpres_t x1, mpres_t y1, mpres_t z1, + mpres_t x2, mpres_t y2, mpres_t z2, + mpmod_t n, ell_curve_t E) +{ + int res = 1; + + if(E->law == ECM_LAW_HOMOGENEOUS){ + /* FIXME: does not work for complete equation! */ + mpres_neg(y2, y2, n); + res = pt_w_add(f, x3, y3, z3, x1, y1, z1, x2, y2, z2, n, E); + mpres_neg(y2, y2, n); + } + else if(E->law == ECM_LAW_AFFINE){ + /* buf[3] not used in law, so use it */ + mpres_mul(E->buf[3], E->a1, x2, n); + mpres_add(E->buf[3], E->buf[3], E->a3, n); + mpres_add(E->buf[3], E->buf[3], y2, n); + mpres_neg(E->buf[3], E->buf[3], n); + res = pt_w_add(f, x3, y3, z3, x1, y1, z1, x2, E->buf[3], z2, n, E); + } + return res; +} +#endif + +/******************** projective Hessian form ********************/ + +/* U^3+V^3+W^3 = 3*D*U*V*W, D^3 <> 1. + O_H = [1:-1:0] + -[u:v:w] = [v:u:w] + Warning: there can exist two other points at infinity, namely + [1:-omega:0] and [1:-omega^2:0] where omega^3 = 1. +*/ +int +hessian_is_zero(ell_point_t P, ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + mpres_t tmp; + int ret; + + if(mpz_sgn(P->z) != 0) + return 0; + mpres_init(tmp, n); + mpres_add(tmp, P->x, P->y, n); + ret = mpz_sgn(tmp) == 0; +#if 0 + if(ret) + gmp_printf("found a third root of unity? %Zd/%Zd\n", P->x, P->y); +#endif + mpres_clear(tmp, n); + return ret; +} + +void +hessian_set_to_zero(ell_point_t P, ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + mpres_set_si(P->x, 1, n); + mpres_set_si(P->y, -1, n); + mpres_set_si(P->z, 0, n); +} + +#if DEBUG_ADD_LAWS >= 1 +void +hessian_print(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + pt_w_print(P->x, P->y, P->z, E, n); +} +#endif + +#if USE_ADD_SUB_CHAINS > 0 +/* -[u:v:w] = [v:u:w] */ +void +hessian_negate(ell_point_t P, ATTRIBUTE_UNUSED ell_curve_t E, ATTRIBUTE_UNUSED mpmod_t n) +{ + mpz_swap(P->x, P->y); /* humf */ +} +#endif + +/* TODO: decrease the number of buffers? */ +int +hessian_duplicate(ell_point_t R, ell_point_t P, + ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + /* A = buf[0], ..., G = buf[6], H = buf[7], J = buf[8] */ + /* A:=P[1]^2 mod N; */ + mpres_mul(E->buf[0], P->x, P->x, n); + /* B:=P[2]^2 mod N; */ + mpres_mul(E->buf[1], P->y, P->y, n); + /* C:=P[3]^2 mod N; */ + mpres_mul(E->buf[2], P->z, P->z, n); + /* D:=(A+B) mod N; */ + mpres_add(E->buf[3], E->buf[0], E->buf[1], n); + /* E:=(A+C) mod N; */ + mpres_add(E->buf[4], E->buf[0], E->buf[2], n); + /* F:=(B+C) mod N; */ + mpres_add(E->buf[5], E->buf[1], E->buf[2], n); + /* G:=((P[1]+P[2])^2-D) mod N; */ + mpres_add(E->buf[6], P->x, P->y, n); + mpres_mul(E->buf[6], E->buf[6], E->buf[6], n); + mpres_sub(E->buf[6], E->buf[6], E->buf[3], n); + /* H:=((P[1]+P[3])^2-E) mod N; */ + mpres_add(E->buf[7], P->x, P->z, n); + mpres_mul(E->buf[7], E->buf[7], E->buf[7], n); + mpres_sub(E->buf[7], E->buf[7], E->buf[4], n); + /* J:=((P[2]+P[3])^2-F) mod N; */ + mpres_add(E->buf[8], P->y, P->z, n); + mpres_mul(E->buf[8], E->buf[8], E->buf[8], n); + mpres_sub(E->buf[8], E->buf[8], E->buf[5], n); + /* R->x = ((J-G)*(H+2*E)) mod N */ + mpres_sub(E->buf[0], E->buf[8], E->buf[6], n); + mpres_add(E->buf[1], E->buf[7], E->buf[4], n); + mpres_add(E->buf[1], E->buf[1], E->buf[4], n); + mpres_mul(R->x, E->buf[0], E->buf[1], n); + /* R->y = ((G-H)*(J+2*F)) mod N */ + mpres_sub(E->buf[0], E->buf[6], E->buf[7], n); + mpres_add(E->buf[1], E->buf[8], E->buf[5], n); + mpres_add(E->buf[1], E->buf[1], E->buf[5], n); + mpres_mul(R->y, E->buf[0], E->buf[1], n); + /* R->z = ((H-J)*(G+2*D)) mod N */ + mpres_sub(E->buf[0], E->buf[7], E->buf[8], n); + mpres_add(E->buf[1], E->buf[6], E->buf[3], n); + mpres_add(E->buf[1], E->buf[1], E->buf[3], n); + mpres_mul(R->z, E->buf[0], E->buf[1], n); + return 1; +} + +/* TODO: reduce the number of buffers? */ +int +hessian_plus(ell_point_t R, ell_point_t P, ell_point_t Q, + ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + /* P = [T1,T2,T3], Q = [T4,T5,T6] */ + /* P = Q <=> T1/T3=T4/T6 and T2/T3=T5/T6 + <=> T1*T6=T3*T4 and T2*T6=T3*T5 + */ + /* T1 = buf[0], ..., T7 = buf[6] */ + /* T7:=(T1*T6) mod N; */ + mpres_mul(E->buf[6], P->x, Q->z, n); + /* T1:=(T1*T5) mod N; */ + mpres_mul(E->buf[0], P->x, Q->y, n); + /* T5:=(T3*T5) mod N; */ + mpres_mul(E->buf[4], P->z, Q->y, n); + /* T3:=(T3*T4) mod N; */ + mpres_mul(E->buf[2], P->z, Q->x, n); + /* T4:=(T2*T4) mod N; */ + mpres_mul(E->buf[3], P->y, Q->x, n); + /* T2:=(T2*T6) mod N; */ + mpres_mul(E->buf[1], P->y, Q->z, n); + + if(mpres_equal(E->buf[6], E->buf[2], n) + && mpres_equal(E->buf[4], E->buf[1], n)) + /* as a matter of that, P = Q and we need duplicate */ + return hessian_duplicate(R, P, E, n); + + /* T6:=(T2*T7) mod N; */ + mpres_mul(E->buf[5], E->buf[1], E->buf[6], n); + /* T2:=(T2*T4) mod N; */ + mpres_mul(E->buf[1], E->buf[1], E->buf[3], n); + /* T4:=(T3*T4) mod N; */ + mpres_mul(E->buf[3], E->buf[2], E->buf[3], n); + /* T3:=(T3*T5) mod N; */ + mpres_mul(E->buf[2], E->buf[2], E->buf[4], n); + /* T5:=(T1*T5) mod N; */ + mpres_mul(E->buf[4], E->buf[0], E->buf[4], n); + /* T1:=(T1*T7) mod N; */ + mpres_mul(E->buf[0], E->buf[0], E->buf[6], n); + /* T1:=(T1-T4) mod N; */ + mpres_sub(R->y, E->buf[0], E->buf[3], n); + /* T2:=(T2-T5) mod N; */ + mpres_sub(R->x, E->buf[1], E->buf[4], n); + /* T3:=(T3-T6) mod N; */ + mpres_sub(R->z, E->buf[2], E->buf[5], n); + /* return [T2, T1, T3]; */ + return 1; +} + +int +hessian_add(ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n) +{ + if(hessian_is_zero(P, E, n)){ + ell_point_set(R, Q, E, n); + return 1; + } + else if(hessian_is_zero(Q, E, n)){ + ell_point_set(R, P, E, n); + return 1; + } + else + return hessian_plus(R, P, Q, E, n); +} + +#if USE_ADD_SUB_CHAINS > 0 +int +hessian_sub(ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n) +{ + int ret; + + hessian_negate(Q, E, n); + ret = hessian_add(R, P, Q, E, n); + hessian_negate(Q, E, n); + return ret; +} +#endif + +/* switch from X^3+Y^3+1=3*D*X*Y to Y^2=X^3+A*X+B + A:=-27*D*(D^3+8); + B:=54*(D^6-20*D^3-8); + xi:=12*(D^3-1)/(D*u+v+1); + x:=-9*D^2+xi*u; + y:=3*xi*(v-1); + OUTPUT: If a factor is found during the inversion, it is put in f and + ECM_FACTOR_FOUND_STEP1 is returned. Otherwise, ECM_NO_FACTOR_FOUND is + returned. + SIDE-EFFECT: (x, y, D) <- (x_on_W, y_on_W, A_of_W) + */ +int +hessian_to_weierstrass(mpz_t f, mpres_t x, mpres_t y, mpres_t D, mpmod_t n) +{ + mpres_t D3, A, xi, tmp1, tmp2; + int ret = ECM_NO_FACTOR_FOUND; + +#if DEBUG_ADD_LAWS >= 1 + printf("P:=["); + print_mpz_from_mpres(x, n); + printf(", "); + print_mpz_from_mpres(y, n); + printf(", 1];\n"); + printf("D:="); + print_mpz_from_mpres(D, n); + printf(";\n"); +#endif + /* D3 <- D^3 */ + mpres_init(D3, n); + mpres_mul(D3, D, D, n); + mpres_mul(D3, D3, D, n); + /* finish A */ + mpres_init(A, n); + mpres_add_ui(A, D3, 8, n); + mpres_mul(A, A, D, n); + mpres_mul_ui(A, A, 27, n); + mpres_neg(A, A, n); + /* compute xi */ + mpres_init(xi, n); + mpres_init(tmp1, n); + mpres_mul(tmp1, D, x, n); + mpres_add(tmp1, tmp1, y, n); + mpres_add_ui(tmp1, tmp1, 1, n); + mpres_init(tmp2, n); + mpres_sub_ui(tmp2, D3, 1, n); + mpres_mul_ui(tmp2, tmp2, 12, n); + if(mpres_invert(xi, tmp1, n) == 0){ + mpres_gcd(f, tmp1, n); + ret = ECM_FACTOR_FOUND_STEP1; + } + else{ + mpres_mul(xi, xi, tmp2, n); + /* compute x */ + mpres_mul(tmp1, D, D, n); + mpres_mul_ui(tmp1, tmp1, 9, n); + mpres_mul(tmp2, xi, x, n); + mpres_sub(x, tmp2, tmp1, n); + /* compute y */ + mpres_sub_ui(tmp1, y, 1, n); + mpres_mul(tmp1, tmp1, xi, n); + mpres_mul_ui(y, tmp1, 3, n); + mpres_set(D, A, n); +#if DEBUG_ADD_LAWS >= 1 + printf("WP:=["); + print_mpz_from_mpres(x, n); + printf(", "); + print_mpz_from_mpres(y, n); + printf(", 1];\n"); + printf("WA:="); + print_mpz_from_mpres(D, n); + printf(";\nWB:=(WP[2]^2-WP[1]^3-WA*WP[1]) mod N;WE:=[WA, WB];\n"); +#endif + } + mpres_clear(A, n); + mpres_clear(D3, n); + mpres_clear(xi, n); + mpres_clear(tmp1, n); + mpres_clear(tmp2, n); + return ret; +} + +int +mult_by_3(mpz_t f, mpres_t x, mpres_t y, mpres_t A, mpmod_t n) +{ + ell_curve_t E; + ell_point_t P, Q; + int ret = ECM_NO_FACTOR_FOUND; + mpz_t e; + + ell_curve_init_set(E, ECM_EC_TYPE_WEIERSTRASS, ECM_LAW_AFFINE, A, n); + ell_point_init(P, E, n); + mpres_set(P->x, x, n); + mpres_set(P->y, y, n); + mpres_set_ui(P->z, 1, n); + ell_point_init(Q, E, n); + mpz_init_set_ui(e, 3); + if(ell_point_mul(f, Q, e, P, E, n) != 0){ + mpres_set(x, Q->x, n); + mpres_set(y, Q->y, n); + } + mpz_clear(e); + ell_point_clear(Q, E, n); + ell_point_clear(P, E, n); + ell_curve_clear(E, n); + return ret; +} + +/******************** projective twisted Hessian form ********************/ + +/* a*U^3+V^3+W^3 = d*U*V*W + O_E = [0:-1:1] + -[U:V:W]=[U:W:V] +*/ +int +twisted_hessian_is_zero(ell_point_t P, ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + mpres_t tmp; + int ret; + + if(mpz_sgn(P->x) != 0) + return 0; + mpres_init(tmp, n); + mpres_add(tmp, P->y, P->z, n); + ret = mpz_sgn(tmp) == 0; +#if 0 + if(ret) + gmp_printf("found a third root of unity? %Zd/%Zd\n", P->x, P->y); +#endif + mpres_clear(tmp, n); + return ret; +} + +void +twisted_hessian_set_to_zero(ell_point_t P, ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + mpres_set_si(P->x, 0, n); + mpres_set_si(P->y, -1, n); + mpres_set_si(P->z, 1, n); +} + +#if DEBUG_ADD_LAWS >= 1 +void +twisted_hessian_print(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + pt_w_print(P->x, P->y, P->z, E, n); +} +#endif + +#if USE_ADD_SUB_CHAINS > 0 +/* -[u:v:w] = [u:w:v] */ +void +twisted_hessian_negate(ell_point_t P, ATTRIBUTE_UNUSED ell_curve_t E, ATTRIBUTE_UNUSED mpmod_t n) +{ + mpz_swap(P->y, P->z); /* humf */ +} +#endif + +/* TODO: decrease the number of buffers? */ +/* 6M+2S+1M_d: better when d is small */ +int +twisted_hessian_duplicate(ell_point_t R, ell_point_t P, + ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + /* R = buf[0], ..., W = buf[5], C = buf[6], D = buf[7], E = buf[8] */ + /* R:=Y1+Z1;*/ + mpres_add(E->buf[0], P->y, P->z, n); + /* S:=Y1-Z1;*/ + mpres_sub(E->buf[1], P->y, P->z, n); + /* T:=R^2 mod N;*/ + mpres_sqr(E->buf[2], E->buf[0], n); + /* U:=S^2 mod N;*/ + mpres_sqr(E->buf[3], E->buf[1], n); + /* V:=T+3*U;*/ + mpres_add(E->buf[4], E->buf[2], E->buf[3], n); + mpres_add(E->buf[4], E->buf[4], E->buf[3], n); + mpres_add(E->buf[4], E->buf[4], E->buf[3], n); + /* W:=3*T+U;*/ + mpres_add(E->buf[5], E->buf[3], E->buf[2], n); + mpres_add(E->buf[5], E->buf[5], E->buf[2], n); + mpres_add(E->buf[5], E->buf[5], E->buf[2], n); + /* C:=(R*V) mod N;*/ + mpres_mul(E->buf[6], E->buf[0], E->buf[4], n); + /* D:=(S*W) mod N;*/ + mpres_mul(E->buf[7], E->buf[1], E->buf[5], n); + /* E:=(3*C-E0[2]*X1*(W-V)) mod N;*/ + mpres_sub(E->buf[8], E->buf[5], E->buf[4], n); + mpres_mul(E->buf[8], E->buf[8], P->x, n); + mpres_mul(E->buf[8], E->buf[8], E->a6, n); + mpres_sub(E->buf[8], E->buf[6], E->buf[8], n); + mpres_add(E->buf[8], E->buf[8], E->buf[6], n); + mpres_add(E->buf[8], E->buf[8], E->buf[6], n); + /* X3:=(-2*X1*D) mod N;*/ + mpres_mul(R->x, P->x, E->buf[7], n); + mpres_add(R->x, R->x, R->x, n); + mpres_neg(R->x, R->x, n); + /* Y3:=((D+E)*Z1) mod N;*/ + mpres_add(E->buf[0], E->buf[7], E->buf[8], n); + mpres_mul(E->buf[1], E->buf[0], P->z, n); + /* Z3:=((D-E)*Y1) mod N;*/ + mpres_sub(E->buf[0], E->buf[7], E->buf[8], n); + mpres_mul(R->z, E->buf[0], P->y, n); + mpres_set(R->y, E->buf[1], n); + return 1; +} + +/* TODO: reduce the number of buffers? */ +int +twisted_hessian_plus(ell_point_t R, ell_point_t P, ell_point_t Q, + ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + /* A = buf[0], ... F = buf[5], G = [6], H = [7], J = [8] */ + // A:=X1*Z2 mod N; + mpres_mul(E->buf[0], P->x, Q->z, n); + // B:=Z1*Z2 mod N; + mpres_mul(E->buf[1], P->z, Q->z, n); + // C:=Y1*X2 mod N; + mpres_mul(E->buf[2], P->y, Q->x, n); + // D:=Y1*Y2 mod N; + mpres_mul(E->buf[3], P->y, Q->y, n); + // E:=Z1*Y2 mod N; + mpres_mul(E->buf[4], P->z, Q->y, n); + // F:=E0[1]*X1*X2 mod N; + mpres_mul(E->buf[5], P->x, Q->x, n); + mpres_mul(E->buf[5], E->buf[5], E->a4, n); + // Hisil + // G := (D+B)*(A-C) mod N; + mpres_add(E->buf[9], E->buf[3], E->buf[1], n); + mpres_sub(E->buf[6], E->buf[0], E->buf[2], n); + mpres_mul(E->buf[6], E->buf[6], E->buf[9], n); + // H := (D-B)*(A+C) mod N; + mpres_sub(E->buf[9], E->buf[3], E->buf[1], n); + mpres_add(E->buf[7], E->buf[0], E->buf[2], n); + mpres_mul(E->buf[7], E->buf[7], E->buf[9], n); + // J := (D+F)*(A-E) mod N; + mpres_add(E->buf[9], E->buf[3], E->buf[5], n); + mpres_sub(E->buf[8], E->buf[0], E->buf[4], n); + mpres_mul(E->buf[8], E->buf[8], E->buf[9], n); + // K := (D-F)*(A+E) mod N; + // this is the last use of A, so that K -> buf[0] + mpres_sub(E->buf[9], E->buf[3], E->buf[5], n); + mpres_add(E->buf[0], E->buf[0], E->buf[4], n); + mpres_mul(E->buf[0], E->buf[0], E->buf[9], n); + // X3 := G-H + mpres_sub(R->x, E->buf[6], E->buf[7], n); + // Y3 := K-J + mpres_sub(R->y, E->buf[0], E->buf[8], n); + // Z3 := (J+K-G-H-2*(B-F)*(C+E)) mod N; + mpres_sub(E->buf[9], E->buf[1], E->buf[5], n); + mpres_add(R->z, E->buf[2], E->buf[4], n); + mpres_mul(R->z, R->z, E->buf[9], n); + mpres_add(R->z, R->z, R->z, n); + mpres_add(R->z, R->z, E->buf[7], n); + mpres_add(R->z, R->z, E->buf[6], n); + mpres_sub(R->z, E->buf[0], R->z, n); + mpres_add(R->z, R->z, E->buf[8], n); + if(mpz_sgn(R->x) == 0 && mpz_sgn(R->y) == 0 && mpz_sgn(R->z) == 0){ + // iff (X2:Y2:Z2)=(Z1:gamma^2*X1:gamma*Y1), gamma^3 = a + fprintf(stderr, "GASP: X3, Y3 and Z3 are 0\n"); + exit(-1); +#if 0 + // TODO: rewrite with above quantities! + X3:=(X1^2*Y2*Z2-X2^2*Y1*Z1) mod N; + // A*X1*Y2-C*X2*Z1 = A*U-C*V + Y3:=(Z1^2*X2*Y2-Z2^2*X1*Y1) mod N; + // E*Z1*X2-A*Z2*Y1 = E*V-A*W + Z3:=(Y1^2*X2*Z2-Y2^2*X1*Z1) mod N; + // C*Y1*Z2-E*Y2*X1 = C*W-E*U + + // X3 = Y1*(a*X1^3-Z1^3) + // Y3 = g^2*X1*(Z1^3-Y1^3) + // Z3 = g*Z1*(Y1^3-Z1^3) + + // can be made faster with a = aa^3, since then g = aa and we + // can share many things + +#endif + } + return 1; +} + +int +twisted_hessian_add(ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n) +{ + if(twisted_hessian_is_zero(P, E, n)){ + ell_point_set(R, Q, E, n); + return 1; + } + else if(twisted_hessian_is_zero(Q, E, n)){ + ell_point_set(R, P, E, n); + return 1; + } + else + return twisted_hessian_plus(R, P, Q, E, n); +} + +#if USE_ADD_SUB_CHAINS > 0 +int +twisted_hessian_sub(ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n) +{ + int ret; + + twisted_hessian_negate(Q, E, n); + ret = twisted_hessian_add(R, P, Q, E, n); + twisted_hessian_negate(Q, E, n); + return ret; +} +#endif + +/* INPUT: a*x^3+y^3+1 = d*x*y + OUTPUT: Y^2 = X^3+A*X+B + If a=c^3, then curve isom to Hessian (c*x)^3+y^3+1=3*(d/(3*c))*(c*x)*y + SIDE EFFECT: (x, y, c) <- (x_on_W, y_on_W, A_of_W) + */ +int +twisted_hessian_to_weierstrass(mpz_t f, mpres_t x, mpres_t y, mpres_t c, mpres_t d, mpmod_t n) +{ + int ret = ECM_NO_FACTOR_FOUND; + mpres_t tmp; + +#if DEBUG_ADD_LAWS >= 2 + printf("x_tH="); print_mpz_from_mpres(x, n); printf("\n"); + printf("y_tH="); print_mpz_from_mpres(y, n); printf("\n"); + printf("c_tH="); print_mpz_from_mpres(c, n); printf("\n"); + printf("d_tH="); print_mpz_from_mpres(d, n); printf("\n"); +#endif + mpres_init(tmp, n); + mpres_mul_ui(tmp, c, 3, n); + if(mpres_invert(tmp, tmp, n) == 0){ + mpres_gcd(f, tmp, n); + ret = ECM_FACTOR_FOUND_STEP1; + } + else{ + mpres_mul(x, x, c, n); + mpres_mul(c, tmp, d, n); + /* from x^3+y^3+1=3*c*x*y to Weierstrass stuff */ + ret = hessian_to_weierstrass(f, x, y, c, n); +#if DEBUG_ADD_LAWS >= 2 + printf("A_W="); print_mpz_from_mpres(c, n); printf("\n"); + printf("x_W="); print_mpz_from_mpres(x, n); printf("\n"); + printf("y_W="); print_mpz_from_mpres(y, n); printf("\n"); +#endif + } + mpres_clear(tmp, n); + return ret; +} + +/******************** generic ec's ********************/ + +void +ell_point_init(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + mpres_init(P->x, n); + mpres_init(P->y, n); + mpres_init(P->z, n); + if(E->type == ECM_EC_TYPE_WEIERSTRASS){ + if(E->law == ECM_LAW_AFFINE) + mpz_set_ui(P->z, 1); /* humf */ + else if(E->law == ECM_LAW_HOMOGENEOUS) + mpres_set_ui(P->z, 1, n); + } + else if(E->type == ECM_EC_TYPE_HESSIAN + || E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + mpres_set_ui(P->z, 1, n); +} + +/* TODO: change this according to E->type */ +void +ell_point_clear(ell_point_t P, ATTRIBUTE_UNUSED ell_curve_t E, mpmod_t n) +{ + mpres_clear(P->x, n); + mpres_clear(P->y, n); + mpres_clear(P->z, n); +} + +#if DEBUG_ADD_LAWS >= 1 +void +ell_point_print(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + if(E->type == ECM_EC_TYPE_WEIERSTRASS) + pt_w_print(P->x, P->y, P->z, E, n); + else if(E->type == ECM_EC_TYPE_HESSIAN) + hessian_print(P, E, n); + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + twisted_hessian_print(P, E, n); +} +#endif + +/* TODO: should depend on E->type... */ +void +ell_point_set(ell_point_t Q, ell_point_t P, + ATTRIBUTE_UNUSED ell_curve_t E, ATTRIBUTE_UNUSED mpmod_t n) +{ + mpres_set(Q->x, P->x, n); + mpres_set(Q->y, P->y, n); + mpres_set(Q->z, P->z, n); +} + +void +ell_curve_init(ell_curve_t E, int etype, int law, mpmod_t n) +{ + int i; + + E->type = etype; + E->law = law; + mpres_init(E->a1, n); + mpres_init(E->a3, n); + mpres_init(E->a2, n); + mpres_init(E->a4, n); + mpres_init(E->a6, n); + mpres_set_ui(E->a1, 0, n); + mpres_set_ui(E->a3, 0, n); + mpres_set_ui(E->a2, 0, n); + mpres_set_ui(E->a4, 0, n); + mpres_set_ui(E->a6, 0, n); + for(i = 0; i < EC_W_NBUFS; i++) + mpres_init (E->buf[i], n); +} + +void +ell_curve_init_set(ell_curve_t E, int etype, int law, mpres_t A, mpmod_t n) +{ + ell_curve_init(E, etype, law, n); + mpres_set(E->a4, A, n); +} + +void +ell_curve_set_z(ell_curve_t E, ell_curve_t zE, mpmod_t n) +{ + ell_curve_init(E, zE->type, zE->law, n); + mpres_set_z(E->a1, zE->a1, n); + mpres_set_z(E->a3, zE->a3, n); + mpres_set_z(E->a2, zE->a2, n); + mpres_set_z(E->a4, zE->a4, n); + mpres_set_z(E->a6, zE->a6, n); +#if 0 + E->disc = zE->disc; + if(E->disc != 0){ + mpres_init(E->sq[0], n); + mpres_set_z(E->sq[0], zE->sq[0], n); + } +#endif +} + +void +ell_curve_clear(ell_curve_t E, mpmod_t n) +{ + int i; + + mpres_clear(E->a4, n); + for(i = 0; i < EC_W_NBUFS; i++) + mpres_clear (E->buf[i], n); + /* TODO: case of sq */ +} + +#if DEBUG_ADD_LAWS >= 1 +void +ell_curve_print(ell_curve_t E, mpmod_t n) +{ + if(E->type == ECM_EC_TYPE_WEIERSTRASS){ + printf("["); print_mpz_from_mpres(E->a1, n); + printf(", "); print_mpz_from_mpres(E->a3, n); + printf(", "); print_mpz_from_mpres(E->a2, n); + printf(", "); print_mpz_from_mpres(E->a4, n); + printf(", "); print_mpz_from_mpres(E->a6, n); printf("];\n"); + } + else if(E->type == ECM_EC_TYPE_HESSIAN){ + printf("D:="); print_mpz_from_mpres(E->a4, n); printf(";\n"); + printf("E:=[D];\n"); + } + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN){ + printf("a:="); print_mpz_from_mpres(E->a4, n); printf(";\n"); + printf("d:="); print_mpz_from_mpres(E->a6, n); printf(";\n"); + printf("E:=[a, d];\n"); + } +} +#endif + +/* OUTPUT: 1 if P = O_E, 0 otherwise. */ +int +ell_point_is_zero(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + if(E->type == ECM_EC_TYPE_WEIERSTRASS) + return pt_w_is_zero(P->z, n); + else if(E->type == ECM_EC_TYPE_HESSIAN) + return hessian_is_zero(P, E, n); + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + return twisted_hessian_is_zero(P, E, n); + return ECM_ERROR; +} + +void +ell_point_set_to_zero(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + if(E->type == ECM_EC_TYPE_WEIERSTRASS) + pt_w_set_to_zero(P, n); + else if(E->type == ECM_EC_TYPE_HESSIAN) + hessian_set_to_zero(P, E, n); + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + twisted_hessian_set_to_zero(P, E, n); +} + +int +ell_point_is_on_curve(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + int ok = 1; + + if(ell_point_is_zero(P, E, n)) + return 1; + if(E->type == ECM_EC_TYPE_WEIERSTRASS){ + mpres_t tmp1, tmp2; + + mpres_init(tmp1, n); + mpres_init(tmp2, n); + if(E->law == ECM_LAW_AFFINE){ + /* y^2+a1*x*y+a3*y = x^3+a2*x^2+a4*x+a6? */ + mpres_mul(tmp1, E->a1, P->x, n); + mpres_add(tmp1, tmp1, P->y, n); + mpres_add(tmp1, tmp1, E->a3, n); + mpres_mul(tmp1, tmp1, P->y, n); + + mpres_add(tmp2, E->a2, P->x, n); + mpres_mul(tmp2, tmp2, P->x, n); + mpres_add(tmp2, tmp2, E->a4, n); + mpres_mul(tmp2, tmp2, P->x, n); + mpres_add(tmp2, tmp2, E->a6, n); + } +#if 0 // useless for the time being + else{ + /* y^2*z+a1*x*y*z+a3*y*z^2 = x^3+a2*x^2*z+a4*x*z^2+a6*z^3? */ + /* y*z*(y+a1*x+a3*z) = ((x+a2*z)*x+a4*z^2)*x+a6*z^3? */ + mpres_t tmp3; + + mpres_mul(tmp1, E->a1, P->x, n); /* a1*x */ + mpres_add(tmp1, tmp1, P->y, n); /* a1*x+y */ + mpres_mul(tmp2, E->a3, P->z, n); /* a3*z */ + mpres_add(tmp1, tmp1, tmp2, n); /* y+a1*x+a3*z */ + mpres_mul(tmp1, tmp1, P->y, n); /* y*(...) */ + mpres_mul(tmp1, tmp1, P->z, n); /* lhs */ + + mpres_init(tmp3, n); + mpres_mul(tmp2, E->a2, P->z, n); /* a2*z */ + mpres_add(tmp2, tmp2, P->x, n); /* x+a2*z */ + mpres_mul(tmp2, tmp2, P->x, n); /* (x+a2*z)*x */ + mpres_mul(tmp3, E->a4, P->z, n); /* a4*z */ + mpres_mul(tmp3, tmp3, P->z, n); /* a4*z^2 */ + mpres_add(tmp2, tmp2, tmp3, n); /* (x+a2*z)*x+a4*z^2 */ + mpres_mul(tmp2, tmp2, P->x, n); /* (...)*x */ + mpres_mul(tmp3, P->z, P->z, n); /* z^2 */ + mpres_mul(tmp3, tmp3, P->z, n); /* z^3 */ + mpres_mul(tmp3, tmp3, E->a6, n); /* a6*z^3 */ + mpres_add(tmp2, tmp2, tmp3, n); /* rhs */ + mpres_clear(tmp3, n); + } +#endif + ok = mpres_equal(tmp1, tmp2, n); + + mpres_clear(tmp1, n); + mpres_clear(tmp2, n); + } + else if(E->type == ECM_EC_TYPE_HESSIAN){ + /* TODO */ + } + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN){ + /* TODO */ + } + return ok; +} + +#if DEBUG_ADD_LAWS >= 1 +static void +ell_point_check(ell_point_t P, ell_curve_t E, mpmod_t n) +{ + if(ell_point_is_on_curve(P, E, n) == 0){ + printf("Point not on curve\n"); + printf("E:="); + ell_curve_print(E, n); + printf("P:="); + pt_print(E, P, n); + printf("\n"); + exit(-1); + } +} +#endif + +#if DEBUG_ADD_LAWS >= 1 +int +ell_point_equal(ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n) +{ + int ret = 1; + + if(E->type == ECM_EC_TYPE_WEIERSTRASS){ + if(E->law == ECM_LAW_AFFINE) + return mpres_equal(P->x, Q->x, n) + && mpres_equal(P->y, Q->y, n) + && mpres_equal(P->z, Q->z, n); + else if(E->law == ECM_LAW_HOMOGENEOUS){ + mpres_t tmp1, tmp2; + + mpres_init(tmp1, n); + mpres_init(tmp2, n); + mpres_mul(tmp1, P->x, Q->z, n); + mpres_mul(tmp2, P->z, Q->x, n); + if(mpres_equal(tmp1, tmp2, n) == 0){ + printf("Px/Pz != Qx/Qz\n"); + ret = 0; + exit(-1); + } + else{ + mpres_mul(tmp1, P->y, Q->z, n); + mpres_mul(tmp2, P->z, Q->y, n); + if(mpres_equal(tmp1, tmp2, n) == 0){ + printf("Py/Pz != Qy/Qz\n"); + ret = 0; + exit(-1); + } + } + mpres_clear(tmp1, n); + mpres_clear(tmp2, n); + } + } + return ret; +} +#endif + +/* OUTPUT: 1 if everything ok, 0 otherwise */ +int +ell_point_add(mpz_t f, ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n) +{ + if(E->type == ECM_EC_TYPE_WEIERSTRASS) + return pt_w_add(f, R->x, R->y, R->z, P->x, P->y, P->z, + Q->x, Q->y, Q->z, n, E); + else if(E->type == ECM_EC_TYPE_HESSIAN) + return hessian_add(R, P, Q, E, n); + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + return twisted_hessian_add(R, P, Q, E, n); + else + return ECM_ERROR; +} + +#if USE_ADD_SUB_CHAINS > 0 +/* R <- P-Q */ +int +ell_point_sub(mpz_t f, ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n) +{ + if(E->type == ECM_EC_TYPE_WEIERSTRASS) + return pt_w_sub(f, R->x, R->y, R->z, P->x, P->y, P->z, + Q->x, Q->y, Q->z, n, E); + else if(E->type == ECM_EC_TYPE_HESSIAN) + return hessian_sub(R, P, Q, E, n); + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + return twisted_hessian_sub(R, P, Q, E, n); + else + return ECM_ERROR; +} +#endif + +int +ell_point_duplicate(mpz_t f, ell_point_t R, ell_point_t P, ell_curve_t E, mpmod_t n) +{ +#if DEBUG_ADD_LAWS >= 2 + printf("E:="); + ell_curve_print(E, n); +#endif + if(E->type == ECM_EC_TYPE_WEIERSTRASS) + return pt_w_duplicate(f, R->x, R->y, R->z, P->x, P->y, P->z, n, E); + else if(E->type == ECM_EC_TYPE_HESSIAN) + return hessian_duplicate(R, P, E, n); + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + return twisted_hessian_duplicate(R, P, E, n); + else + return ECM_ERROR; +} + +void +ell_point_negate(ell_point_t P, ell_curve_t E, mpmod_t n) +{ +#if DEBUG_ADD_LAWS >= 2 + printf("P:="); ell_point_print(P, E, n); printf(";\n"); +#endif + if(ell_point_is_zero(P, E, n) == 0){ + if(E->type == ECM_EC_TYPE_WEIERSTRASS){ + if(E->law == ECM_LAW_HOMOGENEOUS){ + /* FIXME: does not work for complete equation! */ + mpres_neg(P->y, P->y, n); + } + else if(E->law == ECM_LAW_AFFINE){ + /* (-P).y = -P.y-a1*P.x-a3 */ + if(mpz_sgn(E->a1) != 0 + || mpz_sgn(E->a3) != 0 + || mpz_sgn(E->a2) != 0){ /* FIXME */ + printf("GROUMF\n"); + exit(-1); + } + mpres_neg(P->y, P->y, n); + } + } +#if USE_ADD_SUB_CHAINS > 0 + else if(E->type == ECM_EC_TYPE_HESSIAN) + hessian_negate(P, E, n); + else if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) + twisted_hessian_negate(P, E, n); +#endif + } +#if DEBUG_ADD_LAWS >= 2 + printf("neg(P):="); ell_point_print(P, E, n); printf(";\n"); +#endif +} + +/* Q <- [e]*P + Return value: 0 if a factor is found, and the factor is in Q->x, + 1 otherwise. +*/ +int +ell_point_mul_plain (mpz_t f, ell_point_t Q, mpz_t e, ell_point_t P, ell_curve_t E, mpmod_t n) +{ + size_t l; + int negated = 0, status = 1; + ell_point_t P0; + + if(ell_point_is_zero(P, E, n) != 0){ + ell_point_set(Q, P, E, n); + return 1; + } + + if (mpz_sgn (e) == 0) + { + ell_point_set_to_zero(Q, E, n); + return 1; + } + + if (mpz_sgn (e) < 0) + { + negated = 1; + mpz_neg (e, e); + ell_point_negate(P, E, n); /* since the point is non-zero */ + } + + if (mpz_cmp_ui (e, 1) == 0){ + ell_point_set(Q, P, E, n); + goto ell_point_mul_plain_end; + } + + l = mpz_sizeinbase (e, 2) - 1; /* l >= 1 */ + + ell_point_init(P0, E, n); + ell_point_set(P0, P, E, n); + +#if DEBUG_ADD_LAWS >= 2 + printf("P:="); ell_point_print(P, E, n); printf(";\n"); +#endif + while (l-- > 0) + { +#if DEBUG_ADD_LAWS >= 2 + printf("P0:="); ell_point_print(P0, E, n); printf(";\n"); +#endif + if(ell_point_duplicate (f, P0, P0, E, n) == 0) + { + status = 0; + break; + } +#if DEBUG_ADD_LAWS >= 2 + printf("Rdup:="); ell_point_print(P0, E, n); printf(";\n"); + printf("dup:=ProjEcmDouble(P0, E, N); ProjEcmEqual(dup, Rdup, N);\n"); +#endif + if (mpz_tstbit (e, l)) + { + if(ell_point_add (f, P0, P0, P, E, n) == 0) + { + status = 0; + break; + } +#if DEBUG_ADD_LAWS >= 2 + printf("Radd:="); ell_point_print(P0, E, n); printf(";\n"); + printf("Padd:=ProjEcmAdd(P, Rdup, E, N); ProjEcmEqual(Padd, Radd, N);\n"); +#endif + } + } + + ell_point_set(Q, P0, E, n); + ell_point_clear(P0, E, n); +ell_point_mul_plain_end: + + /* Undo negation to avoid changing the caller's e value */ + if (negated){ + mpz_neg (e, e); + ell_point_negate(P, E, n); + } + return status; +} + +int +ell_point_mul(mpz_t f, ell_point_t Q, mpz_t e, ell_point_t P, ell_curve_t E, mpmod_t n) +{ +#if 1 /* keeping it simple */ + return ell_point_mul_plain(f, Q, e, P, E, n); +#else + return ell_point_mul_add_sub(f, Q, e, P, E, n); +#endif +} + diff -Nru gmp-ecm-7.0.4+ds/addlaws.h gmp-ecm-7.0.5+ds/addlaws.h --- gmp-ecm-7.0.4+ds/addlaws.h 2016-03-15 09:59:19.000000000 +0000 +++ gmp-ecm-7.0.5+ds/addlaws.h 2022-06-06 14:16:49.000000000 +0000 @@ -1,4 +1,5 @@ -#define USE_ADD_SUB_CHAINS 1 +#define DEBUG_ADD_LAWS 0 +#define USE_ADD_SUB_CHAINS 0 #define pt_is_equal(P, Q) (mpz_cmp((P)->x, (Q)->x) == 0 \ && mpz_cmp((P)->y, (Q)->y) == 0 \ @@ -9,20 +10,13 @@ void pt_set_to_zero(ell_point_t P, mpmod_t n); void pt_assign(ell_point_t Q, ell_point_t P, ATTRIBUTE_UNUSED mpmod_t n); void pt_neg(ell_point_t P, mpmod_t n); -void pt_many_set_to_zero(ell_point_t *tP, int nE, mpmod_t n); -void pt_many_neg(ell_point_t *tP, int nE, mpmod_t n); -void pt_many_assign(ell_point_t *tQ, ell_point_t *tP, int nE, mpmod_t n); -void pt_many_print(ell_curve_t *tE, ell_point_t *tP, int nE, mpmod_t n); -void print_mpz_from_mpres(mpres_t x, mpmod_t n); -int pt_many_duplicate(ell_point_t *tQ, ell_point_t *tP, ell_curve_t *tE, int nE, mpmod_t n, mpres_t *num, mpres_t *den, mpres_t *inv, char *ok); -int pt_many_mul(ell_point_t *tQ, ell_point_t *tP, ell_curve_t *tE, int nE, - mpz_t e, mpmod_t n, - mpres_t *num, mpres_t *den, mpres_t *inv, char *ok); int hessian_to_weierstrass(mpz_t f, mpres_t x, mpres_t y, mpres_t D, mpmod_t n); +int +twisted_hessian_to_weierstrass(mpz_t f, mpres_t x, mpres_t y, mpres_t c, mpres_t d, mpmod_t n); -int build_MO_chain(short *S, size_t Slen, mpz_t e, int w); -int build_add_sub_chain(short *S, size_t Slen, mpz_t e, int w); +size_t build_MO_chain(short *S, size_t Slen, mpz_t e, int w); +size_t build_add_sub_chain(short *S, size_t Slen, mpz_t e, int w); int compute_s_4_add_sub(mpz_t s, ecm_uint B1, int disc); int mult_by_3(mpz_t f, mpres_t x, mpres_t y, mpres_t A, mpmod_t n); @@ -39,16 +33,15 @@ int ell_point_is_on_curve(ell_point_t P, ell_curve_t E, mpmod_t n); int ell_point_is_zero(ell_point_t P, ell_curve_t E, mpmod_t n); void ell_point_set_to_zero(ell_point_t P, ell_curve_t E, mpmod_t n); -int ell_point_add(ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n); -int ell_point_sub(ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n); -int ell_point_duplicate(ell_point_t R, ell_point_t P, ell_curve_t E, mpmod_t n); +int ell_point_add(mpz_t f, ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n); +int ell_point_sub(mpz_t f, ell_point_t R, ell_point_t P, ell_point_t Q, ell_curve_t E, mpmod_t n); +int ell_point_duplicate(mpz_t f, ell_point_t R, ell_point_t P, ell_curve_t E, mpmod_t n); void ell_point_negate(ell_point_t P, ell_curve_t E, mpmod_t n); -int ell_point_mul_plain (ell_point_t Q, mpz_t e, ell_point_t P, ell_curve_t E, mpmod_t n); +int ell_point_mul_plain (mpz_t f, ell_point_t Q, mpz_t e, ell_point_t P, ell_curve_t E, mpmod_t n); int get_add_sub_w(mpz_t e); -void add_sub_pack(mpz_t s, int w, short *S, int iS); -void add_sub_unpack(int *w, short **S, int *iS, mpz_t s); -int ell_point_mul_add_sub_with_S(ell_point_t Q, ell_point_t P, ell_curve_t E, - mpmod_t n, int w, short *S, int iS); -int ell_point_mul_add_sub (ell_point_t Q, mpz_t e, ell_point_t P, +void add_sub_pack(mpz_t s, int w, short *S, size_t iS); +void add_sub_unpack(int *w, short **S, size_t *iS, mpz_t s); +int ell_point_mul_add_sub_with_S(mpz_t f, ell_point_t Q, ell_point_t P, ell_curve_t E,mpmod_t n, int w, short *S, int iS); +int ell_point_mul_add_sub (mpz_t f, ell_point_t Q, mpz_t e, ell_point_t P, ell_curve_t E, mpmod_t n); -int ell_point_mul(ell_point_t Q, mpz_t e, ell_point_t P, ell_curve_t E, mpmod_t n); +int ell_point_mul(mpz_t f, ell_point_t Q, mpz_t e, ell_point_t P, ell_curve_t E, mpmod_t n); diff -Nru gmp-ecm-7.0.4+ds/aprtcle/mpz_aprcl.c gmp-ecm-7.0.5+ds/aprtcle/mpz_aprcl.c --- gmp-ecm-7.0.4+ds/aprtcle/mpz_aprcl.c 2015-03-16 07:01:51.000000000 +0000 +++ gmp-ecm-7.0.5+ds/aprtcle/mpz_aprcl.c 2022-06-06 14:16:49.000000000 +0000 @@ -609,7 +609,7 @@ if (verbose >= APRTCLE_VERBOSE1) { - printf("P = %2d, Q = %12d (%3.2f%%)\r", P, Q, (i * (TestingQs + 1) + j) * 100.0 / (NP * (TestingQs + 1))); + printf("APR primality test: P = %2d, Q = %12d (%3.2f%%)\r", P, Q, (i * (TestingQs + 1) + j) * 100.0 / (NP * (TestingQs + 1))); fflush(stdout); } diff -Nru gmp-ecm-7.0.4+ds/athlon/autogen.py gmp-ecm-7.0.5+ds/athlon/autogen.py --- gmp-ecm-7.0.4+ds/athlon/autogen.py 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/autogen.py 2022-06-06 14:16:49.000000000 +0000 @@ -3,6 +3,13 @@ import re import sys +# Final assembler statement to mark stack as not executable on linux elf platforms +# Single quotes are used around # to prevent M4 to discard them as comments. M4 will remove them. +noexecstack_statement = """ +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif +""" def offaddr(addr, offset): if offset == 0: @@ -199,7 +206,7 @@ k = int(sys.argv[1]) if k == 1: - print """# + print("""# # mp_limb_t mulredc1(mp_limb_t *z, const mp_limb_t x, const mp_limb_t y, # const mp_limb_t m, mp_limb_t inv_m) # @@ -242,7 +249,7 @@ movl %edx, (%ecx) adcl $0, %eax ret -""" +""" + noexecstack_statement) else: - print mulredc_k_rolled(k) + print(mulredc_k_rolled(k) + noexecstack_statement) diff -Nru gmp-ecm-7.0.4+ds/athlon/generate_all gmp-ecm-7.0.5+ds/athlon/generate_all --- gmp-ecm-7.0.4+ds/athlon/generate_all 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/generate_all 2022-06-06 14:16:49.000000000 +0000 @@ -1,6 +1,6 @@ #!/bin/sh -for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do +for i in {1..20}; do ./autogen.py $i > mulredc$i.asm done diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc10.asm gmp-ecm-7.0.5+ds/athlon/mulredc10.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc10.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc10.asm 2022-06-06 14:16:49.000000000 +0000 @@ -254,3 +254,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc11.asm gmp-ecm-7.0.5+ds/athlon/mulredc11.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc11.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc11.asm 2022-06-06 14:16:49.000000000 +0000 @@ -272,3 +272,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc12.asm gmp-ecm-7.0.5+ds/athlon/mulredc12.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc12.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc12.asm 2022-06-06 14:16:49.000000000 +0000 @@ -290,3 +290,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc13.asm gmp-ecm-7.0.5+ds/athlon/mulredc13.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc13.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc13.asm 2022-06-06 14:16:49.000000000 +0000 @@ -308,3 +308,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc14.asm gmp-ecm-7.0.5+ds/athlon/mulredc14.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc14.asm 2006-03-07 15:57:35.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc14.asm 2022-06-06 14:16:49.000000000 +0000 @@ -326,3 +326,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc15.asm gmp-ecm-7.0.5+ds/athlon/mulredc15.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc15.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc15.asm 2022-06-06 14:16:49.000000000 +0000 @@ -344,3 +344,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc16.asm gmp-ecm-7.0.5+ds/athlon/mulredc16.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc16.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc16.asm 2022-06-06 14:16:49.000000000 +0000 @@ -362,3 +362,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc17.asm gmp-ecm-7.0.5+ds/athlon/mulredc17.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc17.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc17.asm 2022-06-06 14:16:49.000000000 +0000 @@ -380,3 +380,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc18.asm gmp-ecm-7.0.5+ds/athlon/mulredc18.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc18.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc18.asm 2022-06-06 14:16:49.000000000 +0000 @@ -398,3 +398,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc19.asm gmp-ecm-7.0.5+ds/athlon/mulredc19.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc19.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc19.asm 2022-06-06 14:16:49.000000000 +0000 @@ -416,3 +416,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc1.asm gmp-ecm-7.0.5+ds/athlon/mulredc1.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc1.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc1.asm 2022-06-06 14:16:49.000000000 +0000 @@ -42,3 +42,7 @@ adcl $0, %eax ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc20.asm gmp-ecm-7.0.5+ds/athlon/mulredc20.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc20.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc20.asm 2022-06-06 14:16:49.000000000 +0000 @@ -434,3 +434,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc2.asm gmp-ecm-7.0.5+ds/athlon/mulredc2.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc2.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc2.asm 2022-06-06 14:16:49.000000000 +0000 @@ -110,3 +110,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc3.asm gmp-ecm-7.0.5+ds/athlon/mulredc3.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc3.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc3.asm 2022-06-06 14:16:49.000000000 +0000 @@ -128,3 +128,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc4.asm gmp-ecm-7.0.5+ds/athlon/mulredc4.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc4.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc4.asm 2022-06-06 14:16:49.000000000 +0000 @@ -146,3 +146,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc5.asm gmp-ecm-7.0.5+ds/athlon/mulredc5.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc5.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc5.asm 2022-06-06 14:16:49.000000000 +0000 @@ -164,3 +164,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc6.asm gmp-ecm-7.0.5+ds/athlon/mulredc6.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc6.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc6.asm 2022-06-06 14:16:49.000000000 +0000 @@ -182,3 +182,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc7.asm gmp-ecm-7.0.5+ds/athlon/mulredc7.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc7.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc7.asm 2022-06-06 14:16:49.000000000 +0000 @@ -200,3 +200,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc8.asm gmp-ecm-7.0.5+ds/athlon/mulredc8.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc8.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc8.asm 2022-06-06 14:16:49.000000000 +0000 @@ -218,3 +218,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/athlon/mulredc9.asm gmp-ecm-7.0.5+ds/athlon/mulredc9.asm --- gmp-ecm-7.0.4+ds/athlon/mulredc9.asm 2006-03-07 15:57:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/athlon/mulredc9.asm 2022-06-06 14:16:49.000000000 +0000 @@ -236,3 +236,7 @@ popl %ebp ret +`#'if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +`#'endif + diff -Nru gmp-ecm-7.0.4+ds/AUTHORS gmp-ecm-7.0.5+ds/AUTHORS --- gmp-ecm-7.0.4+ds/AUTHORS 2014-04-08 07:01:43.000000000 +0000 +++ gmp-ecm-7.0.5+ds/AUTHORS 2022-06-06 14:16:49.000000000 +0000 @@ -26,6 +26,8 @@ and the NTT code. Jason S. Papadopoulos contributed optimizations to the NTT code. +Seth Troisi + contributed to GPU code Paul Zimmermann author of the first version of the program. @@ -34,4 +36,4 @@ Japke Rosink, Bruce Dodson. If you want to contribute to GMP-ECM, you are welcome; the development -version is available on . +version is available on . diff -Nru gmp-ecm-7.0.4+ds/auxlib.c gmp-ecm-7.0.5+ds/auxlib.c --- gmp-ecm-7.0.4+ds/auxlib.c 2015-02-25 14:50:40.000000000 +0000 +++ gmp-ecm-7.0.5+ds/auxlib.c 2022-06-06 14:16:49.000000000 +0000 @@ -283,3 +283,35 @@ return fseek (f, (long) offset, whence); } #endif + +int +ecm_tstbit (mpz_srcptr u, ecm_uint bit_index) +{ + mp_srcptr u_ptr = PTR(u); + ecm_int size = SIZ(u); + ecm_uint abs_size = ABS(size); + ecm_uint limb_index = bit_index / GMP_NUMB_BITS; + mp_srcptr p = u_ptr + limb_index; + mp_limb_t limb; + + if (limb_index >= abs_size) + return (size < 0); + + limb = *p; + if (size < 0) + { + limb = -limb; /* twos complement */ + + while (p != u_ptr) + { + p--; + if (*p != 0) + { + limb--; /* make it a ones complement instead */ + break; + } + } + } + + return (limb >> (bit_index % GMP_NUMB_BITS)) & 1; +} diff -Nru gmp-ecm-7.0.4+ds/batch.c gmp-ecm-7.0.5+ds/batch.c --- gmp-ecm-7.0.4+ds/batch.c 2016-04-08 12:47:36.000000000 +0000 +++ gmp-ecm-7.0.5+ds/batch.c 2022-06-06 14:16:49.000000000 +0000 @@ -41,14 +41,14 @@ is = (2^32-1). Multiplying all primes up to the following will result in a product that has (2^32-1) bits. */ #define MAX_B1_BATCH 2977044736UL -#elif defined(_WIN32) +#elif defined(_WIN32) && __GNU_MP_VERSION <= 6 && !defined(__MPIR_VERSION) /* Due to a limitation in GMP on 64-bit Windows, should also affect 32-bit Windows, sufficient memory cannot be allocated for the batch product s when using primes larger than the following */ #define MAX_B1_BATCH 3124253146UL #else -/* nth_prime(2^(MAX_HEIGHT-1)) */ -#define MAX_B1_BATCH 50685770167ULL +/* nth_prime(2^(MAX_HEIGHT-1))-1 */ +#define MAX_B1_BATCH 50685770166ULL #endif /* If forbiddenres != NULL, forbiddenres = "m r_1 ... r_k -1" indicating that @@ -67,7 +67,7 @@ prime_info_init (prime_info); - ASSERT_ALWAYS (B1 < MAX_B1_BATCH); + ASSERT_ALWAYS (B1 <= MAX_B1_BATCH); for (j = 0; j < MAX_HEIGHT; j++) mpz_init (acc[j]); /* sets acc[j] to 0 */ @@ -79,40 +79,44 @@ pp = qi = pi; maxpp = B1 / qi; #ifdef HAVE_ADDLAWS - if(forbiddenres != NULL && pi > 2){ - /* non splitting primes can occur in even powers only */ - int rp = (int)(pi % forbiddenres[0]); - for(j = 1; forbiddenres[j] >= 0; j++) - if(rp >= forbiddenres[j]) - break; - if(rp == forbiddenres[j]){ - /* printf("p=%lu is forbidden\n", pi);*/ - if(qi <= maxpp){ - /* qi <= B1/qi => qi^2 <= B1, let it go */ - qi *= qi; - } - else{ - /* qi is too large, do not increment i */ - pi = getprime_mt (prime_info); - continue; - } - } - } + if (forbiddenres != NULL && pi > 2) + { + /* non splitting primes can occur in even powers only */ + int rp = (int)(pi % forbiddenres[0]); + for (j = 1; forbiddenres[j] >= 0; j++) + if (rp >= forbiddenres[j]) + break; + if (rp == forbiddenres[j]) + { + /* printf("p=%lu is forbidden\n", pi); */ + if (qi <= maxpp) + { + /* qi <= B1/qi => qi^2 <= B1, let it go */ + qi *= qi; + } + else + { + /* qi is too large, do not increment i */ + pi = getprime_mt (prime_info); + continue; + } + } + } #endif while (pp <= maxpp) pp *= qi; #if ECM_UINT_MAX == 4294967295 - mpz_set_ui (ppz, pp); + mpz_set_ui (ppz, pp); #else - mpz_set_uint64 (ppz, pp); + mpz_set_uint64 (ppz, pp); #endif if ((i & 1) == 0) - mpz_set (acc[0], ppz); + mpz_set (acc[0], ppz); else - mpz_mul (acc[0], acc[0], ppz); - + mpz_mul (acc[0], acc[0], ppz); + j = 0; /* We have accumulated i+1 products so far. If bits 0..j of i are all set, then i+1 is a multiple of 2^(j+1). */ @@ -140,7 +144,7 @@ prime_info_clear (prime_info); /* free the prime tables */ for (i = 0; i < MAX_HEIGHT; i++) - mpz_clear (acc[i]); + mpz_clear (acc[i]); mpz_clear (ppz); } @@ -264,13 +268,13 @@ A is curve parameter in Montgomery's form: g*y^2*z = x^3 + a*x^2*z + x*z^2 n is the number to factor - B1 is the stage 1 bound + B1 is the stage 1 bound Output: If a factor is found, it is returned in x. Otherwise, x contains the x-coordinate of the point computed in stage 1 (with z coordinate normalized to 1). - B1done is set to B1 if stage 1 completed normally, - or to the largest prime processed if interrupted, but never - to a smaller value than B1done was upon function entry. + B1done is set to B1 if stage 1 completed normally, + or to the largest prime processed if interrupted, but never + to a smaller value than B1done was upon function entry. Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise ECM_NO_FACTOR_FOUND */ @@ -304,7 +308,7 @@ /* Compute d=(A+2)/4 from A and d'=B*d thus d' = 2^(GMP_NUMB_BITS-2)*(A+2) */ if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D) - { + { mpres_get_z (u, A, n); mpz_add_ui (u, u, 2); mpz_mul_2exp (u, u, GMP_NUMB_BITS - 2); @@ -358,7 +362,7 @@ { for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;) { - if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */ + if (ecm_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */ /* P2 <- P1+P2 P1 <- 2*P1 */ dup_add_batch1 (x1, z1, x2, z2, t, u, d_1, n); else /* (j,j+1) -> (2j+1,2j+2) */ @@ -371,7 +375,7 @@ mpresn_pad (d_2, n); for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;) { - if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */ + if (ecm_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */ /* P2 <- P1+P2 P1 <- 2*P1 */ dup_add_batch2 (x1, z1, x2, z2, t, u, d_2, n); else /* (j,j+1) -> (2j+1,2j+2) */ @@ -398,10 +402,8 @@ mpz_clear (z2); mpz_clear (t); mpz_clear (u); - if (batch == 2) - { + if (batch == ECM_PARAM_BATCH_2) mpz_clear (d_2); - } return ret; } diff -Nru gmp-ecm-7.0.4+ds/build.vc12/config.h gmp-ecm-7.0.5+ds/build.vc12/config.h --- gmp-ecm-7.0.4+ds/build.vc12/config.h 2014-04-08 07:01:43.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc12/config.h 2022-06-06 14:16:49.000000000 +0000 @@ -4,7 +4,7 @@ #define VERSION_GPU "gpu_ecm-win" -#define PACKAGE_BUGREPORT "ecm-discuss@lists.gforge.inria.fr" +#define PACKAGE_BUGREPORT "ecm-discuss@inria.fr" /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP systems. This function is required for `alloca.c' support on those systems. diff -Nru gmp-ecm-7.0.4+ds/build.vc12/gen_ecm_h.bat gmp-ecm-7.0.5+ds/build.vc12/gen_ecm_h.bat --- gmp-ecm-7.0.4+ds/build.vc12/gen_ecm_h.bat 2016-10-11 09:22:42.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc12/gen_ecm_h.bat 2022-06-06 14:16:49.000000000 +0000 @@ -5,7 +5,7 @@ for /f "tokens=1,2*" %%a in (..\ecm.h.in) do ( if "%%a" EQU "#undef" ( if "%%b" EQU "ECM_VERSION" ( - echo #define ECM_VERSION "7.0.4">>tmp.h + echo #define ECM_VERSION "7.0.5">>tmp.h ) ) else echo %%a %%b %%c>>tmp.h ) diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/a_win32a_redc.asm gmp-ecm-7.0.5+ds/build.vc14/assembler/a_win32a_redc.asm --- gmp-ecm-7.0.4+ds/build.vc14/assembler/a_win32a_redc.asm 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/a_win32a_redc.asm 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,133 @@ +; +; Part of GMP-ECM +; +; void ecm_redc3( +; mp_limb_t *z, rdi r8 <- rcx +; const mp_limb_t *x, rsi r9 <- rdx +; size_t n, rdx r10 <- r8 +; mp_limb_t m rcx r11 <- r9 +; ) + +%macro seq 3 + mov eax, [byte esi+4*%3] + mul ebp + add [byte edi+4*%3], %2 + adc %1, eax + mov %2, edx + adc %2, 0 +%endmacro + + text + global _ecm_redc3 + +_ecm_redc3: + push ebp + push edi + push esi + push ebx + sub esp, 16 + mov ecx, [esp+44] + mov edi, [esp+36] + mov [esp], ecx + cmp ecx, 5 + jae .3 + +.1: mov ebp, [esp+48] + mov esi, [esp+40] + imul ebp, [edi] + mov [esp+36], edi + mov ecx, [esp+44] + xor ebx, ebx + +.2: mov eax, [esi] + add edi, 4 + mul ebp + add esi, 4 + add eax, ebx + adc edx, 0 + add [edi-4], eax + adc edx, 0 + dec ecx + mov ebx, edx + jnz .2 + mov edi, [esp+36] + mov [edi], ebx + dec dword [esp] + lea edi, [edi+4] + jnz .1 + + add esp, 16 + pop ebx + pop esi + pop edi + pop ebp + ret + +.3: mov edx, ecx + dec ecx + sub edx, 2 + neg ecx + shr edx, 4 + and ecx, 15 + mov [esp+8], edx + mov edx, ecx + shl edx, 4 + neg ecx + lea edx, [edx+ecx+.6] + mov [esp+44], ecx + mov [esp+12], edx + +.4: mov ebp, [esp+48] + mov esi, [esp+40] + imul ebp, [edi] + mov [esp+36], edi + mov ecx, [esp+44] + mov edx, [esp+8] + mov [esp+4], edx + mov eax, [esi] + lea esi, [esi+ecx*4+4] + mul ebp + lea edi, [edi+ecx*4] + mov ebx, edx + mov edx, [esp+12] + test ecx, 1 + mov ecx, eax + cmovnz ecx, ebx + cmovnz ebx, eax + jmp edx + + align 32 +.5: add edi, 64 +.6: + +%assign i 0 +%rep 16 + %if (i & 1) + seq ecx, ebx, i + %else + seq ebx, ecx, i + %endif + %assign i i + 1 +%endrep + + dec dword [esp+4] + lea esi, [esi+64] + jns .5 + + add [edi+64], ecx + mov edi, [esp+36] + adc ebx, 0 + mov [edi], ebx + dec dword [esp] + lea edi, [edi+4] + jnz .4 + + add esp, 16 + pop ebx + pop esi + pop edi + pop ebp + ret + + end + \ No newline at end of file diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/a_win32p_mulredc.asm gmp-ecm-7.0.5+ds/build.vc14/assembler/a_win32p_mulredc.asm --- gmp-ecm-7.0.4+ds/build.vc14/assembler/a_win32p_mulredc.asm 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/a_win32p_mulredc.asm 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,148 @@ + +; Part of GMP-ECM +; +; mp_limb_t mulredc1( 1 limb +; mp_limb_t *z, +; const mp_limb_t x, +; const mp_limb_t y, +; const mp_limb_t m, +; mp_limb_t inv_m +; ) +; +; mp_limb_t mulredc( > 1 limb +; mp_limb_t *z, +; const mp_limb_t *x, +; const mp_limb_t *y, +; const mp_limb_t *m, +; mp_limb_t inv_m +; ) + +%macro mseq 1 + movd mm1, [esi+4*%1] + movd mm2, [edi+4*%1] + pmuludq mm1, mm7 + paddq mm2, mm1 + paddq mm0, mm2 + movd [edi+4*%1], mm0 + psrlq mm0, 32 +%endmacro + +%macro mulredc 1 +%assign limbs %1 +%define f_name(x) _mulredc %+ x + + global f_name(limbs) +%ifdef DLL + export f_name(limbs) +%endif + +f_name(limbs): + push ebp + push edi + push esi + push ebx + sub esp, 8*(limbs+1) + mov edi, esp + +%assign i 0 +%rep 2 * limbs + 1 + mov dword [edi+4*i], 0 + %assign i i + 1 +%endrep + + mov dword [esp+8*limbs+4], limbs + + align 32 + +.1: mov eax, [esp+8*limbs+32] + mov esi, [esp+8*limbs+36] + mov eax, [eax] + mul dword [esi] + add eax, [edi] + mul dword [esp+8*limbs+44] + mov ebp, eax + mov esi, [esp+8*limbs+40] + + pxor mm0, mm0 + movd mm7, ebp + +%assign i 0 +%rep limbs + mseq i + %assign i i + 1 +%endrep + + movd ecx, mm0 + + add [edi+4*limbs], ecx + adc dword [edi+4*limbs+4], 0 + mov eax, [esp+8*limbs+32] + mov ebp, [eax] + mov esi, [esp+8*limbs+36] + + pxor mm0, mm0 + movd mm7, ebp + +%assign i 0 +%rep limbs + mseq i + %assign i i + 1 +%endrep + + movd ecx, mm0 + add [edi+4*limbs], ecx + adc dword [edi+4*limbs+4], 0 + add dword [esp+8*limbs+32], 4 + add edi, 4 + dec dword [esp+8*limbs+4] + jnz .1 + + mov ebx, [esp+8*limbs+28] + +%assign i 0 +%rep limbs + mov eax, [edi+4*i] + mov [ebx+4*i], eax + %assign i i + 1 +%endrep + mov eax, [edi+4*limbs] + add esp, 8*(limbs+1) + + pop ebx + pop esi + pop edi + pop ebp + emms + ret +%endmacro + + bits 32 + section .text + + global _mulredc1 +%ifdef DLL + export _mulredc1 +%endif + +_mulredc1: + mov eax, [esp+12] + mul dword [esp+8] + mov [esp+12], edx + mov [esp+8], eax + mul dword [esp+20] + mul dword [esp+16] + add eax, [esp+8] + adc edx, [esp+12] + mov ecx, [esp+4] + mov [ecx], edx + adc eax, 0 + ret + +%assign i 2 +%rep 19 ; 3..20 inclusive + mulredc i + %assign i i + 1 +%endrep + + end + diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/a_win32p_redc.asm gmp-ecm-7.0.5+ds/build.vc14/assembler/a_win32p_redc.asm --- gmp-ecm-7.0.4+ds/build.vc14/assembler/a_win32p_redc.asm 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/a_win32p_redc.asm 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,145 @@ +; +; Part of GMP-ECM +; +; void ecm_redc3( +; mp_limb_t *z, rdi r8 <- rcx +; const mp_limb_t *x, rsi r9 <- rdx +; size_t n, rdx r10 <- r8 +; mp_limb_t m rcx r11 <- r9 +; ) + +%macro rloop 3 + mov eax, [byte esi+4*%3] + mul ebp + add [byte edi+4*%3], %2 + adc %1, eax + mov %2, edx + adc %2, 0 +%endmacro + + bits 32 + section .text + + global _ecm_redc3 +%ifdef DLL + export _ecm_redc3 +%endif + +_ecm_redc3: + push ebp + push edi + push esi + push ebx + sub esp, 16 + + mov ecx, [esp+44] + mov edi, [esp+36] + mov [esp], ecx + cmp ecx, 5 + jae .unroll + +.1: mov ebp, [esp+48] + mov esi, [esp+40] + imul ebp, [edi] + mov [esp+36], edi + mov ecx, [esp+44] + xor ebx, ebx + +.2: mov eax, [esi] + add edi, 4 + mul ebp + add esi, 4 + add eax, ebx + adc edx, 0 + add [edi-4], eax + adc edx, 0 + dec ecx + mov ebx, edx + jnz .2 + mov edi, [esp+36] + mov [edi], ebx + dec dword [esp] + lea edi, [edi+4] + jnz .1 + + add esp, 16 + pop ebx + pop esi + pop edi + pop ebp + ret + +.unroll: + mov edx, ecx + dec ecx + sub edx, 2 + neg ecx + shr edx, 4 + and ecx, 15 + mov [esp+8], edx + mov edx, ecx + shl edx, 4 + neg ecx + lea edx, [edx+ecx*1+.loop_base] + mov [esp+44], ecx + mov [esp+12], edx + +.4: mov ebp, [esp+48] + mov esi, [esp+40] + imul ebp, [edi] + mov [esp+36], edi + mov ecx, [esp+44] + mov edx, [esp+8] + mov [esp+4], edx + mov eax, [esi] + lea esi, [esi+ecx*4+4] + mul ebp + lea edi, [edi+ecx*4] + mov ebx, edx + mov edx, [esp+12] + test ecx, 1 + mov ecx, eax + cmovnz ecx, ebx + cmovnz ebx, eax + jmp edx + + align 32 +.5: add edi, 64 +.loop_base: + rloop ebx, ecx, 0 + rloop ecx, ebx, 1 + rloop ebx, ecx, 2 + rloop ecx, ebx, 3 + rloop ebx, ecx, 4 + rloop ecx, ebx, 5 + rloop ebx, ecx, 6 + rloop ecx, ebx, 7 + rloop ebx, ecx, 8 + rloop ecx, ebx, 9 + rloop ebx, ecx, 10 + rloop ecx, ebx, 11 + rloop ebx, ecx, 12 + rloop ecx, ebx, 13 + rloop ebx, ecx, 14 + rloop ecx, ebx, 15 + + dec dword [esp+4] + lea esi, [esi+64] + jns .5 + + add [edi+64], ecx + mov edi, [esp+36] + adc ebx, 0 + mov [edi], ebx + dec dword [esp] + lea edi, [edi+4] + jnz .4 + + add esp, 16 + pop ebx + pop esi + pop edi + pop ebp + ret + + end diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/a_x64_mulredc.asm gmp-ecm-7.0.5+ds/build.vc14/assembler/a_x64_mulredc.asm --- gmp-ecm-7.0.4+ds/build.vc14/assembler/a_x64_mulredc.asm 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/a_x64_mulredc.asm 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,237 @@ +; +; Part of GMP-ECM +; +; mp_limb_t mulredc1( MSVC 1 limb +; mp_limb_t *z, rcx +; const mp_limb_t x, rdx +; const mp_limb_t y, r8 +; const mp_limb_t m, r9 +; mp_limb_t inv_m [rsp+0x28] +; ) +; +; mp_limb_t mulredc( MSVC > 1 limb +; mp_limb_t *z, rcx +; const mp_limb_t *x, rdx +; const mp_limb_t *y, r8 +; const mp_limb_t *m, r9 +; mp_limb_t inv_m [rsp+0x28] +; ) + +%macro mseq_1 4 + mov %2, rcx + mul r14 + add %1, rax + mov rax, [r9+8*%3] + adc %2, rdx + mul r11 +%if %3 < %4 - 1 + add rax, %1 + mov [rbp+8*(%3-1)], rax + mov rax, [r8+8*(%3+1)] + adc %2, rdx + setc cl +%else + add %1, rax + mov [rbp+8*(%3-1)], %1 + adc %2, rdx + mov [rbp+8*%3], %2 + setc cl + mov [rbp+8*(%3+1)], rcx +%endif +%endmacro + +%macro mseq_20 2 + mov r14, [r13+r12*8] + mov rax, [r8] + mov %1, [rbp] + mov %2, [rbp+8] + mul r14 + add r12, 1 + add rax, %1 + adc %2, rdx + setc cl + mov %1, rax + imul rax, r10 + mov r11, rax + mul qword [r9] + add %1, rax + adc %2, rdx + mov rax, [r8+8] +%endmacro + +%macro mseq_2 4 + mov %2, [rbp+8*(%3+1)] + adc %2, rcx +%if %3 < %4 - 1 + setc cl +%endif + mul r14 + add %1, rax + mov rax, [r9+8*%3] + adc %2, rdx +%if %3 < %4 - 1 + adc cl, 0 +%else + setc cl +%endif + mul r11 +%if %3 < %4 - 1 + add rax, %1 + mov [rbp+8*(%3-1)], rax + adc %2, rdx + mov rax, [r8+8*(%3+1)] +%else + add %1, rax + mov [rbp+8*(%3-1)], %1 + adc %2, rdx + mov [rbp+8*%3],%2 + adc cl, 0 + mov [rbp+8*(%3+1)], rcx +%endif +%endmacro + +%macro store 1 +%assign i 0 +%rep %1 + %if i == %1 - 1 && (%1 & 1) + mov rax, [rbp+8*i] + mov [rdi+8*i], rax + %elif (i & 1) + mov [rdi+8*(i-1)], rax + mov [rdi+8*i], rdx + %else + mov rax, [rbp+8*i] + mov rdx, [rbp+8*(i+1)] + %endif + %assign i i + 1 +%endrep +%endmacro + +%macro mulredc 1 + +%assign limbs %1 +%define f_name(x) mulredc %+ x +%define stack_space 8 * (limbs + 1 + (limbs & 1)) + + global f_name(limbs) +%ifdef DLL + export f_name(limbs) +%endif + + align 64 + +PROC_FRAME f_name(limbs) ; SEH Frame + push_reg rbp + push_reg rbx + push_reg rsi + push_reg rdi + push_reg r12 + push_reg r13 + push_reg r14 + alloc_stack stack_space +END_PROLOGUE + ; *y in r8 + mov rdi, rcx ; *z -> rdi + mov r13, rdx ; *x -> r13 + mov r10, [rsp+8*12+stack_space] ; invm -> r10 + ; *m in r9 + mov r14, [r13] + mov rax, [r8] + xor rcx, rcx + lea rbp, [rsp] + mov r12, rcx + mul qword r14 + add r12, 1 + mov rsi, rax + mov rbx, rdx + imul rax, r10 + mov r11, rax + mul qword [r9] + add rsi, rax + mov rax, [r8+8] + adc rbx, rdx + setc cl + +%assign j 1 +%rep limbs - 1 +%if (j & 1) + mseq_1 rbx, rsi, j, limbs +%else + mseq_1 rsi, rbx, j, limbs +%endif + %assign j j + 1 +%endrep + + align 32 +.1: + +%assign j 1 +%if (limbs & 1) + mseq_20 rsi, rbx + %rep limbs - 1 + %if (j & 1) + mseq_2 rbx, rsi, j, limbs + %else + mseq_2 rsi, rbx, j, limbs + %endif + %assign j j + 1 + %endrep +%else + mseq_20 rbx, rsi + %rep limbs - 1 + %if (j & 1) + mseq_2 rsi, rbx, j, limbs + %else + mseq_2 rbx, rsi, j, limbs + %endif + %assign j j + 1 + %endrep +%endif + + cmp r12, limbs + jb .1 + + store limbs + + mov rax, rcx + add rsp, stack_space + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbx + pop rbp + ret +ENDPROC_FRAME +%endmacro + + bits 64 + section .text + + global mulredc1 +%ifdef DLL + export mulredc1 +%endif + + align 64 +mulredc1: + mov rax, r8 + mul rdx + mov r10, rax + mov r11, rdx + mul qword [rsp+0x28] + mul r9 + add rax, r10 + adc rdx, r11 + mov [rcx], rdx + adc rax, 0 + ret + +%assign i 2 +%rep 19 ; 2..20 inclusive + mulredc i + %assign i i + 1 +%endrep + + end diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/a_x64_redc.asm gmp-ecm-7.0.5+ds/build.vc14/assembler/a_x64_redc.asm --- gmp-ecm-7.0.4+ds/build.vc14/assembler/a_x64_redc.asm 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/a_x64_redc.asm 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,161 @@ +; +; Part of GMP-ECM +; +; void ecm_redc3( +; mp_limb_t *z, rdi r8 <- rcx +; const mp_limb_t *x, rsi r9 <- rdx +; size_t n, rdx r10 <- r8 +; mp_limb_t m rcx r11 <- r9 +; ) + +%macro rloop 3 + mov rax,[byte rsi+8*%3] + mul rbp + add [byte rdi+8*%3], %1 + adc %2, rax + mov %1, rdx + adc %1, 0 +%endmacro + + bits 64 + section .text + + global ecm_redc3 +%ifdef DLL + export ecm_redc3 +%endif + +PROC_FRAME ecm_redc3 + push_reg rbp + push_reg rbx + push_reg rsi + push_reg rdi + alloc_stack 5*8 +END_PROLOGUE + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + + mov r8, rdi + mov r9, rsi + mov r10, rdx + mov r11, rcx + + mov rcx, r10 + mov [rsp], rcx + cmp rcx, 3 + jae .unroll + +.1: mov rbp, r11 + mov rsi, r9 + imul rbp, [rdi] + mov r8, rdi + mov rcx, r10 + xor rbx, rbx + +.2: mov rax, [rsi] + add rdi, 8 + mul rbp + add rsi, 8 + add rax, rbx + adc rdx, 0 + add [rdi-8], rax + adc rdx, 0 + dec rcx + mov rbx, rdx + jnz .2 + mov rdi, r8 + mov [rdi], rbx + dec qword [rsp] + lea rdi, [rdi+8] + jnz .1 + + add rsp, 5*8 + pop rdi + pop rsi + pop rbx + pop rbp + ret + +.unroll: + mov rdx, rcx + dec rcx + sub rdx, 2 + neg rcx + shr rdx, 4 + and rcx, 15 + mov [rsp+16], rdx + mov rdx, rcx + shl rdx, 4 + lea r10, [.loop_base wrt rip] + add rdx, r10 + lea rdx, [rdx+rcx*4] + add rdx, rcx + neg rcx + mov r10, rcx + mov [rsp+24], rdx + +.4: mov rbp, r11 + mov rsi, r9 + imul rbp, [rdi] + mov r8, rdi + mov rcx, r10 + mov rdx, [rsp+16] + mov [rsp+8], rdx + + mov rax, [rsi] + lea rsi, [rsi+rcx*8+8] + mul rbp + lea rdi, [rdi+rcx*8] + mov rbx, rdx + + mov rdx, [rsp+24] + test rcx, 1 + mov rcx, rax + cmovnz rcx, rbx + cmovnz rbx, rax + jmp rdx + + align 64 + +.5: add rdi, 128 +.loop_base: + rloop rcx, rbx, 0 + rloop rbx, rcx, 1 + rloop rcx, rbx, 2 + rloop rbx, rcx, 3 + rloop rcx, rbx, 4 + rloop rbx, rcx, 5 + rloop rcx, rbx, 6 + rloop rbx, rcx, 7 + rloop rcx, rbx, 8 + rloop rbx, rcx, 9 + rloop rcx, rbx, 10 + rloop rbx, rcx, 11 + rloop rcx, rbx, 12 + rloop rbx, rcx, 13 + rloop rcx, rbx, 14 + rloop rbx, rcx, 15 + + dec qword [rsp+8] + lea rsi, [rsi+128] + jns .5 + + add [rdi+128], rcx + mov rdi, r8 + adc rbx, 0 + mov [rdi], rbx + dec qword [rsp] + lea rdi, [rdi+8] + jnz .4 + + add rsp, 5*8 + pop rdi + pop rsi + pop rbx + pop rbp + ret +ENDPROC_FRAME + + end diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/Makefile.am gmp-ecm-7.0.5+ds/build.vc14/assembler/Makefile.am --- gmp-ecm-7.0.4+ds/build.vc14/assembler/Makefile.am 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/Makefile.am 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,3 @@ +EXTRA_DIST = a_win32a_mulredc.asm a_win32a_redc.asm a_win32p_mulredc.asm \ + a_win32p_redc.asm a_x64_mulredc.asm a_x64_redc.asm \ + test_mulredc.c mulredc.h mulredc.asm redc.asm diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/mulredc.asm gmp-ecm-7.0.5+ds/build.vc14/assembler/mulredc.asm --- gmp-ecm-7.0.4+ds/build.vc14/assembler/mulredc.asm 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/mulredc.asm 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,8 @@ + +%ifdef _WIN64 +%include "a_x64_mulredc.asm" +%elifdef AMD_ASM +%include "a_win32a_mulredc.asm" +%else +%include "a_win32p_mulredc.asm" +%endif diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/mulredc.h gmp-ecm-7.0.5+ds/build.vc14/assembler/mulredc.h --- gmp-ecm-7.0.4+ds/build.vc14/assembler/mulredc.h 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/mulredc.h 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,32 @@ +#ifndef __ASM_REDC_H__ +#define __ASM_REDC_H__ + +#include + +extern void ecm_redc3(mp_limb_t *cp, const mp_limb_t *np, mp_size_t nn, mp_limb_t Nprim); + + +/* WARNING: the size-1 version doesn't take pointers in input */ +extern mp_limb_t mulredc1(mp_limb_t *z, mp_limb_t x, mp_limb_t y, mp_limb_t m, mp_limb_t inv_m); + +extern mp_limb_t mulredc2(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc3(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc4(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc5(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc6(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc7(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc8(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc9(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc10(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc11(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc12(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc13(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc14(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc15(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc16(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc17(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc18(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc19(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); +extern mp_limb_t mulredc20(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); + +#endif diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/redc.asm gmp-ecm-7.0.5+ds/build.vc14/assembler/redc.asm --- gmp-ecm-7.0.4+ds/build.vc14/assembler/redc.asm 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/redc.asm 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,7 @@ +%ifdef _WIN64 +%include "a_x64_redc.asm" +%elif AMD_ASM +%include "a_win32a_redc.asm" +%else +%include "a_win32p_redc.asm" +%endif diff -Nru gmp-ecm-7.0.4+ds/build.vc14/assembler/test_mulredc.c gmp-ecm-7.0.5+ds/build.vc14/assembler/test_mulredc.c --- gmp-ecm-7.0.4+ds/build.vc14/assembler/test_mulredc.c 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/assembler/test_mulredc.c 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,303 @@ +#include +#include +#include + +#include + +#include "asmredc.h" + +void mp_print(mp_limb_t *x, int N) { + int i; + for (i = 0; i < N-1; ++i) + printf("%lu + W*(", x[i]); + printf("%lu", x[N-1]); + for (i = 0; i < N-1; ++i) + printf(")"); + printf("\n"); +} + +static mp_limb_t +call_mulredc (int N, mp_limb_t *z, mp_limb_t *x, mp_limb_t *y, mp_limb_t *m, + mp_limb_t invm) +{ + mp_limb_t cy; + + switch (N) + { + case 1: + cy = mulredc1(z, x[0], y[0], m[0], invm); + break; + case 2: + cy = mulredc2(z, x, y, m, invm); + break; + case 3: + cy = mulredc3(z, x, y, m, invm); + break; + case 4: + cy = mulredc4(z, x, y, m, invm); + break; + case 5: + cy = mulredc5(z, x, y, m, invm); + break; + case 6: + cy = mulredc6(z, x, y, m, invm); + break; + case 7: + cy = mulredc7(z, x, y, m, invm); + break; + case 8: + cy = mulredc8(z, x, y, m, invm); + break; + case 9: + cy = mulredc9(z, x, y, m, invm); + break; + case 10: + cy = mulredc10(z, x, y, m, invm); + break; + case 11: + cy = mulredc11(z, x, y, m, invm); + break; + case 12: + cy = mulredc12(z, x, y, m, invm); + break; + case 13: + cy = mulredc13(z, x, y, m, invm); + break; + case 14: + cy = mulredc14(z, x, y, m, invm); + break; + case 15: + cy = mulredc15(z, x, y, m, invm); + break; + case 16: + cy = mulredc16(z, x, y, m, invm); + break; + case 17: + cy = mulredc17(z, x, y, m, invm); + break; + case 18: + cy = mulredc18(z, x, y, m, invm); + break; + case 19: + cy = mulredc19(z, x, y, m, invm); + break; + case 20: + cy = mulredc20(z, x, y, m, invm); + break; + default: + cy = mulredc20(z, x, y, m, invm); + } + return cy; +} + +void test(mp_size_t N, int k) +{ + mp_limb_t *x, *y, *yp, *z, *m, invm, cy, cy2, *tmp, *tmp2, *tmp3; + int i, j; + + x = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); + y = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); + z = (mp_limb_t *) malloc((N+1)*sizeof(mp_limb_t)); + m = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); + tmp = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); + tmp2 = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); + tmp3 = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); + + if (x == NULL || y == NULL || z == NULL || m == NULL || tmp == NULL || + tmp2 == NULL || tmp3 == NULL) + { + fprintf (stderr, "Cannot allocate memory in test_mulredc\n"); + exit (1); + } + + mpn_random2(m, N); + m[0] |= 1UL; + if (m[N-1] == 0) + m[N-1] = 1UL; + + invm = 1UL; + for (i = 0; i < 10; ++i) + invm = (2*invm-m[0]*invm*invm); + invm = -invm; + + assert( (invm*m[0] +1UL) == 0UL); + + yp = y; + for (i=0; i < k; ++i) { + /* Try a few special cases */ + if (i == 0) + { + /* Try all 0, product should be 0 */ + for (j = 0; j < N; j++) + x[j] = y[j] = 0; + } + else if (i == 1) + { + /* Try all 1 */ + for (j = 0; j < N; j++) + x[j] = y[j] = 1; + } + else if (i == 2) + { + /* Try all 2^wordsize - 1 */ + for (j = 0; j < N; j++) + x[j] = y[j] = ~(0UL); + } + else + { + /* In the other cases, try random data */ + if (i % 2 == 0) + { + /* Try squaring */ + mpn_random2(x, N); + yp = x; + } + else + { + /* Try multiplication */ + mpn_random2(x, N); + mpn_random2(y, N); + } + } + + // Mul followed by ecm_redc3 + mpn_mul_n(tmp, x, yp, N); + ecm_redc3(tmp, m, N, invm); + cy2 = mpn_add_n (tmp2, tmp + N, tmp, N); + + // Mixed mul and redc + cy = call_mulredc (N, z, x, yp, m, invm); + + if (cy != cy2) + printf ("i = %d: mulredc cy = %ld, mpn_mul_n/ecm_redc3 cy = %ld\n", + i, (long) cy, (long) cy2); + assert (cy == cy2); + if (mpn_cmp(z,tmp2, N) != 0) + { + printf ("i = %d\nmulredc = ", i); + for (j = N - 1; j >= 0; j--) + printf ("%lx ", z[j]); + printf ("\nmpn_mul_n/ecm_redc3 = "); + for (j = N - 1; j >= 0; j--) + printf ("%lx ", tmp2[j]); + printf ("\n"); + assert (mpn_cmp(z,tmp2, N) == 0); + } + + if (cy) + printf("!"); + z[N] = cy; + // Check with pure gmp : multiply by 2^(N*GMP_NUMB_BITS) and compare. + for (j=0; j < N; ++j) { + tmp[j] = 0; + tmp[j+N] = z[j]; + } + tmp[2*N] = z[N]; + mpn_tdiv_qr(tmp2, tmp3, 0, tmp, 2*N+1, m, N); + for (j=0; j < N; ++j) + z[j] = tmp3[j]; + + mpn_mul_n(tmp, x, yp, N); + mpn_tdiv_qr(tmp2, tmp3, 0, tmp, 2*N, m, N); + + assert(mpn_cmp(z, tmp3, N) == 0); + } + + free(tmp); free(tmp2); free(tmp3); + free(x); free(y); free(z); free(m); +} + + + +int main(int argc, char** argv) +{ + int i, len; + + if (argc > 1) /* Test a specific length */ + { + len = atoi (argv[1]); + for (i = 0; i < 1; i++) + test (len, 1000000); + return 0; + } + + for (;;) { + for (i = 1; i <= 20; ++i) { + test(i, 1000); + } +#if 0 + test(1, 1000); + test(2, 1000); + test(3, 1000); + test(4, 1000); + test(5, 1000); + test(6, 1000); + test(7, 1000); + test(8, 1000); + test(9, 1000); + test(10, 1000); + test(11, 1000); + test(12, 1000); + test(13, 100); + test(14, 100); + test(15, 100); + test(16, 100); + test(17, 100); + test(18, 100); + test(44, 10); + test(45, 10); + test(46, 10); + test(47, 10); + test(48, 10); + test(49, 10); +#endif + printf("."); fflush(stdout); + } +#if 0 + x[0] = 12580274668139321508UL; + x[1] = 9205793975152560417UL; + x[2] = 7857372727033793057UL; + + y[0] = 13688385828267279103UL; + y[1] = 10575011835742767258UL; + y[2] = 8802048318027595690UL; + + + m[0] = 2981542467342508025UL; + m[1] = 5964669706257742025UL; + m[2] = 18446744073678090270UL; + + invm = 9419286575570128311UL; + + carry = mulredc(z, x, y, m, 3, invm); + + printf("%lu + 2^64*(%lu + 2^64*%lu), carry=%lu\n", z[0], z[1], z[2], carry); +#endif + return 0; +} + + +#if 0 + +W := 2^64; + +x0:= 12580274668139321508; +x1:= 9205793975152560417; +x2:= 7857372727033793057; +x := x0 + W*(x1 + W*x2); + +y0:= 13688385828267279103; +y1:= 10575011835742767258; +y2:= 8802048318027595690; +y := y0 + W*(y1 + W*y2); + +m0:= 2981542467342508025; +m1:= 5964669706257742025; +m2:= 18446744073678090270; +m := m0 + W*(m1 + W*m2); + +invm := 9419286575570128311; + + + +#endif diff -Nru gmp-ecm-7.0.4+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj gmp-ecm-7.0.5+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj --- gmp-ecm-7.0.4+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,170 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {4727DE12-787D-432D-B166-BF103B0C3C87} + Win32Proj + bench_mulredc + + + + Application + true + v140 + + + Application + true + v140 + + + Application + false + true + v140 + + + Application + false + true + v140 + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)..bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + true + $(SolutionDir)..bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + false + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + false + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ + MultiThreadedDebug + + + Console + true + psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + _WIN64;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ + MultiThreadedDebug + + + Console + true + psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ + MultiThreaded + + + Console + true + true + true + psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + _WIN64;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ + MultiThreaded + + + Console + true + true + true + psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) + + + + + + + + + + + + \ No newline at end of file diff -Nru gmp-ecm-7.0.4+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj.filters gmp-ecm-7.0.5+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj.filters --- gmp-ecm-7.0.4+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj.filters 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/bench_mulredc/bench_mulredc.vcxproj.filters 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,23 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff -Nru gmp-ecm-7.0.4+ds/build.vc14/bench_mulredc/Makefile.am gmp-ecm-7.0.5+ds/build.vc14/bench_mulredc/Makefile.am --- gmp-ecm-7.0.4+ds/build.vc14/bench_mulredc/Makefile.am 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/bench_mulredc/Makefile.am 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1 @@ +EXTRA_DIST = bench_mulredc.vcxproj bench_mulredc.vcxproj.filters diff -Nru gmp-ecm-7.0.4+ds/build.vc14/config.h gmp-ecm-7.0.5+ds/build.vc14/config.h --- gmp-ecm-7.0.4+ds/build.vc14/config.h 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/config.h 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,246 @@ +/* config.h.in. Generated from configure.in by autoheader. */ + +#define VERSION ECM_VERSION + +#define VERSION_GPU "gpu_ecm-win" + +#define PACKAGE_BUGREPORT "ecm-discuss@inria.fr" + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#define C_ALLOCA 1 + +/* Define to 1 if you have the `access' function. */ +#undef HAVE_ACCESS + +/* Define to 1 if you have `alloca', as a function or macro. */ +#define HAVE_ALLOCA 1 + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#undef HAVE_ALLOCA_H + +/* Define to 1 if you have the `ctime' function. */ +#define HAVE_CTIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define to 1 if you have the `floor' function. */ +#define HAVE_FLOOR 1 + +/* Define to 1 if you have the `fmod' function. */ +#define HAVE_FMOD 1 + +/* Define to 1 if you have the `gethostname' function. */ +#define HAVE_GETHOSTNAME 1 + +/* Define to 1 if you have the `getrusage' function. */ +#define HAVE_GETRUSAGE 1 + +/* Define to 1 if you have the `gettimeofday' function. */ +#undef HAVE_GETTIMEOFDAY + +/* Define to 1 if you have the header file. */ +#define HAVE_GMP_H 1 + +/* Define to 1 if gwnum.a or gwnum.lib exist */ +#undef HAVE_GWNUM + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_IO_H + +/* Define to 1 if you have the `isascii' function. */ +#undef HAVE_ISASCII + +/* Define to 1 if you have the `isdigit' function. */ +#define HAVE_ISDIGIT 1 + +/* Define to 1 if you have the `isspace' function. */ +#define HAVE_ISSPACE 1 + +/* Define to 1 if you have the `isxdigit' function. */ +#define HAVE_ISXDIGIT 1 + +/* Define to 1 if you have the `m' library (-lm). */ +#undef HAVE_LIBM + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the `malloc_usable_size' function. */ +#undef HAVE_MALLOC_USABLE_SIZE + +/* Define to 1 if you have the header file. */ +#define HAVE_MATH_H 1 + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `memset' function. */ +#define HAVE_MEMSET 1 + +/* Define to 1 if you have the `nice' function. */ +#undef HAVE_NICE + +/* Define to 1 if you have the `pow' function. */ +#define HAVE_POW 1 + +/* Define to 1 if you have the `signal' function. */ +#define HAVE_SIGNAL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the `sqrt' function. */ +#define HAVE_SQRT 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strchr' function. */ +#define HAVE_STRCHR 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strlen' function. */ +#define HAVE_STRLEN 1 + +/* Define to 1 if you have the `strncasecmp' function. */ +#undef HAVE_STRNCASECMP + +/* Define to 1 if you have the `strstr' function. */ +#undef HAVE_STRSTR + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_RESOURCE_H + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the `time' function. */ +#undef HAVE_TIME + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `unlink' function. */ +#define HAVE_UNLINK 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WINDOWS_H 1 + +/* Define to 1 if you have the `__gmpn_add_nc' function. */ +#if defined( _WIN64 ) +# define HAVE___GMPN_ADD_NC 1 +#endif + +/* Define to 1 if you have the `__gmpn_mod_34lsub1' function. */ +#define HAVE___GMPN_MOD_34LSUB1 1 + +/* Define to 1 if you have the `__gmpn_mul_fft' function. */ +#define HAVE___GMPN_MUL_FFT 1 + +/* Define to 1 if you want memory debugging */ +#undef MEMORY_DEBUG + +/* Define if the system has the type `long long'. */ +#define HAVE_LONG_LONG 1 +#define HAVE_LONG_LONG_INT 1 + +/* Define to 1 to use asm redc on x86 or x86_64 */ +# define NATIVE_REDC 1 + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Define to 1 if you can safely include both and . */ +#undef TIME_WITH_SYS_TIME + +/* Define to 1 if you want assertions enabled */ +#undef WANT_ASSERT + +/* Define to 1 if you want shell command execution */ +#undef WANT_SHELLCMD + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* How to specify hot-spot attribute, if available */ +#define ATTRIBUTE_HOT + +#define HAVE___GMPN_REDC_1 1 + +#define HAVE___GMPN_REDC_2 1 + +#define HAVE_ASM_REDC3 1 + +#define WINDOWS64_ABI 1 + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#define inline __inline +#endif + +/* Define to `unsigned int' if does not define. */ +#undef size_t + +#define PRIdSIZE "Id" +#define PRIuSIZE "Iu" + +#ifdef _MSC_VER + +#define __func__ __FUNCTION__ + +/* define Windows tuning here */ +# define __tune_corei7__ + +# if _MSC_VER < 1600 +# define int64_t __int64 +# define uint64_t unsigned __int64 +# endif +# define strncasecmp strnicmp +# define access _access +# define alloca _alloca +# define fseek64 _fseek64 +# define ftell64 _ftell64 +# define omp_get_thread_limit omp_get_max_threads +#endif diff -Nru gmp-ecm-7.0.4+ds/build.vc14/ecm/ecm.vcxproj gmp-ecm-7.0.5+ds/build.vc14/ecm/ecm.vcxproj --- gmp-ecm-7.0.4+ds/build.vc14/ecm/ecm.vcxproj 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/ecm/ecm.vcxproj 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,239 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {C0E2EA85-996A-4B5F-AD30-590FAF5B7187} + ecm + Win32Proj + 8.1 + + + + Application + v110 + + + Application + v120 + + + Application + v140 + + + Application + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30128.1 + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + + Full + true + Speed + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + MultiThreaded + + + Level3 + ProgramDatabase + Default + true + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib + false + Console + true + true + false + + + MachineX86 + + + + + X64 + + + Full + true + Speed + ..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;_WIN64;NDEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + MultiThreaded + + + Level3 + ProgramDatabase + Default + true + + + ws2_32.lib;..\..\..\$(mp_dir)lib\$(Platform)\release\$(mp_lib);%(AdditionalDependencies) + Console + true + true + false + + + MachineX64 + 8388608 + 65536 + + + + + Disabled + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + + + Level3 + EditAndContinue + Default + true + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib + true + Console + false + + + MachineX86 + + + + + X64 + + + Disabled + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;_WIN64;_DEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + + + Level3 + ProgramDatabase + Default + true + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib + true + Console + false + + + MachineX64 + 8388608 + 65536 + + + + + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + {cd555681-d65b-4173-a29c-b8bf06a4871b} + false + + + + + + + \ No newline at end of file diff -Nru gmp-ecm-7.0.4+ds/build.vc14/ecm/ecm.vcxproj.filters gmp-ecm-7.0.5+ds/build.vc14/ecm/ecm.vcxproj.filters --- gmp-ecm-7.0.4+ds/build.vc14/ecm/ecm.vcxproj.filters 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/ecm/ecm.vcxproj.filters 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,74 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff -Nru gmp-ecm-7.0.4+ds/build.vc14/ecm/Makefile.am gmp-ecm-7.0.5+ds/build.vc14/ecm/Makefile.am --- gmp-ecm-7.0.4+ds/build.vc14/ecm/Makefile.am 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/ecm/Makefile.am 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1 @@ +EXTRA_DIST = ecm.vcxproj ecm.vcxproj.filters diff -Nru gmp-ecm-7.0.4+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj gmp-ecm-7.0.5+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj --- gmp-ecm-7.0.4+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,286 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {1B353D8B-9808-4EB3-A5E7-075D751757AD} + ecm_gpu + Win32Proj + 8.1 + + + + Application + v140 + + + Application + v140 + + + Application + v140 + + + Application + v140 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30128.1 + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + + Full + true + Speed + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;WITH_GPU;GPU_CC50;NDEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + MultiThreaded + + + Level3 + ProgramDatabase + Default + true + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);..\..\lib\$(IntDir)libecm_gpu.lib;advapi32.lib;ws2_32.lib;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\$(Platform)\cudart.lib + false + Console + true + true + false + + + MachineX86 + + + compute_50,sm_50 + + + 32 + ..\;..\..\..\mpir\lib\$(IntDir) + true + + + + + + + X64 + + + Full + true + Speed + ..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;WITH_GPU;GPU_CC50;_WIN64;NDEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + MultiThreaded + + + Level3 + ProgramDatabase + Default + true + + + ws2_32.lib;..\..\..\$(mp_dir)lib\$(Platform)\release\$(mp_lib);%(AdditionalDependencies) + Console + true + true + false + + + MachineX64 + 8388608 + 65536 + + + + + Disabled + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;WITH_GPU;GPU_CC50;_DEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + + + Level3 + EditAndContinue + Default + true + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);..\..\lib\$(IntDir)libecm_gpu.lib;advapi32.lib;ws2_32.lib;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\$(Platform)\cudart.lib + true + Console + false + + + MachineX86 + + + compute_50,sm_50 + + + 32 + ..\;..\..\..\mpir\lib\$(IntDir) + true + + + + + + + X64 + + + Disabled + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + WIN32;WITH_GPU;GPU_CC50;_WIN64;_DEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + + + Level3 + ProgramDatabase + Default + true + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);..\..\lib\$(IntDir)libecm_gpu.lib;advapi32.lib;ws2_32.lib;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\$(Platform)\cudart.lib + true + Console + false + + + MachineX64 + 8388608 + 65536 + + + compute_50,sm_50 + + + 64 + ..\;..\..\..\mpir\lib\$(IntDir) + true + + + + + + + ..\;..\..\;..\assembler;..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) + + + ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);..\..\lib\$(IntDir)libecm_gpu.lib;advapi32.lib;ws2_32.lib;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\$(Platform)\cudart.lib + true + + + compute_50,sm_50 + + + 64 + ..\;..\..\..\mpir\lib\$(IntDir) + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ + + + + + + \ No newline at end of file diff -Nru gmp-ecm-7.0.4+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj.filters gmp-ecm-7.0.5+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj.filters --- gmp-ecm-7.0.4+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj.filters 1970-01-01 00:00:00.000000000 +0000 +++ gmp-ecm-7.0.5+ds/build.vc14/ecm_gpu/ecm_gpu.vcxproj.filters 2022-06-06 14:16:49.000000000 +0000 @@ -0,0 +1,78 @@ + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {2a13feaf-0c0e-469a-8047-82c647322da9} + + + {163547c7-89d7-4ddc-b0ad-02b4cfd722b4} + + + +