/*
 * Copyright 1996 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: mpqs.c,v 1.15 1996/09/14 09:44:14 bousch Exp $
 *
 * An implementation of the Multiple Polynomial Quadratic Sieve.
 * Reference: Robert D. Silverman, "The Multiple Polynomial Quadratic
 * Sieve", Mathematics of Computation 48, january 1987.
 */

#include <assert.h>
#include <gdbm.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/times.h>
#include <unistd.h>
#include "saml.h"
#include "saml-util.h"
#include "mp-arch.h"
#include "factorint.h"

struct cfact {
	mref_t A, B;
	__u32 e[0];
};

static int k, M, *primes, *sqrt_kN, *rt1, *rt2;
double T;
static int nprimes, complete, incomplete;
static mref_t N, ffound, kN, A, B, C, D, i2D;
static GDBM_FILE dbf;
static struct cfact **arfact;
static __u32 **reduced, **depend;

static inline __u32 product_mod (__u32 x1, __u32 x2, __u32 p)
{
	__u32 th, tl, quot, rem;
	
	umul_ppmm(th, tl, x1, x2);
	udiv_qrnnd(quot, rem, th, tl, p);
	return rem;
}

static __u32 power_mod (__u32 x, __u32 e, __u32 p)
{
	__u32 f = 1;

	while(1) {
		/* The value of f.pow(x,e) is a loop invariant */
		if (e&1)
		    f = product_mod(f,x,p);
		e = e/2;
		if (!e)
		    return f;
		x = product_mod(x,x,p);
	}
}

static struct cfact* new_cfact (void)
{
	struct cfact *cf;

	cf = calloc(1, sizeof(struct cfact) + (nprimes>>5)*sizeof(__u32));
	assert(cf != NULL);
	cf->A = mref_new(); cf->B = mref_new();
	return cf;
}

static inline int test_bit (__u32 *bitmap, unsigned position)
{
	return bitmap[position >> 5] & (1UL << (position & 31));
}

static inline void clear_bit (__u32 *bitmap, unsigned position)
{
	bitmap[position >> 5] &= ~(1UL << (position & 31));
}

static inline void set_bit (__u32 *bitmap, unsigned position)
{
	bitmap[position >> 5] |= (1UL << (position & 31));
}

static void verify_relation (struct cfact* cf)
{
#if 0
	int i, p;
	mref_t t0, t1;

	t0 = mref_new(); t1 = mref_new();
	mref_mul(t0, cf->B, cf->B);
	mref_mod(t0, t0, kN);
	for (i = 0; i < nprimes; i++) {
		p = primes[i];
		if (!test_bit(cf->e, i))
			continue;
		if (p < 0)
			mref_sub(t0, kN, t0);
		else {
			mref_build(t1, ST_INTEGER, u32toa(p));
			mref_mul(t0, t1, t0);
			mref_mod(t0, t0, kN);
		}
	}
	mref_mul(t1, cf->A, cf->A);
	mref_mod(t1, t1, kN);
	if (!mref_differ(t0,t1))
		printf("Relation is ok.\n");
	else {
		printf("Bad relation: %s^2 = %s^2",
			mref_string(cf->A), mref_string(cf->B));
		for (i = 0; i < nprimes; i++)
			if (test_bit(cf->e, i))
				printf(" %d", primes[i]);
		putchar('\n');
	}
	mref_free(t0); mref_free(t1);
#endif
}

static inline void xorline (__u32 *p, __u32 *q)
{
	int count = (nprimes >> 5);
	while (count--)
		*p++ ^= *q++;
}

static void process_cfact (struct cfact* cf)
{
	int i, j, v;
	__u32 bitmap[nprimes/32], dep[nprimes/32];
	mref_t t0, t1;

	verify_relation(cf);
	if (mref_notzero(ffound))
		return;
	memcpy(bitmap, cf->e, nprimes/8);
	bzero(dep, nprimes/8);
	for (i = 0; i < nprimes; i++) {
		if (!test_bit(bitmap,i))
			continue;
		if (!arfact[i])
			break;
		/* Compose with line i */
		xorline(bitmap, reduced[i]);
		xorline(dep, depend[i]);
	}
	if (i < nprimes) {
		/* Insert the relation in slot i */
		set_bit(dep, i);
		arfact[i] = cf;
		memcpy(reduced[i], bitmap, nprimes/8);
		memcpy(depend[i], dep, nprimes/8);
		++complete;
		return;
	}
	/*
	 * We should have A^2 = B^2 now.
	 * Let's verify that all went well...
	 */
	if (!quiet)
		fputs("*\b", stderr);

	for (i = 0; i < nprimes/32; i++)
		if (bitmap[i]) {
			fprintf(stderr, "Eek! bitmap should be zero\n");
			abort();
		}
	memcpy(bitmap, cf->e, nprimes/8);
	for (i = 0; i < nprimes; i++)
		if (test_bit(dep,i))
			xorline(bitmap, arfact[i]->e);
	for (i = 0; i < nprimes/32; i++)
		if (bitmap[i]) {
			fprintf(stderr, "Eek! bitmap2 should be zero\n");
			abort();
		}
	/*
	 * Good, now really combine the relations
	 */
	t0 = mref_new();
	t1 = mref_new();
	for (i = 0; i < nprimes; i++)
		if (test_bit(dep,i)) {
			mref_mul(cf->A, cf->A, arfact[i]->A);
			mref_mod(cf->A, cf->A, kN);
			mref_mul(cf->B, cf->B, arfact[i]->B);
			mref_mod(cf->B, cf->B, kN);
		}
	/*
	 * Compensate for the exponents greater than one
	 */
	for (j = 0; j < nprimes; j++) {
		v = !!test_bit(cf->e,j);
		for (i = 0; i < nprimes; i++) {
			if (test_bit(dep,i))
				v += !!test_bit(arfact[i]->e,j);
		}
		assert(v%2 == 0);
		if (j && v) {
			mref_build(t0, ST_INTEGER, u32toa(primes[j]));
			mref_build(t1, ST_INTEGER, u32toa(v/2));
			raise_pmod(t0, kN, t1);
			mref_mul(t0, t0, cf->B);
			mref_mod(cf->B, t0, kN);
		}
	}
#if 0
	printf("A = %s\n", mref_string(cf->A));
	printf("B = %s\n", mref_string(cf->B));
#endif
	mref_add(t0, cf->A, cf->B);
	mref_gcd(t0, t0, N);
	mref_one(t1, t0);
	if (mref_differ(t0, t1) && is_pseudo_prime(t0)) {
		/* Prime factor found */
		mref_copy(ffound, t0);
	}
	mref_free(t0); mref_free(t1);
}

/*
 * bezout(A,B,x,y); assuming that A and B are relatively prime, returns
 * x and y such that Ax+By == gcd(A,B).
 */

static void bezout (mref_t A, mref_t B, mref_t x, mref_t y)
{
	mref_t q, r, t;

	if (!mref_notzero(B)) {
		/* B is zero, thus the gcd is abs(A) */
		mref_one(x, A);
		if (mref_isneg(A))
			mref_negate(x, x);
		mref_zero(y, B);
		return;
	}
	/*
	 * Otherwise, write A=qB+r. The equation is now B(qx+y)+rx == gcd.
	 */
	q = mref_new(); r = mref_new(); t = mref_new();
	mref_div(q, A, B);
	mref_mul(t, q, B);
	mref_sub(r, A, t);
	bezout(B, r, y, x);
	mref_mul(t, q, x);
	mref_sub(y, y, t);
	mref_free(q); mref_free(r); mref_free(t);
}

static void square_modulo_kN (mref_t dest, mref_t src)
{
	mref_t t0 = mref_new();
	mref_build(t0, ST_INTEGER, "2");
	mref_div(t0, kN, t0);
	mref_mul(dest, src, src);
	mref_mod(dest, dest, kN);
	if (mref_lessthan(t0,dest))
		mref_sub(dest, dest, kN);
	mref_free(t0);
}

static void compute_residues (mref_t N, int n, const int *p, int *r)
{
#if 0
	int i;
	mref_t t0 = mref_new();

	for (i = 0; i < n; i++) {
		mref_build(t0, ST_INTEGER, u32toa(p[i]));
		mref_mod(t0, N, t0);
		r[i] = atoi(mref_string(t0));
	}
	mref_free(t0);
#else
	int i, j, digits, nlimbs;
	__u32 *limbs, nb, quot, rem;
	mref_t t0, t1, t2, base;

	digits = strlen(mref_string(N));
	limbs = alloca((digits/9+1) * sizeof(__u32));
	base = mref_new(); t0 = mref_new();
	t1 = mref_new(); t2 = mref_new();
	mref_build(base, ST_INTEGER, "4294967296");  /* 2^32 */
	mref_copy(t0, N);
	for (j = 0; mref_notzero(t0); j++) {
		mref_div(t1, t0, base);	/* quotient */
		mref_mul(t2, t1, base);
		mref_sub(t0, t0, t2);	/* remainder */
		limbs[j] = strtoul(mref_string(t0), NULL, 10);
		mref_copy(t0, t1);
	}
	nlimbs = j;
	mref_free(base); mref_free(t0);
	mref_free(t1); mref_free(t2);

	/* The number is now expressed in base 2^32 */
	for (i = 0; i < n; i++) {
		nb = p[i];
		rem = 0;
		for (j = nlimbs-1; j >= 0; j--)
			udiv_qrnnd(quot, rem, rem, limbs[j], nb);
		r[i] = rem;
	}
#endif
}

static void reduce (mref_t dest, mref_t src, int *expo)
{
	mref_t t0, t1;
	int i, p, vp, rem[nprimes];

	t0 = mref_new(); t1 = mref_new();
	bzero(expo, nprimes * sizeof(int));
	mref_copy(dest, src);
	if (mref_isneg(dest)) {
		mref_negate(dest, dest);
		expo[0] = 1;
	}
	compute_residues(dest, nprimes-1, primes+1, rem+1);
	for (i = 1; i < nprimes; i++) {
		if (rem[i])
			continue;
		/* The remainder modulo p is zero */
		p = primes[i];
		vp = 0;
		mref_build(t0, ST_INTEGER, u32toa(p));
again:
		mref_mod(t1, dest, t0);
		if (!mref_notzero(t1)) {
			mref_div(dest, dest, t0);
			vp++;
			goto again;
		}
		expo[i] = vp;
	}
	mref_free(t0); mref_free(t1);
}		

static void compute_k (void)
{
	mref_t tmp = mref_new();

	mref_build(tmp, ST_INTEGER, "8");
	mref_mod(tmp, N, tmp);
	k = atoi(mref_string(tmp));
	assert(k & 1);
	mref_build(tmp, ST_INTEGER, u32toa(k));
	mref_mul(kN, tmp, N);
	mref_free(tmp);
}

static int is_in_FB (int p)
{
	mref_t tmp;
	int rem;

	if (p == 2)
		return 1;
	tmp = mref_new();
	mref_build(tmp, ST_INTEGER, u32toa(p));
	mref_mod(tmp, kN, tmp);
	rem = atoi(mref_string(tmp));
	mref_free(tmp);
	return (power_mod(rem,(p-1)/2,p) <= 1);
}

static void build_factor_base (void)
{
	int index, i, j, p, rem;
	mref_t t0;

	primes	= calloc(nprimes, sizeof(int));
	sqrt_kN	= calloc(nprimes, sizeof(int));
	rt1	= calloc(nprimes, sizeof(int));
	rt2	= calloc(nprimes, sizeof(int));
	if (!primes || !sqrt_kN || !rt1 || !rt2)
		panic_out_of_memory();
	if (!quiet)
		fputs("#\b", stderr);
	primes[0] = -1;
	p = 2; index = 1;
	while (1) {
		if (is_in_FB(p)) {
			primes[index++] = p;
			if (index == nprimes)
				break;
		}
		do p++; while (!is_small_prime(p));
	}
	t0 = mref_new();
	for (i = 2; i < nprimes; i++) {
		p = primes[i];
		mref_build(t0, ST_INTEGER, u32toa(p));
		mref_mod(t0, kN, t0);
		rem = atoi(mref_string(t0));
		for (j = p/2; j >= 0; j--)
			if (product_mod(j,j,p) == rem) {
				sqrt_kN[i] = j;
				break;
			}
		assert(j >= 0);
	}
	mref_free(t0);
}	

static void generate_coefficients (void)
{
	mref_t h0, h1, h2, t0, t1;

	h0 = mref_new(); h1 = mref_new(); h2 = mref_new();
	t0 = mref_new(); t1 = mref_new();
	if (mref_type(D) == ST_VOID) {
		mref_build(t0, ST_INTEGER, "2");
		mref_div(t0, kN, t0);
		mref_sqrt(A, t0);
		mref_build(t0, ST_INTEGER, u32toa(M));
		mref_div(A, A, t0);
		mref_sqrt(D, A);
		/*
		 * Round it to a multiple of 4, and substract 1
		 */
		mref_build(t0, ST_INTEGER, "4");
		mref_div(D, D, t0);
		mref_mul(D, D, t0);
		mref_one(t0, t0);
		mref_sub(D, D, t0);
	} else {
		/* Add 4 to the previous value of D */
sub4:
		mref_build(t0, ST_INTEGER, "4");
		mref_add(D, D, t0);
	}
	if (!is_pseudo_prime(D)) {
		/* not prime -- try again */
		goto sub4;
	}
	mref_build(t0, ST_INTEGER, "2");
	mref_div(t0, D, t0);	/* t0 = (D-1)/2 */
	mref_copy(t1, kN);
	raise_pmod(t1, D, t0);
	mref_one(t0, t1);
	if (mref_differ(t0,t1)) {
		/* kN not a quadratic residue modulo D -- try again */
		goto sub4;
	}
	mref_mul(A, D, D);
	mref_copy(h0, kN);
	mref_build(t0, ST_INTEGER, "4");
	mref_div(t0, D, t0);
	raise_pmod(h0, D, t0);
	mref_mul(h1, kN, h0); mref_mod(h1, h1, D);
	mref_one(t0, D);
	mref_add(t1, t0, t0);
	mref_add(t0, t0, D);
	mref_div(t0, t0, t1);	/* (D+1)/2 */
	mref_mul(h2, t0, h0);	/* (2h1)^(-1) */
	mref_mod(h2, h2, D);
	mref_mul(t0, h1, h1);
	mref_sub(t0, kN, t0);
	mref_div(t0, t0, D);	/* (kN-h1^2)/D */
	mref_mul(h2, h2, t0);
	mref_mod(h2, h2, D);
	mref_mul(B, h2, D);
	mref_add(B, h1, B);
	mref_mod(B, B, A);
	/* Is B even? */
	mref_build(t0, ST_INTEGER, "2");
	mref_mod(t0, B, t0);
	if (!mref_notzero(t0)) {
		mref_sub(B, B, A);
		if (mref_isneg(B))
			mref_negate(B, B);
	}
	mref_mul(C, B, B);
	mref_sub(C, C, kN);
	mref_build(t0, ST_INTEGER, "4");
	mref_mul(t0, t0, A);
	mref_div(C, C, t0);
	mref_add(h0, D, D);
	bezout(h0, kN, i2D, t1);
	mref_mod(i2D, i2D, kN);
	if (mref_isneg(i2D))
		mref_add(i2D, i2D, kN);
	mref_free(t0); mref_free(t1);
	mref_free(h0); mref_free(h1); mref_free(h2);
}

static void insert_factorization (mref_t rad)
{
	struct cfact *cf = new_cfact();
	int *expo1, i, p, vp;
	mref_t t0;

	t0 = mref_new();
	mref_copy(cf->A, rad);
	square_modulo_kN(t0, rad);
	expo1 = alloca(nprimes * sizeof(int));
	reduce(cf->B, t0, expo1);	/* cf->B should be 1 now */
	for (i = 0; i < nprimes; i++) {
		p = primes[i];
		vp = expo1[i];
		while (vp > 1) {
			if (p > 0) {
				mref_build(t0, ST_INTEGER, u32toa(p));
				mref_mul(t0, cf->B, t0);
				mref_mod(cf->B, t0, kN);
			} else
				mref_sub(cf->B, kN, cf->B);
			vp -= 2;
		}
		if (vp)
			set_bit(cf->e, i);
	}
	process_cfact(cf);
	mref_free(t0);
}

static void bcd_encode (datum *pdat, const char *dstring)
{
	int len = strlen(dstring);
	int bcd_len = (len+1)/2;
	char *bcd_arr = malloc(bcd_len);

	if (!bcd_arr)
		panic_out_of_memory();
	pdat->dsize = bcd_len;
	pdat->dptr = bcd_arr;
	if (len & 1)
		*bcd_arr++ = *dstring++ - '0';
	while (*dstring) {
		*bcd_arr++ = (dstring[0] - '0') * 16 + (dstring[1] - '0');
		dstring += 2;
	}
}

static void bcd_decode (char **pstr, datum dat)
{
	int bcd_len = dat.dsize;
	int len = 2*bcd_len+1;
	char *str = malloc(len);
	char *bcd_arr = dat.dptr;

	if (!str)
		panic_out_of_memory();
	*pstr = str;
	while (bcd_len--) {
		*str++ = ((unsigned char)(*bcd_arr) >> 4) + '0';
		*str++ = ((*bcd_arr++) & 15) + '0';
	}
	*str = '\0';
}

static void insert_incomplete_factorization (mref_t rad, int rem)
{
	datum key, content;
	char *srad = mref_string(rad);
	int retval, *expo1, *expo2, i, p, vp;
	mref_t t0, t1;
	struct cfact *cf;

	/*
	 * The key is the "big prime" appearing at the end of the
	 * factorization.
	 */
	key.dptr = (char*) &rem;
	key.dsize = sizeof(int);
	/*
	 * The "srad" is simply a sequence of digits; to save space
	 * on the disk, we squeeze it into BCD.
	 */
	bcd_encode(&content, srad);
	retval = gdbm_store(dbf, key, content, GDBM_INSERT);
	free(content.dptr);

	if (retval < 0) {
		perror("gdbm_store");
		return;
	}
	if (retval == 0) {
		/* An incomplete factorization has been saved */
		incomplete++;
		return;
	}
	/*
	 * The database already contains an incomplete factorization with
	 * the same "big prime". We can combine them to get a complete one.
	 * But first, decode the radical from BCD.
	 */
	t0 = mref_new(); t1 = mref_new();

	content = gdbm_fetch(dbf, key);
	assert(content.dptr != NULL);
	bcd_decode(&srad, content);
	mref_build(t1, ST_INTEGER, srad);
	free(srad);

	/* Allocate a "struct cfact" to store the combined factorization */
	cf = new_cfact();
	expo1 = alloca(nprimes * sizeof(int));
	expo2 = alloca(nprimes * sizeof(int));
	square_modulo_kN(t0, t1);
	reduce(cf->B, t0, expo1);	/* This should be the "big prime" */
	square_modulo_kN(t0, rad);
	reduce(t0, t0, expo2);		/* And so should this */
	mref_mul(t0, t1, rad);
	mref_mod(cf->A, t0, kN);
	for (i = 0; i < nprimes; i++) {
		p = primes[i];
		vp = expo1[i] + expo2[i];
		while (vp > 1) {
			if (p > 0) {
				mref_build(t0, ST_INTEGER, u32toa(p));
				mref_mul(t0, cf->B, t0);
				mref_mod(cf->B, t0, kN);
			} else
				mref_sub(cf->B, kN, cf->B);
			vp -= 2;
		}
		if (vp)
			set_bit(cf->e, i);
	}
	process_cfact(cf);
	mref_free(t0); mref_free(t1);
}

static void display_factorization (int x)
{
	mref_t t0, t1, t2, radical;
	char buffer[32];
	int i, p, vp;

	t0 = mref_new(); t1 = mref_new(); t2 = mref_new();
	radical = mref_new();
	sprintf(buffer, "%d", x);
	mref_build(t0, ST_INTEGER, buffer);
	mref_add(t0, t0, t0);
	mref_mul(t0, t0, A);
	mref_add(t0, t0, B);
	mref_mul(t0, t0, i2D);
	mref_mod(radical, t0, kN);
	if (mref_isneg(radical))
		mref_add(radical, radical, kN);
	square_modulo_kN(t1, radical);
	if (mref_isneg(t1))
		mref_negate(t1, t1);
	if (!mref_notzero(t1))
		return;
	for (i = 1; i < nprimes; i++) {
		p = primes[i];
		if (p >= 5) {
			/*
			 * The roots modulo p_i are stored in rt1[i]
			 * and rt2[i]. This is much faster than reducing
			 * a big integer modulo p_i.
			 */
			int rem = x % p;
			if (rem < 0)
				rem += p;
			if (rem != rt1[i] && rem != rt2[i])
				continue;
		}
		vp = 0;
		mref_build(t0, ST_INTEGER, u32toa(p));
again:
		mref_mod(t2, t1, t0);
		if (!mref_notzero(t2)) {
			vp++;
			mref_div(t1, t1, t0);
			goto again;
		}
	}
	mref_one(t2, t1);
	if (mref_differ(t1,t2)) {
		/*
		 * Incomplete factorization found (unless t1 is too big)
		 */
		char *t1str = mref_string(t1);
		if (strlen(t1str) < 10)
			insert_incomplete_factorization(radical,atoi(t1str));
	} else {
		/*
		 * Complete factorization
		 */
		insert_factorization(radical);
	}
	mref_free(t0); mref_free(t1); mref_free(t2); mref_free(radical);
}

static void sieve (void)
{
#define WSIZE 16000
	int window[WSIZE+1], log_p[nprimes], *sptr, *barrier;
	int aa, i2a, b, i, p, rem, amount, sieved, thres, offset;

	/*
	 * First, fill the rt1[] and rt2[] arrays with the values of
	 * A and B modulo p[i], then compute the roots of Q(x)=0 mod p[i].
	 * Carefully avoid p[0] and p[1].
	 */
	compute_residues(A, nprimes-2, primes+2, rt1+2);
	compute_residues(B, nprimes-2, primes+2, rt2+2);
	for (i = 2; i < nprimes; i++) {
		p = primes[i];
		log_p[i] = 128.0 * log(p);
		/* Compute 1/2A modulo p_i */
		aa = (2 * rt1[i]) % p;
		i2a = power_mod(aa, p-2, p);
		b = rt2[i];
		/* First root */
		rem = (-b + sqrt_kN[i]) % p;
		if (rem < 0)
			rem += p;
		rt1[i] = product_mod(rem, i2a, p);
		/* Second root */
		rem = (-b - sqrt_kN[i]) % p;
		if (rem < 0)
			rem += p;
		rt2[i] = product_mod(rem, i2a, p);
	}
	barrier = &window[WSIZE];
	*barrier = INT_MAX;  /* sentry */
	thres = 128.0 * (log(atof(mref_string(kN)))/2.0 + log(M)
		- T*log(primes[nprimes-1]));
	sieved = 0;
	for (offset = -M; offset < M; offset += WSIZE) {
		if (offset+WSIZE > M) {
			/* Be careful, the last fragment is shorter */
			barrier = &window[M-offset];
			*barrier = INT_MAX;
		}
		bzero(window, (barrier-window)*sizeof(int));
		for (i = 2; i < nprimes; i++) {
			p = primes[i];
			if (p < 5)
				continue;
			amount = log_p[i];
			/* First pass */
			rem = (rt1[i] - offset) % p;
			if (rem < 0)
				rem += p;
			for (sptr = &window[rem]; sptr < barrier; sptr += p)
				*sptr += amount;
			/* Second pass, unless p divides kN */
			if (rt2[i] == rt1[i])
				continue;
			rem = (rt2[i] - offset) % p;
			if (rem < 0)
				rem += p;
			for (sptr = &window[rem]; sptr < barrier; sptr += p)
				*sptr += amount;
		}
		/* Now we can sieve */
		for (sptr = window; ; sptr++) {
			/* Jump to the 1st entry above the threshold */
			while (*sptr < thres)
				++sptr;
			/* Have we met the sentry? */
			if (sptr == barrier)
				break;
			display_factorization((sptr-window)+offset);
			++sieved;
		}
	}
}

static void write_debug_info (FILE *fp)
{
	struct tms t;

	if (!fp) return;
	times(&t);
	fseek(fp, 0, SEEK_SET);

	fprintf(fp, "primes=%d, M=%dk, T=%.3f\n", nprimes, M/1000, T);
	fprintf(fp, "relations=%d, incomplete=%d\n", complete, incomplete);
	fprintf(fp, "user=%.3fs, system=%.3fs\n",
	  t.tms_utime/(double)CLK_TCK, t.tms_stime/(double)CLK_TCK);
	fflush(fp);
}

int find_factor_mpqs (mref_t result, mref_t N0)
{
	char dbname[] = "/tmp/ifact.gdbm.XXXXXX";
	double log_kN;
	int i, loops = 0;
	FILE *debug_fp = NULL;

	/* Verify that N0 is not a square -- this is not enough, we should
	 * really check if it's some prime power. */
	mref_sqrt(result, N0);
	mref_mul(result, result, result);
	mref_sub(result, N0, result);
	if (!mref_notzero(result)) {
		N = mref_new(); mref_sqrt(N, N0);
		find_factor_mpqs(result, N);
		mref_free(N); return 1;
	}
	
	N = N0; ffound = result; kN = mref_new();
	A = mref_new(); B = mref_new(); C = mref_new();
	D = mref_new(); i2D = mref_new();

	compute_k();
	log_kN = log10(atof(mref_string(kN)));
	nprimes = mpqs_primes;
	if (!nprimes)
		nprimes = 0.31 * exp(1.18 * sqrt(log_kN));
	nprimes = (nprimes + 31) & ~31;
	M = mpqs_width / 2;
	if (!mpqs_width)
		M = 200 * nprimes;
	T = mpqs_exponent * 0.001;
	if (!mpqs_exponent)
		T = log_kN/35.0 + 0.8;
#if 0
	fprintf(stderr, "nprimes=%d M=%d T=%f\n", nprimes, M, T);
#endif
	/* Open a database for incomplete factorizations */
	mktemp(dbname);
	dbf = gdbm_open(dbname, 0, GDBM_NEWDB|GDBM_FAST, 0666, 0);
	assert(dbf != NULL);
	unlink(dbname);

	arfact = calloc(nprimes, sizeof(struct cfact*));
	reduced = malloc(nprimes*sizeof(__u32*)+nprimes*nprimes/8);
	depend  = malloc(nprimes*sizeof(__u32*)+nprimes*nprimes/8);
	assert(arfact && reduced && depend);
	for (i = 0; i < nprimes; i++) {
		reduced[i] = (__u32*)(reduced+nprimes) + i*nprimes/32;
		depend[i]  = (__u32*)(depend +nprimes) + i*nprimes/32;
	}
	build_factor_base();
	complete = incomplete = 0;
	mref_zero(ffound, N0);
	if (mpqs_debug_file)
		debug_fp = fopen(mpqs_debug_file, "w");
	while(!mref_notzero(ffound)) {
		if (!quiet) {
			char rotbuff[4];
			/* Display a cute rotating cursor */
			rotbuff[0] = "-\\|/"[loops&3];
			rotbuff[1] = '\b';
			rotbuff[2] = '\0';
			fputs(rotbuff, stderr);
			++loops;
		}
		write_debug_info(debug_fp);
		generate_coefficients();
		sieve();
	}
	write_debug_info(debug_fp);
	if (debug_fp)
		fclose(debug_fp);
	mref_free(kN);
	mref_free(A); mref_free(B); mref_free(C);
	mref_free(D); mref_free(i2D);

	for (i = 0; i < nprimes; i++) {
		struct cfact *cf = arfact[i];
		if (!cf) continue;
		mref_free(cf->A); mref_free(cf->B);
		free(cf);
	}
	free(primes); free(sqrt_kN);
	free(arfact); free(reduced); free(depend);

	return 1;
}
