/*
 * 32 bit multiplication to produce phase number for a DDS
 *
 * Dana Browne, AD5VC, 26 Jan 2014
 *
 * Ver 2: replaced floating pt arithmetic to find the integer part with
 *        integer arithmetic.
 *
 * Ver 3: replaced floating division to find multiplier with integer long
 *        division
 *
 * phase constant = Fwanted * (2^32/Fclock)
 *
 * Assumes desired frequency is stored as a 32 bit unsigned integer
 *
 * 2^32/Fclock is stored in a two 32 bit unsigned integers. 
 * The first integer is the integer part of the factor
 * The fractional part is stored in the low 24 bits of the other integer.
 *
 * Yes, I could pack them into 1 integer.  Just in case I run into a situation
 * where the integer part is too big, this allows me to detect that.
 *
 * The integer part is small, typically 20-40 because the clock frequency
 * is much larger than needed frequencies for this DDS
 *
 * For this method to work without overflows,
 * Fclock must be bigger then 2^{24} = 16.17 MHz
 *
 * This code does multiplication on these two using 32 bit multiplication
 * and keeps the integer part.  It relies on the fact that the product
 * is a 32 bit integer, not a 64 bit integer, in this application.
 *
 */

unsigned long find_fraction(unsigned long numerator, unsigned long denominator);
int gen_multiplier(unsigned long f_clock);
unsigned long gen_phase_constant(unsigned long f_wanted);

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define PWR_2_32 4294967296.0
#define PWR_2_24 16777216.0
#define TWO32M1  0XFFFFFFFFUL
#undef DEBUG

static unsigned long f_mult_int = 0;	/* Integer part of multiplier    */
static unsigned long f_mult_frac = 0;   /* Fractional part of multiplier */
					/* stored as integer/2^24        */
static unsigned long f_max1 = 0; 	/* Max freq to use with DDS      */

unsigned long find_fraction(unsigned long dividend, unsigned long divisor)
{
/*
 * Returns 32 bit unsigned int representing the fractional part
 * The algorithm is long hand division
 */
	unsigned long ibit, fraction;

	if (dividend >= divisor) { /* ERROR, WE SHOULD FLAG THIS */
		printf("find_fraction(): %lu / %lu >= 1\n", dividend, divisor);
		return 0;
	}

	ibit = 0x80000000;
	fraction = 0;
	do {
#ifdef DEBUG
		printf("%lx %lu %lu\n", ibit, fraction, dividend);
#endif
		dividend <<= 1;
		if (dividend >= divisor) {
			fraction += ibit;
			dividend -= divisor;
		}
		ibit = (ibit >> 1) & 0x7FFFFFFF;
	} while (ibit != 0);
/*
 * Next code snippet will round the last bit
 */
	if ((dividend << 1) > divisor) ++fraction;
#ifdef DEBUG
	printf("fraction: %lu  fraction/256 : %lu\n", fraction, fraction>>8);
#endif
	return fraction;
}

int gen_multiplier(unsigned long Fclock)
{
	unsigned long dividend, left_over;

	f_mult_int = 0;
	f_mult_frac = 0;
	if (Fclock < 1677216) {
		/* Fclock too small, abort! */
		return 0;
	}
/* Get integer part of division of 2^32/Fclock
 * 
 *  Since 2^32 is too big to be divided into, I use the following trick
 *
 *  2^32 = quotient* fclock + remainder
 *
 *  subtract fclock from both sides:
 *
 *  2^32 - fclock = (quotient -1)*fclock + remainder (RHS < 2^32!)
 *
 *  To subtract from 2^32, we subtract from (2^32-1) and add 1
 *  To keep an optimizing compiler from rearranging the additions
 *  I might have to hide the subtraction from 2^32 -1 as an XOR.
 *  Hopefully the parentheses will stop that optimization.
 *
 */
	dividend = ((TWO32M1) - Fclock) + 1; /* subtract 2^32 - Fclock */
	f_mult_int = 1 + (dividend/Fclock);

	left_over = (TWO32M1 - (f_mult_int * Fclock)) + 1 ;
/*
 * Probably not necessary, but check that division was done right
 */
#ifdef DEBUG
		printf("leftover: %lu  f_mult_int: %lu\n", left_over, f_mult_int);
#endif
#ifdef DEBUG
	while (left_over >= Fclock) {
		printf("Fixing division: leftover: %lu  f_mult_int: %lu\n",
			left_over, f_mult_int);
		f_mult_int ++;
		left_over -= Fclock;
	}
#endif
/*
 * If the integer_part is bigger than 255, this whole algorithm will fail.
 * Since the assumption was that Fclock would be in the 100 - 200 MHz range,
 * this should not happen.
 */
	if (f_mult_int > 255) { 
		/* FLAG ERROR MESSAGE HERE that Fclock is too small */
		/* Should have been caught above, but just in case... */
		return 0;
	}

/*
 * Generate a conservative upper bound for frequencies that we will
 * want to generate phase constants for
 */
	f_max1 =  1 + (((TWO32M1 - f_mult_int - 1)+1)/(f_mult_int +1));

/*
 * left_over/Fclock is the fractional part. 
 *
 * The integer part is known perfectly.  If we use it as the first
 * 8 bits of our multipler, and we use 24 bits of the fractional part
 * we will have a multiplier accurate to 32 bits.
 */
	f_mult_frac = 0;
	if (left_over != 0) {
		f_mult_frac = find_fraction(left_over, Fclock);
		/* will use top 24 bits, but use bottom 8 to round */
/****
		if ((f_mult_frac & 0x000000FF) > 0x00000080) {
			f_mult_frac += 0x00000100;
		}
****/
		f_mult_frac = (f_mult_frac >> 8) & 0x00FFFFFF;
	}

#ifdef DEBUG
	printf("integer and fraction: %lu %lu\n", f_mult_int, f_mult_frac);
	printf("Expect fraction to be : %lu\n", 
	(unsigned long) floor(0.5 + PWR_2_24 * (float) left_over / (float) Fclock));
#endif

	return 1;
}


#define LOW16 0x0000FFFFUL
#define LOW8 0x000000FFUL

unsigned long gen_phase_constant(unsigned long f_wanted)
{
/*
 * Multiply the wanted frequency by the multiplier to find our phase constant
 *
 * This code assumes that the product does not overflow a 32 bit unsigned int
 */
	unsigned long phase_const, ah, al, bh, bl, acc;

	if (f_wanted > f_max1) {
		printf("Phase constant will overflow for freq %lu\n", f_wanted);
		return 0;
	}

	phase_const = f_wanted * f_mult_int; /* multiply by integer part */

/*
 * Do 32 bit unsigned multiplication as pairs of 16 bit multiplications.
 * f_wanted = (ah, al),  f_mult_frac = (bh, bl)/2^24 (bh < 256)
 *
 * Product w/ fractional part is (ah*bh)*2^8 + (ah*bl+bh*al)/2^8 + (al*bl)/2^24
 */
	ah = (f_wanted >> 16) & LOW16;
	al = (f_wanted & LOW16);

	bh = (f_mult_frac >> 16);
	bl = (f_mult_frac & LOW16);

	phase_const += (ah*bh) << 8;

	acc = ah*bl ;

	phase_const += (acc >> 8);

	acc &= LOW8;

/* bh*al less than 2^32, but when contents of acc added, it might overflow */

	if (bh*al > TWO32M1 - acc) {  /* overflow, put it in phase_const */
		phase_const += (1 << 24);
	}

	acc += bh*al;

	phase_const += (acc >> 8);

	acc &= LOW8;

/* bl*al less than 2^32, but when contents of acc added, it might overflow */
	if (bl*al > TWO32M1 - acc) {  /* overflow, put it in phase_const */
		phase_const += (1 << 8);
	}
	acc += bl*al;

	phase_const += (acc >> 24);
	if ((acc & 0x00FFFFFF) > 0x00800000)  ++phase_const; /* Round up */

	return phase_const;
}

/*
 * Testbed for the generator, comparing to operations using (double)
 */

/*  COMMENT OUT MAIN() AND TEST ROUTINE FOR TRIAL WITH PHSNA -- NRK 1/27/14

int main(int argc , char ** argv)
{
	unsigned long fclock, fdesired;
	unsigned long phase_val;
	double exact_mult, exact;

	printf(" Input clock frequency: ");
	if (scanf(" %lu", &fclock) != 1) exit(1);
	exact_mult = PWR_2_32/(double) fclock;
	if (gen_multiplier(fclock) == 0) {
		exit(1);
	}
	fdesired = 10000;
	printf("Freq       Phase    Exact ph   Df ppm\n");
	do {
		phase_val = gen_phase_constant(fdesired);
		exact = floor(exact_mult*(double) fdesired);
		printf("%10lu  %10lu  %11.0f  %6.3f\n",
			fdesired, phase_val, exact,
			1E6*((double) phase_val - exact)/exact);
		if (fdesired < 1000000) {
			fdesired += fdesired/15;
		} else {
			fdesired += fdesired/50;
		}
	} while (fdesired < fclock/3);
/*
 * Do manual testing

	printf("Freq       Phase    Exact ph   Df ppm\n");
	do {
		printf(" Input desired frequency, 0 to quit: ");
		if (scanf(" %lu", &fdesired) != 1) exit(1);
		if (fdesired == 0) exit(0);
		phase_val = gen_phase_constant(fdesired);
		exact = floor(exact_mult*(double) fdesired);
		printf("%10lu  %10lu  %11.0f  %6.3f\n",
			fdesired, phase_val, exact,
			1E6*((double) phase_val - exact)/exact);
	} while (fdesired != 0) ;
	exit(0);
}
*/