! 32-bit arithmetic library.
! (C) 2001 David Given dg@cowlark.com
! This code may be used by anyone, for anything, unconditionally. Be warned
! that the code is not guaranteed to do anything, not even to work. I am not
! responsible for any problems caused by the use of this code.
!
! If you do want to use it, I'd appreciate being dropped a line to let me know
! people are interested.
!
! Usage: all numbers are stored in memory as four-byte arrays, big-endian.
! Pass pointers to the numbers into the functions. q paramters are inputs,
! z are outputs. For example:
!
! array in1 -> 4;
! array in2 -> 4;
! array out -> 4;
!
! [
!    __long_fromint(in1, 30000);
!    __long_fromint(in2, 1000);
!    __long_mul(in1, in2, out);
! ]
!
! Input and output parameters may be the same array, unless mentioned otherwise
! in the comments.
!
! Credits: loads of people, including the translator group at Tao for working
! out a number of the algorithms for me. The division routine is taken from
! code on the net; see the comment for the URL.
!
! Have fun.

! There's an Inform bug that stops the @not opcode working, so we have to use a
! conventional assignment.

[ __not q1;
	return ~q1;
];

! And the Z-machine doesn't have xor. How bizarre.

[ __xor q1 q2  t;
	return (q1 & (~q2)) | ((~q1) & q2);
];

! Unsigned arithmetic is hard. These helper functions do it for us.
! Thanks to Jay Foad for the algorithm for this.

[ __unsigned_div q1 q2  t;
	if (q1 == 0)
		return q1;
	else if (q2 == 0)
	{
		print "[error: division by zero in __unsigned_div]";
		return 0;
	}
	else if (q2 == 1)
		return q1;
	else if ((q1 > 0) && (q2 > 0))
		return q1/q2;
		
	! Optimisation query: given that when executing Z-machine instructions,
	! the decoding is by far the slowest part of the process, is it
	! useful to have these special cases?

	!else if ((q1-32768) < (q2-32768))
	!	return 0;

	else if (q1 == q2)
		return 1;
	else if (q2 < 0)
	{
		! If q2 is high, then the result can only be 0 or 1.
		! The only way it can be 1 is if q1 > q2.
		return ((q1-32768) > (q2-32768));
	}

	! Lose one bit of precision, and do the calculation.

	@log_shift q1 0-1 -> t;
	t = t / q2;
	@log_shift t 1 -> t;
	
	! Now multiply back out and calculate the remainder. This tells
	! us how much to modify the result by, to restore lost precision.

	!print "{", t, "}";
	!print "[", (q1 - t*q2), ">", q2, "]";

	if (((q1 - t*q2)-32768) >= (q2-32768))
		t++;
	return t;
];
	
[ __unsigned_mod q1 q2  t;
	t = __unsigned_div(q1, q2) * q2;
	return q1 - t;
];

! These wrapper functions do all the long arithmetic.

Array __long_temp1 -> 4;
Array __long_temp2 -> 4;
Array __long_temp3 -> 4;

[ __long_copy q1 z; ! z = q1
	z-->0 = q1-->0;
	z-->1 = q1-->1;
];

[ __long_loadconst z hi lo; ! z = hi.lo
	z-->0 = hi;
	z-->1 = lo;
];

[ __long_fromint z q1; ! z = (long)q1, with sign extension
	z-->1 = q1;
	if (q1 < 0)
		z-->0 = -1;
	else
		z-->0 = 0;
];

[ __long_and q1 q2 z;
	z-->0 = q1-->0 & q2-->0;
	z-->1 = q1-->1 & q2-->1;
];

[ __long_or q1 q2 z;
	z-->0 = q1-->0 | q2-->0;
	z-->1 = q1-->1 | q2-->1;
];

[ __long_asr q1 q2 z  s t;
	if ((q2-->0 ~= 0) || (((q2-->1)-32768) > (31-32768)))
	{
		! All bits shifted off; so we just propagate the sign.

		if (q2-->0 & $8000)
		{
			z-->0 = -1;
			z-->1 = -1;
		}
		else
		{
			z-->0 = 0;
			z-->1 = 0;
		}

		return;
	}
	
	q2 = -(q2-->1 & $1F);
	s = 16 + q2;

	t = q1-->1;
	@log_shift t q2 -> t;
	z-->1 = t;

	t = q1-->0;
	@log_shift t s -> t;
	z-->1 = z-->1 | t;

	t = q1-->0;
	@art_shift t q2 -> t;
	z-->0 = t;
];
	
[ __long_lsr q1 q2 z  s t;
	if ((q2-->0 ~= 0) || ((q2-->1-32768) > (31-32768)))
	{
		! All bits shifted off; the result is zero.

		z-->0 = 0;
		z-->1 = 0;
		return;
	}
	
	q2 = -(q2-->1 & $1F);
	s = 16 + q2;

	t = q1-->1;
	@log_shift t q2 -> t;
	z-->1 = t;

	t = q1-->0;
	@log_shift t s -> t;
	z-->1 = z-->1 | t;

	t = q1-->0;
	@log_shift t q2 -> t;
	z-->0 = t;
];
	
[ __long_neg q1 z  a b;
	a = ~(q1-->0);
	b = ~(q1-->1) + 1;
	if (~~b)
		a++;
	z-->0 = a;
	z-->1 = b;
];

[ __long_xor q1 q2 z  a b;
	a = q1-->0;
	b = q2-->0;
	z-->0 = (a & (~b)) | ((~a) & b);

	a = q1-->1;
	b = q2-->1;
	z-->1 = (a & (~b)) | ((~a) & b);
];

[ __long_add q1 q2 z  a b c cc;
	! Add the low word, and detect overflow.

	a = q1-->1;
	b = q2-->1;
	c = a + b;
	z-->1 = c;
	@log_shift a 0-15 -> a;
	@log_shift b 0-15 -> b;
	@log_shift c 0-15 -> c;
	cc = a;
	if ((~~a) && b && (~~c))
		cc = 1;
	else if (a && b && (~~c))
		cc = 0;

	! Add the high word, plus one if the low word overflowed.

	z-->0 = q1-->0 + q2-->0 + cc;
];

[ __long_sub q1 q2 z  a b c cc;
	! Subtract the low word, and detect overflow.

	a = q1-->1;
	b = q2-->1;
	c = a - b;
	z-->1 = c;
	@log_shift a 0-15 -> a;
	@log_shift b 0-15 -> b;
	@log_shift c 0-15 -> c;

	! Carry table:
	! a b c  c
	! 0 0 0  0
	! 0 0 1	 1
	! 0 1 0	 1
	! 0 1 1	 1
	! 1 0 0	 0
	! 1 0 1	 0
	! 1 1 0	 0
	! 1 1 1	 1

	cc = ~~a;
	if ((~~a) && (~~b) && (~~c))
		cc = 0;
	else if (a && b && c)
		cc = 1;

	! Subtract the high word, plus one if the low word overflowed.

	z-->0 = q1-->0 - q2-->0 - cc;
];

! Algorithm converted from MIPS assembly, at:
! http://www.cz3.nus.edu.sg/~wangjs/CZ101/assembly-examples/divide.s
[ __long_unsigned_divmod q1 q2 d r  r1 r2 r3 r4 count t1 t2;
	! Check for division by zero.

	if ((~~(q2-->0)) && (~~(q2-->1)))
	{
		print "[error: division by zero in __long_unsigned_divmod]";
		return 0;
	}

	count = 0;
	r1 = 0;
	r2 = 0;
	r3 = q1-->0;
	r4 = q1-->1;
	!print "[q1=", r3, " ", r4, " q2=", q2-->0, " ", q2-->1, " ";

	do {
		count++;

		! Shift r1..r4 left by one bit.

		@log_shift r4 0-15 -> t1;
		@log_shift r4 1 -> r4;

		@log_shift r3 0-15 -> t2;
		@log_shift r3 1 -> r3;
		@or r3 t1 -> r3;

		@log_shift r2 0-15 -> t1;
		@log_shift r2 1 -> r2;
		@or r2 t2 -> r2;

		@log_shift r1 1 -> r1;
		@or r1 t1 -> r1;
		
		! Subtract divisor from r1..r2.

		__long_temp3-->0 = r1;
		__long_temp3-->1 = r2;
		__long_sub(__long_temp3, q2, __long_temp3);

		! Is the remainder non-negative?

		if (__long_temp3-->0 >= 0)
		{
			! Yes. Quotient gets a one; commit subtraction.

			r1 = __long_temp3-->0;
			r2 = __long_temp3-->1;

			r4 = r4 | 1;
			!print "1";
		}
		!else print "0";
		! Otherwise the quotient gets a zero and the subtraction is not
		! committed. No operation.
	} until (count == 32);

	! Save results.

	!print " r=", r1, " ", r2;
	if (r)
	{
		r-->0 = r1;
		r-->1 = r2;
	}
	!print " d=", r3, " ", r4, "]";
	if (d)
	{
		d-->0 = r3;
		d-->1 = r4;
	}
];

[ __long_div q1 q2 z  t sign;
	! Calculate final sign, and convert parameters to unsigned.

	t = q1-->0;
	if (t < 0)
		sign = 1;
	__long_temp1-->0 = t & $7FFF;
	__long_temp1-->1 = q1-->1;

	t = q2-->0;
	if (t < 0)
		sign = ~~sign;
	__long_temp2-->0 = t & $7FFF;
	__long_temp2-->1 = q2-->1;

	! Do the actual divide.

	__long_unsigned_divmod(__long_temp1, __long_temp2, z, 0);

	! Adjust sign.

	if (sign)
		__long_neg(z);
];

[ __long_mod q1 q2 z  t sign;
	! Calculate final sign, and convert parameters to unsigned.

	t = q1-->0;
	if (t < 0)
		sign = 1;
	__long_temp1-->0 = t & $7FFF;
	__long_temp1-->1 = q1-->1;

	t = q2-->0;
	if (t < 0)
		sign = ~~sign;
	__long_temp2-->0 = t & $7FFF;
	__long_temp2-->1 = q2-->1;

	! Do the actual modulo.

	__long_unsigned_divmod(__long_temp1, __long_temp2, 0, z);

	! Adjust sign.

	if (sign)
		__long_neg(z);
];

[ __long_unsigned_div q1 q2 z;
	__long_unsigned_divmod(q1, q2, z, 0);
];

[ __long_unsigned_mod q1 q2 z;
	__long_unsigned_divmod(q1, q2, 0, z);
];

! Algorithm my own. Probably buggy (although it passes every test I've
! thrown at it).
[ __long_mul q1 q2 z  a b c d aa bb cc dd sign t;
	! What we're doing here is long multiplication in base 256; so each
	! digit is a byte.
	!
	!    A   B   C   D
	! *  A'  B'  C'  D'
	! = ---------------
	!   AD' BD' CD' DD'
	!   BC' CC' DC'
	!   CB' DB'
	!   DA'
	!
	! We need to add up the columns, remembering to overflow into the
	! next column. (Don't forget to make everything positive.)

	if (q1->0 >= $80)
	{
		! q1 is negative.
		__long_neg(q1, __long_temp1);
		q1 = __long_temp1;
		sign = 1;
	}

	a  = q1->0;
	b  = q1->1;
	c  = q1->2;
	d  = q1->3;

	if (q2->0 >= $80)
	{
		! q2 is negative.
		__long_neg(q2, __long_temp1);
		q2 = __long_temp1;
		sign = ~~sign;
	}
	
	aa = q2->0;
	bb = q2->1;
	cc = q2->2;
	dd = q2->3;

	! D column.

	t = d*dd;
	z->3 = t;
	@log_shift t 0-8 -> t;

	! C column.

	t = t + c*dd + d*cc;
	z->2 = t;
	@log_shift t 0-8 -> t;

	! B column.
	t = t + b*dd + c*cc + d*bb;
	z->1 = t;
	@log_shift t 0-8 -> t;

	! A column.
	t = t + a*dd + b*cc + c*bb + d*aa;
	!t = t & $7F;
	z->0 = t;

	! Apply sign bit.

	if (sign)
		__long_neg(z, z);

	! LongMul can't use the same output as one of its inputs.
	!LongMul(__long_temp1, q1, q2);
	!@copy_table __long_temp1 z 4;
];

[ __long_compare q1 q2  a b;
	a = q1-->0;
	b = q2-->0;
	if (a == b)
	{
		a = q1-->1 - 32768;
		b = q2-->1 - 32768;
	}

	if (a > b)
		return 1;
	if (a < b)
		return -1;
	return 0;
];

[ __long_unsigned_compare q1 q2  a b;
	a = q1-->0 - 32768;
	b = q2-->0 - 32768;
	if (a == b)
	{
		a = q1-->1 - 32768;
		b = q2-->1 - 32768;
	}

	if (a > b)
		return 1;
	if (a < b)
		return -1;
	return 0;
];

