Much investigation of bignum problems

bignum multiply is still not working, but as bignum read and bignum divide depend on it, it's the problem to hit first.
2019-01-19 16:24:59 +00:00 · 2019-01-19 16:24:59 +00:00 · 0f8bc990f2
parent 000ae3c392
commit 0f8bc990f2
9 changed files with 372 additions and 173 deletions
--- a/lisp/expt.lisp
+++ b/lisp/expt.lisp
@ -5,4 +5,4 @@
              ((= x 1) n)
              (t (* n (expt n (- x 1)))))))

-(expt 2 65)
+(expt 2 60)
--- a/lisp/scratchpad.lisp
+++ b/lisp/scratchpad.lisp
@ -0,0 +1,48 @@
+(set! i
+  (+
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000
+    10000000000000000000))
+
+(set! j (+ i i i i i i i i i i))
+
+(set! k (+ j j j j j j j j j j))
+
+(set! l (+ k k k k k k k k k k))
+
+(set! m (+ l l l l l l l l l l))
+
+(set! n (+ m m m m m m m m m m))
+
+(set! o (+ n n n n n n n n n n))
+
+(set! p (+ o o o o o o o o o o))
+
+(set! q (+ p p p p p p p p p p))
+
+(set! r (+ q q q q q q q q q q))
+
+(set! s (+ r r r r r r r r r r))
+
+(set! t (+ s s s s s s s s s s))
+
+(set! u (+ t t t t t t t t t t))
+
+(set! v (+ u u u u u u u u u u))
+
+(set! x (+ v v v v v v v v v v))
+
+(set! y (+ x x x x x x x x x x))
+
+"we're OK to here: 10^36, which is below the 2^120 barrier so represented as two cells"
+(inspect (set! z (+ y y y y y y y y y y)))
+
+"This blows up: 10^37, which is a three cell bignum."
+(inspect (+ z z z z z z z z z z))
--- a/lisp/scratchpad2.lisp
+++ b/lisp/scratchpad2.lisp
@ -0,0 +1,84 @@
+"This demonstrates that although the print representation of three cell bignums blows up, the internal representation is sane"
+
+"We start by adding 8 copies of 2^60 - i.e. the first two-cell integer"
+
+(set! a
+  (+
+    1152921504606846976
+    1152921504606846976
+    1152921504606846976
+    1152921504606846976
+    1152921504606846976
+    1152921504606846976
+    1152921504606846976
+    1152921504606846976))
+
+"Then repeatedly add eight copies of the previous generation"
+
+(set! b (+ a a a a a a a a))
+
+(set! c (+ b b b b b b b b))
+
+(set! d (+ c c c c c c c c))
+
+(set! e (+ d d d d d d d d))
+
+(set! f (+ e e e e e e e e))
+
+(set! g (+ f f f f f f f f))
+
+(set! h (+ g g g g g g g g))
+
+(set! i (+ h h h h h h h h))
+
+(set! j (+ i i i i i i i i))
+
+(set! k (+ j j j j j j j j))
+
+(set! l (+ k k k k k k k k))
+
+(set! m (+ l l l l l l l l))
+
+(set! n (+ m m m m m m m m))
+
+(set! o (+ n n n n n n n n))
+
+"p"
+(set! p (+ o o o o o o o o))
+
+"q"
+(set! q (+ p p p p p p p p))
+
+"r"
+(set! r (+ q q q q q q q q))
+
+"s"
+(inspect
+ (set! s (+ r r r r r r r r)))
+
+"t - first three cell integer. Printing blows up here"
+(inspect
+ (set! t (+ s s s s s s s s)))
+
+"u"
+(inspect
+ (set! u (+ t t t t t t t t)))
+
+"v"
+(inspect
+ (set! v (+ u u u u u u u u)))
+
+"w"
+(inspect
+ (set! w (+ v v v v v v v v)))
+
+(inspect
+ (set! x (+ w w w w w w w w)))
+
+(inspect
+ (set! y (+ x x x x x x x x)))
+
+(inspect
+ (set! z (+ y y y y y y y y)))
+
+(inspect (+ z z z z z z z z))
--- a/src/arith/integer.c
+++ b/src/arith/integer.c
@ -95,6 +95,21 @@ struct cons_pointer make_integer( int64_t value, struct cons_pointer more ) {
 }


+__int128_t cell_value( struct cons_pointer c, char op, bool is_first_cell ) {
+    long int val = nilp( c ) ? 0 : pointer2cell( c ).payload.integer.value;
+    long int carry = is_first_cell ? 0 : ( MAX_INTEGER + 1 );
+
+    __int128_t result = ( __int128_t ) integerp( c ) ?
+        ( val == 0 ) ? carry : val : op == '*' ? 1 : 0;
+    debug_printf( DEBUG_ARITH,
+                  L"cell_value: raw value is %ld, op = '%c', is_first_cell = %s; returning ",
+                  val, op, is_first_cell ? "true" : "false" );
+    debug_print_128bit( result, DEBUG_ARITH );
+    debug_println( DEBUG_ARITH );
+
+    return result;
+}
+
 /**
 * internal workings of both `add_integers` and `multiply_integers` (and
 * possibly, later, other operations. Apply the operator `op` to the
@ -106,11 +121,11 @@ struct cons_pointer make_integer( int64_t value, struct cons_pointer more ) {
 * up significantly WRONG, but the value in the more significant cell
 * ends up correct. */
 struct cons_pointer operate_on_integers( struct cons_pointer a,
-                                       struct cons_pointer b,
-                                        char op) {
+                                         struct cons_pointer b, char op ) {
    struct cons_pointer result = NIL;
    struct cons_pointer cursor = NIL;
    __int128_t carry = 0;
+    bool is_first_cell = true;

    if ( integerp( a ) && integerp( b ) ) {
        debug_print( L"operate_on_integers: \n", DEBUG_ARITH );
@ -120,12 +135,8 @@ struct cons_pointer operate_on_integers( struct cons_pointer a,
        debug_println( DEBUG_ARITH );

        while ( !nilp( a ) || !nilp( b ) || carry != 0 ) {
-            __int128_t av =
-                ( __int128_t ) integerp( a ) ? pointer2cell( a ).
-                payload.integer.value : op == '*' ? 1 : 0;
-            __int128_t bv =
-                ( __int128_t ) integerp( b ) ? pointer2cell( b ).
-                payload.integer.value : op == '*' ? 1 : 0;
+            __int128_t av = cell_value( a, op, is_first_cell );
+            __int128_t bv = cell_value( b, op, is_first_cell );

            /* slightly dodgy. `MAX_INTEGER` is substantially smaller than `LONG_MAX`, and
             * `LONG_MAX * LONG_MAX` =~ the maximum value for `__int128_t`. So if the carry
@ -144,7 +155,8 @@ struct cons_pointer operate_on_integers( struct cons_pointer a,
                    break;
            }

-          debug_printf( DEBUG_ARITH, L"operate_on_integers: op = '%c'; av = ", op);
+            debug_printf( DEBUG_ARITH,
+                          L"operate_on_integers: op = '%c'; av = ", op );
            debug_print_128bit( av, DEBUG_ARITH );
            debug_print( L"; bv = ", DEBUG_ARITH );
            debug_print_128bit( bv, DEBUG_ARITH );
@ -186,6 +198,7 @@ struct cons_pointer operate_on_integers( struct cons_pointer a,

            a = pointer2cell( a ).payload.integer.more;
            b = pointer2cell( b ).payload.integer.more;
+            is_first_cell = false;
        }
    }

@ -239,6 +252,11 @@ struct cons_pointer integer_to_string_add_digit( int digit, int digits,
 * when we get to the last digit from one integer cell, we have potentially
 * to be looking to the next. H'mmmm.
 */
+/*
+ * TODO: this blows up when printing three-cell integers, but works fine
+ * for two-cell. What's happening is that when we cross the barrier we
+ * SHOULD print 2^120, but what we actually print is 2^117. H'mmm.
+ */
 struct cons_pointer integer_to_string( struct cons_pointer int_pointer,
                                       int base ) {
    struct cons_pointer result = NIL;
@ -253,24 +271,27 @@ struct cons_pointer integer_to_string( struct cons_pointer int_pointer,
        while ( accumulator > 0 || !nilp( integer.payload.integer.more ) ) {
            if ( !nilp( integer.payload.integer.more ) ) {
                integer = pointer2cell( integer.payload.integer.more );
-                accumulator +=
+                accumulator += integer.payload.integer.value == 0 ?
+                    MAX_INTEGER :
                    ( llabs( integer.payload.integer.value ) *
                      ( MAX_INTEGER + 1 ) );
+                debug_print
+                    ( L"integer_to_string: crossing cell boundary, accumulator is: ",
+                      DEBUG_IO );
+                debug_print_128bit( accumulator, DEBUG_IO );
+                debug_println( DEBUG_IO );
            }

-            debug_printf( DEBUG_IO,
-                          L"integer_to_string: accumulator is %ld\n:",
-                          accumulator );
            do {
                int offset = ( int ) ( accumulator % base );
                debug_printf( DEBUG_IO,
-                              L"integer_to_string: digit is %ld, hexadecimal is %c\n:",
-                              offset,
-                              hex_digits[offset] );
+                              L"integer_to_string: digit is %ld, hexadecimal is %c, accumulator is: ",
+                              offset, hex_digits[offset] );
+                debug_print_128bit( accumulator, DEBUG_IO );
+                debug_println( DEBUG_IO );

                result =
-                    integer_to_string_add_digit( offset, digits++,
-                                                 result );
+                    integer_to_string_add_digit( offset, digits++, result );
                accumulator = accumulator / base;
            } while ( accumulator > base );
        }
--- a/src/debug.c
+++ b/src/debug.c
@ -54,7 +54,8 @@ void debug_print_128bit( __int128_t n, int level ) {
            char str[40] = { 0 }; // log10(1 << 128) + '\0'
            char *s = str + sizeof( str ) - 1;  // start at the end
            while ( n != 0 ) {
-        if (s == str) return; // never happens
+                if ( s == str )
+                    return;     // never happens

                *--s = "0123456789"[n % 10];  // save last digit
                n /= 10;        // drop it
--- a/src/ops/read.c
+++ b/src/ops/read.c
@ -187,7 +187,8 @@ struct cons_pointer read_number( struct stack_frame *frame,
                                            ( L"Malformed number: too many periods" ),
                                            frame_pointer );
                } else {
-        debug_print(L"read_number: decimal point seen\n", DEBUG_IO);
+                    debug_print( L"read_number: decimal point seen\n",
+                                 DEBUG_IO );
                    seen_period = true;
                }
                break;
@ -197,7 +198,8 @@ struct cons_pointer read_number( struct stack_frame *frame,
                                            ( L"Malformed number: dividend of rational must be integer" ),
                                            frame_pointer );
                } else {
-        debug_print(L"read_number: ratio slash seen\n", DEBUG_IO);
+                    debug_print( L"read_number: ratio slash seen\n",
+                                 DEBUG_IO );
                    dividend = result;

                    result = make_integer( 0, NIL );
@ -212,7 +214,8 @@ struct cons_pointer read_number( struct stack_frame *frame,
                                                     NIL ) );

                debug_printf( DEBUG_IO,
-                   L"read_number: added character %c, result now ", c );
+                              L"read_number: added character %c, result now ",
+                              c );
                debug_print_object( result, DEBUG_IO );
                debug_print( L"\n", DEBUG_IO );

@ -246,7 +249,8 @@ struct cons_pointer read_number( struct stack_frame *frame,
    }

    if ( neg ) {
-    debug_print(L"read_number: converting result to negative\n", DEBUG_IO);
+        debug_print( L"read_number: converting result to negative\n",
+                     DEBUG_IO );

        result = negative( frame_pointer, result );
    }
--- a/unit-tests/bignum-add.sh
+++ b/unit-tests/bignum-add.sh
@ -5,12 +5,12 @@
 # (right on the boundary)
 a=1152921504606846975
 b=1
-expected='1152921504606846976'
-output=`echo "(+ $a $b)" | target/psse -v 2 2>psse.log`
+c=`echo "$a + $b" | bc`
+expected='t'
+output=`echo "(= (+ $a $b) $c)" | target/psse -v 2 2>psse.log`

 actual=`echo $output |\
-  tail -1 |\
-  sed 's/\,//g'`
+  tail -1`

 echo -n "adding $a to $b: "
 if [ "${expected}" = "${actual}" ]
@ -36,8 +36,9 @@ fi
 # (just over the boundary)
 a='1152921504606846976'
 b=1
-expected='1152921504606846977'
-output=`echo "(+ $a $b)" | target/psse -v 2 2>psse.log`
+c=`echo "$a + $b" | bc`
+expected='t'
+output=`echo "(= (+ $a $b) $c)" | target/psse -v 2 2>psse.log`

 actual=`echo $output |\
  tail -1 |\
@ -62,13 +63,15 @@ else
    exit 1
 fi

+
 #####################################################################
 # add a bignum and a smallnum to produce a bignum
 # (just over the boundary)
 a='1152921504606846977'
 b=1
-expected='1152921504606846978'
-output=`echo "(+ $a $b)" | target/psse -v 2 2>psse.log`
+c=`echo "$a + $b" | bc`
+expected='t'
+output=`echo "(= (+ $a $b) $c)" | target/psse -v 2 2>psse.log`

 actual=`echo $output |\
  tail -1 |\
@ -98,8 +101,9 @@ fi
 # (just over the boundary)
 a=1
 b=1152921504606846977
-expected='1152921504606846978'
-output=`echo "(+ $a $b)" | target/psse -v 2 2>psse.log`
+c=`echo "$a + $b" | bc`
+expected='t'
+output=`echo "(= (+ $a $b) $c)" | target/psse -v 2 2>psse.log`

 actual=`echo $output |\
  tail -1 |\
@ -124,12 +128,14 @@ else
    exit 1
 fi

+
 #####################################################################
 # add two bignums to produce a bignum
 a=10000000000000000000
 b=10000000000000000000
-expected='20000000000000000000'
-output=`echo "(+ $a $b)" | target/psse -v 2 2>psse.log`
+c=`echo "$a + $b" | bc`
+expected='t'
+output=`echo "(= (+ $a $b) $c)" | target/psse -v 2 2>psse.log`

 actual=`echo $output |\
  tail -1 |\
@ -154,13 +160,15 @@ else
    exit 1
 fi

+
 #####################################################################
 # add a smallnum and a two-cell bignum to produce a three-cell bignum
 # (just over the boundary)
 a=1
 b=1329227995784915872903807060280344576
-expected='1329227995784915872903807060280344577'
-output=`echo "(+ $a $b)" | target/psse -v 2 2>psse.log`
+c=`echo "$a + $b" | bc`
+expected='t'
+output=`echo "(= (+ $a $b) $c)" | target/psse -v 2 2>psse.log`

 actual=`echo $output |\
  tail -1 |\
@ -185,3 +193,36 @@ else
    exit 1
 fi

+
+#####################################################################
+# This currently fails:
+# (= (+ 1 3064991081731777716716694054300618367237478244367204352)
+#         3064991081731777716716694054300618367237478244367204353)
+a=1
+b=3064991081731777716716694054300618367237478244367204352
+c=`echo "$a + $b" | bc`
+expected='t'
+output=`echo "(= (+ $a $b) $c)" | target/psse -v 2 2>psse.log`
+
+actual=`echo $output |\
+  tail -1 |\
+  sed 's/\,//g'`
+
+echo -n "adding $a to $b: "
+if [ "${expected}" = "${actual}" ]
+then
+    echo "OK"
+else
+    echo "Fail: expected '${expected}', got '${actual}'"
+    exit 1
+fi
+
+echo -n "checking a bignum was created: "
+grep 'BIGNUM!' psse.log > /dev/null
+if [ $? -eq "0" ]
+then
+    echo "OK"
+else
+    echo "Fail"
+    exit 1
+fi
--- a/unit-tests/where-does-it-break.sh
+++ b/unit-tests/where-does-it-break.sh
@ -3,9 +3,9 @@
 # Not really a unit test, but a check to see where bignum addition breaks

 broken=0
-i=1152921506900200000
+i=11529215046068469750
 # we've already proven we can successfullu get up to here
-increment=10000
+increment=1

 while [ $broken -eq "0" ]
 do