No one seems to be interested in this issue so I’ll have to reply to
myself…
No one has pointed out that a clean solution for this is currently
available so I went ahead and implemented it. Below is unit test I used
and one patch for bignum.c and one for pack.c. The patches are taken
against latest nightly snapshot:
$ ruby -v
ruby 1.8.0 (2003-05-27) [i386-mingw32]
It should work for both big and little endian architectures but I’ve only
tried on little endian. Would be great if someone can try on big-endian
machine.
This patch adds a ‘W’ template character to pack and unpack for
packing/unpacking an unsigned integer (Fixnum OR Bignum). The packing
is from MSB to LSB so that
[0xff00].pack(“W”) == “\377\000”
regardless of the endianness of the machine. Leading zeroes are trimmed
from the string (except for negative numbers see below). If you pack a
negative number you loose information about the sign, ie
[-1].pack(“W”).unpack(“W”).first == 1
which is the same as for template ‘I’ but in contrast to template ‘Q’.
I choose W as in “raW binary representation of number” but its hard to
find a good template char since most are taken.
I didn’t implement ‘w’ for dumping negative numbers since I don’t see
the need. However, the implementation hints at one possible way for how
to do ‘w’ (by only allowing negative numbers to have leading zeroes).
If someone finds this worthy/useful its in the public domain so use in
anyway you want. I tried to stay close to the style in Ruby source but I’m
sure the code can be even cleaner/nicer/faster.
Regards,
Robert Feldt
Ps. This post is probably too long; I’m sorry… Maybe ruby-core list is
better for these things? Or just to matz? I’m not fully up-to-date with
community procedures.
----------utest_bignum_pack_unpack.rb------------------------------------
require ‘test/unit’
class TestBignumPackAndUnpack < Test::Unit::TestCase
def test_01_pack_W_one_byte
(0…255).each do |i|
assert_equal(i.chr, [i].pack(“W”))
end
end
def num_with_bytes(bytes)
low_limit = 2**(8 * (bytes-1))
low_limit + rand(-low_limit + 2**(8*bytes))
end
def assert_pack_W_sampled(numBytes, numSamples = 100)
numSamples.times do
num = num_with_bytes(numBytes)
packed = [num].pack(“W”)
assert_equal(numBytes, packed.length, “num = #{num}”)
lsb_first = packed.reverse
numBytes.times do |i|
assert_equal(lsb_first[i], num & 0xff)
num >>= 8
end
end
end
def test_02_pack_W_sampled_positive_multi_bytes
(2…10).each do |num_bytes|
assert_pack_W_sampled(num_bytes, 25)
end
end
def test_03_pack_W_large
p1024 = [21024].pack(“W”)
assert_equal(1.chr + (0.chr*(1024/8)), p1024)
p1024_ones = [21024-1].pack(“W”)
assert_equal(0xff.chr * (1024/8), p1024_ones)
p2048 = [22048].pack(“W”)
assert_equal(1.chr + (0.chr*(2048/8)), p2048)
p2048_ones = [22048-1].pack(“W”)
assert_equal(0xff.chr * (2048/8), p2048_ones)
end
This might not be what one wants but I think main use is in
converting positive nums so lets leave it as is…
To do ‘w’ we could make sure that negative numbers always
start with leading 0. This way we could later unpack them without
losing the sign.
def test_04_pack_W_negative_numbers
assert_equal("\000\000\000\001", [-1].pack(“W”))
assert_equal("\000\000\000\002", [-2].pack(“W”))
assert_equal("\000\000\000\377", [-255].pack(“W”))
assert_equal("\000\000\377\377", [-216+1].pack(“W”))
assert_equal("\000\377\377\377", [-224+1].pack(“W”))
assert_equal("\377\377\377\377", [-232+1].pack(“W”))
assert_equal("\000\000\000\001\000\000\000\000", [-232].pack(“W”))
end
def test_05_unpack_W_one_byte
(0…255).each do |i|
assert_equal(i, i.chr.unpack(“W”).first)
end
end
def str_with_bytes(bytes)
s = ""
bytes.times {s << rand(256).chr}
s
end
def test_06_unpack_W_sampled_positive_multi_bytes
(2…10).each do |num_bytes|
25.times do
s = str_with_bytes(num_bytes)
num = s.unpack(“W”).first
lsb_first = s.reverse
num_bytes.times do |i|
assert_equal(lsb_first[i], num & 0xff,
“s = #{s.unpack(‘H*’)}, num = #{num}”)
num >>= 8
end
end
end
end
def test_07_unpack_W_large
u1024 = (1.chr + (0.chr*(1024/8))).unpack(“W”).first
assert_equal(21024, u1024)
u1024_ones = (0xff.chr * (1024/8)).unpack(“W”).first
assert_equal(21024-1, u1024_ones)
u2048 = (1.chr + (0.chr*(2048/8))).unpack(“W”).first
assert_equal(22048, u2048)
u2048_ones = (0xff.chr * (2048/8)).unpack(“W”).first
assert_equal(22048-1, u2048_ones)
end
def test_08_unpack_W_packed_negative_numbers
assert_equal(1, [-1].pack(“W”).unpack(“W”).first)
assert_equal(2, [-2].pack(“W”).unpack(“W”).first)
assert_equal(255, [-255].pack(“W”).unpack(“W”).first)
assert_equal(216-1, [-216+1].pack(“W”).unpack(“W”).first)
assert_equal(224-1, [-224+1].pack(“W”).unpack(“W”).first)
assert_equal(232-1, [-232+1].pack(“W”).unpack(“W”).first)
assert_equal(232, [-232].pack(“W”).unpack(“W”).first)
end
def test_09_cycle_pack_then_unpack
1000.times do
num = rand(2**200)
assert_equal(num, [num].pack(“W”).unpack(“W”).first)
end
end
end
---------upatch_bignum_c---------------------------------------------
— bignum.c 2003-05-28 23:50:04.000000000 +0200
+++ bignum.c.old 2003-05-28 11:09:30.000000000 +0200
@@ -306,110 +306,6 @@
#endif
-/* We should probably use endian in pack.c instead but I had problems
-
- */
-static int
-big_endian()
-{
- static int init = 0;
- static int big_endian_value;
- char *p;
···
-
- if (init) return big_endian_value;
- init = 1;
- p = (char*)&init;
- return big_endian_value = (p[0]==1)?0:1;
-}
-
-/* Pack a nonnegative bignum as raw binary data/bitstring starting from
-
-
- Returned data will be multiple of SIZEOF_BDIGITS so there can be up to
-
- SIZEOF_BDIGITS-1 leading zeroes.
-
- Assumes that val is really a bignum ie. fixnums
-
- needs to be converted prior to calling this.
- */
-void
-rb_nonneg_bignum_pack(buf, val)
- char *buf;
- VALUE val;
-{
- long len, i, j, chars;
- char *next_digit;
-
- len = RBIGNUM(val)->len;
- next_digit = RBIGNUM(val)->digits + (len * SIZEOF_BDIGITS);
- if (big_endian()) {
-
for(i=0; i<len; i++) {
-
next_digit -= SIZEOF_BDIGITS;
-
for(j=0; j<SIZEOF_BDIGITS; j++) {
-
*buf++ = *(next_digit+j);
-
}
- }
- } else {
-
for(i=0; i<len; i++) {
-
next_digit -= SIZEOF_BDIGITS;
-
for(j=SIZEOF_BDIGITS-1; j>=0; j--) {
-
*buf++ = *(next_digit+j);
-
}
- }
- }
-}
-
-VALUE
-rb_bignum_unpack(buf, sign, len)
- const char *buf;
- int sign;
- long len;
-{
- VALUE big;
- long num_digits, i, j;
- char *next_digit;
- char *extra_digit;
- long num_full_digits = len / SIZEOF_BDIGITS;
- int extra_bytes = len % SIZEOF_BDIGITS;
-
- num_digits = num_full_digits + (extra_bytes>0 ? 1 : 0);
- big = bignew(num_digits, 1);
- extra_digit = next_digit =
-
(char*)RBIGNUM(big)->digits + num_full_digits * SIZEOF_BDIGITS;
-
- if (big_endian()) {
-
if (extra_bytes > 0) {
-
for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) {
-
*extra_digit++ = 0;
-
}
-
for(i = 0; i < extra_bytes; i++) {
-
*extra_digit++ = *buf++;
-
}
-
}
-
for(i = 0; i < num_full_digits; i++) {
-
next_digit -= SIZEOF_BDIGITS;
-
for(j = 0; j < SIZEOF_BDIGITS; j++) {
-
*next_digit++ = *buf++;
-
}
-
}
- } else {
-
if (extra_bytes > 0) {
-
for(i = extra_bytes - 1; i >= 0 ; i--) {
-
*(extra_digit+i) = *buf++;
-
}
-
extra_digit += extra_bytes;
-
for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) {
-
*extra_digit++ = 0;
-
}
-
}
-
for(i = 0; i < num_full_digits; i++) {
-
next_digit -= SIZEOF_BDIGITS;
-
for(j = SIZEOF_BDIGITS - 1; j >= 0; j--) {
-
*(next_digit+j) = *buf++;
-
}
-
}
- }
-
- return bignorm(big);
-}
-
VALUE
rb_cstr_to_inum(str, base, badcheck)
const char *str;
--------upatch_pack_c-------------------------------------------------------
— pack.c 2003-05-28 23:55:32.000000000 +0200
+++ pack.c.old 2003-05-28 11:19:21.000000000 +0200
@@ -376,21 +376,6 @@
static int uv_to_utf8 _((char*,unsigned long));
static unsigned long utf8_to_uv _((char*,long*));
-VALUE
-ensure_bignum(val)
static VALUE
pack_pack(ary, fmt)
VALUE ary, fmt;
@@ -683,33 +668,6 @@
}
break;
-
case 'W':
-
while (len-- > 0) {
-
VALUE from;
-
long len;
-
long num_bytes_to_skip = 0;
-
-
from = ensure_bignum(NEXTFROM);
-
len = RBIGNUM(from)->len * SIZEOF_BDIGITS;
-
{
-
char tmp[len];
-
-
rb_nonneg_bignum_pack(tmp, from);
-
// Skip leading zeroes if positive bignum. Extend
-
// this "strategy" for 'w' so that only negative
-
// bignums (and 0) can have leading zero?
-
if (RBIGNUM(from)->sign) {
-
while (num_bytes_to_skip < (len-1) &&
-
tmp[num_bytes_to_skip] == 0x00) {
-
num_bytes_to_skip++;
-
}
-
}
-
rb_str_buf_cat(res, ((char*)&tmp) + num_bytes_to_skip,
-
len - num_bytes_to_skip);
-
}
-
}
-
break;
-
case 'n':
while (len-- > 0) {
unsigned short s;
@@ -1456,11 +1414,6 @@
}
break;
- case ‘W’:
-
rb_ary_push(ary, rb_bignum_unpack(s, 1, send - s));
-
s = send;
-
break;
-
case 'n':
PACK_LENGTH_ADJUST(unsigned short,2);
while (len-- > 0) {