Marshaling with ARGF

Hi –

The Marshal.load function demands an object of type IO. Unfortunately,
ARGF and other IO-like objects (e.g. GzipReader from the Ruby/zlib
bindings) are not instances of IO, even though they export the same
interface. This means that you cannot use Marshal.load with ARGF or
GzipReader.

Anyone on the Ruby dev team care to modifiy marshal.c so that objects
like ARGF can be used (i.e. use rb_funcall(obj, rb_intern(“read”),
…) rather than hardcoded calls to rb_io_fread, etc.)?

Thanks,

Tom

Hi,

···

At Thu, 17 Oct 2002 03:24:27 +0900, Tom Payne wrote:

Anyone on the Ruby dev team care to modifiy marshal.c so that objects
like ARGF can be used (i.e. use rb_funcall(obj, rb_intern(“read”),
…) rather than hardcoded calls to rb_io_fread, etc.)?

Dirty hack.

Index: marshal.c

RCS file: /cvs/ruby/src/ruby/marshal.c,v
retrieving revision 1.72
diff -u -2 -p -r1.72 marshal.c
— marshal.c 25 Sep 2002 14:52:37 -0000 1.72
+++ marshal.c 17 Oct 2002 02:31:20 -0000
@@ -77,9 +77,10 @@ shortlen(len, ds)
static ID s_dump, s_load;
static ID s_dump_data, s_load_data, s_alloc;
+static ID s_getc, s_read;

struct dump_arg {
VALUE obj;
FILE *fp;

  • VALUE str;
  • VALUE str, dest;
    st_table *symbol;
    st_table *data;
    @@ -96,10 +97,28 @@ static void w_long ((long, struct dump

static void
+w_byten(s, n, arg)

  • char *s;
  • int n;
  • struct dump_arg *arg;
    +{
  • if (arg->fp) {
  • fwrite(s, 1, n, arg->fp);
  • }
  • else {
  • VALUE buf = arg->str;
  • rb_str_buf_cat(buf, s, n);
  • if (arg->dest && RSTRING(buf)->len >= BUFSIZ) {
  •   rb_io_write(arg->dest, buf);
    
  •   rb_str_resize(buf, 0);
    
  • }
  • }
    +}

+static void
w_byte(c, arg)
char c;
struct dump_arg *arg;
{

  • if (arg->fp) putc(c, arg->fp);
  • else rb_str_buf_cat(arg->str, &c, 1);
  • w_byten(&c, 1, arg);
    }

@@ -111,10 +130,5 @@ w_bytes(s, n, arg)
{
w_long(n, arg);

  • if (arg->fp) {
  • fwrite(s, 1, n, arg->fp);
  • }
  • else {
  • rb_str_buf_cat(arg->str, s, n);
  • }
  • w_byten(s, n, arg);
    }

@@ -587,4 +601,5 @@ marshal_dump(argc, argv)
else port = a1;
}

  • arg.dest = 0;
    if (port) {
    if (rb_obj_is_kind_of(port, rb_cIO)) {
    @@ -597,5 +612,7 @@ marshal_dump(argc, argv)
    }
    else {
  •   rb_raise(rb_eTypeError, "instance of IO needed");
    
  •   arg.fp = 0;
    
  •   arg.str = rb_str_buf_new(0);
    
  •   arg.dest = port;
    
    }
    }
    @@ -642,4 +659,10 @@ r_byte(arg)
    if (c == EOF) rb_eof_error();
    }
  • else if (!arg->end) {
  • VALUE src = (VALUE)arg->ptr;
  • VALUE v = rb_funcall2(src, s_getc, 0, 0);
  • if (NIL_P(v)) rb_eof_error();
  • c = (unsigned char)FIX2INT(v);
  • }
    else if (arg->ptr < arg->end) {
    c = (unsigned char)arg->ptr++;
    @@ -651,16 +674,4 @@ r_byte(arg)
    }

-static unsigned short
-r_short(arg)

  • struct load_arg *arg;
    -{
  • unsigned short x;
  • x = r_byte(arg);
  • x |= r_byte(arg)<<8;
  • return x;
    -}

static void
long_toobig(size)
@@ -729,4 +740,12 @@ r_bytes0(len, arg)
}
}

  • else if (!arg->end) {
  • VALUE src = (VALUE)arg->ptr;
  • VALUE n = LONG2NUM(len);
  • str = rb_funcall2(src, s_read, 1, &n);
  • if (NIL_P(str)) goto too_short;
  • Check_Type(str, T_STRING);
  • if (RSTRING(str)->len != len) goto too_short;
  • }
    else {
    if (arg->ptr + len > arg->end) {
    @@ -934,4 +953,5 @@ r_object0(arg, proc)
    long len;
    BDIGIT *digits;
  •   VALUE data;
    
      NEWOBJ(big, struct RBignum);
    

@@ -939,4 +959,5 @@ r_object0(arg, proc)
big->sign = (r_byte(arg) == ‘+’);
len = r_long(arg);

  •   data = r_bytes0(len * 2, arg);
    

#if SIZEOF_BDIGITS == SIZEOF_SHORT
big->len = len;
@@ -945,21 +966,26 @@ r_object0(arg, proc)
#endif
big->digits = digits = ALLOC_N(BDIGIT, big->len);

  •   while (len > 0) {
    
  •   MEMCPY(digits, RSTRING(data)->ptr, char, len * 2);
    

#if SIZEOF_BDIGITS > SIZEOF_SHORT

  •   MEMZERO((char *)digits + len * 2, char,
    
  •       big->len * sizeof(BDIGIT) - len * 2);
    

+#endif

  •   len = big->len;
    
  •   while (len > 0) {
    
  •   unsigned char *p = (unsigned char *)digits;
      BDIGIT num = 0;
    

+#if SIZEOF_BDIGITS > SIZEOF_SHORT
int shift = 0;
int i;

  •   for (i=0; i<SIZEOF_BDIGITS; i+=2) {
    
  •       int j = r_short(arg);
    
  •       num |= j << shift;
    
  •       shift += BITSPERSHORT;
    
  •       if (--len == 0) break;
    
  •   for (i=0; i<SIZEOF_BDIGITS; i++) {
    
  •       num |= (int)p[i] << shift;
    
  •       shift += 8;
      }
    
  •   *digits++ = num;
    

#else

  •   *digits++ = r_short(arg);
    
  •   len--;
    
  •   num = p[0] | (p[1] << 8);
    

#endif

  •   *digits++ = num;
    
  •   len--;
      }
      v = rb_big_norm((VALUE)big);
    

@@ -1191,5 +1217,8 @@ marshal_load(argc, argv)
}
else {

  • rb_raise(rb_eTypeError, “instance of IO needed”);
  • arg.taint = OBJ_TAINTED(port); /* original taintedness */
  • arg.fp = 0;
  • arg.ptr = (char *)port;
  • arg.end = 0;
    }

@@ -1226,4 +1255,7 @@ Init_marshal()
s_load_data = rb_intern(“_load_data”);
s_alloc = rb_intern(“_alloc”);

  • s_getc = rb_intern(“getc”);
  • s_read = rb_intern(“read”);
  • rb_define_module_function(rb_mMarshal, “dump”, marshal_dump, -1);
    rb_define_module_function(rb_mMarshal, “load”, marshal_load, -1);


Nobu Nakada

Hi,

Dirty hack.

Oops, forgot to flush at the end and propagate taintness.

— marshal.c~ Thu Oct 17 11:31:20 2002
+++ marshal.c Thu Oct 17 11:52:57 2002
@@ -109,4 +109,5 @@ w_byten(s, n, arg)
rb_str_buf_cat(buf, s, n);
if (arg->dest && RSTRING(buf)->len >= BUFSIZ) {

  •   if (arg->taint) OBJ_TAINT(buf);
      rb_io_write(arg->dest, buf);
      rb_str_resize(buf, 0);
    

@@ -566,4 +567,8 @@ dump(arg)
{
w_object(arg->obj, arg->arg, arg->limit);

  • if (arg->arg->dest) {
  • rb_io_write(arg->arg->dest, arg->arg->str);
  • rb_str_resize(arg->arg->str, 0);
  • }
    return 0;
    }
    @@ -747,4 +752,5 @@ r_bytes0(len, arg)
    Check_Type(str, T_STRING);
    if (RSTRING(str)->len != len) goto too_short;
  • if (OBJ_TAINTED(str)) arg->taint = Qtrue;
    }
    else {
    @@ -1217,5 +1223,5 @@ marshal_load(argc, argv)
    }
    else {
  • arg.taint = OBJ_TAINTED(port); /* original taintedness */
  • arg.taint = Qfalse;
    arg.fp = 0;
    arg.ptr = (char *)port;
···

At Thu, 17 Oct 2002 11:33:09 +0900, nobu.nokada@softhome.net wrote:


Nobu Nakada

Hi,

···

In message “Re: Marshaling with ARGF” on 02/10/17, nobu.nokada@softhome.net nobu.nokada@softhome.net writes:

At Thu, 17 Oct 2002 11:33:09 +0900, >nobu.nokada@softhome.net wrote:

Dirty hack.

Oops, forgot to flush at the end and propagate taintness.

Commit these fixes, please.

						matz.

Thanks Nobu & Matz.