For performance, write it in C - Part 3, Source code now available

Here's how it goes on my box:

[Latin]$ time ruby latin2.rb > r5

real 0m16.283s
user 0m15.380s
sys 0m0.498s

Which means that your computer is about as crap as mine :slight_smile:

Yes. I was really suprised to see that your timings where longer than
mine :slight_smile: But I am happy with the speed of my computer, though emacs
startup is a bit slow... :slight_smile:

I will add it
to the pages and make it a download. I think I need a table summarising
the timings.

I just realised that it is better to move the permutations on the
row string (the ones that use String#tr) outside the row permutations,
to avoid recalculating the rows each time. The only method I changed is
print_solution. The program runs almost twice as fast now! (0m7.8s on my
computer).

Regards,
Kristof

----------------- latin.rb, version 2 -----------------------

require 'permutation'

$size = (ARGV.shift || 5).to_i
MaxVal = $size-1

RowPermutations = Permutation.new($size).map{|p| p.value}
BaseStr = (2..$size).to_a.join
StrPermutations = Permutation.new($size-1).map{|p| p.project(BaseStr)}

StartColumns = (1..MaxVal).to_a
def init_columns(el)
   a = StartColumns.dup
   a.delete_at(el-1)
   return a
end

def insert(sqr, num, row, columns)
   insert(sqr, num, row+1, columns) if (row == num)
   columns.each do |col|
      next if sqr[row][col] != ?.
      sqr[row][col] = num + ?1
      if (row == MaxVal)
         insert(sqr, num+1, 1, init_columns(num+1))
      elsif (num == MaxVal && row == MaxVal - 1)
         print_solution(sqr)
      else
         insert(sqr, num, row+1, columns - [col])
      end
      sqr[row][col] = ?.
   end
end

def print_solution(sqr)
   StrPermutations.each do |sp|
      newsqr = sqr.map { |r| r.tr(BaseStr, sp) }
      RowPermutations.each do |rp|
         rp.each do |r|
            print newsqr[r]
            print ":"
         end
         puts
      end
   end
end

$square = [("1" + BaseStr)] +
   Array.new($size-1) { |i| (i+2).to_s + "." * ($size - 1) }

insert($square, 0, 1, StartColumns)

···

On Thu, 03 Aug 2006 01:08:21 +0900, Peter Hickman wrote:

William James wrote:

Peter Hickman wrote:
> Time for another update.
>
> Isaac Gouy provided a Java implementation based on mine (ie still pre
> computes the tables in Perl) that brought the times down to sub 9 seconds.
>
> real 0m8.966s
> user 0m5.815s
> sys 0m1.488s
>
> But the big news is that William James' revision of his previous Ocaml
> version is now the fastest.
>
> real 0m3.660s
> user 0m1.958s
> sys 0m1.421s
>
> The source code for both are available on the web site for you to examine.

I stole code and ideas from Jon Harrop, so this should be called
the James/Harrop version.

I just noticed that your site doesn't have my last version (the 4th).
Here it is again:

(* Thanks to Jon Harrup for code and ideas. *)
(* compile with:
ocamlopt -unsafe -inline 100 latin-squares.ml -o latin-squares.exe
*)

(* permutation code by Eric C. Cooper *)
let rec distribute elt = function
    (hd :: tl) as list -> (elt :: list) ::
      (List.map (fun x -> hd :: x) (distribute elt tl))
  > -> [ [elt] ]
let rec permute = function
    x :: rest -> List.flatten (List.map (distribute x) (permute rest))
  > -> [ ]

let list = [ 1; 2; 3; 4; 5 ]
let size = List.length list

let perms = Array.of_list (permute list)
let n = Array.length perms

(* Boolean array used to determine if one row is
   compatible with another. *)
let compatible = Array.make_matrix n n true ;;
Array.iteri (fun x ex ->
  Array.iteri (fun y ey ->
    compatible.(x).(y) <- List.for_all2 (<>) ex ey) perms ) perms

let join list = String.concat "" (List.map string_of_int list)
let output_strings = Array.map join perms

(* For speed, create a string that's the length of the lines
   that we'll print; the :'s that aren't needed as separators
   will later be overwritten. *)
let output_line = String.make (size*(size+1)-1) ':' ^ "\n"
let board = Array.make size 0

(* A recursive function. *)
let rec add_a_row row =
  if row = size then
    ( for i=0 to size-1 do
        String.blit
            output_strings.(board.(i)) 0 (* source *)
            output_line (i*(size+1)) (* dest *)
            size
      done;
      print_string output_line
    )
  else
    for latest = 0 to n - 1 do
      let compatible_slice = compatible.(latest) in
      (* Create a changeable thing (variable). *)
      let prev_row = ref 0 in
      (* The ! below fetches the variable's value. *)
      while !prev_row < row &&
            compatible_slice.(board.(!prev_row)) do
        incr prev_row
      done;
      if !prev_row = row then
        ( board.(row) <- latest ; add_a_row (row + 1) )
    done
;;

add_a_row 0

Isaac Gouy wrote:

Peter Hickman wrote:
  

Time for another update.

Isaac Gouy provided a Java implementation based on mine (ie still pre
computes the tables in Perl) that brought the times down to sub 9 seconds.

real 0m8.966s
user 0m5.815s
sys 0m1.488s
    
sub 9 seconds?
7.3s
"real" is elapsed time, which includes all those other processes that
grabbed CPU after you gave the time command.

This is a good point but in all the posts where I have mentioned the
time taken I have used the real time (which is the best case from 10
runs - except for the first Perl version) all the way back to the first
and fourth versions in Perl (473 and 12 minutes). However it does not
affect the ordering except to push my improved C version ahead of
William James' revised Ocaml version by just 0.029 of a second.

This late in the proceedings it would only confuse the issue to start
saying that the first Perl version ran in 251 minutes when 473 minutes
has been mentioned several times. Besides now that we are getting into
such short times for the Ocaml and C versions the difference between
real and user + sys is sub second and it is becoming harder to say that
one is significantly faster then the other. One more run of William
James' program and it might make up that 0.029 of a second difference.

Perhaps stepping up to a 6 x 6 grid would allow more meaningful timings?

http://peterhi.dyndns.org/write_it_in_c/index.html

I'd be curious to see a python version, as long as we're so far off the ML
topic. This thread is about solving scripting language speed issues by
resorting to a different language, but python seems to benchmark
considerably better than ruby.

Anyone willing to take a crack at it?

···

--
Contribute to RubySpec! @ www.headius.com/rubyspec
Charles Oliver Nutter @ headius.blogspot.com
Ruby User @ ruby.mn
JRuby Developer @ www.jruby.org
Application Architect @ www.ventera.com

William Grosso (wgrosso@wgrosso.com)
21/8/2006 18:26:41

-- 2.0 will support the same language as 1.8, but with a different
underlying implementation.

Wrong, AFAIK.
The 1.9 (experimental) would once became 2.0 (stable).
The language WOULD be differ from 1.8, hence the major version.

V.

The Java code was broken in many different ways, such that any numbers
generated using this code are badly skewed against Java. You can't run
benchmarks against Java, to prove Java's slow, and then write code that's
obviously crippling it. I'll only fix the blatant mistakes here...others may
do a deeper cleanup of the Java code if they wish. I'll post my code if
requested, but any Java programmer will understand the optimizations as I
list them below. They're pretty obvious.

And I don't doubt that C will probably be faster, even with those
optimizations...but it won't be faster by much and certainly not by orders
of magnitude. Algorithms where Java does especially well are any that
involve memory allocation, which doesn't come into play here but which is
applicable to almost all real-world code. The point of this thread is that
using the underlying platform code--C for Ruby, Java for JRuby--will often
help many algorithms...and this much is true. But don't venture into
comparing C against Java if you're going to make blanket statements using
flawed tests.

First, some notes on benchmarking:

- NEVER include IO when benchmarking a numeric algorithm; IO speeds vary
greatly from system to system and can vary from run to run depending on what
else is happening
- Do not include initialization code in benchmarks, especially in this case
where you're manually tweaking a gigantic array
- If you're building up a large chunk of strings, write to an internal
buffer and then write out at the end; don't write for every little tiny
operation. At the very least, use a buffer per-line, rather than a separate
write for every element on that line.
- Make sure you're actually testing the same thing on all systems; in this
case, the Java code was severely crippled on a number of levels
- I have not changed the algorithm in any way, but an iterative algorithm
would perform a lot better on all platforms.

So I made some mild optimizations:

- You do not need to compare boolean values to "true" or "false"; just use
them directly as the test condition.
- Write strings to an internal buffer or do not write them at all; to
support Unicode across platforms, Java normalizes text to the base
platform's preferred encoding, and so incurs extra overhead for this
benchmark than the other versions. If you want to make this a better test,
have the C version use wide char strings internally and normalize to ASCII
on write.
- I moved the initialization of the Compared array to a separate function
and excluded it from the test. I clear out and reinit the string buffer and
the compared array for each test run. The C code loads a static array into
memory in probably microseconds, so including this initialization for the
Java test totally skews results.
- I had the test benchmark just the call to addRow, since the Java platform
overhead is a fixed cost outside of this test. If you want to figure that
cost in, you're welcome to...I've left the timings as in the original.
- I ran the algorithm six times per test to allow the JVM to optimize it.
Note how quickly the speed improves once HotSpot gets to it.

And one caveat: I don't have perl set up properly on this machine, so I
wasn't able to generate the header or run the C code. When I do I'll post
those numbers for comparison.

Ubuntu Linux 6 (64-bit), current supported kernel (something 2.6.15ish)
Opteron 150, 2.6GHz, 2GB RAM

All Java versions are AMD64.

Java 5, client vm, no string creation/buffering:
headius@opteron:~/latin_in_java$ time java Latin
Took 1082 ms
Took 645 ms
Took 388 ms
Took 385 ms
Took 551 ms
Took 385 ms

real 0m3.667s
user 0m3.436s
sys 0m0.032s

Java 5, client vm, write strings to internal string buffer:
headius@opteron:~/latin_in_java$ time java Latin
Took 631 ms
Took 599 ms
Took 492 ms
Took 496 ms
Took 492 ms
Took 499 ms

real 0m3.340s
user 0m3.080s
sys 0m0.116s

Java 6, client vm, no strings:
headius@opteron:~/latin_in_java$ time /usr/lib/jvm/jdk1.6.0/jre/bin/java
Latin
Took 400 ms
Took 395 ms
Took 408 ms
Took 369 ms
Took 367 ms

real 0m2.459s
user 0m2.368s
sys 0m0.032s

Java 6, client vm, write strings to internal buffer:
headius@opteron:~/latin_in_java$ time /usr/lib/jvm/jdk1.6.0/jre/bin/java
Latin
Took 531 ms
Took 497 ms
Took 478 ms
Took 486 ms
Took 494 ms

real 0m3.172s
user 0m2.940s
sys 0m0.104s

···

On 8/1/06, Charles O Nutter <headius@headius.com> wrote:

Ok, so there's a bunch of problems with the Java version.

- In addition to the addRow run and the Java startup time you're also
benchmarking over 5200 array modifications to set Compared values to true
- Your variable naming is entirely contrary to every Java coding
convention
published (not a benchmark thing, but it sets off any Java devs warning
flags)
- Almost all of the time spent running is spent building and printing
strings

Benchmarking just the algorithm run itself with no gross string creation
and
printing, I'm getting in the neighborhood of 370ms per invocation once
HotSpot has optimized the code. I'll have more detailed numbers shortly.

--
Contribute to RubySpec! @ Welcome to headius.com
Charles Oliver Nutter @ headius.blogspot.com
Ruby User @ ruby.mn
JRuby Developer @ www.jruby.org
Application Architect @ www.ventera.com

--
Contribute to RubySpec! @ Welcome to headius.com
Charles Oliver Nutter @ headius.blogspot.com
Ruby User @ ruby.mn
JRuby Developer @ www.jruby.org
Application Architect @ www.ventera.com

Yes, that's one of the biggest problems with the code. The Java version uses
all 16-bit UTF-16 character strings internally and then normalizes to the
platform's preferred encoding (usually ISO-8859 or some variation of it). If
you really want the prints (which you SHOULDN'T because benchmarking a
numeric algorithm and including IO is bogus), then make the C version do the
same amount of work...wide char strings and normalize to ASCII on write.

···

On 8/1/06, Isaac Gouy <igouy@yahoo.com> wrote:

iirc the Java program is shuffling around double byte unicode chars and
the C program is handling single byte chars.

--
Contribute to RubySpec! @ Welcome to headius.com
Charles Oliver Nutter @ headius.blogspot.com
Ruby User @ ruby.mn
JRuby Developer @ www.jruby.org
Application Architect @ www.ventera.com

Charles O Nutter wrote:

···

On 8/1/06, Alex Young <alex@blackkettle.org> wrote:

While I certainly appreciate the efforts that are going into this, I
can't help feeling it's all completely irrelevant.

My only purpose in battling these benchmarks is to help dispel the rumors
that "Java is slow," "VMs are slow," and so on. If Ruby does move to a real
optimizing VM, it will be a good thing...all those folks who continue to
think that VMs are inherently bad need to join the 21st century.

... Which is extremely funny, since Common Lisp have had wicked fast virtual machines for the last 15 years (on par with C in performance).

They should catch up with the 20th century first of all. =)

--
  Ola Bini (http://ola-bini.blogspot.com)
  JvYAML, RbYAML, JRuby and Jatha contributor
  System Developer, Karolinska Institutet (http://www.ki.se)
  OLogix Consulting (http://www.ologix.com)

  "Yields falsehood when quined" yields falsehood when quined.

. . . but VMs actually are slow (to start, all else being equal).
There's a trade-off, though, and VMs tend to be faster later on in
execution for extended operations (again, all else being equal). There
are other alternatives than VMs to consider, though, and the specifics
of what one wishes to accomplish should be examined before settling on
the VM (or any other implementation style) as "the answer".

I'm kinda just babbling at this point.

···

On Wed, Aug 02, 2006 at 05:04:26AM +0900, Charles O Nutter wrote:

On 8/1/06, Alex Young <alex@blackkettle.org> wrote:
>
>While I certainly appreciate the efforts that are going into this, I
>can't help feeling it's all completely irrelevant.

My only purpose in battling these benchmarks is to help dispel the rumors
that "Java is slow," "VMs are slow," and so on. If Ruby does move to a real
optimizing VM, it will be a good thing...all those folks who continue to
think that VMs are inherently bad need to join the 21st century.

--
CCD CopyWrite Chad Perrin [ http://ccd.apotheon.org ]
"The first rule of magic is simple. Don't waste your time waving your
hands and hopping when a rock or a club will do." - McCloctnick the Lucid

Charles O Nutter wrote:

Ok, so there's a bunch of problems with the Java version.

- In addition to the addRow run and the Java startup time you're also
benchmarking over 5200 array modifications to set Compared values to true

That was simple because I couldn't define the array when I declared it as I did in C.

- Your variable naming is entirely contrary to every Java coding convention
published (not a benchmark thing, but it sets off any Java devs warning
flags)

And this affects the performance?

···

- Almost all of the time spent running is spent building and printing
strings

Benchmarking just the algorithm run itself with no gross string creation and
printing, I'm getting in the neighborhood of 370ms per invocation once
HotSpot has optimized the code. I'll have more detailed numbers shortly.

William James wrote:

Here's an OCaml version that runs in about 1.5 seconds when
output is redirected to a file on my faster computer (3.2 GHz).
It uses the same algorithm as the C program.
The C version takes 1.961 seconds when writing to /dev/null on
the o.p.'s computer. Since this is my first OCaml program, I'm
certain an expert could improve it.
  
An expert did suggest replacing a couple of lists with arrays.
This should be a bit faster.
  

*This* expert suggests comparing performance between the C version and the Ocaml version on the *same* machine! :slight_smile:

I will add it to the page.

When I get home I'll time it and add it.

I'm beginning to wonder if my computer is slow enough to give meaningful timings as the Ocaml versions seem to be pushing for sub second timings :slight_smile:

showing how it performs on a 2, 4, and 8 grid would show how it scaled...

-a

···

On Sat, 5 Aug 2006, Peter Hickman wrote:

Perhaps stepping up to a 6 x 6 grid would allow more meaningful timings?

--
happiness is not something ready-made. it comes from your own actions.
- h.h. the 14th dali lama

Peter Hickman wrote:

Isaac Gouy wrote:
> Peter Hickman wrote:
>
>> Time for another update.
>>
>> Isaac Gouy provided a Java implementation based on mine (ie still pre
>> computes the tables in Perl) that brought the times down to sub 9 seconds.
>>
>> real 0m8.966s
>> user 0m5.815s
>> sys 0m1.488s
>>
>
> sub 9 seconds?
> 7.3s
> "real" is elapsed time, which includes all those other processes that
> grabbed CPU after you gave the time command.
>
>
>
>
This is a good point but in all the posts where I have mentioned the
time taken I have used the real time (which is the best case from 10
runs - except for the first Perl version) all the way back to the first
and fourth versions in Perl (473 and 12 minutes). However it does not
affect the ordering except to push my improved C version ahead of
William James' revised Ocaml version by just 0.029 of a second.

This late in the proceedings it would only confuse the issue to start
saying that the first Perl version ran in 251 minutes when 473 minutes
has been mentioned several times.

Having repeatedly made a mistake in the past is no reason to continue
making the same mistake in the future!

(You're timing for Perl includes 3 or 4 hours of you surfing the web or
installing OCaml or whatever!)

···

Besides now that we are getting into
such short times for the Ocaml and C versions the difference between
real and user + sys is sub second and it is becoming harder to say that
one is significantly faster then the other. One more run of William
James' program and it might make up that 0.029 of a second difference.

Perhaps stepping up to a 6 x 6 grid would allow more meaningful timings?

Oops. Sorry. Meant to type

-- 2.0 will support the same language as 1.9, but with a different
    underlying implementation.

Bill

vshepelev@imho.com.ua wrote:

···

William Grosso (wgrosso@wgrosso.com)
21/8/2006 18:26:41

-- 2.0 will support the same language as 1.8, but with a different underlying implementation.

Wrong, AFAIK. The 1.9 (experimental) would once became 2.0 (stable).
The language WOULD be differ from 1.8, hence the major version.

V.

And for the record, here are the single-run timings for Java 6 (rather than
the same test six times in process):

With internal string buffer:
headius@opteron:~/latin_in_java$ time /usr/lib/jvm/jdk1.6.0/jre/bin/java
Latin
Took 617 ms

real 0m0.754s
user 0m0.664s
sys 0m0.044s

Without strings at all:
headius@opteron:~/latin_in_java$ time /usr/lib/jvm/jdk1.6.0/jre/bin/java
Latin
Took 368 ms

real 0m0.494s
user 0m0.420s
sys 0m0.024s

···

On 8/1/06, Charles O Nutter <headius@headius.com> wrote:

The Java code was broken in many different ways, such that any numbers
generated using this code are badly skewed against Java. You can't run
benchmarks against Java, to prove Java's slow, and then write code that's
obviously crippling it. I'll only fix the blatant mistakes here...others
may
do a deeper cleanup of the Java code if they wish. I'll post my code if
requested, but any Java programmer will understand the optimizations as I
list them below. They're pretty obvious.

And I don't doubt that C will probably be faster, even with those
optimizations...but it won't be faster by much and certainly not by orders
of magnitude. Algorithms where Java does especially well are any that
involve memory allocation, which doesn't come into play here but which is
applicable to almost all real-world code. The point of this thread is that
using the underlying platform code--C for Ruby, Java for JRuby--will often
help many algorithms...and this much is true. But don't venture into
comparing C against Java if you're going to make blanket statements using
flawed tests.

First, some notes on benchmarking:

- NEVER include IO when benchmarking a numeric algorithm; IO speeds vary
greatly from system to system and can vary from run to run depending on
what
else is happening
- Do not include initialization code in benchmarks, especially in this
case
where you're manually tweaking a gigantic array
- If you're building up a large chunk of strings, write to an internal
buffer and then write out at the end; don't write for every little tiny
operation. At the very least, use a buffer per-line, rather than a
separate
write for every element on that line.
- Make sure you're actually testing the same thing on all systems; in this
case, the Java code was severely crippled on a number of levels
- I have not changed the algorithm in any way, but an iterative algorithm
would perform a lot better on all platforms.

So I made some mild optimizations:

- You do not need to compare boolean values to "true" or "false"; just use
them directly as the test condition.
- Write strings to an internal buffer or do not write them at all; to
support Unicode across platforms, Java normalizes text to the base
platform's preferred encoding, and so incurs extra overhead for this
benchmark than the other versions. If you want to make this a better test,
have the C version use wide char strings internally and normalize to ASCII
on write.
- I moved the initialization of the Compared array to a separate function
and excluded it from the test. I clear out and reinit the string buffer
and
the compared array for each test run. The C code loads a static array into
memory in probably microseconds, so including this initialization for the
Java test totally skews results.
- I had the test benchmark just the call to addRow, since the Java
platform
overhead is a fixed cost outside of this test. If you want to figure that
cost in, you're welcome to...I've left the timings as in the original.
- I ran the algorithm six times per test to allow the JVM to optimize it.
Note how quickly the speed improves once HotSpot gets to it.

And one caveat: I don't have perl set up properly on this machine, so I
wasn't able to generate the header or run the C code. When I do I'll post
those numbers for comparison.

Ubuntu Linux 6 (64-bit), current supported kernel (something 2.6.15ish)
Opteron 150, 2.6GHz, 2GB RAM

All Java versions are AMD64.

Java 5, client vm, no string creation/buffering:
headius@opteron:~/latin_in_java$ time java Latin
Took 1082 ms
Took 645 ms
Took 388 ms
Took 385 ms
Took 551 ms
Took 385 ms

real 0m3.667s
user 0m3.436s
sys 0m0.032s

Java 5, client vm, write strings to internal string buffer:
headius@opteron:~/latin_in_java$ time java Latin
Took 631 ms
Took 599 ms
Took 492 ms
Took 496 ms
Took 492 ms
Took 499 ms

real 0m3.340s
user 0m3.080s
sys 0m0.116s

Java 6, client vm, no strings:
headius@opteron:~/latin_in_java$ time /usr/lib/jvm/jdk1.6.0/jre/bin/java
Latin
Took 400 ms
Took 395 ms
Took 408 ms
Took 369 ms
Took 367 ms

real 0m2.459s
user 0m2.368s
sys 0m0.032s

Java 6, client vm, write strings to internal buffer:
headius@opteron:~/latin_in_java$ time /usr/lib/jvm/jdk1.6.0/jre/bin/java
Latin
Took 531 ms
Took 497 ms
Took 478 ms
Took 486 ms
Took 494 ms

real 0m3.172s
user 0m2.940s
sys 0m0.104s

On 8/1/06, Charles O Nutter <headius@headius.com> wrote:
>
> Ok, so there's a bunch of problems with the Java version.
>
> - In addition to the addRow run and the Java startup time you're also
> benchmarking over 5200 array modifications to set Compared values to
true
> - Your variable naming is entirely contrary to every Java coding
> convention
> published (not a benchmark thing, but it sets off any Java devs warning
> flags)
> - Almost all of the time spent running is spent building and printing
> strings
>
> Benchmarking just the algorithm run itself with no gross string creation
> and
> printing, I'm getting in the neighborhood of 370ms per invocation once
> HotSpot has optimized the code. I'll have more detailed numbers shortly.
>
> --
> Contribute to RubySpec! @ Welcome to headius.com
> Charles Oliver Nutter @ headius.blogspot.com
> Ruby User @ ruby.mn
> JRuby Developer @ www.jruby.org
> Application Architect @ www.ventera.com
>

--
Contribute to RubySpec! @ Welcome to headius.com
Charles Oliver Nutter @ headius.blogspot.com
Ruby User @ ruby.mn
JRuby Developer @ www.jruby.org
Application Architect @ www.ventera.com

--
Contribute to RubySpec! @ Welcome to headius.com
Charles Oliver Nutter @ headius.blogspot.com
Ruby User @ ruby.mn
JRuby Developer @ www.jruby.org
Application Architect @ www.ventera.com

On Wed, 02 Aug 2006, Charles O Nutter defenestrated me:

First, some notes on benchmarking:

- NEVER include IO when benchmarking a numeric algorithm; IO speeds vary
greatly from system to system and can vary from run to run depending on what
else is happening

  IO can be noisy. I say avoid it for any benchmarking since it can
greatly influence timings. Usually the IO is not what you want to
measure so why add this variable into things?
  

- If you're building up a large chunk of strings, write to an internal
buffer and then write out at the end; don't write for every little tiny
operation. At the very least, use a buffer per-line, rather than a separate
write for every element on that line.

  I just informally thought I would measure a few things involving IO.
I only changed the printing and nothing else:

Unaltered test: ~3.8s
Use of StringBuffer to print out a single row: ~2.1s
Use of StringBuffer for entire run: ~1.5s
Preallocated StringBuffer for entire run: ~1.4s

  As you can see IO can have a large affect on clock time. I demonstrated
that in Java's case the IO in the benchmark accounted for over 2/3 of the
wall clock time (which is interesting because a decent chunk that is
left over is JVM startup overhead).

Some stack allocated space will likely improve the C run as well (and in
this case you can output it in a single write system call).

-Tom

···

--
+ http://www.tc.umn.edu/~enebo +---- mailto:enebo@acm.org ----+

Thomas E Enebo, Protagonist | "Luck favors the prepared |
                             > mind." -Louis Pasteur |

Peter Hickman wrote:

Charles O Nutter wrote:

- Your variable naming is entirely contrary to every Java coding convention
published (not a benchmark thing, but it sets off any Java devs warning
flags)

And this affects the performance?

The point Charles made with saying "but it sets off any Java devs warning flags" is that your Java coding conventions differ so much from regular conventions that your Java coding capacity is put into doubt. In plain speak; are you a good enough Java programmer to write an honest benchmark version for Java?

···

--
  Ola Bini (http://ola-bini.blogspot.com)
  JvYAML, RbYAML, JRuby and Jatha contributor
  System Developer, Karolinska Institutet (http://www.ki.se)
  OLogix Consulting (http://www.ologix.com)

  "Yields falsehood when quined" yields falsehood when quined.