B-C

 view release on metacpan or  search on metacpan

ramblings/blogs-optimizing-2.md  view on Meta::CPAN

          $distance = sqrt($dx * $dx + $dy * $dy + $dz * $dz);
          $mag = $dt / ($distance * $distance * $distance);
          $mm = $mass[$_] * $mag;
          $mm2 = $mass[$j] * $mag;
          $vxs[$_] -= $dx * $mm2;
          $vxs[$j] += $dx * $mm;
          $vys[$_] -= $dy * $mm2;
          $vys[$j] += $dy * $mm;
          $vzs[$_] -= $dz * $mm2;
          $vzs[$j] += $dz * $mm;
        }

        # We're done with planet $_ at this point
        # This could be done in a seperate loop, but it's slower
        $xs[$_] += $dt * $vxs[$_];
        $ys[$_] += $dt * $vys[$_];
        $zs[$_] += $dt * $vzs[$_];
      }
    }

    sub energy
    {
      my ($e, $i, $dx, $dy, $dz, $distance);

      $e = 0.0;
      for $i (0..$last) {
        $e += 0.5 * $mass[$i] *
              ($vxs[$i] * $vxs[$i] + $vys[$i] * $vys[$i] + $vzs[$i] * $vzs[$i]);
        for ($i + 1..$last) {
          $dx = $xs[$i] - $xs[$_];
          $dy = $ys[$i] - $ys[$_];
          $dz = $zs[$i] - $zs[$_];
          $distance = sqrt($dx * $dx + $dy * $dy + $dz * $dz);
          $e -= ($mass[$i] * $mass[$_]) / $distance;
        }
      }
      return $e;
    }

    sub offset_momentum
    {
      my ($px, $py, $pz) = (0.0, 0.0, 0.0);

      for (0..$last) {
        $px += $vxs[$_] * $mass[$_];
        $py += $vys[$_] * $mass[$_];
        $pz += $vzs[$_] * $mass[$_];
      }
      $vxs[0] = - $px / SOLAR_MASS;
      $vys[0] = - $py / SOLAR_MASS;
      $vzs[0] = - $pz / SOLAR_MASS;
    }

    # @ns = ( sun, jupiter, saturn, uranus, neptune )
    @xs = (0, 4.84143144246472090e+00, 8.34336671824457987e+00, 1.28943695621391310e+01, 1.53796971148509165e+01);
    @ys = (0, -1.16032004402742839e+00, 4.12479856412430479e+00, -1.51111514016986312e+01, -2.59193146099879641e+01);
    @zs = (0, -1.03622044471123109e-01, -4.03523417114321381e-01, -2.23307578892655734e-01, 1.79258772950371181e-01);
    @vxs = map {$_ * DAYS_PER_YEAR}
      (0, 1.66007664274403694e-03, -2.76742510726862411e-03, 2.96460137564761618e-03, 2.68067772490389322e-03);
    @vys = map {$_ * DAYS_PER_YEAR}
      (0, 7.69901118419740425e-03, 4.99852801234917238e-03, 2.37847173959480950e-03, 1.62824170038242295e-03);
    @vzs = map {$_ * DAYS_PER_YEAR}
      (0, -6.90460016972063023e-05, 2.30417297573763929e-05, -2.96589568540237556e-05, -9.51592254519715870e-05);
    @mass = map {$_ * SOLAR_MASS}
      (1, 9.54791938424326609e-04, 2.85885980666130812e-04, 4.36624404335156298e-05, 5.15138902046611451e-05);

    $last = @xs - 1;

    offset_momentum();
    printf ("%.9f\n", energy());

    my $n = $ARGV[0];

    # This does not, in fact, consume N*4 bytes of memory
    for (1..$n){
      advance(0.01);
    }

    printf ("%.9f\n", energy());


A lot of arithmetic, only three functions, advance is called 50,000 times, the others only once.

The generated C code for some inlined arithmetic looks like:

`$ grep -A50 pp_sub_energy nbody.perl.c`


    static
    CCPP(pp_sub_energy)
    {
        double rnv0, lnv0, d1_e, d2_i, d3_dx, d4_dy, d5_dz, d6_distance, d11_tmp, d13_tmp,
               d15_tmp, d16_tmp, d18_tmp, d19_tmp, d20_tmp, d22_tmp, d31_tmp, d32_tmp, d33_tmp,
               d34_tmp, d35_tmp, d37_tmp, d38_tmp;
    	SV *sv, *src, *dst, *left, *right;
    	PERL_CONTEXT *cx;
    	MAGIC *mg;
    	I32 oldsave, gimme;
    	dSP;
    	/* init_pp: pp_sub_energy */
    	/* load_pad: 39 names, 39 values */
    	/* PL_curpad[1] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[1] iv=i1_e nv=d1_e */
    	/* PL_curpad[2] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[2] iv=i2_i nv=d2_i */
    	/* PL_curpad[3] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[3] iv=i3_dx nv=d3_dx */
    	/* PL_curpad[4] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[4] iv=i4_dy nv=d4_dy */
    	/* PL_curpad[5] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[5] iv=i5_dz nv=d5_dz */
    	/* PL_curpad[6] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[6] iv=i6_distance nv=d6_distance */
    	/* PL_curpad[7] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[7] iv=i7_last nv=d7_last */
    	/* PL_curpad[8] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[8] iv=i8_tmp nv=d8_tmp */
    	/* PL_curpad[9] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[9] iv=i9_tmp nv=d9_tmp */
    	/* PL_curpad[10] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[10] iv=i10_tmp nv=d10_tmp */
    	/* PL_curpad[11] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[11] iv=i11_tmp nv=d11_tmp */
    	/* PL_curpad[12] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[12] iv=i12_tmp nv=d12_tmp */
    	/* PL_curpad[13] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[13] iv=i13_tmp nv=d13_tmp */
    	/* PL_curpad[14] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[14] iv=i14_tmp nv=d14_tmp */
    	/* PL_curpad[15] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[15] iv=i15_tmp nv=d15_tmp */
    	/* PL_curpad[16] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[16] iv=i16_tmp nv=d16_tmp */
    	/* PL_curpad[17] = Padsv type=T_UNKNOWN flags=VALID_SV sv=PL_curpad[17] iv=i17_tmp nv=d17_tmp */
    	/* PL_curpad[18] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[18] iv=i18_tmp nv=d18_tmp */
    	/* PL_curpad[19] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[19] iv=i19_tmp nv=d19_tmp */
    	/* PL_curpad[20] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[20] iv=i20_tmp nv=d20_tmp */
    	/* PL_curpad[21] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[21] iv=i21_tmp nv=d21_tmp */
    	/* PL_curpad[22] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[22] iv=i22_tmp nv=d22_tmp */
    	/* PL_curpad[23] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[23] iv=i23_tmp nv=d23_tmp */
    	/* PL_curpad[24] = Padsv type=T_UNKNOWN flags=VALID_SV|REGISTER|TEMPORARY sv=PL_curpad[24] iv=i24_tmp nv=d24_tmp */



( run in 3.303 seconds using v1.01-cache-2.11-cpan-437f7b0c052 )