KappaCUDA
view release on metacpan or search on metacpan
cuda/matrixMul_kernel.ptx view on Meta::CPAN
ld.param.u32 %r40, [__cudaparm_matrixMul_B];
add.u32 %r41, %r40, %r34;
ld.param.u32 %r42, [__cudaparm_matrixMul_A];
add.u32 %r43, %r36, %r42;
add.s32 %r44, %r39, %r9;
mul.lo.u32 %r45, %r44, 4;
add.u32 %r46, %r45, %r42;
mov.f32 %f1, 0f00000000; // 0
mov.s32 %r47, %r19;
$Lt_0_2818:
//<loop> Loop body line 72, nesting depth: 1, estimated iterations: unknown
.loc 28 87 0
ld.global.f32 %f2, [%r43+0];
st.shared.f32 [%r37+0], %f2;
.loc 28 88 0
ld.global.f32 %f3, [%r41+0];
st.shared.f32 [%r38+0], %f3;
.loc 28 91 0
bar.sync 0;
.loc 28 97 0
ld.shared.f32 %f4, [%r29+0];
( run in 0.622 second using v1.01-cache-2.11-cpan-71847e10f99 )