KappaCUDA

 view release on metacpan or  search on metacpan

cuda/matrixMul_kernel.ptx  view on Meta::CPAN

	ld.param.u32 	%r40, [__cudaparm_matrixMul_B];
	add.u32 	%r41, %r40, %r34;
	ld.param.u32 	%r42, [__cudaparm_matrixMul_A];
	add.u32 	%r43, %r36, %r42;
	add.s32 	%r44, %r39, %r9;
	mul.lo.u32 	%r45, %r44, 4;
	add.u32 	%r46, %r45, %r42;
	mov.f32 	%f1, 0f00000000;     	// 0
	mov.s32 	%r47, %r19;
$Lt_0_2818:
 //<loop> Loop body line 72, nesting depth: 1, estimated iterations: unknown
	.loc	28	87	0
	ld.global.f32 	%f2, [%r43+0];
	st.shared.f32 	[%r37+0], %f2;
	.loc	28	88	0
	ld.global.f32 	%f3, [%r41+0];
	st.shared.f32 	[%r38+0], %f3;
	.loc	28	91	0
	bar.sync 	0;
	.loc	28	97	0
	ld.shared.f32 	%f4, [%r29+0];



( run in 0.622 second using v1.01-cache-2.11-cpan-71847e10f99 )