Navigation :

GPU Kernel Information Aggregated by Name

Search:

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound
cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams)	211	590.70	0.36	0	0.00	0.00	7.54	0.00	0.00	true
maxwell_scudnn_128x128_relu_interior_nn	8	560.98	0.34	0	0.00	0.00	12.91	0.00	0.00	true
maxwell_scudnn_128x32_relu_interior_nn	10	392.89	0.24	0	0.00	0.00	11.80	0.00	0.00	true
maxwell_scudnn_128x32_relu_small_nn	180	1471.09	0.90	0	0.00	0.00	8.10	0.00	0.00	true
maxwell_scudnn_128x64_relu_interior_nn	10	305.78	0.19	0	0.00	0.00	6.48	0.00	0.00	true
maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148t_nt	4595	33600.52	20.63	0	0.00	0.00	12.96	0.00	0.00	true
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	52	438.72	0.27	0	0.00	0.00	58.62	0.00	0.00	true
void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const, int, float const, int, float, kernel_conv_params, int, int, float, float, int, float, float*)	576	3609.57	2.22	0	0.00	0.00	3.23	0.00	0.00	true
void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	3	146.55	0.09	0	0.00	0.00	6.07	0.00	0.00	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	16.00	0.01	0	0.00	0.00	12.20	0.00	0.00	true
void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4595	18384.00	11.29	0	0.00	0.00	6.20	0.00	0.00	true
void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const, float, int)	576	3093.00	1.90	0	0.00	0.00	8.35	0.00	0.00	true
void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::clip, float, float, float, float>(int, float, float, float, float)	35	266.47	0.16	0	0.00	0.00	77.83	0.00	0.00	true
void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	9	32.00	0.02	0	0.00	0.00	39.27	0.00	0.00	true

Showing 1 to 14 of 14 entries

Download as CSV