Navigation :

GPU Kernel Information Aggregated by Name

Search:

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound
cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams)	232	492.69	0.31	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_128x128_relu_interior_nn	5	134.00	0.08	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_128x32_relu_interior_nn	10	431.17	0.27	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_128x32_relu_small_nn	204	1984.70	1.26	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_128x64_relu_interior_nn	10	416.50	0.26	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148t_nt	4571	45720.00	28.94	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	52	266.67	0.17	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::explicit_convolve_sgemm<float, int, 1024, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const, int, float const, int, float, kernel_conv_params, int, int, float, float, int, float, float*)	578	3504.00	2.22	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::implicit_convolve_sgemm<float, float, 128, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	4	658.00	0.42	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	0.00	0.00	0	0.00	0.00	NaN	0.00	NaN	true
void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4571	18290.00	11.58	0	0.00	0.00	0.00	0.00	0.00	true
void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const, float, int)	578	3033.00	1.92	0	0.00	0.00	0.00	0.00	0.00	true
void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::clip, float, float, float, float>(int, float, float, float, float)	35	138.00	0.09	0	0.00	0.00	0.00	0.00	0.00	true
void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	9	21.00	0.01	0	0.00	0.00	0.00	0.00	0.00	true

Showing 1 to 14 of 14 entries

Download as CSV