Navigation :

GPU Kernel Information Aggregated by Name

Search:

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound
cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	13	48.66	0.35	0	1941.33	422656.00	5.97	0.00	0.00	true
void conv2d_c1_k1_nchw_hw_packed_kernel<float, float, 3>(cudnnTensorStruct, float const, cudnnFilterStruct, float const, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, int)	2	709.33	5.11	1011548160	193431264.00	193015253.33	69.99	2.62	1426.06	true
void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const, cudnnFilterStruct, float const, cudnnConvolutionStruct, cudnnTensorStruct, float, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const, float const*, cudnnActivationStruct)	9	1017.00	7.33	790235136	395113898.67	185685248.00	85.73	1.36	777.03	true
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	26	2802.34	20.19	1503977472	965409066.67	981786688.00	80.14	0.77	536.69	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	30.00	0.22	3136512	14867285.33	3142613.33	50.40	0.17	104.55	true
void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>)	0	4.00	0.03	64000	4736.00	0.00	34.40	13.51	16.00	true
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	26	2585.67	18.62	484098048	960852224.00	964906890.66	96.08	0.25	187.22	true
volta_scudnn_128x32_relu_interior_nn_v1	0	279.00	2.01	1888223232	115703850.67	77040053.33	23.90	9.80	6767.83	true
volta_scudnn_128x32_relu_small_nn_v1	0	201.00	1.45	1490026496	38772906.67	79432682.67	24.00	12.61	7413.07	true
volta_scudnn_128x64_relu_interior_nn_v1	11	3539.33	25.49	39217004544	987901994.67	456906698.67	22.80	27.14	11080.34	false
volta_sgemm_64x32_sliced1x4_tn	0	23.67	0.17	101711872	3072949.33	4010.67	12.30	33.06	4297.62	false

Showing 1 to 11 of 11 entries

Download as CSV