Navigation :

GPU Kernel Information Aggregated by Name

Search:

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound
cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams)	70	248.00	0.94	0	98592.00	195285.33	6.21	0.00	0.00	true
maxwell_scudnn_128x128_relu_interior_nn	66	9280.67	35.12	9953869824	65200565.33	31098421.33	12.99	103.36	1072.54	false
maxwell_scudnn_128x64_relu_interior_nn	2	240.67	0.91	237158400	7983616.00	2720512.00	7.88	22.16	985.43	true
maxwell_scudnn_128x64_relu_medium_nn	0	130.00	0.49	239239168	0.00	320.00	19.00	747622.40	1840.30	false
maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt	32	2759.67	10.44	4995989504	111178112.00	35070079.99	13.83	34.16	1810.36	false
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	103	1053.33	3.99	98780160	23755274.67	61402677.33	64.33	1.16	93.78	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	17.00	0.06	144148	12352.00	1824.00	11.90	10.17	8.48	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	52.00	0.20	200704	4046453.33	1630208.00	71.00	0.04	3.86	true
void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	32	1943.00	7.35	137551872	75970026.66	112481760.00	58.63	0.73	70.79	true
void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 2, 2, false, cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float> >(cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float>, float, float)	0	69.67	0.26	4495000	9712416.00	943957.33	39.00	0.42	64.52	true
void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>)	0	3.00	0.01	1000	6176.00	0.00	12.40	0.16	0.33	true
void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	32	589.00	2.23	8931328	57383274.67	23980949.33	82.84	0.11	15.16	true
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	65	916.00	3.47	55820800	10330506.67	18049898.66	74.89	1.97	60.94	true
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	66	325.33	1.23	10536960	3265909.33	5563968.00	82.19	1.19	32.39	true

Showing 1 to 14 of 14 entries

Download as CSV