Navigation :

GPU Kernel Information Aggregated by Name

Search:

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound
cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams)	18	46.00	0.09	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_128x128_relu_interior_nn	17	1541.33	2.86	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_128x64_relu_medium_nn	0	62.00	0.11	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt	767	9472.00	17.56	0	0.00	0.00	0.00	0.00	0.00	true
maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148t_nt	191	1920.00	3.56	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	103	659.00	1.22	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::explicit_convolve_sgemm<float, int, 1024, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const, int, float const, int, float, kernel_conv_params, int, int, float, float, int, float, float*)	33	706.00	1.31	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const, int, float const, int, float, kernel_conv_params, int, int, float, float, int, float, float*)	66	1777.03	3.29	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	46	5782.00	10.72	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	13.00	0.02	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	19.00	0.04	0	0.00	0.00	0.00	0.00	0.00	true
void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	959	3840.00	7.12	0	0.00	0.00	0.00	0.00	0.00	true
void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 2, 2, false, cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float> >(cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float>, float, float)	0	27.00	0.05	0	0.00	0.00	0.00	0.00	0.00	true
void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const, float, int)	100	4210.02	7.80	0	0.00	0.00	0.00	0.00	0.00	true
void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>)	0	3.00	0.01	0	0.00	0.00	0.00	0.00	0.00	true
void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	32	217.33	0.40	0	0.00	0.00	0.00	0.00	0.00	true
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	66	299.67	0.56	0	0.00	0.00	0.00	0.00	0.00	true

Showing 1 to 17 of 17 entries

Download as CSV