Navigation :

GPU Kernel Information Aggregated by Name

Search:

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound
cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams)	107	326.67	0.40	0	7296.00	219189.31	6.94	0.00	0.00	true
maxwell_scudnn_128x128_relu_interior_nn	7	463.33	0.57	229834752	3034677.33	590101.33	12.50	63.41	496.05	false
maxwell_scudnn_128x32_relu_interior_nn	0	25.00	0.03	13647872	13813.33	1721482.67	29.40	7.86	545.91	true
maxwell_scudnn_128x32_relu_small_nn	96	1113.50	1.38	156188672	193866.66	933813.34	7.33	138.50	140.27	false
maxwell_scudnn_128x64_relu_interior_nn	1	61.00	0.08	40140800	52426.67	1375584.00	7.85	28.11	658.05	true
maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148t_nt	1999	22592.00	27.99	2321022976	0.00	2001098.70	12.91	1159.87	102.74	false
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	26	182.67	0.23	15666432	961696.00	6563690.67	42.77	2.08	85.76	true
void cudnn::detail::explicit_convolve_sgemm<float, int, 1024, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const, int, float const, int, float, kernel_conv_params, int, int, float, float, int, float, float*)	1	97.00	0.12	50381824	3160213.33	356245.33	5.00	14.33	519.40	true
void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const, int, float const, int, float, kernel_conv_params, int, int, float, float, int, float, float*)	383	2688.00	3.33	46174976	24576.00	7936.00	3.10	1420.24	17.18	false
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	18.00	0.02	32287	12096.00	1696.00	10.90	2.34	1.79	true
void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	1999	10000.00	12.39	29696000	128000.00	17194.42	6.20	204.53	2.97	false
void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 2, 2, false, cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float> >(cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float>, float, float)	0	23.33	0.03	1135000	2444256.00	376672.00	38.70	0.40	48.64	true
void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const, float, int)	385	2803.72	3.47	0	106.67	2538.62	8.71	0.00	0.00	true
void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>)	0	3.00	0.00	1000	5877.33	554.67	12.40	0.16	0.33	true
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	26	126.67	0.16	5042688	809472.00	2755434.67	81.84	1.41	39.81	true

Showing 1 to 15 of 15 entries

Download as CSV