Navigation :

GPU Kernel Information Aggregated by Name

Search:

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

kernel_name	kernel_count	kernel_duration (us)	model_duration_percentage	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound
cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	37	129.11	1.74	0	3562.67	1301610.66	6.32	0.00	0.00	true
void conv2d_c1_k1_nchw_hw_packed_kernel<float, float, 3>(cudnnTensorStruct, float const, cudnnFilterStruct, float const, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, int)	1	152.33	2.05	202309632	36479082.67	38658933.33	81.64	2.69	1328.07	true
void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const, cudnnFilterStruct, float const, cudnnConvolutionStruct, cudnnTensorStruct, float, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const, float const*, cudnnActivationStruct)	8	280.33	3.77	203083776	70890698.67	51358826.67	85.23	1.66	724.44	true
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	26	1016.33	13.68	501325824	298807925.33	326414496.00	73.52	0.80	493.27	true
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	0	9.00	0.12	1051904	1898.67	322634.67	16.90	3.24	116.88	true
void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>)	0	4.00	0.05	64000	4501.33	310154.67	33.80	0.20	16.00	true
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	26	868.33	11.69	161366016	280105141.33	313039477.33	92.91	0.27	185.83	true
volta_scudnn_128x128_relu_interior_nn_v1	0	37.67	0.51	208732160	278058.67	6857685.33	15.40	29.25	5541.51	false
volta_scudnn_128x32_relu_interior_nn_v1	2	260.00	3.50	1104674816	64465642.67	102264426.67	23.00	6.63	4248.75	true
volta_scudnn_128x32_relu_small_nn_v1	24	1741.05	23.43	9505341440	115711296.00	66878186.67	23.13	52.06	5459.55	false
volta_scudnn_128x64_relu_interior_nn_v1	8	455.66	6.13	3333095424	25059754.67	69077077.33	13.93	35.41	7314.79	false
volta_sgemm_64x32_sliced1x4_tn	0	15.67	0.21	34603008	1027242.67	369877.33	12.20	24.77	2208.66	false

Showing 1 to 12 of 12 entries

Download as CSV