GPU Kernel Information Aggregated by Name
kernel_name | kernel_count | kernel_duration (us) | model_duration_percentage | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|
kernel_name | kernel_count | kernel_duration (us) | model_duration_percentage | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|---|---|---|---|---|---|---|---|---|---|
cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 0 | 4.33 | 0.10 | 0 | 96.00 | 0.00 | 21.80 | 0.00 | 0.00 | true |
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 4 | 53.67 | 1.25 | 1530368 | 14496042.67 | 8016554.67 | 55.40 | 0.07 | 28.52 | true |
void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11 | 237.67 | 5.53 | 94789632 | 59561578.67 | 90082645.33 | 49.93 | 0.63 | 398.83 | true |
void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 4, 4, false, cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float> >(cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>, float, float) | 0 | 25.00 | 0.58 | 8893000 | 16519978.67 | 18709.33 | 28.20 | 0.54 | 355.72 | true |
void gemv2T_kernel_val<long, long, float, float, float, 128, 16, 2, 2, false, cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float> >(cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>, float, float) | 1 | 572.00 | 13.31 | 261611520 | 302736128.00 | 312544.00 | 39.84 | 0.86 | 457.36 | true |
void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>) | 2 | 11.00 | 0.26 | 9192 | 40298.67 | 0.00 | 12.20 | 0.23 | 0.84 | true |
void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::identity, 1>, float*, float*>(int, float*, float*) | 1 | 6.00 | 0.14 | 0 | 0.00 | 0.00 | 12.25 | 0.00 | 0.00 | true |
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 12 | 188.00 | 4.37 | 67737600 | 37582858.67 | 35104864.00 | 77.56 | 0.93 | 360.30 | true |
void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 14 | 160.00 | 3.72 | 27111424 | 28271456.00 | 43258336.00 | 80.52 | 0.38 | 169.45 | true |
void splitKreduce_kernel<float, float, float>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*) | 0 | 4.00 | 0.09 | 4000 | 160.00 | 0.00 | 6.20 | 25.00 | 1.00 | false |
volta_scudnn_128x64_relu_small_nn_v1 | 0 | 31.67 | 0.74 | 211943424 | 8181.33 | 10622112.00 | 20.50 | 19.94 | 6692.88 | false |
volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 11 | 1843.00 | 42.88 | 16521035776 | 197021045.33 | 67185248.00 | 20.21 | 62.53 | 8964.21 | false |
Showing 1 to 12 of 12 entries