GPU Kernel Information Aggregated by Name
kernel_name | kernel_count | kernel_duration (us) | model_duration_percentage | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|
kernel_name | kernel_count | kernel_duration (us) | model_duration_percentage | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|---|---|---|---|---|---|---|---|---|---|
cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 123 | 629.49 | 0.86 | 0 | 0.00 | 0.00 | 10.80 | 0.00 | 0.00 | true |
maxwell_scudnn_128x128_relu_interior_nn | 7 | 647.17 | 0.88 | 229834752 | 0.00 | 0.00 | 12.42 | 0.00 | 355.14 | true |
maxwell_scudnn_128x32_relu_interior_nn | 0 | 44.33 | 0.06 | 13647872 | 0.00 | 0.00 | 25.00 | 0.00 | 307.85 | true |
maxwell_scudnn_128x32_relu_small_nn | 112 | 1939.37 | 2.65 | 323174400 | 0.00 | 0.00 | 12.02 | 0.00 | 166.64 | true |
maxwell_scudnn_128x64_relu_interior_nn | 1 | 78.67 | 0.11 | 40140800 | 0.00 | 0.00 | 9.75 | 0.00 | 510.26 | true |
maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148t_nt | 1983 | 29286.61 | 39.98 | 1832910848 | 0.00 | 0.00 | 12.79 | 0.00 | 62.59 | true |
void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 26 | 252.17 | 0.34 | 2521344 | 0.00 | 0.00 | 39.10 | 0.00 | 10.00 | true |
void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 26 | 307.67 | 0.42 | 15666432 | 0.00 | 0.00 | 51.85 | 0.00 | 50.92 | true |
void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const*, int, float const*, int, float*, kernel_conv_params, int, int, float, float, int, float*, float*) | 383 | 4452.74 | 6.08 | 46174976 | 0.00 | 0.00 | 3.10 | 0.00 | 10.37 | true |
void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 1 | 226.17 | 0.31 | 50381824 | 0.00 | 0.00 | 6.20 | 0.00 | 222.76 | true |
void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 0 | 27.00 | 0.04 | 32287 | 0.00 | 0.00 | 10.90 | 0.00 | 1.20 | true |
void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 1983 | 15478.77 | 21.13 | 29458432 | 0.00 | 0.00 | 6.20 | 0.00 | 1.90 | true |
void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 2, 2, false, cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float> >(cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float>, float, float) | 0 | 33.83 | 0.05 | 1135000 | 0.00 | 0.00 | 48.10 | 0.00 | 33.55 | true |
void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const*, float*, int) | 383 | 4068.74 | 5.55 | 0 | 0.00 | 0.00 | 9.20 | 0.00 | 0.00 | true |
void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>) | 0 | 5.50 | 0.01 | 1000 | 0.00 | 0.00 | 12.40 | 0.00 | 0.18 | true |
Showing 1 to 15 of 15 entries