GPU Kernel Information
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | resnetv13_conv0_fwd | Convolution | [1,3,224,224] | 21320 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_medium_nn_v1 | 42.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
0 | resnetv13_conv0_fwd | Convolution | [1,3,224,224] | 21320 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
1 | resnetv13_batchnorm0_fwd | BatchNorm | [1,64,112,112] | 207 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 11.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
2 | resnetv13_relu0_fwd | Activation | [1,64,112,112] | 154 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 7.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
3 | resnetv13_pool0_fwd | Pooling | [1,64,112,112] | 2839.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 9.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
4 | resnetv13_stage1_conv0_fwd | Convolution | [1,64,56,56] | 1922.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 11.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
4 | resnetv13_stage1_conv0_fwd | Convolution | [1,64,56,56] | 1922.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
5 | resnetv13_stage1_batchnorm0_fwd | BatchNorm | [1,64,56,56] | 78 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
6 | resnetv13_stage1_relu0_fwd | Activation | [1,64,56,56] | 41 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
7 | resnetv13_stage1_conv1_fwd | Convolution | [1,64,56,56] | 14769.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 26.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
7 | resnetv13_stage1_conv1_fwd | Convolution | [1,64,56,56] | 14769.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
8 | resnetv13_stage1_batchnorm1_fwd | BatchNorm | [1,64,56,56] | 110.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
9 | resnetv13_stage1_relu1_fwd | Activation | [1,64,56,56] | 40.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
10 | resnetv13_stage1_conv2_fwd | Convolution | [1,64,56,56] | 6208.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
10 | resnetv13_stage1_conv2_fwd | Convolution | [1,64,56,56] | 6208.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 10.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
10 | resnetv13_stage1_conv2_fwd | Convolution | [1,64,56,56] | 6208.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
11 | resnetv13_stage1_batchnorm2_fwd | BatchNorm | [1,256,56,56] | 455.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
12 | resnetv13_stage1_conv3_fwd | Convolution | [1,64,56,56] | 5981 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
12 | resnetv13_stage1_conv3_fwd | Convolution | [1,64,56,56] | 5981 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
13 | resnetv13_stage1_batchnorm3_fwd | BatchNorm | [1,256,56,56] | 629.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
14 | add_resnetv13_stage1_activation0 | add_relu | [1,256,56,56] | 189 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 15.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
15 | resnetv13_stage1_conv4_fwd | Convolution | [1,256,56,56] | 6343 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 27.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
15 | resnetv13_stage1_conv4_fwd | Convolution | [1,256,56,56] | 6343 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
16 | resnetv13_stage1_batchnorm4_fwd | BatchNorm | [1,64,56,56] | 29.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
17 | resnetv13_stage1_relu2_fwd | Activation | [1,64,56,56] | 31.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
18 | resnetv13_stage1_conv5_fwd | Convolution | [1,64,56,56] | 14156.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 25.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
18 | resnetv13_stage1_conv5_fwd | Convolution | [1,64,56,56] | 14156.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
19 | resnetv13_stage1_batchnorm5_fwd | BatchNorm | [1,64,56,56] | 98.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
20 | resnetv13_stage1_relu3_fwd | Activation | [1,64,56,56] | 35 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
21 | resnetv13_stage1_conv6_fwd | Convolution | [1,64,56,56] | 5930.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
21 | resnetv13_stage1_conv6_fwd | Convolution | [1,64,56,56] | 5930.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 10.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
21 | resnetv13_stage1_conv6_fwd | Convolution | [1,64,56,56] | 5930.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
22 | resnetv13_stage1_batchnorm6_fwd | BatchNorm | [1,256,56,56] | 130.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
23 | add_resnetv13_stage1_activation1 | add_relu | [1,256,56,56] | 186 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
24 | resnetv13_stage1_conv7_fwd | Convolution | [1,256,56,56] | 6187.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 27.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
24 | resnetv13_stage1_conv7_fwd | Convolution | [1,256,56,56] | 6187.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
25 | resnetv13_stage1_batchnorm7_fwd | BatchNorm | [1,64,56,56] | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
26 | resnetv13_stage1_relu4_fwd | Activation | [1,64,56,56] | 41 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
27 | resnetv13_stage1_conv8_fwd | Convolution | [1,64,56,56] | 14295.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 25.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
27 | resnetv13_stage1_conv8_fwd | Convolution | [1,64,56,56] | 14295.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
28 | resnetv13_stage1_batchnorm8_fwd | BatchNorm | [1,64,56,56] | 100.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
29 | resnetv13_stage1_relu5_fwd | Activation | [1,64,56,56] | 38.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
30 | resnetv13_stage1_conv9_fwd | Convolution | [1,64,56,56] | 5945 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
30 | resnetv13_stage1_conv9_fwd | Convolution | [1,64,56,56] | 5945 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 10.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
30 | resnetv13_stage1_conv9_fwd | Convolution | [1,64,56,56] | 5945 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
31 | resnetv13_stage1_batchnorm9_fwd | BatchNorm | [1,256,56,56] | 98.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
32 | add_resnetv13_stage1_activation2 | add_relu | [1,256,56,56] | 193.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 14.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
33 | resnetv13_stage2_conv0_fwd | Convolution | [1,256,56,56] | 3757.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 25.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
33 | resnetv13_stage2_conv0_fwd | Convolution | [1,256,56,56] | 3757.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
34 | resnetv13_stage2_batchnorm0_fwd | BatchNorm | [1,128,28,28] | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
35 | resnetv13_stage2_relu0_fwd | Activation | [1,128,28,28] | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
36 | resnetv13_stage2_conv1_fwd | Convolution | [1,128,28,28] | 12862.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 35.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
36 | resnetv13_stage2_conv1_fwd | Convolution | [1,128,28,28] | 12862.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
37 | resnetv13_stage2_batchnorm1_fwd | BatchNorm | [1,128,28,28] | 83.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
38 | resnetv13_stage2_relu1_fwd | Activation | [1,128,28,28] | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
39 | resnetv13_stage2_conv2_fwd | Convolution | [1,128,28,28] | 5722.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
39 | resnetv13_stage2_conv2_fwd | Convolution | [1,128,28,28] | 5722.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
39 | resnetv13_stage2_conv2_fwd | Convolution | [1,128,28,28] | 5722.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
40 | resnetv13_stage2_batchnorm2_fwd | BatchNorm | [1,512,28,28] | 82.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
41 | resnetv13_stage2_conv3_fwd | Convolution | [1,256,56,56] | 10510.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 38.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
41 | resnetv13_stage2_conv3_fwd | Convolution | [1,256,56,56] | 10510.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
42 | resnetv13_stage2_batchnorm3_fwd | BatchNorm | [1,512,28,28] | 146.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 5.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
43 | add_resnetv13_stage2_activation0 | add_relu | [1,512,28,28] | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 6.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
44 | resnetv13_stage2_conv4_fwd | Convolution | [1,512,28,28] | 5990 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 45.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
44 | resnetv13_stage2_conv4_fwd | Convolution | [1,512,28,28] | 5990 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
45 | resnetv13_stage2_batchnorm4_fwd | BatchNorm | [1,128,28,28] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
46 | resnetv13_stage2_relu2_fwd | Activation | [1,128,28,28] | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
47 | resnetv13_stage2_conv5_fwd | Convolution | [1,128,28,28] | 13075 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 35.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
47 | resnetv13_stage2_conv5_fwd | Convolution | [1,128,28,28] | 13075 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
48 | resnetv13_stage2_batchnorm5_fwd | BatchNorm | [1,128,28,28] | 94.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
49 | resnetv13_stage2_relu3_fwd | Activation | [1,128,28,28] | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
50 | resnetv13_stage2_conv6_fwd | Convolution | [1,128,28,28] | 6016 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
50 | resnetv13_stage2_conv6_fwd | Convolution | [1,128,28,28] | 6016 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 6.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
50 | resnetv13_stage2_conv6_fwd | Convolution | [1,128,28,28] | 6016 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
51 | resnetv13_stage2_batchnorm6_fwd | BatchNorm | [1,512,28,28] | 107.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
52 | add_resnetv13_stage2_activation1 | add_relu | [1,512,28,28] | 102.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 6.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
53 | resnetv13_stage2_conv7_fwd | Convolution | [1,512,28,28] | 6172.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 45.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
53 | resnetv13_stage2_conv7_fwd | Convolution | [1,512,28,28] | 6172.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
54 | resnetv13_stage2_batchnorm7_fwd | BatchNorm | [1,128,28,28] | 18.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
55 | resnetv13_stage2_relu4_fwd | Activation | [1,128,28,28] | 27.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
56 | resnetv13_stage2_conv8_fwd | Convolution | [1,128,28,28] | 12930.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 35.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
56 | resnetv13_stage2_conv8_fwd | Convolution | [1,128,28,28] | 12930.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
57 | resnetv13_stage2_batchnorm8_fwd | BatchNorm | [1,128,28,28] | 87.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
58 | resnetv13_stage2_relu5_fwd | Activation | [1,128,28,28] | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
59 | resnetv13_stage2_conv9_fwd | Convolution | [1,128,28,28] | 5748.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
59 | resnetv13_stage2_conv9_fwd | Convolution | [1,128,28,28] | 5748.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
59 | resnetv13_stage2_conv9_fwd | Convolution | [1,128,28,28] | 5748.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
60 | resnetv13_stage2_batchnorm9_fwd | BatchNorm | [1,512,28,28] | 47.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
61 | add_resnetv13_stage2_activation2 | add_relu | [1,512,28,28] | 93.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
62 | resnetv13_stage2_conv10_fwd | Convolution | [1,512,28,28] | 6083 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 45.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
62 | resnetv13_stage2_conv10_fwd | Convolution | [1,512,28,28] | 6083 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
63 | resnetv13_stage2_batchnorm10_fwd | BatchNorm | [1,128,28,28] | 18.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
64 | resnetv13_stage2_relu6_fwd | Activation | [1,128,28,28] | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
65 | resnetv13_stage2_conv11_fwd | Convolution | [1,128,28,28] | 12917.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 36.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
65 | resnetv13_stage2_conv11_fwd | Convolution | [1,128,28,28] | 12917.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
66 | resnetv13_stage2_batchnorm11_fwd | BatchNorm | [1,128,28,28] | 83.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
67 | resnetv13_stage2_relu7_fwd | Activation | [1,128,28,28] | 20.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
68 | resnetv13_stage2_conv12_fwd | Convolution | [1,128,28,28] | 5857 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 22.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
68 | resnetv13_stage2_conv12_fwd | Convolution | [1,128,28,28] | 5857 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
68 | resnetv13_stage2_conv12_fwd | Convolution | [1,128,28,28] | 5857 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
69 | resnetv13_stage2_batchnorm12_fwd | BatchNorm | [1,512,28,28] | 54.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
70 | add_resnetv13_stage2_activation3 | add_relu | [1,512,28,28] | 104.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 6.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
71 | resnetv13_stage3_conv0_fwd | Convolution | [1,512,28,28] | 3516.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 32.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
71 | resnetv13_stage3_conv0_fwd | Convolution | [1,512,28,28] | 3516.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
72 | resnetv13_stage3_batchnorm0_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
73 | resnetv13_stage3_relu0_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
74 | resnetv13_stage3_conv1_fwd | Convolution | [1,256,14,14] | 12520.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
74 | resnetv13_stage3_conv1_fwd | Convolution | [1,256,14,14] | 12520.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
75 | resnetv13_stage3_batchnorm1_fwd | BatchNorm | [1,256,14,14] | 88.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
76 | resnetv13_stage3_relu1_fwd | Activation | [1,256,14,14] | 11.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
77 | resnetv13_stage3_conv2_fwd | Convolution | [1,256,14,14] | 5579.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
77 | resnetv13_stage3_conv2_fwd | Convolution | [1,256,14,14] | 5579.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
78 | resnetv13_stage3_batchnorm2_fwd | BatchNorm | [1,1024,14,14] | 45.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
79 | resnetv13_stage3_conv3_fwd | Convolution | [1,512,28,28] | 10477 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 66.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
79 | resnetv13_stage3_conv3_fwd | Convolution | [1,512,28,28] | 10477 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
80 | resnetv13_stage3_batchnorm3_fwd | BatchNorm | [1,1024,14,14] | 136.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
81 | add_resnetv13_stage3_activation0 | add_relu | [1,1024,14,14] | 52 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
82 | resnetv13_stage3_conv4_fwd | Convolution | [1,1024,14,14] | 5850 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
82 | resnetv13_stage3_conv4_fwd | Convolution | [1,1024,14,14] | 5850 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
83 | resnetv13_stage3_batchnorm4_fwd | BatchNorm | [1,256,14,14] | 15.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
84 | resnetv13_stage3_relu2_fwd | Activation | [1,256,14,14] | 13.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
85 | resnetv13_stage3_conv5_fwd | Convolution | [1,256,14,14] | 12447.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
85 | resnetv13_stage3_conv5_fwd | Convolution | [1,256,14,14] | 12447.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
86 | resnetv13_stage3_batchnorm5_fwd | BatchNorm | [1,256,14,14] | 82.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
87 | resnetv13_stage3_relu3_fwd | Activation | [1,256,14,14] | 13 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
88 | resnetv13_stage3_conv6_fwd | Convolution | [1,256,14,14] | 5849.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
88 | resnetv13_stage3_conv6_fwd | Convolution | [1,256,14,14] | 5849.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
89 | resnetv13_stage3_batchnorm6_fwd | BatchNorm | [1,1024,14,14] | 36.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
90 | add_resnetv13_stage3_activation1 | add_relu | [1,1024,14,14] | 50.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
91 | resnetv13_stage3_conv7_fwd | Convolution | [1,1024,14,14] | 5998.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 58.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
91 | resnetv13_stage3_conv7_fwd | Convolution | [1,1024,14,14] | 5998.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
92 | resnetv13_stage3_batchnorm7_fwd | BatchNorm | [1,256,14,14] | 14.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
93 | resnetv13_stage3_relu4_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
94 | resnetv13_stage3_conv8_fwd | Convolution | [1,256,14,14] | 12426.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
94 | resnetv13_stage3_conv8_fwd | Convolution | [1,256,14,14] | 12426.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
95 | resnetv13_stage3_batchnorm8_fwd | BatchNorm | [1,256,14,14] | 84.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
96 | resnetv13_stage3_relu5_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
97 | resnetv13_stage3_conv9_fwd | Convolution | [1,256,14,14] | 5494 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
97 | resnetv13_stage3_conv9_fwd | Convolution | [1,256,14,14] | 5494 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
98 | resnetv13_stage3_batchnorm9_fwd | BatchNorm | [1,1024,14,14] | 31.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
99 | add_resnetv13_stage3_activation2 | add_relu | [1,1024,14,14] | 49.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
100 | resnetv13_stage3_conv10_fwd | Convolution | [1,1024,14,14] | 5860 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 60.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
100 | resnetv13_stage3_conv10_fwd | Convolution | [1,1024,14,14] | 5860 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
101 | resnetv13_stage3_batchnorm10_fwd | BatchNorm | [1,256,14,14] | 13.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
102 | resnetv13_stage3_relu6_fwd | Activation | [1,256,14,14] | 12.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
103 | resnetv13_stage3_conv11_fwd | Convolution | [1,256,14,14] | 12286 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
103 | resnetv13_stage3_conv11_fwd | Convolution | [1,256,14,14] | 12286 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
104 | resnetv13_stage3_batchnorm11_fwd | BatchNorm | [1,256,14,14] | 77.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
105 | resnetv13_stage3_relu7_fwd | Activation | [1,256,14,14] | 11.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
106 | resnetv13_stage3_conv12_fwd | Convolution | [1,256,14,14] | 5619 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
106 | resnetv13_stage3_conv12_fwd | Convolution | [1,256,14,14] | 5619 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
107 | resnetv13_stage3_batchnorm12_fwd | BatchNorm | [1,1024,14,14] | 33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
108 | add_resnetv13_stage3_activation3 | add_relu | [1,1024,14,14] | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
109 | resnetv13_stage3_conv13_fwd | Convolution | [1,1024,14,14] | 5883 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
109 | resnetv13_stage3_conv13_fwd | Convolution | [1,1024,14,14] | 5883 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
110 | resnetv13_stage3_batchnorm13_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
111 | resnetv13_stage3_relu8_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
112 | resnetv13_stage3_conv14_fwd | Convolution | [1,256,14,14] | 12371.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
112 | resnetv13_stage3_conv14_fwd | Convolution | [1,256,14,14] | 12371.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
113 | resnetv13_stage3_batchnorm14_fwd | BatchNorm | [1,256,14,14] | 82 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
114 | resnetv13_stage3_relu9_fwd | Activation | [1,256,14,14] | 11 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
115 | resnetv13_stage3_conv15_fwd | Convolution | [1,256,14,14] | 5585.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
115 | resnetv13_stage3_conv15_fwd | Convolution | [1,256,14,14] | 5585.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
116 | resnetv13_stage3_batchnorm15_fwd | BatchNorm | [1,1024,14,14] | 30.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
117 | add_resnetv13_stage3_activation4 | add_relu | [1,1024,14,14] | 43.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
118 | resnetv13_stage3_conv16_fwd | Convolution | [1,1024,14,14] | 5832 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
118 | resnetv13_stage3_conv16_fwd | Convolution | [1,1024,14,14] | 5832 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
119 | resnetv13_stage3_batchnorm16_fwd | BatchNorm | [1,256,14,14] | 14.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
120 | resnetv13_stage3_relu10_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
121 | resnetv13_stage3_conv17_fwd | Convolution | [1,256,14,14] | 12448.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
121 | resnetv13_stage3_conv17_fwd | Convolution | [1,256,14,14] | 12448.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
122 | resnetv13_stage3_batchnorm17_fwd | BatchNorm | [1,256,14,14] | 81.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
123 | resnetv13_stage3_relu11_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
124 | resnetv13_stage3_conv18_fwd | Convolution | [1,256,14,14] | 5658.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
124 | resnetv13_stage3_conv18_fwd | Convolution | [1,256,14,14] | 5658.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
125 | resnetv13_stage3_batchnorm18_fwd | BatchNorm | [1,1024,14,14] | 32 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
126 | add_resnetv13_stage3_activation5 | add_relu | [1,1024,14,14] | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
127 | resnetv13_stage3_conv19_fwd | Convolution | [1,1024,14,14] | 5920 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 58.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
127 | resnetv13_stage3_conv19_fwd | Convolution | [1,1024,14,14] | 5920 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
128 | resnetv13_stage3_batchnorm19_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
129 | resnetv13_stage3_relu12_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
130 | resnetv13_stage3_conv20_fwd | Convolution | [1,256,14,14] | 12418 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
130 | resnetv13_stage3_conv20_fwd | Convolution | [1,256,14,14] | 12418 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
131 | resnetv13_stage3_batchnorm20_fwd | BatchNorm | [1,256,14,14] | 77.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
132 | resnetv13_stage3_relu13_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
133 | resnetv13_stage3_conv21_fwd | Convolution | [1,256,14,14] | 5572 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 31.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
133 | resnetv13_stage3_conv21_fwd | Convolution | [1,256,14,14] | 5572 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
134 | resnetv13_stage3_batchnorm21_fwd | BatchNorm | [1,1024,14,14] | 31.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
135 | add_resnetv13_stage3_activation6 | add_relu | [1,1024,14,14] | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
136 | resnetv13_stage3_conv22_fwd | Convolution | [1,1024,14,14] | 5906.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
136 | resnetv13_stage3_conv22_fwd | Convolution | [1,1024,14,14] | 5906.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
137 | resnetv13_stage3_batchnorm22_fwd | BatchNorm | [1,256,14,14] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
138 | resnetv13_stage3_relu14_fwd | Activation | [1,256,14,14] | 12.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
139 | resnetv13_stage3_conv23_fwd | Convolution | [1,256,14,14] | 12450.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
139 | resnetv13_stage3_conv23_fwd | Convolution | [1,256,14,14] | 12450.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
140 | resnetv13_stage3_batchnorm23_fwd | BatchNorm | [1,256,14,14] | 87.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
141 | resnetv13_stage3_relu15_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
142 | resnetv13_stage3_conv24_fwd | Convolution | [1,256,14,14] | 5611.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 31.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
142 | resnetv13_stage3_conv24_fwd | Convolution | [1,256,14,14] | 5611.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
143 | resnetv13_stage3_batchnorm24_fwd | BatchNorm | [1,1024,14,14] | 33.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
144 | add_resnetv13_stage3_activation7 | add_relu | [1,1024,14,14] | 47 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
145 | resnetv13_stage3_conv25_fwd | Convolution | [1,1024,14,14] | 5901.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 58.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
145 | resnetv13_stage3_conv25_fwd | Convolution | [1,1024,14,14] | 5901.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
146 | resnetv13_stage3_batchnorm25_fwd | BatchNorm | [1,256,14,14] | 14.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
147 | resnetv13_stage3_relu16_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
148 | resnetv13_stage3_conv26_fwd | Convolution | [1,256,14,14] | 12361 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
148 | resnetv13_stage3_conv26_fwd | Convolution | [1,256,14,14] | 12361 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
149 | resnetv13_stage3_batchnorm26_fwd | BatchNorm | [1,256,14,14] | 89.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
150 | resnetv13_stage3_relu17_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
151 | resnetv13_stage3_conv27_fwd | Convolution | [1,256,14,14] | 5575.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
151 | resnetv13_stage3_conv27_fwd | Convolution | [1,256,14,14] | 5575.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
152 | resnetv13_stage3_batchnorm27_fwd | BatchNorm | [1,1024,14,14] | 30.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
153 | add_resnetv13_stage3_activation8 | add_relu | [1,1024,14,14] | 46.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
154 | resnetv13_stage3_conv28_fwd | Convolution | [1,1024,14,14] | 5883.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
154 | resnetv13_stage3_conv28_fwd | Convolution | [1,1024,14,14] | 5883.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
155 | resnetv13_stage3_batchnorm28_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
156 | resnetv13_stage3_relu18_fwd | Activation | [1,256,14,14] | 11.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
157 | resnetv13_stage3_conv29_fwd | Convolution | [1,256,14,14] | 12635.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 62.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
157 | resnetv13_stage3_conv29_fwd | Convolution | [1,256,14,14] | 12635.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
158 | resnetv13_stage3_batchnorm29_fwd | BatchNorm | [1,256,14,14] | 92.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
159 | resnetv13_stage3_relu19_fwd | Activation | [1,256,14,14] | 17.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
160 | resnetv13_stage3_conv30_fwd | Convolution | [1,256,14,14] | 5622.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
160 | resnetv13_stage3_conv30_fwd | Convolution | [1,256,14,14] | 5622.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
161 | resnetv13_stage3_batchnorm30_fwd | BatchNorm | [1,1024,14,14] | 30.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
162 | add_resnetv13_stage3_activation9 | add_relu | [1,1024,14,14] | 46.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
163 | resnetv13_stage3_conv31_fwd | Convolution | [1,1024,14,14] | 5892.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
163 | resnetv13_stage3_conv31_fwd | Convolution | [1,1024,14,14] | 5892.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
164 | resnetv13_stage3_batchnorm31_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
165 | resnetv13_stage3_relu20_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
166 | resnetv13_stage3_conv32_fwd | Convolution | [1,256,14,14] | 12363 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
166 | resnetv13_stage3_conv32_fwd | Convolution | [1,256,14,14] | 12363 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
167 | resnetv13_stage3_batchnorm32_fwd | BatchNorm | [1,256,14,14] | 90.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
168 | resnetv13_stage3_relu21_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
169 | resnetv13_stage3_conv33_fwd | Convolution | [1,256,14,14] | 5640 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 31.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
169 | resnetv13_stage3_conv33_fwd | Convolution | [1,256,14,14] | 5640 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
170 | resnetv13_stage3_batchnorm33_fwd | BatchNorm | [1,1024,14,14] | 31.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
171 | add_resnetv13_stage3_activation10 | add_relu | [1,1024,14,14] | 48.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
172 | resnetv13_stage3_conv34_fwd | Convolution | [1,1024,14,14] | 5930.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
172 | resnetv13_stage3_conv34_fwd | Convolution | [1,1024,14,14] | 5930.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
173 | resnetv13_stage3_batchnorm34_fwd | BatchNorm | [1,256,14,14] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
174 | resnetv13_stage3_relu22_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
175 | resnetv13_stage3_conv35_fwd | Convolution | [1,256,14,14] | 12603 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
175 | resnetv13_stage3_conv35_fwd | Convolution | [1,256,14,14] | 12603 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
176 | resnetv13_stage3_batchnorm35_fwd | BatchNorm | [1,256,14,14] | 84.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
177 | resnetv13_stage3_relu23_fwd | Activation | [1,256,14,14] | 11.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
178 | resnetv13_stage3_conv36_fwd | Convolution | [1,256,14,14] | 5640.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 31.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
178 | resnetv13_stage3_conv36_fwd | Convolution | [1,256,14,14] | 5640.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
179 | resnetv13_stage3_batchnorm36_fwd | BatchNorm | [1,1024,14,14] | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
180 | add_resnetv13_stage3_activation11 | add_relu | [1,1024,14,14] | 48.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
181 | resnetv13_stage3_conv37_fwd | Convolution | [1,1024,14,14] | 5979.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
181 | resnetv13_stage3_conv37_fwd | Convolution | [1,1024,14,14] | 5979.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
182 | resnetv13_stage3_batchnorm37_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
183 | resnetv13_stage3_relu24_fwd | Activation | [1,256,14,14] | 12.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
184 | resnetv13_stage3_conv38_fwd | Convolution | [1,256,14,14] | 12564.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
184 | resnetv13_stage3_conv38_fwd | Convolution | [1,256,14,14] | 12564.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
185 | resnetv13_stage3_batchnorm38_fwd | BatchNorm | [1,256,14,14] | 85.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
186 | resnetv13_stage3_relu25_fwd | Activation | [1,256,14,14] | 11.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
187 | resnetv13_stage3_conv39_fwd | Convolution | [1,256,14,14] | 5725 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 31.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
187 | resnetv13_stage3_conv39_fwd | Convolution | [1,256,14,14] | 5725 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
188 | resnetv13_stage3_batchnorm39_fwd | BatchNorm | [1,1024,14,14] | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
189 | add_resnetv13_stage3_activation12 | add_relu | [1,1024,14,14] | 47.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
190 | resnetv13_stage3_conv40_fwd | Convolution | [1,1024,14,14] | 5942.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
190 | resnetv13_stage3_conv40_fwd | Convolution | [1,1024,14,14] | 5942.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
191 | resnetv13_stage3_batchnorm40_fwd | BatchNorm | [1,256,14,14] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
192 | resnetv13_stage3_relu26_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
193 | resnetv13_stage3_conv41_fwd | Convolution | [1,256,14,14] | 12365.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
193 | resnetv13_stage3_conv41_fwd | Convolution | [1,256,14,14] | 12365.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
194 | resnetv13_stage3_batchnorm41_fwd | BatchNorm | [1,256,14,14] | 77.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
195 | resnetv13_stage3_relu27_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
196 | resnetv13_stage3_conv42_fwd | Convolution | [1,256,14,14] | 5635.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
196 | resnetv13_stage3_conv42_fwd | Convolution | [1,256,14,14] | 5635.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
197 | resnetv13_stage3_batchnorm42_fwd | BatchNorm | [1,1024,14,14] | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
198 | add_resnetv13_stage3_activation13 | add_relu | [1,1024,14,14] | 44.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
199 | resnetv13_stage3_conv43_fwd | Convolution | [1,1024,14,14] | 5901 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
199 | resnetv13_stage3_conv43_fwd | Convolution | [1,1024,14,14] | 5901 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
200 | resnetv13_stage3_batchnorm43_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
201 | resnetv13_stage3_relu28_fwd | Activation | [1,256,14,14] | 11.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
202 | resnetv13_stage3_conv44_fwd | Convolution | [1,256,14,14] | 12379 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 62.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
202 | resnetv13_stage3_conv44_fwd | Convolution | [1,256,14,14] | 12379 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
203 | resnetv13_stage3_batchnorm44_fwd | BatchNorm | [1,256,14,14] | 84 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
204 | resnetv13_stage3_relu29_fwd | Activation | [1,256,14,14] | 11.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
205 | resnetv13_stage3_conv45_fwd | Convolution | [1,256,14,14] | 5618.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
205 | resnetv13_stage3_conv45_fwd | Convolution | [1,256,14,14] | 5618.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
206 | resnetv13_stage3_batchnorm45_fwd | BatchNorm | [1,1024,14,14] | 30.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
207 | add_resnetv13_stage3_activation14 | add_relu | [1,1024,14,14] | 49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
208 | resnetv13_stage3_conv46_fwd | Convolution | [1,1024,14,14] | 5907 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 58.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
208 | resnetv13_stage3_conv46_fwd | Convolution | [1,1024,14,14] | 5907 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
209 | resnetv13_stage3_batchnorm46_fwd | BatchNorm | [1,256,14,14] | 14.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
210 | resnetv13_stage3_relu30_fwd | Activation | [1,256,14,14] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
211 | resnetv13_stage3_conv47_fwd | Convolution | [1,256,14,14] | 12482.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
211 | resnetv13_stage3_conv47_fwd | Convolution | [1,256,14,14] | 12482.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
212 | resnetv13_stage3_batchnorm47_fwd | BatchNorm | [1,256,14,14] | 79 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
213 | resnetv13_stage3_relu31_fwd | Activation | [1,256,14,14] | 11.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
214 | resnetv13_stage3_conv48_fwd | Convolution | [1,256,14,14] | 5609 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
214 | resnetv13_stage3_conv48_fwd | Convolution | [1,256,14,14] | 5609 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
215 | resnetv13_stage3_batchnorm48_fwd | BatchNorm | [1,1024,14,14] | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
216 | add_resnetv13_stage3_activation15 | add_relu | [1,1024,14,14] | 45.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
217 | resnetv13_stage3_conv49_fwd | Convolution | [1,1024,14,14] | 5883.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 58.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
217 | resnetv13_stage3_conv49_fwd | Convolution | [1,1024,14,14] | 5883.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
218 | resnetv13_stage3_batchnorm49_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
219 | resnetv13_stage3_relu32_fwd | Activation | [1,256,14,14] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
220 | resnetv13_stage3_conv50_fwd | Convolution | [1,256,14,14] | 12360.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
220 | resnetv13_stage3_conv50_fwd | Convolution | [1,256,14,14] | 12360.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
221 | resnetv13_stage3_batchnorm50_fwd | BatchNorm | [1,256,14,14] | 79 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
222 | resnetv13_stage3_relu33_fwd | Activation | [1,256,14,14] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
223 | resnetv13_stage3_conv51_fwd | Convolution | [1,256,14,14] | 5592 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
223 | resnetv13_stage3_conv51_fwd | Convolution | [1,256,14,14] | 5592 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
224 | resnetv13_stage3_batchnorm51_fwd | BatchNorm | [1,1024,14,14] | 37.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
225 | add_resnetv13_stage3_activation16 | add_relu | [1,1024,14,14] | 47 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
226 | resnetv13_stage3_conv52_fwd | Convolution | [1,1024,14,14] | 5963.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 58.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
226 | resnetv13_stage3_conv52_fwd | Convolution | [1,1024,14,14] | 5963.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
227 | resnetv13_stage3_batchnorm52_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
228 | resnetv13_stage3_relu34_fwd | Activation | [1,256,14,14] | 12.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
229 | resnetv13_stage3_conv53_fwd | Convolution | [1,256,14,14] | 12543 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
229 | resnetv13_stage3_conv53_fwd | Convolution | [1,256,14,14] | 12543 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
230 | resnetv13_stage3_batchnorm53_fwd | BatchNorm | [1,256,14,14] | 94.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
231 | resnetv13_stage3_relu35_fwd | Activation | [1,256,14,14] | 13.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
232 | resnetv13_stage3_conv54_fwd | Convolution | [1,256,14,14] | 5639 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
232 | resnetv13_stage3_conv54_fwd | Convolution | [1,256,14,14] | 5639 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
233 | resnetv13_stage3_batchnorm54_fwd | BatchNorm | [1,1024,14,14] | 31.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
234 | add_resnetv13_stage3_activation17 | add_relu | [1,1024,14,14] | 47.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
235 | resnetv13_stage3_conv55_fwd | Convolution | [1,1024,14,14] | 6065.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
235 | resnetv13_stage3_conv55_fwd | Convolution | [1,1024,14,14] | 6065.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
236 | resnetv13_stage3_batchnorm55_fwd | BatchNorm | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
237 | resnetv13_stage3_relu36_fwd | Activation | [1,256,14,14] | 13.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
238 | resnetv13_stage3_conv56_fwd | Convolution | [1,256,14,14] | 12673.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
238 | resnetv13_stage3_conv56_fwd | Convolution | [1,256,14,14] | 12673.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
239 | resnetv13_stage3_batchnorm56_fwd | BatchNorm | [1,256,14,14] | 94.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
240 | resnetv13_stage3_relu37_fwd | Activation | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
241 | resnetv13_stage3_conv57_fwd | Convolution | [1,256,14,14] | 5756.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
241 | resnetv13_stage3_conv57_fwd | Convolution | [1,256,14,14] | 5756.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
242 | resnetv13_stage3_batchnorm57_fwd | BatchNorm | [1,1024,14,14] | 33.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
243 | add_resnetv13_stage3_activation18 | add_relu | [1,1024,14,14] | 60.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
244 | resnetv13_stage3_conv58_fwd | Convolution | [1,1024,14,14] | 5978 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
244 | resnetv13_stage3_conv58_fwd | Convolution | [1,1024,14,14] | 5978 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
245 | resnetv13_stage3_batchnorm58_fwd | BatchNorm | [1,256,14,14] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
246 | resnetv13_stage3_relu38_fwd | Activation | [1,256,14,14] | 12.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
247 | resnetv13_stage3_conv59_fwd | Convolution | [1,256,14,14] | 12720.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
247 | resnetv13_stage3_conv59_fwd | Convolution | [1,256,14,14] | 12720.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
248 | resnetv13_stage3_batchnorm59_fwd | BatchNorm | [1,256,14,14] | 94.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
249 | resnetv13_stage3_relu39_fwd | Activation | [1,256,14,14] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
250 | resnetv13_stage3_conv60_fwd | Convolution | [1,256,14,14] | 5755 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
250 | resnetv13_stage3_conv60_fwd | Convolution | [1,256,14,14] | 5755 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
251 | resnetv13_stage3_batchnorm60_fwd | BatchNorm | [1,1024,14,14] | 34.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
252 | add_resnetv13_stage3_activation19 | add_relu | [1,1024,14,14] | 69.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
253 | resnetv13_stage3_conv61_fwd | Convolution | [1,1024,14,14] | 6015.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
253 | resnetv13_stage3_conv61_fwd | Convolution | [1,1024,14,14] | 6015.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
254 | resnetv13_stage3_batchnorm61_fwd | BatchNorm | [1,256,14,14] | 14.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
255 | resnetv13_stage3_relu40_fwd | Activation | [1,256,14,14] | 14.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
256 | resnetv13_stage3_conv62_fwd | Convolution | [1,256,14,14] | 12655.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
256 | resnetv13_stage3_conv62_fwd | Convolution | [1,256,14,14] | 12655.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
257 | resnetv13_stage3_batchnorm62_fwd | BatchNorm | [1,256,14,14] | 93 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
258 | resnetv13_stage3_relu41_fwd | Activation | [1,256,14,14] | 13 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
259 | resnetv13_stage3_conv63_fwd | Convolution | [1,256,14,14] | 5697.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
259 | resnetv13_stage3_conv63_fwd | Convolution | [1,256,14,14] | 5697.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
260 | resnetv13_stage3_batchnorm63_fwd | BatchNorm | [1,1024,14,14] | 34 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
261 | add_resnetv13_stage3_activation20 | add_relu | [1,1024,14,14] | 68.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
262 | resnetv13_stage3_conv64_fwd | Convolution | [1,1024,14,14] | 5994.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
262 | resnetv13_stage3_conv64_fwd | Convolution | [1,1024,14,14] | 5994.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
263 | resnetv13_stage3_batchnorm64_fwd | BatchNorm | [1,256,14,14] | 14.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
264 | resnetv13_stage3_relu42_fwd | Activation | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
265 | resnetv13_stage3_conv65_fwd | Convolution | [1,256,14,14] | 12452.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
265 | resnetv13_stage3_conv65_fwd | Convolution | [1,256,14,14] | 12452.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
266 | resnetv13_stage3_batchnorm65_fwd | BatchNorm | [1,256,14,14] | 85 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
267 | resnetv13_stage3_relu43_fwd | Activation | [1,256,14,14] | 11.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
268 | resnetv13_stage3_conv66_fwd | Convolution | [1,256,14,14] | 5728.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
268 | resnetv13_stage3_conv66_fwd | Convolution | [1,256,14,14] | 5728.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
269 | resnetv13_stage3_batchnorm66_fwd | BatchNorm | [1,1024,14,14] | 34.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
270 | add_resnetv13_stage3_activation21 | add_relu | [1,1024,14,14] | 56.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
271 | resnetv13_stage3_conv67_fwd | Convolution | [1,1024,14,14] | 5934.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 59.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
271 | resnetv13_stage3_conv67_fwd | Convolution | [1,1024,14,14] | 5934.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
272 | resnetv13_stage3_batchnorm67_fwd | BatchNorm | [1,256,14,14] | 14.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
273 | resnetv13_stage3_relu44_fwd | Activation | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
274 | resnetv13_stage3_conv68_fwd | Convolution | [1,256,14,14] | 12433.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 62.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
274 | resnetv13_stage3_conv68_fwd | Convolution | [1,256,14,14] | 12433.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 11.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
275 | resnetv13_stage3_batchnorm68_fwd | BatchNorm | [1,256,14,14] | 92.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
276 | resnetv13_stage3_relu45_fwd | Activation | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
277 | resnetv13_stage3_conv69_fwd | Convolution | [1,256,14,14] | 5643.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 30.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
277 | resnetv13_stage3_conv69_fwd | Convolution | [1,256,14,14] | 5643.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
278 | resnetv13_stage3_batchnorm69_fwd | BatchNorm | [1,1024,14,14] | 32.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
279 | add_resnetv13_stage3_activation22 | add_relu | [1,1024,14,14] | 49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
280 | resnetv13_stage4_conv0_fwd | Convolution | [1,1024,14,14] | 4019 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 63.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
280 | resnetv13_stage4_conv0_fwd | Convolution | [1,1024,14,14] | 4019 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
281 | resnetv13_stage4_batchnorm0_fwd | BatchNorm | [1,512,7,7] | 15.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
282 | resnetv13_stage4_relu0_fwd | Activation | [1,512,7,7] | 9.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
283 | resnetv13_stage4_conv1_fwd | Convolution | [1,512,7,7] | 15021.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 120.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
283 | resnetv13_stage4_conv1_fwd | Convolution | [1,512,7,7] | 15021.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 53.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
284 | resnetv13_stage4_batchnorm1_fwd | BatchNorm | [1,512,7,7] | 86.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
285 | resnetv13_stage4_relu1_fwd | Activation | [1,512,7,7] | 7.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
286 | resnetv13_stage4_conv2_fwd | Convolution | [1,512,7,7] | 6763.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 45.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
286 | resnetv13_stage4_conv2_fwd | Convolution | [1,512,7,7] | 6763.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
287 | resnetv13_stage4_batchnorm2_fwd | BatchNorm | [1,2048,7,7] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
288 | resnetv13_stage4_conv3_fwd | Convolution | [1,1024,14,14] | 12869.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 87.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
289 | resnetv13_stage4_batchnorm3_fwd | BatchNorm | [1,2048,7,7] | 106 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
290 | add_resnetv13_stage4_activation0 | add_relu | [1,2048,7,7] | 25.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
291 | resnetv13_stage4_conv4_fwd | Convolution | [1,2048,7,7] | 6971 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 111.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
291 | resnetv13_stage4_conv4_fwd | Convolution | [1,2048,7,7] | 6971 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
292 | resnetv13_stage4_batchnorm4_fwd | BatchNorm | [1,512,7,7] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
293 | resnetv13_stage4_relu2_fwd | Activation | [1,512,7,7] | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
294 | resnetv13_stage4_conv5_fwd | Convolution | [1,512,7,7] | 15201.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 120.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
294 | resnetv13_stage4_conv5_fwd | Convolution | [1,512,7,7] | 15201.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 51.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
295 | resnetv13_stage4_batchnorm5_fwd | BatchNorm | [1,512,7,7] | 87.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
296 | resnetv13_stage4_relu3_fwd | Activation | [1,512,7,7] | 8.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
297 | resnetv13_stage4_conv6_fwd | Convolution | [1,512,7,7] | 7021 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 45.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
297 | resnetv13_stage4_conv6_fwd | Convolution | [1,512,7,7] | 7021 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
298 | resnetv13_stage4_batchnorm6_fwd | BatchNorm | [1,2048,7,7] | 28.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
299 | add_resnetv13_stage4_activation1 | add_relu | [1,2048,7,7] | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 3.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
300 | resnetv13_stage4_conv7_fwd | Convolution | [1,2048,7,7] | 7204 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 112.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
300 | resnetv13_stage4_conv7_fwd | Convolution | [1,2048,7,7] | 7204 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
301 | resnetv13_stage4_batchnorm7_fwd | BatchNorm | [1,512,7,7] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
302 | resnetv13_stage4_relu4_fwd | Activation | [1,512,7,7] | 9.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
303 | resnetv13_stage4_conv8_fwd | Convolution | [1,512,7,7] | 15224.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 120.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
303 | resnetv13_stage4_conv8_fwd | Convolution | [1,512,7,7] | 15224.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 50.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
304 | resnetv13_stage4_batchnorm8_fwd | BatchNorm | [1,512,7,7] | 91.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
305 | resnetv13_stage4_relu5_fwd | Activation | [1,512,7,7] | 8.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
306 | resnetv13_stage4_conv9_fwd | Convolution | [1,512,7,7] | 6906.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 46.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
306 | resnetv13_stage4_conv9_fwd | Convolution | [1,512,7,7] | 6906.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
307 | resnetv13_stage4_batchnorm9_fwd | BatchNorm | [1,2048,7,7] | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
308 | add_resnetv13_stage4_activation2 | add_relu | [1,2048,7,7] | 27.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float*, float*, float*, mxnet::OpReqType>(int, float*, float*, float*, mxnet::OpReqType) | 3.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
309 | resnetv13_pool1_fwd | Pooling | [1,2048,7,7] | 155.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
310 | resnetv13_dense0_fwd | FullyConnected | [1,2048,1,1] | 2078.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 4, 4, false, cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float> >(cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>, float, float) | 20.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
310 | resnetv13_dense0_fwd | FullyConnected | [1,2048,1,1] | 2078.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>) | 2.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true |
Showing 1 to 422 of 422 entries