GPU Kernel Information
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound | achieved_occupancy | flop_count_sp |
---|
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound | achieved_occupancy | flop_count_sp |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | resnetv23_batchnorm0_fwd | BatchNorm | [1,3,224,224] | 129.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 50.67 | 904704 | 0.00 | 0.00 | 24.60 | 0.00 | 17.86 | true | 0.245349;0.245883;0.247273;0.245683;0.245925 | 904704;904704;904704;904704;904704 | |
0 | resnetv23_batchnorm0_fwd | BatchNorm | [1,3,224,224] | 129.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 1, float>, float>, mshadow::expr::Plan<mshadow::expr::ScalarExp<float>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 1, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::ScalarExp<float>, float>) | 5.00 | 0 | 0.00 | 0.00 | 11.00 | 0.00 | 0.00 | true | 0.108659;0.110124;0.110186;0.110476;0.109647 | 0;0;0;0;0 | |
1 | resnetv23_conv0_fwd | Convolution | [1,3,224,224] | 16270 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x64_relu_medium_nn | 179.00 | 239239168 | 0.00 | 0.00 | 21.10 | 0.00 | 1336.53 | true | 0.215093;0.210246;0.209948;0.212226;0.211803 | 239239168;239239168;239239168;239239168;239239168 | |
1 | resnetv23_conv0_fwd | Convolution | [1,3,224,224] | 16270 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 34.80 | 0.00 | 0.00 | true | 0.347712;0.348708;0.347872;0.346895;0.347404 | 0;0;0;0;0 | |
2 | resnetv23_batchnorm1_fwd | BatchNorm | [1,64,112,112] | 571 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 70.33 | 4849664 | 0.00 | 0.00 | 94.20 | 0.00 | 68.95 | true | 0.936711;0.928591;0.950609;0.945193;0.942858 | 4849664;4849664;4849664;4849664;4849664 | |
3 | resnetv23_relu0_fwd | Activation | [1,64,112,112] | 167 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 67.00 | 802816 | 0.00 | 0.00 | 92.60 | 0.00 | 11.98 | true | 0.925275;0.935001;0.925332;0.928787;0.923531 | 802816;802816;802816;802816;802816 | |
4 | resnetv23_pool0_fwd | Pooling | [1,64,112,112] | 2571 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 61.33 | 200704 | 0.00 | 0.00 | 69.00 | 0.00 | 3.27 | true | 0.689749;0.690576;0.689795;0.689530;0.688970 | 200704;200704;200704;200704;200704 | |
5 | resnetv23_stage1_batchnorm0_fwd | BatchNorm | [1,64,56,56] | 199.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 14.00 | 1236992 | 0.00 | 0.00 | 89.90 | 0.00 | 88.36 | true | 0.901889;0.894029;0.903826;0.902082;0.889932 | 1236992;1236992;1236992;1236992;1236992 | |
6 | resnetv23_stage1_activation0 | Activation | [1,64,56,56] | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.30 | 0.00 | 18.25 | true | 0.423819;0.423669;0.424233;0.422748;0.422018 | 200704;200704;200704;200704;200704 | |
7 | resnetv23_stage1_conv0_fwd | Convolution | [1,64,56,56] | 1880.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x64_relu_interior_nn | 43.00 | 26624000 | 0.00 | 0.00 | 9.80 | 0.00 | 619.16 | true | 0.097771;0.097840;0.097807;0.097842;0.097764 | 26624000;26624000;26624000;26624000;26624000 | |
7 | resnetv23_stage1_conv0_fwd | Convolution | [1,64,56,56] | 1880.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.094657;0.094644;0.094888;0.094582;0.094756 | 0;0;0;0;0 | |
8 | resnetv23_stage1_batchnorm1_fwd | BatchNorm | [1,64,56,56] | 86 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 12.00 | 1236992 | 0.00 | 0.00 | 90.30 | 0.00 | 103.08 | true | 0.905436;0.896514;0.906112;0.898397;0.904109 | 1236992;1236992;1236992;1236992;1236992 | |
9 | resnetv23_stage1_activation1 | Activation | [1,64,56,56] | 45.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.423549;0.421102;0.421704;0.424021;0.422023 | 200704;200704;200704;200704;200704 | |
10 | resnetv23_stage1_conv1_fwd | Convolution | [1,64,56,56] | 15601.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 98.33 | 124207104 | 0.00 | 0.00 | 22.80 | 0.00 | 1263.13 | true | 0.228255;0.228604;0.227052;0.225693;0.227693 | 124207104;124207104;124207104;124207104;124207104 | |
10 | resnetv23_stage1_conv1_fwd | Convolution | [1,64,56,56] | 15601.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 16.00 | 237568 | 0.00 | 0.00 | 12.40 | 0.00 | 14.85 | true | 0.124447;0.124460;0.124455;0.124452;0.124457 | 237568;237568;237568;237568;237568 | |
11 | resnetv23_stage1_batchnorm2_fwd | BatchNorm | [1,64,56,56] | 114 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 12.00 | 1236992 | 0.00 | 0.00 | 90.40 | 0.00 | 103.08 | true | 0.905232;0.897263;0.907347;0.905681;0.902162 | 1236992;1236992;1236992;1236992;1236992 | |
12 | resnetv23_stage1_activation2 | Activation | [1,64,56,56] | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 41.80 | 0.00 | 18.25 | true | 0.416638;0.419808;0.417642;0.418689;0.416541 | 200704;200704;200704;200704;200704 | |
13 | resnetv23_stage1_conv2_fwd | Convolution | [1,64,56,56] | 5885.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 107.67 | 106496000 | 0.00 | 0.00 | 21.80 | 0.00 | 989.12 | true | 0.217033;0.219758;0.223723;0.217002;0.218140 | 106496000;106496000;106496000;106496000;106496000 | |
13 | resnetv23_stage1_conv2_fwd | Convolution | [1,64,56,56] | 5885.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.095124;0.095738;0.095223;0.095684;0.095464 | 0;0;0;0;0 | |
14 | resnetv23_stage1_conv3_fwd | Convolution | [1,64,56,56] | 5942 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 99.00 | 106496000 | 0.00 | 0.00 | 21.50 | 0.00 | 1075.72 | true | 0.215595;0.214000;0.214378;0.216188;0.219436 | 106496000;106496000;106496000;106496000;106496000 | |
14 | resnetv23_stage1_conv3_fwd | Convolution | [1,64,56,56] | 5942 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.094976;0.094978;0.094997;0.095055;0.094911 | 0;0;0;0;0 | |
15 | resnetv23_stage1__plus0 | elemwise_add | [1,256,56,56] | 176.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 84.33 | 802816 | 0.00 | 0.00 | 85.80 | 0.00 | 9.52 | true | 0.860031;0.856072;0.864612;0.856923;0.855067 | 802816;802816;802816;802816;802816 | |
16 | resnetv23_stage1_batchnorm3_fwd | BatchNorm | [1,256,56,56] | 118.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 65.67 | 4947968 | 0.00 | 0.00 | 87.20 | 0.00 | 75.35 | true | 0.865758;0.871396;0.877217;0.880119;0.868039 | 4947968;4947968;4947968;4947968;4947968 | |
17 | resnetv23_stage1_activation3 | Activation | [1,256,56,56] | 165 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 66.00 | 802816 | 0.00 | 0.00 | 92.90 | 0.00 | 12.16 | true | 0.928300;0.928544;0.926829;0.930315;0.931447 | 802816;802816;802816;802816;802816 | |
18 | resnetv23_stage1_conv4_fwd | Convolution | [1,256,56,56] | 6260 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x64_relu_interior_nn | 111.33 | 105267200 | 0.00 | 0.00 | 9.70 | 0.00 | 945.52 | true | 0.096513;0.097709;0.096662;0.096635;0.095997 | 105267200;105267200;105267200;105267200;105267200 | |
18 | resnetv23_stage1_conv4_fwd | Convolution | [1,256,56,56] | 6260 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.094942;0.095146;0.095094;0.094908;0.094919 | 0;0;0;0;0 | |
19 | resnetv23_stage1_batchnorm4_fwd | BatchNorm | [1,64,56,56] | 115 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 12.00 | 1236992 | 0.00 | 0.00 | 90.60 | 0.00 | 103.08 | true | 0.905552;0.903782;0.911671;0.908731;0.902277 | 1236992;1236992;1236992;1236992;1236992 | |
20 | resnetv23_stage1_activation4 | Activation | [1,64,56,56] | 46.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.10 | 0.00 | 18.25 | true | 0.422871;0.417774;0.422919;0.420061;0.418963 | 200704;200704;200704;200704;200704 | |
21 | resnetv23_stage1_conv5_fwd | Convolution | [1,64,56,56] | 15619.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 98.67 | 124207104 | 0.00 | 0.00 | 22.70 | 0.00 | 1258.85 | true | 0.227211;0.226896;0.227114;0.225706;0.227713 | 124207104;124207104;124207104;124207104;124207104 | |
21 | resnetv23_stage1_conv5_fwd | Convolution | [1,64,56,56] | 15619.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 16.00 | 237568 | 0.00 | 0.00 | 12.40 | 0.00 | 14.85 | true | 0.124447;0.124443;0.124450;0.124450;0.124459 | 237568;237568;237568;237568;237568 | |
22 | resnetv23_stage1_batchnorm5_fwd | BatchNorm | [1,64,56,56] | 113.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 13.00 | 1236992 | 0.00 | 0.00 | 90.30 | 0.00 | 95.15 | true | 0.904235;0.897492;0.904156;0.900321;0.905506 | 1236992;1236992;1236992;1236992;1236992 | |
23 | resnetv23_stage1_activation5 | Activation | [1,64,56,56] | 46.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 10.00 | 200704 | 0.00 | 0.00 | 41.90 | 0.00 | 20.07 | true | 0.418780;0.415877;0.420928;0.418243;0.419178 | 200704;200704;200704;200704;200704 | |
24 | resnetv23_stage1_conv6_fwd | Convolution | [1,64,56,56] | 5929.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 108.33 | 106496000 | 0.00 | 0.00 | 21.70 | 0.00 | 983.04 | true | 0.214214;0.216438;0.231218;0.215565;0.218400 | 106496000;106496000;106496000;106496000;106496000 | |
24 | resnetv23_stage1_conv6_fwd | Convolution | [1,64,56,56] | 5929.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.095121;0.095632;0.094982;0.095087;0.095473 | 0;0;0;0;0 | |
25 | resnetv23_stage1__plus1 | elemwise_add | [1,256,56,56] | 141.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 85.00 | 802816 | 0.00 | 0.00 | 85.50 | 0.00 | 9.44 | true | 0.859985;0.844037;0.859029;0.854642;0.850377 | 802816;802816;802816;802816;802816 | |
26 | resnetv23_stage1_batchnorm6_fwd | BatchNorm | [1,256,56,56] | 111.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 67.00 | 4947968 | 0.00 | 0.00 | 87.20 | 0.00 | 73.85 | true | 0.880108;0.871985;0.865397;0.870604;0.873100 | 4947968;4947968;4947968;4947968;4947968 | |
27 | resnetv23_stage1_activation6 | Activation | [1,256,56,56] | 119.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 66.67 | 802816 | 0.00 | 0.00 | 92.70 | 0.00 | 12.04 | true | 0.930913;0.919276;0.921869;0.933433;0.927929 | 802816;802816;802816;802816;802816 | |
28 | resnetv23_stage1_conv7_fwd | Convolution | [1,256,56,56] | 6298.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x64_relu_interior_nn | 114.33 | 105267200 | 0.00 | 0.00 | 9.70 | 0.00 | 920.71 | true | 0.097024;0.096974;0.096624;0.096294;0.096516 | 105267200;105267200;105267200;105267200;105267200 | |
28 | resnetv23_stage1_conv7_fwd | Convolution | [1,256,56,56] | 6298.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.095123;0.095143;0.095063;0.095092;0.094851 | 0;0;0;0;0 | |
29 | resnetv23_stage1_batchnorm7_fwd | BatchNorm | [1,64,56,56] | 113 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 14.00 | 1236992 | 0.00 | 0.00 | 92.30 | 0.00 | 88.36 | true | 0.919621;0.924752;0.921518;0.925809;0.923981 | 1236992;1236992;1236992;1236992;1236992 | |
30 | resnetv23_stage1_activation7 | Activation | [1,64,56,56] | 35.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.419045;0.423950;0.422102;0.419542;0.423419 | 200704;200704;200704;200704;200704 | |
31 | resnetv23_stage1_conv8_fwd | Convolution | [1,64,56,56] | 15601.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 98.67 | 124207104 | 0.00 | 0.00 | 22.80 | 0.00 | 1258.85 | true | 0.228628;0.227330;0.232447;0.226597;0.226990 | 124207104;124207104;124207104;124207104;124207104 | |
31 | resnetv23_stage1_conv8_fwd | Convolution | [1,64,56,56] | 15601.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 16.00 | 237568 | 0.00 | 0.00 | 12.40 | 0.00 | 14.85 | true | 0.124453;0.124448;0.124452;0.124446;0.124448 | 237568;237568;237568;237568;237568 | |
32 | resnetv23_stage1_batchnorm8_fwd | BatchNorm | [1,64,56,56] | 105.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 13.00 | 1236992 | 0.00 | 0.00 | 91.20 | 0.00 | 95.15 | true | 0.907901;0.913727;0.909596;0.912152;0.913651 | 1236992;1236992;1236992;1236992;1236992 | |
33 | resnetv23_stage1_activation8 | Activation | [1,64,56,56] | 33.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 10.33 | 200704 | 0.00 | 0.00 | 41.80 | 0.00 | 19.42 | true | 0.418635;0.417088;0.417932;0.417562;0.418177 | 200704;200704;200704;200704;200704 | |
34 | resnetv23_stage1_conv9_fwd | Convolution | [1,64,56,56] | 5919 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 106.33 | 106496000 | 0.00 | 0.00 | 21.70 | 0.00 | 1001.53 | true | 0.224179;0.217593;0.215375;0.216259;0.216948 | 106496000;106496000;106496000;106496000;106496000 | |
34 | resnetv23_stage1_conv9_fwd | Convolution | [1,64,56,56] | 5919 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.095525;0.095050;0.095993;0.095499;0.094997 | 0;0;0;0;0 | |
35 | resnetv23_stage1__plus2 | elemwise_add | [1,256,56,56] | 138.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 85.33 | 802816 | 0.00 | 0.00 | 86.00 | 0.00 | 9.41 | true | 0.862299;0.855865;0.858955;0.864610;0.858560 | 802816;802816;802816;802816;802816 | |
36 | resnetv23_stage2_batchnorm0_fwd | BatchNorm | [1,256,56,56] | 108.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 65.00 | 4947968 | 0.00 | 0.00 | 86.60 | 0.00 | 76.12 | true | 0.870100;0.865504;0.855593;0.862935;0.869126 | 4947968;4947968;4947968;4947968;4947968 | |
37 | resnetv23_stage2_activation0 | Activation | [1,256,56,56] | 112.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 66.00 | 802816 | 0.00 | 0.00 | 92.90 | 0.00 | 12.16 | true | 0.930325;0.931409;0.924837;0.927270;0.928655 | 802816;802816;802816;802816;802816 | |
38 | resnetv23_stage2_conv0_fwd | Convolution | [1,256,56,56] | 11899 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 141.67 | 210534400 | 0.00 | 0.00 | 19.50 | 0.00 | 1486.12 | true | 0.191449;0.196510;0.195838;0.190838;0.201796 | 210534400;210534400;210534400;210534400;210534400 | |
38 | resnetv23_stage2_conv0_fwd | Convolution | [1,256,56,56] | 11899 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 9.50 | 0.00 | 0.00 | true | 0.094647;0.095274;0.095099;0.095019;0.094855 | 0;0;0;0;0 | |
39 | resnetv23_stage2_batchnorm1_fwd | BatchNorm | [1,128,56,56] | 130 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 32.67 | 2473984 | 0.00 | 0.00 | 88.80 | 0.00 | 75.73 | true | 0.889985;0.890145;0.889960;0.873290;0.884821 | 2473984;2473984;2473984;2473984;2473984 | |
40 | resnetv23_stage2_activation1 | Activation | [1,128,56,56] | 47.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 19.67 | 401408 | 0.00 | 0.00 | 42.90 | 0.00 | 20.41 | true | 0.428470;0.427849;0.428367;0.432687;0.430162 | 401408;401408;401408;401408;401408 | |
41 | resnetv23_stage2_conv1_fwd | Convolution | [1,128,56,56] | 14098 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_small_nn | 317.33 | 264470528 | 0.00 | 0.00 | 12.50 | 0.00 | 833.42 | true | 0.124554;0.124550;0.124554;0.124354;0.124548 | 264470528;264470528;264470528;264470528;264470528 | |
41 | resnetv23_stage2_conv1_fwd | Convolution | [1,128,56,56] | 14098 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.80 | 0.00 | 0.00 | true | 0.058303;0.058143;0.058485;0.058535;0.058282 | 0;0;0;0;0 | |
42 | resnetv23_stage2_batchnorm2_fwd | BatchNorm | [1,128,28,28] | 98.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 10.00 | 634880 | 0.00 | 0.00 | 86.60 | 0.00 | 63.49 | true | 0.865917;0.868659;0.865671;0.863340;0.866088 | 634880;634880;634880;634880;634880 | |
43 | resnetv23_stage2_activation2 | Activation | [1,128,28,28] | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 42.10 | 0.00 | 12.54 | true | 0.421237;0.419600;0.422064;0.418719;0.421837 | 100352;100352;100352;100352;100352 | |
44 | resnetv23_stage2_conv2_fwd | Convolution | [1,128,28,28] | 5847 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 83.67 | 118358016 | 0.00 | 0.00 | 20.60 | 0.00 | 1414.63 | true | 0.205716;0.201066;0.204278;0.206797;0.206659 | 118358016;118358016;118358016;118358016;118358016 | |
44 | resnetv23_stage2_conv2_fwd | Convolution | [1,128,28,28] | 5847 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.80 | 0.00 | 0.00 | true | 0.057601;0.058095;0.057850;0.057709;0.057737 | 0;0;0;0;0 | |
45 | resnetv23_stage2_conv3_fwd | Convolution | [1,256,56,56] | 11297.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 145.00 | 235798528 | 0.00 | 0.00 | 19.60 | 0.00 | 1626.20 | true | 0.203712;0.193382;0.199517;0.193849;0.195830 | 235798528;235798528;235798528;235798528;235798528 | |
45 | resnetv23_stage2_conv3_fwd | Convolution | [1,256,56,56] | 11297.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.80 | 0.00 | 0.00 | true | 0.058526;0.058351;0.058442;0.058260;0.058293 | 0;0;0;0;0 | |
46 | resnetv23_stage2__plus0 | elemwise_add | [1,512,28,28] | 102 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 38.00 | 401408 | 0.00 | 0.00 | 83.40 | 0.00 | 10.56 | true | 0.824154;0.834934;0.833689;0.835730;0.833738 | 401408;401408;401408;401408;401408 | |
47 | resnetv23_stage2_batchnorm3_fwd | BatchNorm | [1,512,28,28] | 70.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 35.00 | 2539520 | 0.00 | 0.00 | 83.20 | 0.00 | 72.56 | true | 0.822818;0.847688;0.831039;0.815735;0.841744 | 2539520;2539520;2539520;2539520;2539520 | |
48 | resnetv23_stage2_activation3 | Activation | [1,512,28,28] | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 19.00 | 401408 | 0.00 | 0.00 | 42.90 | 0.00 | 21.13 | true | 0.427432;0.427830;0.427301;0.431914;0.430268 | 401408;401408;401408;401408;401408 | |
49 | resnetv23_stage2_conv4_fwd | Convolution | [1,512,28,28] | 6121 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 178.00 | 104957952 | 0.00 | 0.00 | 18.30 | 0.00 | 589.65 | true | 0.182112;0.185624;0.182278;0.184587;0.181929 | 104957952;104957952;104957952;104957952;104957952 | |
50 | resnetv23_stage2_batchnorm4_fwd | BatchNorm | [1,128,28,28] | 98.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 10.00 | 634880 | 0.00 | 0.00 | 86.60 | 0.00 | 63.49 | true | 0.867684;0.870783;0.864370;0.865753;0.865891 | 634880;634880;634880;634880;634880 | |
51 | resnetv23_stage2_activation4 | Activation | [1,128,28,28] | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 42.30 | 0.00 | 12.54 | true | 0.422678;0.423398;0.421515;0.422814;0.423179 | 100352;100352;100352;100352;100352 | |
52 | resnetv23_stage2_conv5_fwd | Convolution | [1,128,28,28] | 14016.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 90.00 | 140705792 | 0.00 | 0.00 | 23.70 | 0.00 | 1563.40 | true | 0.237390;0.236927;0.236862;0.231894;0.239319 | 140705792;140705792;140705792;140705792;140705792 | |
52 | resnetv23_stage2_conv5_fwd | Convolution | [1,128,28,28] | 14016.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 23.00 | 950272 | 0.00 | 0.00 | 46.50 | 0.00 | 41.32 | true | 0.464325;0.465527;0.462435;0.465413;0.467509 | 950272;950272;950272;950272;950272 | |
53 | resnetv23_stage2_batchnorm5_fwd | BatchNorm | [1,128,28,28] | 98 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 10.00 | 634880 | 0.00 | 0.00 | 86.00 | 0.00 | 63.49 | true | 0.856393;0.861134;0.861757;0.857534;0.863228 | 634880;634880;634880;634880;634880 | |
54 | resnetv23_stage2_activation5 | Activation | [1,128,28,28] | 17.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 41.70 | 0.00 | 12.54 | true | 0.418041;0.417090;0.415365;0.418024;0.415680 | 100352;100352;100352;100352;100352 | |
55 | resnetv23_stage2_conv6_fwd | Convolution | [1,128,28,28] | 5889.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 91.67 | 118358016 | 0.00 | 0.00 | 20.10 | 0.00 | 1291.17 | true | 0.203830;0.201902;0.202977;0.199544;0.199122 | 118358016;118358016;118358016;118358016;118358016 | |
55 | resnetv23_stage2_conv6_fwd | Convolution | [1,128,28,28] | 5889.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 5.80 | 0.00 | 0.00 | true | 0.057912;0.057918;0.057801;0.057900;0.058198 | 0;0;0;0;0 | |
56 | resnetv23_stage2__plus1 | elemwise_add | [1,512,28,28] | 99.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 30.33 | 401408 | 0.00 | 0.00 | 80.50 | 0.00 | 13.23 | true | 0.800307;0.815709;0.801261;0.809366;0.803528 | 401408;401408;401408;401408;401408 | |
57 | resnetv23_stage2_batchnorm6_fwd | BatchNorm | [1,512,28,28] | 66 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 34.00 | 2539520 | 0.00 | 0.00 | 83.70 | 0.00 | 74.69 | true | 0.838789;0.839134;0.839805;0.834177;0.832302 | 2539520;2539520;2539520;2539520;2539520 | |
58 | resnetv23_stage2_activation6 | Activation | [1,512,28,28] | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 19.00 | 401408 | 0.00 | 0.00 | 42.80 | 0.00 | 21.13 | true | 0.425902;0.428060;0.433564;0.427207;0.427404 | 401408;401408;401408;401408;401408 | |
59 | resnetv23_stage2_conv7_fwd | Convolution | [1,512,28,28] | 6087.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 178.33 | 104957952 | 0.00 | 0.00 | 18.30 | 0.00 | 588.55 | true | 0.185024;0.182944;0.181386;0.183316;0.182670 | 104957952;104957952;104957952;104957952;104957952 | |
60 | resnetv23_stage2_batchnorm7_fwd | BatchNorm | [1,128,28,28] | 104.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.67 | 634880 | 0.00 | 0.00 | 85.20 | 0.00 | 65.67 | true | 0.850374;0.850403;0.846421;0.854635;0.860434 | 634880;634880;634880;634880;634880 | |
61 | resnetv23_stage2_activation7 | Activation | [1,128,28,28] | 19 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 42.20 | 0.00 | 12.54 | true | 0.423366;0.419574;0.423588;0.422113;0.421212 | 100352;100352;100352;100352;100352 | |
62 | resnetv23_stage2_conv8_fwd | Convolution | [1,128,28,28] | 14014.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 89.67 | 140705792 | 0.00 | 0.00 | 23.50 | 0.00 | 1569.20 | true | 0.234176;0.236376;0.234007;0.233760;0.238536 | 140705792;140705792;140705792;140705792;140705792 | |
62 | resnetv23_stage2_conv8_fwd | Convolution | [1,128,28,28] | 14014.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 23.00 | 950272 | 0.00 | 0.00 | 46.50 | 0.00 | 41.32 | true | 0.466719;0.467592;0.464815;0.463057;0.458100 | 950272;950272;950272;950272;950272 | |
63 | resnetv23_stage2_batchnorm8_fwd | BatchNorm | [1,128,28,28] | 98 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 634880 | 0.00 | 0.00 | 84.50 | 0.00 | 70.54 | true | 0.845295;0.843528;0.845979;0.844227;0.855228 | 634880;634880;634880;634880;634880 | |
64 | resnetv23_stage2_activation8 | Activation | [1,128,28,28] | 17.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 41.80 | 0.00 | 12.54 | true | 0.417905;0.417703;0.417201;0.416224;0.417669 | 100352;100352;100352;100352;100352 | |
65 | resnetv23_stage2_conv9_fwd | Convolution | [1,128,28,28] | 5851.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 91.67 | 118358016 | 0.00 | 0.00 | 20.30 | 0.00 | 1291.17 | true | 0.203487;0.202007;0.203109;0.218148;0.198334 | 118358016;118358016;118358016;118358016;118358016 | |
65 | resnetv23_stage2_conv9_fwd | Convolution | [1,128,28,28] | 5851.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 5.80 | 0.00 | 0.00 | true | 0.057835;0.058246;0.057862;0.057856;0.057699 | 0;0;0;0;0 | |
66 | resnetv23_stage2__plus2 | elemwise_add | [1,512,28,28] | 97 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 30.33 | 401408 | 0.00 | 0.00 | 81.80 | 0.00 | 13.23 | true | 0.822508;0.823038;0.815279;0.815836;0.817026 | 401408;401408;401408;401408;401408 | |
67 | resnetv23_stage2_batchnorm9_fwd | BatchNorm | [1,512,28,28] | 63.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 33.67 | 2539520 | 0.00 | 0.00 | 83.50 | 0.00 | 75.43 | true | 0.821660;0.832371;0.836179;0.838146;0.836971 | 2539520;2539520;2539520;2539520;2539520 | |
68 | resnetv23_stage2_activation9 | Activation | [1,512,28,28] | 50.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 19.00 | 401408 | 0.00 | 0.00 | 42.80 | 0.00 | 21.13 | true | 0.428288;0.421959;0.429025;0.427743;0.430643 | 401408;401408;401408;401408;401408 | |
69 | resnetv23_stage2_conv10_fwd | Convolution | [1,512,28,28] | 6096.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 179.00 | 104957952 | 0.00 | 0.00 | 18.20 | 0.00 | 586.36 | true | 0.182223;0.182009;0.181679;0.183221;0.183532 | 104957952;104957952;104957952;104957952;104957952 | |
70 | resnetv23_stage2_batchnorm10_fwd | BatchNorm | [1,128,28,28] | 92 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 634880 | 0.00 | 0.00 | 85.00 | 0.00 | 70.54 | true | 0.848203;0.860428;0.843485;0.850181;0.851616 | 634880;634880;634880;634880;634880 | |
71 | resnetv23_stage2_activation10 | Activation | [1,128,28,28] | 18.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 42.20 | 0.00 | 12.54 | true | 0.421049;0.422251;0.421441;0.421795;0.422301 | 100352;100352;100352;100352;100352 | |
72 | resnetv23_stage2_conv11_fwd | Convolution | [1,128,28,28] | 14012.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 90.00 | 140705792 | 0.00 | 0.00 | 23.80 | 0.00 | 1563.40 | true | 0.238946;0.235475;0.242344;0.236326;0.238972 | 140705792;140705792;140705792;140705792;140705792 | |
72 | resnetv23_stage2_conv11_fwd | Convolution | [1,128,28,28] | 14012.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 23.00 | 950272 | 0.00 | 0.00 | 46.40 | 0.00 | 41.32 | true | 0.460509;0.463369;0.467315;0.460806;0.466677 | 950272;950272;950272;950272;950272 | |
73 | resnetv23_stage2_batchnorm11_fwd | BatchNorm | [1,128,28,28] | 98.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 634880 | 0.00 | 0.00 | 84.70 | 0.00 | 70.54 | true | 0.844160;0.847922;0.844628;0.847722;0.847829 | 634880;634880;634880;634880;634880 | |
74 | resnetv23_stage2_activation11 | Activation | [1,128,28,28] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 41.70 | 0.00 | 12.54 | true | 0.417210;0.416939;0.417293;0.417037;0.417792 | 100352;100352;100352;100352;100352 | |
75 | resnetv23_stage2_conv12_fwd | Convolution | [1,128,28,28] | 5862.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 91.33 | 118358016 | 0.00 | 0.00 | 20.20 | 0.00 | 1295.90 | true | 0.200120;0.200206;0.205292;0.205415;0.197614 | 118358016;118358016;118358016;118358016;118358016 | |
75 | resnetv23_stage2_conv12_fwd | Convolution | [1,128,28,28] | 5862.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 5.80 | 0.00 | 0.00 | true | 0.058097;0.057924;0.058135;0.057927;0.058108 | 0;0;0;0;0 | |
76 | resnetv23_stage2__plus3 | elemwise_add | [1,512,28,28] | 96.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 30.00 | 401408 | 0.00 | 0.00 | 81.10 | 0.00 | 13.38 | true | 0.803796;0.801626;0.819651;0.808102;0.820860 | 401408;401408;401408;401408;401408 | |
77 | resnetv23_stage3_batchnorm0_fwd | BatchNorm | [1,512,28,28] | 60.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 33.00 | 2539520 | 0.00 | 0.00 | 82.70 | 0.00 | 76.96 | true | 0.825749;0.830132;0.829531;0.819888;0.825413 | 2539520;2539520;2539520;2539520;2539520 | |
78 | resnetv23_stage3_activation0 | Activation | [1,512,28,28] | 47 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 19.00 | 401408 | 0.00 | 0.00 | 43.10 | 0.00 | 21.13 | true | 0.433685;0.430932;0.430389;0.431899;0.429729 | 401408;401408;401408;401408;401408 | |
79 | resnetv23_stage3_conv0_fwd | Convolution | [1,512,28,28] | 11247 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 147.00 | 235339776 | 0.00 | 0.00 | 12.50 | 0.00 | 1600.95 | true | 0.124698;0.124697;0.124701;0.124700;0.124698 | 235339776;235339776;235339776;235339776;235339776 | |
79 | resnetv23_stage3_conv0_fwd | Convolution | [1,512,28,28] | 11247 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.80 | 0.00 | 0.00 | true | 0.058286;0.058041;0.058337;0.058413;0.058333 | 0;0;0;0;0 | |
80 | resnetv23_stage3_batchnorm1_fwd | BatchNorm | [1,256,28,28] | 110.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 1269760 | 0.00 | 0.00 | 87.10 | 0.00 | 84.65 | true | 0.873399;0.869263;0.872157;0.869030;0.870762 | 1269760;1269760;1269760;1269760;1269760 | |
81 | resnetv23_stage3_activation1 | Activation | [1,256,28,28] | 26.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.30 | 0.00 | 18.25 | true | 0.424028;0.422482;0.422030;0.423274;0.422271 | 200704;200704;200704;200704;200704 | |
82 | resnetv23_stage3_conv1_fwd | Convolution | [1,256,28,28] | 13638.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const*, int, float const*, int, float*, kernel_conv_params, int, int, float, float, int, float*, float*) | 332.00 | 264291328 | 0.00 | 0.00 | 10.80 | 0.00 | 796.06 | true | 0.107347;0.108119;0.108151;0.106919;0.107519 | 264291328;264291328;264291328;264291328;264291328 | |
82 | resnetv23_stage3_conv1_fwd | Convolution | [1,256,28,28] | 13638.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const*, float*, int) | 145.00 | 0 | 0.00 | 0.00 | 11.00 | 0.00 | 0.00 | true | 0.110229;0.110202;0.110273;0.110322;0.110329 | 0;0;0;0;0 | |
83 | resnetv23_stage3_batchnorm2_fwd | BatchNorm | [1,256,14,14] | 89.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.90 | 0.00 | 35.27 | true | 0.420845;0.415738;0.418165;0.418385;0.420641 | 317440;317440;317440;317440;317440 | |
84 | resnetv23_stage3_activation2 | Activation | [1,256,14,14] | 15.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.33 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 6.02 | true | 0.364829;0.364465;0.363392;0.365570;0.364973 | 50176;50176;50176;50176;50176 | |
85 | resnetv23_stage3_conv2_fwd | Convolution | [1,256,14,14] | 5682 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 88.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1531.16 | true | 0.124743;0.124745;0.124730;0.124737;0.124746 | 134742016;134742016;134742016;134742016;134742016 | |
85 | resnetv23_stage3_conv2_fwd | Convolution | [1,256,14,14] | 5682 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.30 | 0.00 | 0.00 | true | 0.051728;0.054265;0.055239;0.053394;0.049884 | 0;0;0;0;0 | |
86 | resnetv23_stage3_conv3_fwd | Convolution | [1,512,28,28] | 11232.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 152.67 | 268959744 | 0.00 | 0.00 | 12.50 | 0.00 | 1761.74 | true | 0.124699;0.124625;0.124734;0.124674;0.124623 | 268959744;268959744;268959744;268959744;268959744 | |
86 | resnetv23_stage3_conv3_fwd | Convolution | [1,512,28,28] | 11232.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055843;0.056642;0.056170;0.057152;0.056518 | 0;0;0;0;0 | |
87 | resnetv23_stage3__plus0 | elemwise_add | [1,1024,14,14] | 76 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 14.67 | 200704 | 0.00 | 0.00 | 81.10 | 0.00 | 13.68 | true | 0.806248;0.820223;0.811525;0.791259;0.816619 | 200704;200704;200704;200704;200704 | |
88 | resnetv23_stage3_batchnorm3_fwd | BatchNorm | [1,1024,14,14] | 50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.30 | 0.00 | 79.36 | true | 0.751609;0.749713;0.749853;0.758230;0.757308 | 1269760;1269760;1269760;1269760;1269760 | |
89 | resnetv23_stage3_activation3 | Activation | [1,1024,14,14] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.10 | 0.00 | 18.25 | true | 0.423126;0.421490;0.421028;0.421160;0.421576 | 200704;200704;200704;200704;200704 | |
90 | resnetv23_stage3_conv4_fwd | Convolution | [1,1024,14,14] | 5847.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.00 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 499.44 | true | 0.124502;0.124687;0.124379;0.124372;0.124386 | 134348800;134348800;134348800;134348800;134348800 | |
90 | resnetv23_stage3_conv4_fwd | Convolution | [1,1024,14,14] | 5847.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056400;0.056192;0.056166;0.056247;0.056004 | 0;0;0;0;0 | |
91 | resnetv23_stage3_batchnorm4_fwd | BatchNorm | [1,256,14,14] | 84 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.80 | 0.00 | 39.68 | true | 0.407069;0.410763;0.409111;0.408892;0.403958 | 317440;317440;317440;317440;317440 | |
92 | resnetv23_stage3_activation4 | Activation | [1,256,14,14] | 19.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.60 | 0.00 | 6.27 | true | 0.353794;0.357961;0.352019;0.356223;0.358808 | 50176;50176;50176;50176;50176 | |
93 | resnetv23_stage3_conv5_fwd | Convolution | [1,256,14,14] | 13494.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 105.67 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1325.70 | true | 0.124738;0.124632;0.124738;0.124707;0.124633 | 140083200;140083200;140083200;140083200;140083200 | |
93 | resnetv23_stage3_conv5_fwd | Convolution | [1,256,14,14] | 13494.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 83.67 | 3801088 | 0.00 | 0.00 | 50.10 | 0.00 | 45.43 | true | 0.507404;0.495985;0.502046;0.505423;0.493886 | 3801088;3801088;3801088;3801088;3801088 | |
94 | resnetv23_stage3_batchnorm5_fwd | BatchNorm | [1,256,14,14] | 93.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.60 | 0.00 | 35.27 | true | 0.416043;0.417181;0.416525;0.415573;0.416338 | 317440;317440;317440;317440;317440 | |
95 | resnetv23_stage3_activation5 | Activation | [1,256,14,14] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.364682;0.365465;0.365498;0.364484;0.362451 | 50176;50176;50176;50176;50176 | |
96 | resnetv23_stage3_conv6_fwd | Convolution | [1,256,14,14] | 5702 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.33 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1428.37 | true | 0.124759;0.124764;0.124763;0.124762;0.124758 | 134742016;134742016;134742016;134742016;134742016 | |
96 | resnetv23_stage3_conv6_fwd | Convolution | [1,256,14,14] | 5702 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055376;0.055672;0.054667;0.054799;0.054601 | 0;0;0;0;0 | |
97 | resnetv23_stage3__plus1 | elemwise_add | [1,1024,14,14] | 78.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.60 | 0.00 | 15.44 | true | 0.804485;0.779296;0.787354;0.785741;0.784051 | 200704;200704;200704;200704;200704 | |
98 | resnetv23_stage3_batchnorm6_fwd | BatchNorm | [1,1024,14,14] | 47.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 1269760 | 0.00 | 0.00 | 74.10 | 0.00 | 84.65 | true | 0.742828;0.738442;0.742243;0.737306;0.742730 | 1269760;1269760;1269760;1269760;1269760 | |
99 | resnetv23_stage3_activation6 | Activation | [1,1024,14,14] | 29.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.422464;0.421228;0.423328;0.422139;0.419609 | 200704;200704;200704;200704;200704 | |
100 | resnetv23_stage3_conv7_fwd | Convolution | [1,1024,14,14] | 5855.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.00 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 499.44 | true | 0.124382;0.124384;0.124369;0.124575;0.124374 | 134348800;134348800;134348800;134348800;134348800 | |
100 | resnetv23_stage3_conv7_fwd | Convolution | [1,1024,14,14] | 5855.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056437;0.055995;0.056421;0.056495;0.055735 | 0;0;0;0;0 | |
101 | resnetv23_stage3_batchnorm7_fwd | BatchNorm | [1,256,14,14] | 92.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.60 | 0.00 | 39.68 | true | 0.404172;0.406444;0.405687;0.407489;0.405649 | 317440;317440;317440;317440;317440 | |
102 | resnetv23_stage3_activation7 | Activation | [1,256,14,14] | 15.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.70 | 0.00 | 6.27 | true | 0.352464;0.361585;0.351968;0.364917;0.357244 | 50176;50176;50176;50176;50176 | |
103 | resnetv23_stage3_conv8_fwd | Convolution | [1,256,14,14] | 13492.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.33 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1355.65 | true | 0.124787;0.124783;0.124786;0.124788;0.124783 | 140083200;140083200;140083200;140083200;140083200 | |
103 | resnetv23_stage3_conv8_fwd | Convolution | [1,256,14,14] | 13492.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.67 | 3801088 | 0.00 | 0.00 | 49.90 | 0.00 | 46.54 | true | 0.500660;0.500214;0.498219;0.496970;0.499003 | 3801088;3801088;3801088;3801088;3801088 | |
104 | resnetv23_stage3_batchnorm8_fwd | BatchNorm | [1,256,14,14] | 89.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 39.68 | true | 0.412238;0.412595;0.412719;0.412442;0.411183 | 317440;317440;317440;317440;317440 | |
105 | resnetv23_stage3_activation8 | Activation | [1,256,14,14] | 13.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.364589;0.366787;0.363014;0.364964;0.365996 | 50176;50176;50176;50176;50176 | |
106 | resnetv23_stage3_conv9_fwd | Convolution | [1,256,14,14] | 5676.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 95.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1418.34 | true | 0.124759;0.124761;0.124768;0.124765;0.124766 | 134742016;134742016;134742016;134742016;134742016 | |
106 | resnetv23_stage3_conv9_fwd | Convolution | [1,256,14,14] | 5676.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055461;0.055228;0.055257;0.055142;0.056239 | 0;0;0;0;0 | |
107 | resnetv23_stage3__plus2 | elemwise_add | [1,1024,14,14] | 83.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.70 | 0.00 | 15.44 | true | 0.779779;0.801482;0.785207;0.790048;0.785087 | 200704;200704;200704;200704;200704 | |
108 | resnetv23_stage3_batchnorm9_fwd | BatchNorm | [1,1024,14,14] | 47.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.33 | 1269760 | 0.00 | 0.00 | 74.80 | 0.00 | 82.81 | true | 0.742735;0.747460;0.742836;0.754275;0.752359 | 1269760;1269760;1269760;1269760;1269760 | |
109 | resnetv23_stage3_activation9 | Activation | [1,1024,14,14] | 32.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.420835;0.421222;0.422530;0.422458;0.422374 | 200704;200704;200704;200704;200704 | |
110 | resnetv23_stage3_conv10_fwd | Convolution | [1,1024,14,14] | 5857.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 268.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 500.06 | true | 0.124386;0.124375;0.124380;0.124379;0.124384 | 134348800;134348800;134348800;134348800;134348800 | |
110 | resnetv23_stage3_conv10_fwd | Convolution | [1,1024,14,14] | 5857.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056015;0.056338;0.055983;0.055837;0.056127 | 0;0;0;0;0 | |
111 | resnetv23_stage3_batchnorm10_fwd | BatchNorm | [1,256,14,14] | 90.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 38.09 | true | 0.412200;0.412999;0.411924;0.410308;0.414588 | 317440;317440;317440;317440;317440 | |
112 | resnetv23_stage3_activation10 | Activation | [1,256,14,14] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.70 | 0.00 | 6.27 | true | 0.351243;0.363748;0.352963;0.360131;0.358414 | 50176;50176;50176;50176;50176 | |
113 | resnetv23_stage3_conv11_fwd | Convolution | [1,256,14,14] | 13489.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 102.67 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1364.44 | true | 0.124787;0.124785;0.124757;0.124635;0.124773 | 140083200;140083200;140083200;140083200;140083200 | |
113 | resnetv23_stage3_conv11_fwd | Convolution | [1,256,14,14] | 13489.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.00 | 3801088 | 0.00 | 0.00 | 50.00 | 0.00 | 46.35 | true | 0.507150;0.498118;0.498865;0.499714;0.502535 | 3801088;3801088;3801088;3801088;3801088 | |
114 | resnetv23_stage3_batchnorm11_fwd | BatchNorm | [1,256,14,14] | 96 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.50 | 0.00 | 38.09 | true | 0.414275;0.416593;0.416366;0.414927;0.414513 | 317440;317440;317440;317440;317440 | |
115 | resnetv23_stage3_activation11 | Activation | [1,256,14,14] | 15.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.367411;0.363161;0.365218;0.364547;0.365470 | 50176;50176;50176;50176;50176 | |
116 | resnetv23_stage3_conv12_fwd | Convolution | [1,256,14,14] | 5700.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1433.43 | true | 0.124764;0.124763;0.124759;0.124694;0.124762 | 134742016;134742016;134742016;134742016;134742016 | |
116 | resnetv23_stage3_conv12_fwd | Convolution | [1,256,14,14] | 5700.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055448;0.055159;0.054487;0.055530;0.055622 | 0;0;0;0;0 | |
117 | resnetv23_stage3__plus3 | elemwise_add | [1,1024,14,14] | 80.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.30 | 0.00 | 15.44 | true | 0.778468;0.780400;0.789024;0.778073;0.788827 | 200704;200704;200704;200704;200704 | |
118 | resnetv23_stage3_batchnorm12_fwd | BatchNorm | [1,1024,14,14] | 53 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 1269760 | 0.00 | 0.00 | 74.20 | 0.00 | 84.65 | true | 0.742252;0.741032;0.747151;0.742562;0.741532 | 1269760;1269760;1269760;1269760;1269760 | |
119 | resnetv23_stage3_activation12 | Activation | [1,1024,14,14] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.30 | 0.00 | 18.25 | true | 0.423477;0.422107;0.422093;0.420393;0.423721 | 200704;200704;200704;200704;200704 | |
120 | resnetv23_stage3_conv13_fwd | Convolution | [1,1024,14,14] | 5853 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 267.33 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 502.55 | true | 0.124682;0.124373;0.124387;0.124683;0.124374 | 134348800;134348800;134348800;134348800;134348800 | |
120 | resnetv23_stage3_conv13_fwd | Convolution | [1,1024,14,14] | 5853 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056115;0.055850;0.056205;0.055599;0.055859 | 0;0;0;0;0 | |
121 | resnetv23_stage3_batchnorm13_fwd | BatchNorm | [1,256,14,14] | 81.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.10 | 0.00 | 39.68 | true | 0.409708;0.412727;0.408446;0.413371;0.409673 | 317440;317440;317440;317440;317440 | |
122 | resnetv23_stage3_activation13 | Activation | [1,256,14,14] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 36.00 | 0.00 | 6.27 | true | 0.357267;0.361704;0.351731;0.362688;0.359613 | 50176;50176;50176;50176;50176 | |
123 | resnetv23_stage3_conv14_fwd | Convolution | [1,256,14,14] | 13513.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124788;0.124950;0.124785;0.124732;0.124710 | 140083200;140083200;140083200;140083200;140083200 | |
123 | resnetv23_stage3_conv14_fwd | Convolution | [1,256,14,14] | 13513.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 80.67 | 3801088 | 0.00 | 0.00 | 49.80 | 0.00 | 47.12 | true | 0.503682;0.497470;0.500479;0.494354;0.495664 | 3801088;3801088;3801088;3801088;3801088 | |
124 | resnetv23_stage3_batchnorm14_fwd | BatchNorm | [1,256,14,14] | 92.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.30 | 0.00 | 39.68 | true | 0.414198;0.416575;0.411823;0.412817;0.413049 | 317440;317440;317440;317440;317440 | |
125 | resnetv23_stage3_activation14 | Activation | [1,256,14,14] | 13.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.60 | 0.00 | 5.58 | true | 0.367272;0.365296;0.365785;0.364864;0.365545 | 50176;50176;50176;50176;50176 | |
126 | resnetv23_stage3_conv15_fwd | Convolution | [1,256,14,14] | 5657.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.33 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1428.37 | true | 0.124764;0.124765;0.124762;0.124762;0.124762 | 134742016;134742016;134742016;134742016;134742016 | |
126 | resnetv23_stage3_conv15_fwd | Convolution | [1,256,14,14] | 5657.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.33 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055592;0.055276;0.055183;0.055932;0.055376 | 0;0;0;0;0 | |
127 | resnetv23_stage3__plus4 | elemwise_add | [1,1024,14,14] | 74.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.90 | 0.00 | 15.44 | true | 0.798204;0.791570;0.785210;0.787423;0.786896 | 200704;200704;200704;200704;200704 | |
128 | resnetv23_stage3_batchnorm15_fwd | BatchNorm | [1,1024,14,14] | 49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.67 | 1269760 | 0.00 | 0.00 | 74.30 | 0.00 | 81.05 | true | 0.741114;0.744515;0.739820;0.743558;0.747343 | 1269760;1269760;1269760;1269760;1269760 | |
129 | resnetv23_stage3_activation15 | Activation | [1,1024,14,14] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.10 | 0.00 | 18.25 | true | 0.420833;0.419583;0.421481;0.421205;0.422386 | 200704;200704;200704;200704;200704 | |
130 | resnetv23_stage3_conv16_fwd | Convolution | [1,1024,14,14] | 5873.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 498.20 | true | 0.124376;0.124703;0.124371;0.124557;0.124371 | 134348800;134348800;134348800;134348800;134348800 | |
130 | resnetv23_stage3_conv16_fwd | Convolution | [1,1024,14,14] | 5873.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056089;0.056390;0.055667;0.056205;0.056346 | 0;0;0;0;0 | |
131 | resnetv23_stage3_batchnorm16_fwd | BatchNorm | [1,256,14,14] | 81.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 39.68 | true | 0.413320;0.412046;0.409533;0.414231;0.409138 | 317440;317440;317440;317440;317440 | |
132 | resnetv23_stage3_activation16 | Activation | [1,256,14,14] | 15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.50 | 0.00 | 6.27 | true | 0.353001;0.355241;0.351929;0.357236;0.360580 | 50176;50176;50176;50176;50176 | |
133 | resnetv23_stage3_conv17_fwd | Convolution | [1,256,14,14] | 13489 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 102.33 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1368.90 | true | 0.124783;0.124786;0.124779;0.124719;0.124782 | 140083200;140083200;140083200;140083200;140083200 | |
133 | resnetv23_stage3_conv17_fwd | Convolution | [1,256,14,14] | 13489 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.67 | 3801088 | 0.00 | 0.00 | 49.90 | 0.00 | 45.98 | true | 0.496426;0.502256;0.497549;0.506303;0.495792 | 3801088;3801088;3801088;3801088;3801088 | |
134 | resnetv23_stage3_batchnorm17_fwd | BatchNorm | [1,256,14,14] | 90.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.90 | 0.00 | 35.27 | true | 0.418603;0.418617;0.416880;0.419443;0.419051 | 317440;317440;317440;317440;317440 | |
135 | resnetv23_stage3_activation17 | Activation | [1,256,14,14] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.364248;0.363720;0.364073;0.364034;0.366689 | 50176;50176;50176;50176;50176 | |
136 | resnetv23_stage3_conv18_fwd | Convolution | [1,256,14,14] | 5716.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.33 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1428.37 | true | 0.124760;0.124766;0.124761;0.124757;0.124756 | 134742016;134742016;134742016;134742016;134742016 | |
136 | resnetv23_stage3_conv18_fwd | Convolution | [1,256,14,14] | 5716.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055450;0.055373;0.055288;0.055719;0.055226 | 0;0;0;0;0 | |
137 | resnetv23_stage3__plus5 | elemwise_add | [1,1024,14,14] | 77 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.20 | 0.00 | 15.44 | true | 0.778864;0.779830;0.788805;0.778154;0.792906 | 200704;200704;200704;200704;200704 | |
138 | resnetv23_stage3_batchnorm18_fwd | BatchNorm | [1,1024,14,14] | 50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 74.90 | 0.00 | 79.36 | true | 0.755370;0.744012;0.749509;0.752854;0.741473 | 1269760;1269760;1269760;1269760;1269760 | |
139 | resnetv23_stage3_activation18 | Activation | [1,1024,14,14] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.30 | 0.00 | 18.25 | true | 0.421302;0.422733;0.424162;0.423177;0.422799 | 200704;200704;200704;200704;200704 | |
140 | resnetv23_stage3_conv19_fwd | Convolution | [1,1024,14,14] | 5857.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.00 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 499.44 | true | 0.124675;0.124371;0.124377;0.124380;0.124379 | 134348800;134348800;134348800;134348800;134348800 | |
140 | resnetv23_stage3_conv19_fwd | Convolution | [1,1024,14,14] | 5857.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056763;0.056215;0.056370;0.056038;0.055965 | 0;0;0;0;0 | |
141 | resnetv23_stage3_batchnorm19_fwd | BatchNorm | [1,256,14,14] | 81.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.50 | 0.00 | 39.68 | true | 0.405814;0.403012;0.403387;0.411352;0.405610 | 317440;317440;317440;317440;317440 | |
142 | resnetv23_stage3_activation19 | Activation | [1,256,14,14] | 18.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.90 | 0.00 | 6.27 | true | 0.351944;0.363071;0.357518;0.362244;0.357894 | 50176;50176;50176;50176;50176 | |
143 | resnetv23_stage3_conv20_fwd | Convolution | [1,256,14,14] | 13473 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.123988;0.124783;0.124783;0.124656;0.124781 | 140083200;140083200;140083200;140083200;140083200 | |
143 | resnetv23_stage3_conv20_fwd | Convolution | [1,256,14,14] | 13473 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.00 | 3801088 | 0.00 | 0.00 | 49.70 | 0.00 | 46.93 | true | 0.495289;0.497325;0.498146;0.496015;0.498373 | 3801088;3801088;3801088;3801088;3801088 | |
144 | resnetv23_stage3_batchnorm20_fwd | BatchNorm | [1,256,14,14] | 87 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 39.68 | true | 0.412793;0.411482;0.412795;0.418422;0.411366 | 317440;317440;317440;317440;317440 | |
145 | resnetv23_stage3_activation20 | Activation | [1,256,14,14] | 21.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.364712;0.365286;0.361845;0.363102;0.364985 | 50176;50176;50176;50176;50176 | |
146 | resnetv23_stage3_conv21_fwd | Convolution | [1,256,14,14] | 5682.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.67 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1423.33 | true | 0.124767;0.124764;0.124763;0.124707;0.124759 | 134742016;134742016;134742016;134742016;134742016 | |
146 | resnetv23_stage3_conv21_fwd | Convolution | [1,256,14,14] | 5682.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055577;0.055449;0.055138;0.055239;0.055350 | 0;0;0;0;0 | |
147 | resnetv23_stage3__plus6 | elemwise_add | [1,1024,14,14] | 73.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 79.30 | 0.00 | 15.44 | true | 0.785555;0.793772;0.800425;0.799906;0.780470 | 200704;200704;200704;200704;200704 | |
148 | resnetv23_stage3_batchnorm21_fwd | BatchNorm | [1,1024,14,14] | 49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.60 | 0.00 | 79.36 | true | 0.762003;0.752558;0.755587;0.757652;0.753694 | 1269760;1269760;1269760;1269760;1269760 | |
149 | resnetv23_stage3_activation21 | Activation | [1,1024,14,14] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.421610;0.421511;0.423838;0.419943;0.423769 | 200704;200704;200704;200704;200704 | |
150 | resnetv23_stage3_conv22_fwd | Convolution | [1,1024,14,14] | 5846.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 268.00 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 501.30 | true | 0.124692;0.124392;0.124373;0.124373;0.124378 | 134348800;134348800;134348800;134348800;134348800 | |
150 | resnetv23_stage3_conv22_fwd | Convolution | [1,1024,14,14] | 5846.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056043;0.056638;0.056396;0.055903;0.055849 | 0;0;0;0;0 | |
151 | resnetv23_stage3_batchnorm22_fwd | BatchNorm | [1,256,14,14] | 90.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.90 | 0.00 | 39.68 | true | 0.414363;0.406316;0.411574;0.407977;0.403914 | 317440;317440;317440;317440;317440 | |
152 | resnetv23_stage3_activation22 | Activation | [1,256,14,14] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.60 | 0.00 | 6.27 | true | 0.351339;0.355931;0.354684;0.358425;0.360772 | 50176;50176;50176;50176;50176 | |
153 | resnetv23_stage3_conv23_fwd | Convolution | [1,256,14,14] | 13455 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 102.67 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1364.44 | true | 0.124786;0.124787;0.124787;0.124790;0.124786 | 140083200;140083200;140083200;140083200;140083200 | |
153 | resnetv23_stage3_conv23_fwd | Convolution | [1,256,14,14] | 13455 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.33 | 3801088 | 0.00 | 0.00 | 49.80 | 0.00 | 46.17 | true | 0.497748;0.499615;0.501707;0.496162;0.494728 | 3801088;3801088;3801088;3801088;3801088 | |
154 | resnetv23_stage3_batchnorm23_fwd | BatchNorm | [1,256,14,14] | 79.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.67 | 317440 | 0.00 | 0.00 | 41.30 | 0.00 | 36.63 | true | 0.413758;0.413651;0.416581;0.412630;0.412495 | 317440;317440;317440;317440;317440 | |
155 | resnetv23_stage3_activation23 | Activation | [1,256,14,14] | 14.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.362432;0.363387;0.364147;0.366065;0.365406 | 50176;50176;50176;50176;50176 | |
156 | resnetv23_stage3_conv24_fwd | Convolution | [1,256,14,14] | 5722.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 95.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1418.34 | true | 0.124764;0.124760;0.124760;0.124767;0.124770 | 134742016;134742016;134742016;134742016;134742016 | |
156 | resnetv23_stage3_conv24_fwd | Convolution | [1,256,14,14] | 5722.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055727;0.055502;0.055166;0.055281;0.055227 | 0;0;0;0;0 | |
157 | resnetv23_stage3__plus7 | elemwise_add | [1,1024,14,14] | 77.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.80 | 0.00 | 15.44 | true | 0.786435;0.784186;0.790747;0.793319;0.785490 | 200704;200704;200704;200704;200704 | |
158 | resnetv23_stage3_batchnorm24_fwd | BatchNorm | [1,1024,14,14] | 47.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.33 | 1269760 | 0.00 | 0.00 | 74.50 | 0.00 | 82.81 | true | 0.742350;0.747310;0.741044;0.744080;0.750895 | 1269760;1269760;1269760;1269760;1269760 | |
159 | resnetv23_stage3_activation24 | Activation | [1,1024,14,14] | 26.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.30 | 0.00 | 18.25 | true | 0.423114;0.422835;0.422870;0.423115;0.423023 | 200704;200704;200704;200704;200704 | |
160 | resnetv23_stage3_conv25_fwd | Convolution | [1,1024,14,14] | 5868 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 268.33 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 500.68 | true | 0.124379;0.124380;0.124375;0.124281;0.124380 | 134348800;134348800;134348800;134348800;134348800 | |
160 | resnetv23_stage3_conv25_fwd | Convolution | [1,1024,14,14] | 5868 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055609;0.056183;0.055938;0.056186;0.056730 | 0;0;0;0;0 | |
161 | resnetv23_stage3_batchnorm25_fwd | BatchNorm | [1,256,14,14] | 87.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.90 | 0.00 | 39.68 | true | 0.409447;0.415177;0.406587;0.404506;0.410782 | 317440;317440;317440;317440;317440 | |
162 | resnetv23_stage3_activation25 | Activation | [1,256,14,14] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.80 | 0.00 | 6.27 | true | 0.353055;0.363092;0.350433;0.362467;0.358229 | 50176;50176;50176;50176;50176 | |
163 | resnetv23_stage3_conv26_fwd | Convolution | [1,256,14,14] | 13490.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.33 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1355.65 | true | 0.124779;0.124726;0.124782;0.124743;0.124688 | 140083200;140083200;140083200;140083200;140083200 | |
163 | resnetv23_stage3_conv26_fwd | Convolution | [1,256,14,14] | 13490.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 80.67 | 3801088 | 0.00 | 0.00 | 49.70 | 0.00 | 47.12 | true | 0.499229;0.493829;0.496792;0.502025;0.494436 | 3801088;3801088;3801088;3801088;3801088 | |
164 | resnetv23_stage3_batchnorm26_fwd | BatchNorm | [1,256,14,14] | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.80 | 0.00 | 35.27 | true | 0.419050;0.416992;0.418324;0.415604;0.419192 | 317440;317440;317440;317440;317440 | |
165 | resnetv23_stage3_activation26 | Activation | [1,256,14,14] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.363292;0.363992;0.364208;0.365067;0.366012 | 50176;50176;50176;50176;50176 | |
166 | resnetv23_stage3_conv27_fwd | Convolution | [1,256,14,14] | 5720.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.67 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1423.33 | true | 0.124768;0.124768;0.124700;0.124770;0.124763 | 134742016;134742016;134742016;134742016;134742016 | |
166 | resnetv23_stage3_conv27_fwd | Convolution | [1,256,14,14] | 5720.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055455;0.054780;0.055065;0.055825;0.055458 | 0;0;0;0;0 | |
167 | resnetv23_stage3__plus8 | elemwise_add | [1,1024,14,14] | 75.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.60 | 0.00 | 15.44 | true | 0.783138;0.789470;0.781309;0.792370;0.784879 | 200704;200704;200704;200704;200704 | |
168 | resnetv23_stage3_batchnorm27_fwd | BatchNorm | [1,1024,14,14] | 47.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.50 | 0.00 | 79.36 | true | 0.755088;0.754426;0.756108;0.753174;0.756104 | 1269760;1269760;1269760;1269760;1269760 | |
169 | resnetv23_stage3_activation27 | Activation | [1,1024,14,14] | 26.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.10 | 0.00 | 18.25 | true | 0.421809;0.419446;0.420027;0.422021;0.422705 | 200704;200704;200704;200704;200704 | |
170 | resnetv23_stage3_conv28_fwd | Convolution | [1,1024,14,14] | 5875 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 267.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 501.93 | true | 0.124683;0.124576;0.124379;0.124379;0.124384 | 134348800;134348800;134348800;134348800;134348800 | |
170 | resnetv23_stage3_conv28_fwd | Convolution | [1,1024,14,14] | 5875 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055798;0.056052;0.056114;0.056544;0.056150 | 0;0;0;0;0 | |
171 | resnetv23_stage3_batchnorm28_fwd | BatchNorm | [1,256,14,14] | 87 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.30 | 0.00 | 38.09 | true | 0.414379;0.413646;0.413016;0.411119;0.410503 | 317440;317440;317440;317440;317440 | |
172 | resnetv23_stage3_activation28 | Activation | [1,256,14,14] | 19 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.60 | 0.00 | 6.27 | true | 0.352452;0.356302;0.353399;0.357064;0.359872 | 50176;50176;50176;50176;50176 | |
173 | resnetv23_stage3_conv29_fwd | Convolution | [1,256,14,14] | 13507.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124784;0.124706;0.124731;0.124787;0.124789 | 140083200;140083200;140083200;140083200;140083200 | |
173 | resnetv23_stage3_conv29_fwd | Convolution | [1,256,14,14] | 13507.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.67 | 3801088 | 0.00 | 0.00 | 50.00 | 0.00 | 46.54 | true | 0.498298;0.501440;0.500591;0.499674;0.499981 | 3801088;3801088;3801088;3801088;3801088 | |
174 | resnetv23_stage3_batchnorm29_fwd | BatchNorm | [1,256,14,14] | 85.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.60 | 0.00 | 39.68 | true | 0.417309;0.413170;0.419289;0.416258;0.414061 | 317440;317440;317440;317440;317440 | |
175 | resnetv23_stage3_activation29 | Activation | [1,256,14,14] | 17.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.365066;0.364067;0.363207;0.362632;0.364812 | 50176;50176;50176;50176;50176 | |
176 | resnetv23_stage3_conv30_fwd | Convolution | [1,256,14,14] | 5693.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1433.43 | true | 0.124759;0.124767;0.124763;0.124762;0.124769 | 134742016;134742016;134742016;134742016;134742016 | |
176 | resnetv23_stage3_conv30_fwd | Convolution | [1,256,14,14] | 5693.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055556;0.055239;0.055083;0.054544;0.056026 | 0;0;0;0;0 | |
177 | resnetv23_stage3__plus9 | elemwise_add | [1,1024,14,14] | 76 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.70 | 0.00 | 15.44 | true | 0.791505;0.786713;0.802198;0.783903;0.781206 | 200704;200704;200704;200704;200704 | |
178 | resnetv23_stage3_batchnorm30_fwd | BatchNorm | [1,1024,14,14] | 47 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.10 | 0.00 | 79.36 | true | 0.748820;0.759215;0.753803;0.748492;0.749317 | 1269760;1269760;1269760;1269760;1269760 | |
179 | resnetv23_stage3_activation30 | Activation | [1,1024,14,14] | 26.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.421605;0.420924;0.423041;0.422695;0.422531 | 200704;200704;200704;200704;200704 | |
180 | resnetv23_stage3_conv31_fwd | Convolution | [1,1024,14,14] | 5862.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 267.33 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 502.55 | true | 0.124575;0.124379;0.124690;0.124377;0.124382 | 134348800;134348800;134348800;134348800;134348800 | |
180 | resnetv23_stage3_conv31_fwd | Convolution | [1,1024,14,14] | 5862.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056079;0.056263;0.056424;0.055997;0.056862 | 0;0;0;0;0 | |
181 | resnetv23_stage3_batchnorm31_fwd | BatchNorm | [1,256,14,14] | 86.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.10 | 0.00 | 38.09 | true | 0.407860;0.412351;0.411536;0.411540;0.411301 | 317440;317440;317440;317440;317440 | |
182 | resnetv23_stage3_activation31 | Activation | [1,256,14,14] | 17.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.80 | 0.00 | 6.27 | true | 0.351148;0.362581;0.352130;0.362601;0.358830 | 50176;50176;50176;50176;50176 | |
183 | resnetv23_stage3_conv32_fwd | Convolution | [1,256,14,14] | 13475.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124787;0.124788;0.124788;0.124733;0.124785 | 140083200;140083200;140083200;140083200;140083200 | |
183 | resnetv23_stage3_conv32_fwd | Convolution | [1,256,14,14] | 13475.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.00 | 3801088 | 0.00 | 0.00 | 49.80 | 0.00 | 46.93 | true | 0.497284;0.499870;0.498189;0.501124;0.494963 | 3801088;3801088;3801088;3801088;3801088 | |
184 | resnetv23_stage3_batchnorm32_fwd | BatchNorm | [1,256,14,14] | 88.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.80 | 0.00 | 35.27 | true | 0.418586;0.416720;0.419607;0.417572;0.416831 | 317440;317440;317440;317440;317440 | |
185 | resnetv23_stage3_activation32 | Activation | [1,256,14,14] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.364084;0.365242;0.365635;0.364654;0.364664 | 50176;50176;50176;50176;50176 | |
186 | resnetv23_stage3_conv33_fwd | Convolution | [1,256,14,14] | 5666.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 95.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1418.34 | true | 0.124763;0.124765;0.124760;0.124764;0.124769 | 134742016;134742016;134742016;134742016;134742016 | |
186 | resnetv23_stage3_conv33_fwd | Convolution | [1,256,14,14] | 5666.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055484;0.055171;0.055507;0.055386;0.055325 | 0;0;0;0;0 | |
187 | resnetv23_stage3__plus10 | elemwise_add | [1,1024,14,14] | 74 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.60 | 0.00 | 15.44 | true | 0.783067;0.784354;0.790139;0.784455;0.792978 | 200704;200704;200704;200704;200704 | |
188 | resnetv23_stage3_batchnorm33_fwd | BatchNorm | [1,1024,14,14] | 53 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.33 | 1269760 | 0.00 | 0.00 | 73.90 | 0.00 | 82.81 | true | 0.735081;0.738546;0.740796;0.736946;0.743698 | 1269760;1269760;1269760;1269760;1269760 | |
189 | resnetv23_stage3_activation33 | Activation | [1,1024,14,14] | 26.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.422906;0.422857;0.421996;0.422591;0.421608 | 200704;200704;200704;200704;200704 | |
190 | resnetv23_stage3_conv34_fwd | Convolution | [1,1024,14,14] | 5858 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 268.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 500.06 | true | 0.124384;0.124385;0.124376;0.124682;0.124385 | 134348800;134348800;134348800;134348800;134348800 | |
190 | resnetv23_stage3_conv34_fwd | Convolution | [1,1024,14,14] | 5858 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056864;0.056179;0.056344;0.055765;0.055707 | 0;0;0;0;0 | |
191 | resnetv23_stage3_batchnorm34_fwd | BatchNorm | [1,256,14,14] | 85.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.30 | 0.00 | 38.09 | true | 0.411336;0.413591;0.413460;0.412858;0.410144 | 317440;317440;317440;317440;317440 | |
192 | resnetv23_stage3_activation34 | Activation | [1,256,14,14] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.80 | 0.00 | 6.27 | true | 0.352949;0.362811;0.354399;0.362461;0.357619 | 50176;50176;50176;50176;50176 | |
193 | resnetv23_stage3_conv35_fwd | Convolution | [1,256,14,14] | 13483.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 102.67 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1364.44 | true | 0.124786;0.124722;0.124786;0.124788;0.124786 | 140083200;140083200;140083200;140083200;140083200 | |
193 | resnetv23_stage3_conv35_fwd | Convolution | [1,256,14,14] | 13483.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.67 | 3801088 | 0.00 | 0.00 | 49.90 | 0.00 | 45.98 | true | 0.491756;0.502017;0.499123;0.501347;0.496733 | 3801088;3801088;3801088;3801088;3801088 | |
194 | resnetv23_stage3_batchnorm35_fwd | BatchNorm | [1,256,14,14] | 82 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.40 | 0.00 | 39.68 | true | 0.414408;0.411266;0.418861;0.414371;0.412956 | 317440;317440;317440;317440;317440 | |
195 | resnetv23_stage3_activation35 | Activation | [1,256,14,14] | 18.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.364935;0.365129;0.366407;0.364701;0.363682 | 50176;50176;50176;50176;50176 | |
196 | resnetv23_stage3_conv36_fwd | Convolution | [1,256,14,14] | 5693 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 95.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1418.34 | true | 0.124706;0.124753;0.124758;0.124762;0.124757 | 134742016;134742016;134742016;134742016;134742016 | |
196 | resnetv23_stage3_conv36_fwd | Convolution | [1,256,14,14] | 5693 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055748;0.055546;0.054266;0.054795;0.056127 | 0;0;0;0;0 | |
197 | resnetv23_stage3__plus11 | elemwise_add | [1,1024,14,14] | 77 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.40 | 0.00 | 15.44 | true | 0.786371;0.781537;0.800624;0.784812;0.778161 | 200704;200704;200704;200704;200704 | |
198 | resnetv23_stage3_batchnorm36_fwd | BatchNorm | [1,1024,14,14] | 50.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 1269760 | 0.00 | 0.00 | 74.30 | 0.00 | 84.65 | true | 0.737248;0.742058;0.742804;0.749235;0.745265 | 1269760;1269760;1269760;1269760;1269760 | |
199 | resnetv23_stage3_activation36 | Activation | [1,1024,14,14] | 26.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.421207;0.422604;0.422938;0.423891;0.421618 | 200704;200704;200704;200704;200704 | |
200 | resnetv23_stage3_conv37_fwd | Convolution | [1,1024,14,14] | 5859 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.00 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 499.44 | true | 0.124691;0.124666;0.124383;0.124374;0.124388 | 134348800;134348800;134348800;134348800;134348800 | |
200 | resnetv23_stage3_conv37_fwd | Convolution | [1,1024,14,14] | 5859 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056084;0.056533;0.056097;0.055683;0.055505 | 0;0;0;0;0 | |
201 | resnetv23_stage3_batchnorm37_fwd | BatchNorm | [1,256,14,14] | 83.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 35.27 | true | 0.411655;0.411960;0.412058;0.410290;0.412485 | 317440;317440;317440;317440;317440 | |
202 | resnetv23_stage3_activation37 | Activation | [1,256,14,14] | 14.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.90 | 0.00 | 6.27 | true | 0.358187;0.361590;0.353142;0.360970;0.357839 | 50176;50176;50176;50176;50176 | |
203 | resnetv23_stage3_conv38_fwd | Convolution | [1,256,14,14] | 13491.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124782;0.124771;0.124785;0.124776;0.124649 | 140083200;140083200;140083200;140083200;140083200 | |
203 | resnetv23_stage3_conv38_fwd | Convolution | [1,256,14,14] | 13491.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.00 | 3801088 | 0.00 | 0.00 | 50.10 | 0.00 | 46.93 | true | 0.500194;0.500849;0.501897;0.497538;0.501996 | 3801088;3801088;3801088;3801088;3801088 | |
204 | resnetv23_stage3_batchnorm38_fwd | BatchNorm | [1,256,14,14] | 84 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 38.09 | true | 0.413442;0.410317;0.411930;0.418486;0.411746 | 317440;317440;317440;317440;317440 | |
205 | resnetv23_stage3_activation38 | Activation | [1,256,14,14] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.364294;0.367945;0.363114;0.363920;0.364030 | 50176;50176;50176;50176;50176 | |
206 | resnetv23_stage3_conv39_fwd | Convolution | [1,256,14,14] | 5676 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.33 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1428.37 | true | 0.124762;0.124767;0.124761;0.124759;0.124761 | 134742016;134742016;134742016;134742016;134742016 | |
206 | resnetv23_stage3_conv39_fwd | Convolution | [1,256,14,14] | 5676 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.33 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055305;0.055143;0.055229;0.055453;0.055293 | 0;0;0;0;0 | |
207 | resnetv23_stage3__plus12 | elemwise_add | [1,1024,14,14] | 73.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.90 | 0.00 | 15.44 | true | 0.788537;0.782804;0.796514;0.785271;0.794489 | 200704;200704;200704;200704;200704 | |
208 | resnetv23_stage3_batchnorm39_fwd | BatchNorm | [1,1024,14,14] | 53.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.67 | 1269760 | 0.00 | 0.00 | 74.20 | 0.00 | 81.05 | true | 0.741282;0.745484;0.740822;0.744978;0.738809 | 1269760;1269760;1269760;1269760;1269760 | |
209 | resnetv23_stage3_activation39 | Activation | [1,1024,14,14] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.425012;0.421198;0.422466;0.419982;0.423644 | 200704;200704;200704;200704;200704 | |
210 | resnetv23_stage3_conv40_fwd | Convolution | [1,1024,14,14] | 5875.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 270.33 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 496.98 | true | 0.124689;0.124371;0.124373;0.124370;0.124382 | 134348800;134348800;134348800;134348800;134348800 | |
210 | resnetv23_stage3_conv40_fwd | Convolution | [1,1024,14,14] | 5875.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056260;0.056140;0.055530;0.055966;0.056386 | 0;0;0;0;0 | |
211 | resnetv23_stage3_batchnorm40_fwd | BatchNorm | [1,256,14,14] | 84.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.80 | 0.00 | 39.68 | true | 0.408365;0.408605;0.408311;0.410025;0.404595 | 317440;317440;317440;317440;317440 | |
212 | resnetv23_stage3_activation40 | Activation | [1,256,14,14] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.60 | 0.00 | 6.27 | true | 0.354707;0.356154;0.355462;0.356837;0.361411 | 50176;50176;50176;50176;50176 | |
213 | resnetv23_stage3_conv41_fwd | Convolution | [1,256,14,14] | 13470 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 102.67 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1364.44 | true | 0.124787;0.124672;0.124784;0.124727;0.124769 | 140083200;140083200;140083200;140083200;140083200 | |
213 | resnetv23_stage3_conv41_fwd | Convolution | [1,256,14,14] | 13470 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.33 | 3801088 | 0.00 | 0.00 | 50.20 | 0.00 | 46.17 | true | 0.502271;0.493960;0.501210;0.503913;0.501945 | 3801088;3801088;3801088;3801088;3801088 | |
214 | resnetv23_stage3_batchnorm41_fwd | BatchNorm | [1,256,14,14] | 82.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.40 | 0.00 | 38.09 | true | 0.416578;0.418106;0.412033;0.411482;0.413699 | 317440;317440;317440;317440;317440 | |
215 | resnetv23_stage3_activation41 | Activation | [1,256,14,14] | 17.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.364453;0.364466;0.366936;0.364265;0.363466 | 50176;50176;50176;50176;50176 | |
216 | resnetv23_stage3_conv42_fwd | Convolution | [1,256,14,14] | 5689.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.67 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1423.33 | true | 0.124761;0.124762;0.124759;0.124762;0.124671 | 134742016;134742016;134742016;134742016;134742016 | |
216 | resnetv23_stage3_conv42_fwd | Convolution | [1,256,14,14] | 5689.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055210;0.054697;0.055093;0.055258;0.055086 | 0;0;0;0;0 | |
217 | resnetv23_stage3__plus13 | elemwise_add | [1,1024,14,14] | 75.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.60 | 0.00 | 15.44 | true | 0.805491;0.776576;0.779625;0.784326;0.793171 | 200704;200704;200704;200704;200704 | |
218 | resnetv23_stage3_batchnorm42_fwd | BatchNorm | [1,1024,14,14] | 50.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.20 | 0.00 | 79.36 | true | 0.748604;0.747637;0.758166;0.753972;0.754139 | 1269760;1269760;1269760;1269760;1269760 | |
219 | resnetv23_stage3_activation42 | Activation | [1,1024,14,14] | 26.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.424523;0.420556;0.422468;0.421690;0.423052 | 200704;200704;200704;200704;200704 | |
220 | resnetv23_stage3_conv43_fwd | Convolution | [1,1024,14,14] | 5865.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 271.00 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 495.75 | true | 0.124389;0.124370;0.124378;0.124679;0.124369 | 134348800;134348800;134348800;134348800;134348800 | |
220 | resnetv23_stage3_conv43_fwd | Convolution | [1,1024,14,14] | 5865.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055904;0.056033;0.056029;0.056346;0.056604 | 0;0;0;0;0 | |
221 | resnetv23_stage3_batchnorm43_fwd | BatchNorm | [1,256,14,14] | 84 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.67 | 317440 | 0.00 | 0.00 | 41.10 | 0.00 | 36.63 | true | 0.410171;0.411115;0.408312;0.413032;0.410818 | 317440;317440;317440;317440;317440 | |
222 | resnetv23_stage3_activation43 | Activation | [1,256,14,14] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.90 | 0.00 | 6.27 | true | 0.351105;0.362526;0.358304;0.363948;0.356830 | 50176;50176;50176;50176;50176 | |
223 | resnetv23_stage3_conv44_fwd | Convolution | [1,256,14,14] | 13485.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124788;0.124732;0.124789;0.124669;0.124727 | 140083200;140083200;140083200;140083200;140083200 | |
223 | resnetv23_stage3_conv44_fwd | Convolution | [1,256,14,14] | 13485.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.67 | 3801088 | 0.00 | 0.00 | 49.90 | 0.00 | 46.54 | true | 0.497475;0.495388;0.498919;0.499202;0.500060 | 3801088;3801088;3801088;3801088;3801088 | |
224 | resnetv23_stage3_batchnorm44_fwd | BatchNorm | [1,256,14,14] | 83 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.70 | 0.00 | 35.27 | true | 0.419946;0.414345;0.416407;0.418484;0.417513 | 317440;317440;317440;317440;317440 | |
225 | resnetv23_stage3_activation44 | Activation | [1,256,14,14] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.30 | 0.00 | 5.58 | true | 0.362354;0.362785;0.363365;0.366113;0.364264 | 50176;50176;50176;50176;50176 | |
226 | resnetv23_stage3_conv45_fwd | Convolution | [1,256,14,14] | 5670.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.67 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1423.33 | true | 0.124756;0.124765;0.124761;0.124756;0.124757 | 134742016;134742016;134742016;134742016;134742016 | |
226 | resnetv23_stage3_conv45_fwd | Convolution | [1,256,14,14] | 5670.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.33 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055453;0.055215;0.055149;0.055504;0.054970 | 0;0;0;0;0 | |
227 | resnetv23_stage3__plus14 | elemwise_add | [1,1024,14,14] | 73.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.80 | 0.00 | 15.44 | true | 0.786595;0.790466;0.787409;0.786741;0.801357 | 200704;200704;200704;200704;200704 | |
228 | resnetv23_stage3_batchnorm45_fwd | BatchNorm | [1,1024,14,14] | 47 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 1269760 | 0.00 | 0.00 | 74.30 | 0.00 | 84.65 | true | 0.743412;0.744576;0.741875;0.738258;0.745900 | 1269760;1269760;1269760;1269760;1269760 | |
229 | resnetv23_stage3_activation45 | Activation | [1,1024,14,14] | 26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.420944;0.422405;0.422342;0.419595;0.422351 | 200704;200704;200704;200704;200704 | |
230 | resnetv23_stage3_conv46_fwd | Convolution | [1,1024,14,14] | 5865 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 268.33 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 500.68 | true | 0.124684;0.124268;0.124382;0.124373;0.124530 | 134348800;134348800;134348800;134348800;134348800 | |
230 | resnetv23_stage3_conv46_fwd | Convolution | [1,1024,14,14] | 5865 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056542;0.056516;0.056181;0.056625;0.056399 | 0;0;0;0;0 | |
231 | resnetv23_stage3_batchnorm46_fwd | BatchNorm | [1,256,14,14] | 80.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 39.68 | true | 0.413022;0.412643;0.412330;0.411123;0.412005 | 317440;317440;317440;317440;317440 | |
232 | resnetv23_stage3_activation46 | Activation | [1,256,14,14] | 15.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.30 | 0.00 | 6.27 | true | 0.351867;0.357275;0.351114;0.357430;0.350807 | 50176;50176;50176;50176;50176 | |
233 | resnetv23_stage3_conv47_fwd | Convolution | [1,256,14,14] | 13464.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 102.67 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1364.44 | true | 0.124788;0.124728;0.124782;0.124623;0.124787 | 140083200;140083200;140083200;140083200;140083200 | |
233 | resnetv23_stage3_conv47_fwd | Convolution | [1,256,14,14] | 13464.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.00 | 3801088 | 0.00 | 0.00 | 50.00 | 0.00 | 46.35 | true | 0.503365;0.498574;0.500945;0.498700;0.499842 | 3801088;3801088;3801088;3801088;3801088 | |
234 | resnetv23_stage3_batchnorm47_fwd | BatchNorm | [1,256,14,14] | 83.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.60 | 0.00 | 35.27 | true | 0.418645;0.416284;0.416912;0.416249;0.415349 | 317440;317440;317440;317440;317440 | |
235 | resnetv23_stage3_activation47 | Activation | [1,256,14,14] | 15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.365120;0.365387;0.363901;0.364487;0.363402 | 50176;50176;50176;50176;50176 | |
236 | resnetv23_stage3_conv48_fwd | Convolution | [1,256,14,14] | 5721 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.33 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1428.37 | true | 0.124764;0.124758;0.124766;0.124691;0.124764 | 134742016;134742016;134742016;134742016;134742016 | |
236 | resnetv23_stage3_conv48_fwd | Convolution | [1,256,14,14] | 5721 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055358;0.055684;0.055301;0.055264;0.055077 | 0;0;0;0;0 | |
237 | resnetv23_stage3__plus15 | elemwise_add | [1,1024,14,14] | 78.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 79.30 | 0.00 | 15.44 | true | 0.785072;0.778376;0.808816;0.802798;0.790124 | 200704;200704;200704;200704;200704 | |
238 | resnetv23_stage3_batchnorm48_fwd | BatchNorm | [1,1024,14,14] | 50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.80 | 0.00 | 79.36 | true | 0.760578;0.757853;0.751585;0.755500;0.761435 | 1269760;1269760;1269760;1269760;1269760 | |
239 | resnetv23_stage3_activation48 | Activation | [1,1024,14,14] | 26.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.30 | 0.00 | 18.25 | true | 0.421892;0.422906;0.422803;0.419801;0.423689 | 200704;200704;200704;200704;200704 | |
240 | resnetv23_stage3_conv49_fwd | Convolution | [1,1024,14,14] | 5867.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 267.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 501.93 | true | 0.124388;0.124371;0.124384;0.124376;0.124489 | 134348800;134348800;134348800;134348800;134348800 | |
240 | resnetv23_stage3_conv49_fwd | Convolution | [1,1024,14,14] | 5867.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055830;0.056234;0.056518;0.056147;0.055933 | 0;0;0;0;0 | |
241 | resnetv23_stage3_batchnorm49_fwd | BatchNorm | [1,256,14,14] | 82.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.90 | 0.00 | 39.68 | true | 0.411735;0.405817;0.401987;0.411974;0.410747 | 317440;317440;317440;317440;317440 | |
242 | resnetv23_stage3_activation49 | Activation | [1,256,14,14] | 16.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.90 | 0.00 | 6.27 | true | 0.353227;0.361599;0.353283;0.362084;0.361105 | 50176;50176;50176;50176;50176 | |
243 | resnetv23_stage3_conv50_fwd | Convolution | [1,256,14,14] | 13484 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.33 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1355.65 | true | 0.124787;0.124785;0.124788;0.124731;0.124788 | 140083200;140083200;140083200;140083200;140083200 | |
243 | resnetv23_stage3_conv50_fwd | Convolution | [1,256,14,14] | 13484 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.33 | 3801088 | 0.00 | 0.00 | 50.10 | 0.00 | 46.17 | true | 0.499612;0.501168;0.503980;0.498893;0.502758 | 3801088;3801088;3801088;3801088;3801088 | |
244 | resnetv23_stage3_batchnorm50_fwd | BatchNorm | [1,256,14,14] | 85.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.40 | 0.00 | 39.68 | true | 0.413155;0.417803;0.413635;0.414177;0.415309 | 317440;317440;317440;317440;317440 | |
245 | resnetv23_stage3_activation50 | Activation | [1,256,14,14] | 18.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.363833;0.364300;0.363366;0.365516;0.363376 | 50176;50176;50176;50176;50176 | |
246 | resnetv23_stage3_conv51_fwd | Convolution | [1,256,14,14] | 5662.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1433.43 | true | 0.124762;0.124768;0.124706;0.124764;0.124760 | 134742016;134742016;134742016;134742016;134742016 | |
246 | resnetv23_stage3_conv51_fwd | Convolution | [1,256,14,14] | 5662.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.33 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055452;0.055224;0.055511;0.055227;0.054190 | 0;0;0;0;0 | |
247 | resnetv23_stage3__plus16 | elemwise_add | [1,1024,14,14] | 73.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.70 | 0.00 | 15.44 | true | 0.784257;0.784973;0.784964;0.791610;0.789863 | 200704;200704;200704;200704;200704 | |
248 | resnetv23_stage3_batchnorm51_fwd | BatchNorm | [1,1024,14,14] | 49.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.70 | 0.00 | 79.36 | true | 0.758461;0.757216;0.753916;0.756023;0.759940 | 1269760;1269760;1269760;1269760;1269760 | |
249 | resnetv23_stage3_activation51 | Activation | [1,1024,14,14] | 26.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.10 | 0.00 | 18.25 | true | 0.420458;0.424502;0.419995;0.422523;0.420542 | 200704;200704;200704;200704;200704 | |
250 | resnetv23_stage3_conv52_fwd | Convolution | [1,1024,14,14] | 5864.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 267.33 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 502.55 | true | 0.124683;0.124377;0.124382;0.124577;0.124375 | 134348800;134348800;134348800;134348800;134348800 | |
250 | resnetv23_stage3_conv52_fwd | Convolution | [1,1024,14,14] | 5864.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056012;0.056571;0.055846;0.056198;0.055834 | 0;0;0;0;0 | |
251 | resnetv23_stage3_batchnorm52_fwd | BatchNorm | [1,256,14,14] | 84.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.50 | 0.00 | 38.09 | true | 0.415948;0.415468;0.415437;0.413122;0.413073 | 317440;317440;317440;317440;317440 | |
252 | resnetv23_stage3_activation52 | Activation | [1,256,14,14] | 15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.50 | 0.00 | 6.27 | true | 0.353581;0.357188;0.351537;0.356456;0.355789 | 50176;50176;50176;50176;50176 | |
253 | resnetv23_stage3_conv53_fwd | Convolution | [1,256,14,14] | 13470.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124784;0.124722;0.124729;0.124789;0.124727 | 140083200;140083200;140083200;140083200;140083200 | |
253 | resnetv23_stage3_conv53_fwd | Convolution | [1,256,14,14] | 13470.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.00 | 3801088 | 0.00 | 0.00 | 49.90 | 0.00 | 46.93 | true | 0.501575;0.492478;0.498117;0.498747;0.499385 | 3801088;3801088;3801088;3801088;3801088 | |
254 | resnetv23_stage3_batchnorm53_fwd | BatchNorm | [1,256,14,14] | 88.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.70 | 0.00 | 39.68 | true | 0.415817;0.418023;0.417342;0.417050;0.413036 | 317440;317440;317440;317440;317440 | |
255 | resnetv23_stage3_activation53 | Activation | [1,256,14,14] | 17.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.364373;0.367363;0.363611;0.363316;0.365657 | 50176;50176;50176;50176;50176 | |
256 | resnetv23_stage3_conv54_fwd | Convolution | [1,256,14,14] | 5709.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.67 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1423.33 | true | 0.124758;0.124760;0.124761;0.124762;0.124760 | 134742016;134742016;134742016;134742016;134742016 | |
256 | resnetv23_stage3_conv54_fwd | Convolution | [1,256,14,14] | 5709.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.33 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055541;0.055267;0.056118;0.054822;0.054775 | 0;0;0;0;0 | |
257 | resnetv23_stage3__plus17 | elemwise_add | [1,1024,14,14] | 78.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.00 | 0.00 | 15.44 | true | 0.781457;0.772806;0.789455;0.782666;0.776787 | 200704;200704;200704;200704;200704 | |
258 | resnetv23_stage3_batchnorm54_fwd | BatchNorm | [1,1024,14,14] | 48.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 1269760 | 0.00 | 0.00 | 74.20 | 0.00 | 84.65 | true | 0.745253;0.741007;0.743970;0.738057;0.741692 | 1269760;1269760;1269760;1269760;1269760 | |
259 | resnetv23_stage3_activation54 | Activation | [1,1024,14,14] | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.422489;0.422203;0.423714;0.420890;0.420113 | 200704;200704;200704;200704;200704 | |
260 | resnetv23_stage3_conv55_fwd | Convolution | [1,1024,14,14] | 5854.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 267.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 501.93 | true | 0.124385;0.124378;0.124371;0.124382;0.124579 | 134348800;134348800;134348800;134348800;134348800 | |
260 | resnetv23_stage3_conv55_fwd | Convolution | [1,1024,14,14] | 5854.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055605;0.056470;0.056495;0.056114;0.056561 | 0;0;0;0;0 | |
261 | resnetv23_stage3_batchnorm55_fwd | BatchNorm | [1,256,14,14] | 95.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.40 | 0.00 | 35.27 | true | 0.412344;0.414914;0.415093;0.415190;0.412543 | 317440;317440;317440;317440;317440 | |
262 | resnetv23_stage3_activation55 | Activation | [1,256,14,14] | 16.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.60 | 0.00 | 6.27 | true | 0.353525;0.361199;0.351346;0.355754;0.358965 | 50176;50176;50176;50176;50176 | |
263 | resnetv23_stage3_conv56_fwd | Convolution | [1,256,14,14] | 13582.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124787;0.124699;0.124785;0.124705;0.124760 | 140083200;140083200;140083200;140083200;140083200 | |
263 | resnetv23_stage3_conv56_fwd | Convolution | [1,256,14,14] | 13582.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.00 | 3801088 | 0.00 | 0.00 | 49.70 | 0.00 | 46.93 | true | 0.497623;0.496037;0.497829;0.493294;0.499566 | 3801088;3801088;3801088;3801088;3801088 | |
264 | resnetv23_stage3_batchnorm56_fwd | BatchNorm | [1,256,14,14] | 90.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 42.00 | 0.00 | 35.27 | true | 0.418657;0.420321;0.416967;0.420351;0.420009 | 317440;317440;317440;317440;317440 | |
265 | resnetv23_stage3_activation56 | Activation | [1,256,14,14] | 15.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.363189;0.362428;0.364215;0.363200;0.365502 | 50176;50176;50176;50176;50176 | |
266 | resnetv23_stage3_conv57_fwd | Convolution | [1,256,14,14] | 5658 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1433.43 | true | 0.124761;0.124763;0.124768;0.124773;0.124761 | 134742016;134742016;134742016;134742016;134742016 | |
266 | resnetv23_stage3_conv57_fwd | Convolution | [1,256,14,14] | 5658 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055344;0.054916;0.055122;0.055263;0.056318 | 0;0;0;0;0 | |
267 | resnetv23_stage3__plus18 | elemwise_add | [1,1024,14,14] | 79.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.20 | 0.00 | 15.44 | true | 0.778732;0.777001;0.780448;0.786558;0.801911 | 200704;200704;200704;200704;200704 | |
268 | resnetv23_stage3_batchnorm57_fwd | BatchNorm | [1,1024,14,14] | 53.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.67 | 1269760 | 0.00 | 0.00 | 74.70 | 0.00 | 81.05 | true | 0.752709;0.747735;0.738209;0.744915;0.747960 | 1269760;1269760;1269760;1269760;1269760 | |
269 | resnetv23_stage3_activation57 | Activation | [1,1024,14,14] | 27.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.422278;0.421652;0.423031;0.420328;0.420765 | 200704;200704;200704;200704;200704 | |
270 | resnetv23_stage3_conv58_fwd | Convolution | [1,1024,14,14] | 5872.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 267.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 501.93 | true | 0.124540;0.124374;0.124379;0.124370;0.124314 | 134348800;134348800;134348800;134348800;134348800 | |
270 | resnetv23_stage3_conv58_fwd | Convolution | [1,1024,14,14] | 5872.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056289;0.056097;0.055829;0.056295;0.056272 | 0;0;0;0;0 | |
271 | resnetv23_stage3_batchnorm58_fwd | BatchNorm | [1,256,14,14] | 94.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.20 | 0.00 | 39.68 | true | 0.414591;0.412872;0.413664;0.410146;0.409295 | 317440;317440;317440;317440;317440 | |
272 | resnetv23_stage3_activation58 | Activation | [1,256,14,14] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.50 | 0.00 | 6.27 | true | 0.353678;0.353846;0.353745;0.358593;0.357136 | 50176;50176;50176;50176;50176 | |
273 | resnetv23_stage3_conv59_fwd | Convolution | [1,256,14,14] | 13466.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 102.67 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1364.44 | true | 0.124787;0.124680;0.124786;0.124785;0.124733 | 140083200;140083200;140083200;140083200;140083200 | |
273 | resnetv23_stage3_conv59_fwd | Convolution | [1,256,14,14] | 13466.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 82.33 | 3801088 | 0.00 | 0.00 | 49.90 | 0.00 | 46.17 | true | 0.500970;0.501661;0.495776;0.494516;0.500149 | 3801088;3801088;3801088;3801088;3801088 | |
274 | resnetv23_stage3_batchnorm59_fwd | BatchNorm | [1,256,14,14] | 92 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 41.50 | 0.00 | 38.09 | true | 0.414972;0.420182;0.412031;0.417043;0.414119 | 317440;317440;317440;317440;317440 | |
275 | resnetv23_stage3_activation59 | Activation | [1,256,14,14] | 15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.363343;0.364396;0.364901;0.365069;0.363957 | 50176;50176;50176;50176;50176 | |
276 | resnetv23_stage3_conv60_fwd | Convolution | [1,256,14,14] | 5700 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.67 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1423.33 | true | 0.124703;0.124731;0.124768;0.124755;0.124705 | 134742016;134742016;134742016;134742016;134742016 | |
276 | resnetv23_stage3_conv60_fwd | Convolution | [1,256,14,14] | 5700 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.33 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055011;0.055297;0.055131;0.055339;0.055753 | 0;0;0;0;0 | |
277 | resnetv23_stage3__plus19 | elemwise_add | [1,1024,14,14] | 83.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.90 | 0.00 | 15.44 | true | 0.802232;0.778040;0.788440;0.802003;0.776987 | 200704;200704;200704;200704;200704 | |
278 | resnetv23_stage3_batchnorm60_fwd | BatchNorm | [1,1024,14,14] | 51.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.67 | 1269760 | 0.00 | 0.00 | 74.20 | 0.00 | 81.05 | true | 0.743271;0.737614;0.747082;0.744632;0.738231 | 1269760;1269760;1269760;1269760;1269760 | |
279 | resnetv23_stage3_activation60 | Activation | [1,1024,14,14] | 33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.420452;0.421350;0.422782;0.422716;0.421623 | 200704;200704;200704;200704;200704 | |
280 | resnetv23_stage3_conv61_fwd | Convolution | [1,1024,14,14] | 5829.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.00 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 499.44 | true | 0.124388;0.124377;0.124687;0.124376;0.124379 | 134348800;134348800;134348800;134348800;134348800 | |
280 | resnetv23_stage3_conv61_fwd | Convolution | [1,1024,14,14] | 5829.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056138;0.055779;0.055997;0.056082;0.055924 | 0;0;0;0;0 | |
281 | resnetv23_stage3_batchnorm61_fwd | BatchNorm | [1,256,14,14] | 92.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.33 | 317440 | 0.00 | 0.00 | 40.70 | 0.00 | 38.09 | true | 0.407778;0.412611;0.406635;0.406881;0.407244 | 317440;317440;317440;317440;317440 | |
282 | resnetv23_stage3_activation61 | Activation | [1,256,14,14] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.80 | 0.00 | 6.27 | true | 0.357241;0.360871;0.351249;0.356962;0.359560 | 50176;50176;50176;50176;50176 | |
283 | resnetv23_stage3_conv62_fwd | Convolution | [1,256,14,14] | 13407.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124730;0.124662;0.124788;0.124786;0.124727 | 140083200;140083200;140083200;140083200;140083200 | |
283 | resnetv23_stage3_conv62_fwd | Convolution | [1,256,14,14] | 13407.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 80.67 | 3801088 | 0.00 | 0.00 | 49.70 | 0.00 | 47.12 | true | 0.496080;0.495800;0.497878;0.500882;0.491116 | 3801088;3801088;3801088;3801088;3801088 | |
284 | resnetv23_stage3_batchnorm62_fwd | BatchNorm | [1,256,14,14] | 95.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.30 | 0.00 | 39.68 | true | 0.412708;0.412417;0.414623;0.414188;0.412520 | 317440;317440;317440;317440;317440 | |
285 | resnetv23_stage3_activation62 | Activation | [1,256,14,14] | 15.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.50 | 0.00 | 5.58 | true | 0.363021;0.364646;0.362271;0.367100;0.365892 | 50176;50176;50176;50176;50176 | |
286 | resnetv23_stage3_conv63_fwd | Convolution | [1,256,14,14] | 5675 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.33 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1428.37 | true | 0.124765;0.124768;0.124764;0.124764;0.124762 | 134742016;134742016;134742016;134742016;134742016 | |
286 | resnetv23_stage3_conv63_fwd | Convolution | [1,256,14,14] | 5675 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055193;0.055484;0.055174;0.055644;0.055685 | 0;0;0;0;0 | |
287 | resnetv23_stage3__plus20 | elemwise_add | [1,1024,14,14] | 82 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.80 | 0.00 | 15.44 | true | 0.787132;0.784969;0.782338;0.790416;0.794426 | 200704;200704;200704;200704;200704 | |
288 | resnetv23_stage3_batchnorm63_fwd | BatchNorm | [1,1024,14,14] | 50.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.67 | 1269760 | 0.00 | 0.00 | 74.60 | 0.00 | 81.05 | true | 0.745007;0.746711;0.745438;0.746941;0.743333 | 1269760;1269760;1269760;1269760;1269760 | |
289 | resnetv23_stage3_activation63 | Activation | [1,1024,14,14] | 35.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.30 | 0.00 | 18.25 | true | 0.421871;0.423218;0.423595;0.423985;0.421000 | 200704;200704;200704;200704;200704 | |
290 | resnetv23_stage3_conv64_fwd | Convolution | [1,1024,14,14] | 5842 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.00 | 134348800 | 0.00 | 0.00 | 12.50 | 0.00 | 499.44 | true | 0.124549;0.124583;0.124683;0.124383;0.124572 | 134348800;134348800;134348800;134348800;134348800 | |
290 | resnetv23_stage3_conv64_fwd | Convolution | [1,1024,14,14] | 5842 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.056214;0.056263;0.056495;0.056305;0.056324 | 0;0;0;0;0 | |
291 | resnetv23_stage3_batchnorm64_fwd | BatchNorm | [1,256,14,14] | 95.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.70 | 0.00 | 39.68 | true | 0.409712;0.407154;0.402366;0.416443;0.404289 | 317440;317440;317440;317440;317440 | |
292 | resnetv23_stage3_activation64 | Activation | [1,256,14,14] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.50 | 0.00 | 6.27 | true | 0.352181;0.357153;0.354650;0.353591;0.360379 | 50176;50176;50176;50176;50176 | |
293 | resnetv23_stage3_conv65_fwd | Convolution | [1,256,14,14] | 13474.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124783;0.124722;0.124780;0.124790;0.124725 | 140083200;140083200;140083200;140083200;140083200 | |
293 | resnetv23_stage3_conv65_fwd | Convolution | [1,256,14,14] | 13474.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 81.33 | 3801088 | 0.00 | 0.00 | 50.00 | 0.00 | 46.73 | true | 0.505389;0.493039;0.505442;0.497003;0.496546 | 3801088;3801088;3801088;3801088;3801088 | |
294 | resnetv23_stage3_batchnorm65_fwd | BatchNorm | [1,256,14,14] | 90 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 9.00 | 317440 | 0.00 | 0.00 | 41.90 | 0.00 | 35.27 | true | 0.418594;0.419950;0.420494;0.418023;0.417404 | 317440;317440;317440;317440;317440 | |
295 | resnetv23_stage3_activation65 | Activation | [1,256,14,14] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.366061;0.362201;0.364400;0.363159;0.364161 | 50176;50176;50176;50176;50176 | |
296 | resnetv23_stage3_conv66_fwd | Convolution | [1,256,14,14] | 5688 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 95.00 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1418.34 | true | 0.124757;0.124767;0.124759;0.124766;0.124763 | 134742016;134742016;134742016;134742016;134742016 | |
296 | resnetv23_stage3_conv66_fwd | Convolution | [1,256,14,14] | 5688 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.055938;0.055468;0.055258;0.055159;0.055468 | 0;0;0;0;0 | |
297 | resnetv23_stage3__plus21 | elemwise_add | [1,1024,14,14] | 81.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 78.10 | 0.00 | 15.44 | true | 0.783617;0.785846;0.780495;0.780175;0.769802 | 200704;200704;200704;200704;200704 | |
298 | resnetv23_stage3_batchnorm66_fwd | BatchNorm | [1,1024,14,14] | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.00 | 0.00 | 79.36 | true | 0.748723;0.751044;0.743635;0.753139;0.751241 | 1269760;1269760;1269760;1269760;1269760 | |
299 | resnetv23_stage3_activation66 | Activation | [1,1024,14,14] | 32.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.20 | 0.00 | 18.25 | true | 0.420444;0.421690;0.422210;0.420680;0.423588 | 200704;200704;200704;200704;200704 | |
300 | resnetv23_stage3_conv67_fwd | Convolution | [1,1024,14,14] | 5866.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 269.67 | 134348800 | 0.00 | 0.00 | 12.40 | 0.00 | 498.20 | true | 0.124385;0.124376;0.124384;0.124374;0.124376 | 134348800;134348800;134348800;134348800;134348800 | |
300 | resnetv23_stage3_conv67_fwd | Convolution | [1,1024,14,14] | 5866.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055761;0.056376;0.056141;0.055805;0.056289 | 0;0;0;0;0 | |
301 | resnetv23_stage3_batchnorm67_fwd | BatchNorm | [1,256,14,14] | 91.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 40.70 | 0.00 | 39.68 | true | 0.402094;0.412038;0.403680;0.415910;0.404923 | 317440;317440;317440;317440;317440 | |
302 | resnetv23_stage3_activation67 | Activation | [1,256,14,14] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 50176 | 0.00 | 0.00 | 35.90 | 0.00 | 6.27 | true | 0.351919;0.362716;0.357259;0.361121;0.358191 | 50176;50176;50176;50176;50176 | |
303 | resnetv23_stage3_conv68_fwd | Convolution | [1,256,14,14] | 13477.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 103.00 | 140083200 | 0.00 | 0.00 | 12.50 | 0.00 | 1360.03 | true | 0.124786;0.124761;0.124785;0.124653;0.124786 | 140083200;140083200;140083200;140083200;140083200 | |
303 | resnetv23_stage3_conv68_fwd | Convolution | [1,256,14,14] | 13477.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 80.67 | 3801088 | 0.00 | 0.00 | 49.70 | 0.00 | 47.12 | true | 0.494260;0.498320;0.495412;0.498120;0.500147 | 3801088;3801088;3801088;3801088;3801088 | |
304 | resnetv23_stage3_batchnorm68_fwd | BatchNorm | [1,256,14,14] | 89.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 317440 | 0.00 | 0.00 | 41.30 | 0.00 | 39.68 | true | 0.412812;0.414042;0.413525;0.415351;0.412154 | 317440;317440;317440;317440;317440 | |
305 | resnetv23_stage3_activation68 | Activation | [1,256,14,14] | 16.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 9.00 | 50176 | 0.00 | 0.00 | 36.40 | 0.00 | 5.58 | true | 0.363472;0.365059;0.362873;0.363272;0.365458 | 50176;50176;50176;50176;50176 | |
306 | resnetv23_stage3_conv69_fwd | Convolution | [1,256,14,14] | 5670 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 94.67 | 134742016 | 0.00 | 0.00 | 12.50 | 0.00 | 1423.33 | true | 0.124734;0.124692;0.124756;0.124757;0.124758 | 134742016;134742016;134742016;134742016;134742016 | |
306 | resnetv23_stage3_conv69_fwd | Convolution | [1,256,14,14] | 5670 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 5.50 | 0.00 | 0.00 | true | 0.054432;0.055301;0.055356;0.055453;0.055217 | 0;0;0;0;0 | |
307 | resnetv23_stage3__plus22 | elemwise_add | [1,1024,14,14] | 74.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 13.00 | 200704 | 0.00 | 0.00 | 79.20 | 0.00 | 15.44 | true | 0.786262;0.793458;0.794834;0.797369;0.787797 | 200704;200704;200704;200704;200704 | |
308 | resnetv23_stage4_batchnorm0_fwd | BatchNorm | [1,1024,14,14] | 49.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 16.00 | 1269760 | 0.00 | 0.00 | 75.70 | 0.00 | 79.36 | true | 0.756151;0.761727;0.754572;0.754605;0.759426 | 1269760;1269760;1269760;1269760;1269760 | |
309 | resnetv23_stage4_activation0 | Activation | [1,1024,14,14] | 27.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 11.00 | 200704 | 0.00 | 0.00 | 42.10 | 0.00 | 18.25 | true | 0.423488;0.419648;0.419892;0.420824;0.421421 | 200704;200704;200704;200704;200704 | |
310 | resnetv23_stage4_conv0_fwd | Convolution | [1,1024,14,14] | 11418.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 273.00 | 268697600 | 0.00 | 0.00 | 12.50 | 0.00 | 984.24 | true | 0.124562;0.124559;0.124561;0.124562;0.124555 | 268697600;268697600;268697600;268697600;268697600 | |
310 | resnetv23_stage4_conv0_fwd | Convolution | [1,1024,14,14] | 11418.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 4.00 | 0 | 0.00 | 0.00 | 5.60 | 0.00 | 0.00 | true | 0.055751;0.056343;0.056427;0.055926;0.055844 | 0;0;0;0;0 | |
311 | resnetv23_stage4_batchnorm1_fwd | BatchNorm | [1,512,14,14] | 101.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 11.00 | 634880 | 0.00 | 0.00 | 73.90 | 0.00 | 57.72 | true | 0.735786;0.733978;0.745748;0.751672;0.733641 | 634880;634880;634880;634880;634880 | |
312 | resnetv23_stage4_activation1 | Activation | [1,512,14,14] | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 42.00 | 0.00 | 12.54 | true | 0.422190;0.419795;0.420041;0.420056;0.420632 | 100352;100352;100352;100352;100352 | |
313 | resnetv23_stage4_conv1_fwd | Convolution | [1,512,14,14] | 16639 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const*, int, float const*, int, float*, kernel_conv_params, int, int, float, float, int, float*, float*) | 601.00 | 302014976 | 0.00 | 0.00 | 6.20 | 0.00 | 502.52 | true | 0.062164;0.062183;0.062183;0.062162;0.062040 | 302014976;302014976;302014976;302014976;302014976 | |
313 | resnetv23_stage4_conv1_fwd | Convolution | [1,512,14,14] | 16639 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const*, float*, int) | 138.67 | 0 | 0.00 | 0.00 | 3.80 | 0.00 | 0.00 | true | 0.037963;0.038043;0.037984;0.037926;0.038088 | 0;0;0;0;0 | |
314 | resnetv23_stage4_batchnorm2_fwd | BatchNorm | [1,512,7,7] | 97 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 175616 | 0.00 | 0.00 | 66.50 | 0.00 | 21.95 | true | 0.670182;0.672606;0.657489;0.664886;0.658561 | 175616;175616;175616;175616;175616 | |
315 | resnetv23_stage4_activation2 | Activation | [1,512,7,7] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 7.00 | 25088 | 0.00 | 0.00 | 41.00 | 0.00 | 3.58 | true | 0.407964;0.407964;0.413060;0.409384;0.412278 | 25088;25088;25088;25088;25088 | |
316 | resnetv23_stage4_conv2_fwd | Convolution | [1,512,7,7] | 7016.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 155.33 | 268959744 | 0.00 | 0.00 | 12.50 | 0.00 | 1731.50 | true | 0.124649;0.124638;0.124754;0.124755;0.124641 | 268959744;268959744;268959744;268959744;268959744 | |
316 | resnetv23_stage4_conv2_fwd | Convolution | [1,512,7,7] | 7016.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 4.10 | 0.00 | 0.00 | true | 0.043524;0.040433;0.040376;0.040366;0.043816 | 0;0;0;0;0 | |
317 | resnetv23_stage4_conv3_fwd | Convolution | [1,1024,14,14] | 13981 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 285.00 | 537395200 | 0.00 | 0.00 | 12.50 | 0.00 | 1885.60 | true | 0.124660;0.124660;0.124661;0.124662;0.124584 | 537395200;537395200;537395200;537395200;537395200 | |
317 | resnetv23_stage4_conv3_fwd | Convolution | [1,1024,14,14] | 13981 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.00 | 0 | 0.00 | 0.00 | 4.70 | 0.00 | 0.00 | true | 0.046889;0.047531;0.047120;0.047080;0.048066 | 0;0;0;0;0 | |
318 | resnetv23_stage4__plus0 | elemwise_add | [1,2048,7,7] | 66.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 9.00 | 100352 | 0.00 | 0.00 | 78.90 | 0.00 | 11.15 | true | 0.773402;0.797371;0.796340;0.796081;0.765317 | 100352;100352;100352;100352;100352 | |
319 | resnetv23_stage4_batchnorm3_fwd | BatchNorm | [1,2048,7,7] | 52.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 702464 | 0.00 | 0.00 | 71.50 | 0.00 | 46.83 | true | 0.707160;0.714295;0.709747;0.719571;0.725035 | 702464;702464;702464;702464;702464 | |
320 | resnetv23_stage4_activation3 | Activation | [1,2048,7,7] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 42.00 | 0.00 | 12.54 | true | 0.419787;0.420196;0.419400;0.420760;0.422020 | 100352;100352;100352;100352;100352 | |
321 | resnetv23_stage4_conv4_fwd | Convolution | [1,2048,7,7] | 7152.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const*, int, float const*, int, float*, kernel_conv_params, int, int, float, float, int, float*, float*) | 266.33 | 134242816 | 0.00 | 0.00 | 6.20 | 0.00 | 504.04 | true | 0.062196;0.062225;0.062195;0.062215;0.062222 | 134242816;134242816;134242816;134242816;134242816 | |
321 | resnetv23_stage4_conv4_fwd | Convolution | [1,2048,7,7] | 7152.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const*, float*, int) | 182.00 | 0 | 0.00 | 0.00 | 3.40 | 0.00 | 0.00 | true | 0.033570;0.033567;0.033632;0.033599;0.033562 | 0;0;0;0;0 | |
322 | resnetv23_stage4_batchnorm4_fwd | BatchNorm | [1,512,7,7] | 87 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 175616 | 0.00 | 0.00 | 66.20 | 0.00 | 21.95 | true | 0.652055;0.690432;0.662404;0.660563;0.663154 | 175616;175616;175616;175616;175616 | |
323 | resnetv23_stage4_activation4 | Activation | [1,512,7,7] | 10.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 7.00 | 25088 | 0.00 | 0.00 | 40.90 | 0.00 | 3.58 | true | 0.409040;0.410259;0.409491;0.408329;0.409760 | 25088;25088;25088;25088;25088 | |
324 | resnetv23_stage4_conv5_fwd | Convolution | [1,512,7,7] | 16681 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 334.67 | 15204352 | 0.00 | 0.00 | 51.80 | 0.00 | 45.43 | true | 0.517406;0.519093;0.516899;0.515328;0.519519 | 15204352;15204352;15204352;15204352;15204352 | |
324 | resnetv23_stage4_conv5_fwd | Convolution | [1,512,7,7] | 16681 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 199.33 | 279543808 | 0.00 | 0.00 | 12.50 | 0.00 | 1402.40 | true | 0.124677;0.124609;0.124676;0.124681;0.124599 | 279543808;279543808;279543808;279543808;279543808 | |
325 | resnetv23_stage4_batchnorm5_fwd | BatchNorm | [1,512,7,7] | 91 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 175616 | 0.00 | 0.00 | 68.20 | 0.00 | 21.95 | true | 0.693345;0.686497;0.679963;0.680920;0.677882 | 175616;175616;175616;175616;175616 | |
326 | resnetv23_stage4_activation5 | Activation | [1,512,7,7] | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 7.00 | 25088 | 0.00 | 0.00 | 40.90 | 0.00 | 3.58 | true | 0.411343;0.410488;0.407084;0.409137;0.408700 | 25088;25088;25088;25088;25088 | |
327 | resnetv23_stage4_conv6_fwd | Convolution | [1,512,7,7] | 7025 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 166.67 | 268959744 | 0.00 | 0.00 | 12.50 | 0.00 | 1613.76 | true | 0.124660;0.124660;0.124660;0.124662;0.124661 | 268959744;268959744;268959744;268959744;268959744 | |
327 | resnetv23_stage4_conv6_fwd | Convolution | [1,512,7,7] | 7025 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 5.67 | 0 | 0.00 | 0.00 | 4.30 | 0.00 | 0.00 | true | 0.043302;0.042573;0.043458;0.042717;0.043360 | 0;0;0;0;0 | |
328 | resnetv23_stage4__plus1 | elemwise_add | [1,2048,7,7] | 69.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 9.00 | 100352 | 0.00 | 0.00 | 78.40 | 0.00 | 11.15 | true | 0.776035;0.799889;0.776188;0.775150;0.799605 | 100352;100352;100352;100352;100352 | |
329 | resnetv23_stage4_batchnorm6_fwd | BatchNorm | [1,2048,7,7] | 50.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 702464 | 0.00 | 0.00 | 72.10 | 0.00 | 46.83 | true | 0.726850;0.726853;0.714089;0.714083;0.723248 | 702464;702464;702464;702464;702464 | |
330 | resnetv23_stage4_activation6 | Activation | [1,2048,7,7] | 16.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.33 | 100352 | 0.00 | 0.00 | 42.00 | 0.00 | 12.04 | true | 0.420196;0.419671;0.419886;0.420830;0.420517 | 100352;100352;100352;100352;100352 | |
331 | resnetv23_stage4_conv7_fwd | Convolution | [1,2048,7,7] | 7132.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const*, int, float const*, int, float*, kernel_conv_params, int, int, float, float, int, float*, float*) | 270.67 | 134242816 | 0.00 | 0.00 | 6.20 | 0.00 | 495.97 | true | 0.062283;0.062223;0.062261;0.062261;0.062191 | 134242816;134242816;134242816;134242816;134242816 | |
331 | resnetv23_stage4_conv7_fwd | Convolution | [1,2048,7,7] | 7132.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const*, float*, int) | 184.00 | 0 | 0.00 | 0.00 | 3.40 | 0.00 | 0.00 | true | 0.033570;0.033564;0.033491;0.033524;0.033542 | 0;0;0;0;0 | |
332 | resnetv23_stage4_batchnorm7_fwd | BatchNorm | [1,512,7,7] | 90.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 175616 | 0.00 | 0.00 | 67.30 | 0.00 | 21.95 | true | 0.688239;0.667656;0.682395;0.670163;0.647262 | 175616;175616;175616;175616;175616 | |
333 | resnetv23_stage4_activation7 | Activation | [1,512,7,7] | 11.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 7.00 | 25088 | 0.00 | 0.00 | 40.90 | 0.00 | 3.58 | true | 0.408807;0.408938;0.407778;0.409628;0.408029 | 25088;25088;25088;25088;25088 | |
334 | resnetv23_stage4_conv8_fwd | Convolution | [1,512,7,7] | 16623.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 334.67 | 15204352 | 0.00 | 0.00 | 51.60 | 0.00 | 45.43 | true | 0.517341;0.515348;0.520130;0.516221;0.509871 | 15204352;15204352;15204352;15204352;15204352 | |
334 | resnetv23_stage4_conv8_fwd | Convolution | [1,512,7,7] | 16623.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_winograd_128x128_ldg1_ldg4_tile148n_nt | 199.67 | 279543808 | 0.00 | 0.00 | 12.50 | 0.00 | 1400.05 | true | 0.124672;0.124679;0.124678;0.124680;0.124679 | 279543808;279543808;279543808;279543808;279543808 | |
335 | resnetv23_stage4_batchnorm8_fwd | BatchNorm | [1,512,7,7] | 92.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 8.00 | 175616 | 0.00 | 0.00 | 68.40 | 0.00 | 21.95 | true | 0.676755;0.689076;0.685566;0.678451;0.696535 | 175616;175616;175616;175616;175616 | |
336 | resnetv23_stage4_activation8 | Activation | [1,512,7,7] | 14.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 7.00 | 25088 | 0.00 | 0.00 | 41.10 | 0.00 | 3.58 | true | 0.411248;0.408817;0.410996;0.410224;0.413334 | 25088;25088;25088;25088;25088 | |
337 | resnetv23_stage4_conv9_fwd | Convolution | [1,512,7,7] | 7010.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | maxwell_scudnn_128x128_relu_interior_nn | 165.67 | 268959744 | 0.00 | 0.00 | 12.50 | 0.00 | 1623.50 | true | 0.124659;0.124665;0.124689;0.124660;0.124660 | 268959744;268959744;268959744;268959744;268959744 | |
337 | resnetv23_stage4_conv9_fwd | Convolution | [1,512,7,7] | 7010.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::maxwell::gemm::computeOffsetsKernel(cudnn::maxwell::gemm::ComputeOffsetsParams) | 6.00 | 0 | 0.00 | 0.00 | 4.30 | 0.00 | 0.00 | true | 0.043404;0.042827;0.043279;0.043531;0.043786 | 0;0;0;0;0 | |
338 | resnetv23_stage4__plus2 | elemwise_add | [1,2048,7,7] | 69 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float*, float*, float*>(int, float*, float*, float*) | 9.00 | 100352 | 0.00 | 0.00 | 79.40 | 0.00 | 11.15 | true | 0.792903;0.795630;0.792840;0.787894;0.803623 | 100352;100352;100352;100352;100352 | |
339 | resnetv23_batchnorm2_fwd | BatchNorm | [1,2048,7,7] | 47 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 15.00 | 702464 | 0.00 | 0.00 | 71.20 | 0.00 | 46.83 | true | 0.717291;0.711961;0.709923;0.712959;0.710880 | 702464;702464;702464;702464;702464 | |
340 | resnetv23_relu1_fwd | Activation | [1,2048,7,7] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::activation_fw_4d_kernel<float, float, 128, 1, 4, cudnn::detail::relu_func<float, (cudnnNanPropagation_t)0, false> >(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float, float, int, float) | 8.00 | 100352 | 0.00 | 0.00 | 42.00 | 0.00 | 12.54 | true | 0.419582;0.420619;0.419661;0.421264;0.419497 | 100352;100352;100352;100352;100352 | |
341 | resnetv23_pool1_fwd | Pooling | [1,2048,7,7] | 145.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 24.67 | 144598 | 0.00 | 0.00 | 11.90 | 0.00 | 5.86 | true | 0.118880;0.118504;0.118396;0.119000;0.118956 | 144598;144598;144598;144598;144598 | |
343 | resnetv23_dense0_fwd | FullyConnected | [1,2048] | 2262 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 2, 2, false, cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float> >(cublasGemvParams<cublasGemvTensor<float const>, cublasGemvTensor<float>, float>, float, float) | 87.67 | 4495000 | 0.00 | 0.00 | 48.50 | 0.00 | 51.27 | true | 0.485207;0.485335;0.485665;0.485245;0.484805 | 4495000;4495000;4495000;4495000;4495000 | |
343 | resnetv23_dense0_fwd | FullyConnected | [1,2048] | 2262 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>) | 5.33 | 1000 | 0.00 | 0.00 | 12.40 | 0.00 | 0.19 | true | 0.124096;0.123905;0.123959;0.123976;0.123961 | 1000;1000;1000;1000;1000 |
Showing 1 to 446 of 446 entries