GPU Kernel Information
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound | achieved_occupancy | flop_count_sp | dram_write_bytes | dram_read_bytes |
---|
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound | achieved_occupancy | flop_count_sp | dram_write_bytes | dram_read_bytes |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mobilenet1_conv0_fwd | Convolution | [64,3,224,224] | 286915 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x32_relu_small_nn_v1 | 201.00 | 1490026496 | 38772906.67 | 79432682.67 | 24.00 | 12.61 | 7413.07 | true | 0.239907;0.239859;0.239881;0.239859;0.239905 | 1490026496;1490026496;1490026496;1490026496;1490026496 | 38812672;38745600;38766176;38770048;38782496 | 79435456;79433280;79432288;79432480;79418592 | |
0 | mobilenet1_conv0_fwd | Convolution | [64,3,224,224] | 286915 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.33 | 0 | 608.00 | 232960.00 | 7.40 | 0.00 | 0.00 | true | 0.073856;0.073874;0.073913;0.073881;0.074010 | 0;0;0;0;0 | 608;352;608;608;928 | 234880;232576;233728;232576;231872 | |
1 | mobilenet1_batchnorm0_fwd | BatchNorm | [64,24,112,112] | 24058.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 204.33 | 116391936 | 77071669.33 | 77283061.33 | 92.20 | 0.75 | 569.62 | true | 0.921732;0.922100;0.921319;0.922191;0.921712 | 116391936;116391936;116391936;116391936;116391936 | 77071584;77071840;77071584;77073376;77071584 | 77266592;77286304;77256768;77301664;77296288 | |
2 | mobilenet1_relu0_fwd | Activation | [64,24,112,112] | 4077.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 202.33 | 38535168 | 77070688.00 | 76851605.33 | 96.60 | 0.25 | 190.45 | true | 0.966349;0.966327;0.965825;0.966541;0.965622 | 38535168;38535168;38535168;38535168;38535168 | 76859712;76855616;76886720;76839488;76838976 | 77070688;77070688;77070688;77070688;77070688 | |
3 | mobilenet1_conv1_fwd | Convolution | [64,24,112,112] | 277431.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_c1_k1_nchw_hw_packed_kernel<float, float, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, int) | 295.33 | 404619264 | 78447370.67 | 77229088.00 | 47.20 | 2.60 | 1370.04 | true | 0.473458;0.471428;0.471986;0.471986;0.471592 | 404619264;404619264;404619264;404619264;404619264 | 78425952;78477216;78405728;78529440;78438944 | 77227488;77187552;77241120;77218656;77252128 | |
4 | mobilenet1_batchnorm1_fwd | BatchNorm | [64,24,112,112] | 24028 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 204.67 | 116391936 | 77071072.00 | 77115552.00 | 92.10 | 0.75 | 568.69 | true | 0.920523;0.921474;0.921817;0.921286;0.921618 | 116391936;116391936;116391936;116391936;116391936 | 77119776;77142688;77102976;77123904;77100256 | 77076192;77071072;77071072;77071072;77071072 | |
5 | mobilenet1_relu1_fwd | Activation | [64,24,112,112] | 4058.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 202.00 | 38535168 | 77070688.00 | 76867989.33 | 96.60 | 0.25 | 190.77 | true | 0.966297;0.965455;0.966843;0.966463;0.966050 | 38535168;38535168;38535168;38535168;38535168 | 77070688;77070944;77070688;77070688;77070688 | 76864960;76890816;76868672;76870336;76859840 | |
6 | mobilenet1_conv2_fwd | Convolution | [64,24,112,112] | 220314 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 358.00 | 2569011200 | 77857258.67 | 153803701.33 | 24.40 | 11.09 | 7176.01 | true | 0.244076;0.244188;0.244115;0.244014;0.244127 | 2569011200;2569011200;2569011200;2569011200;2569011200 | 77844672;77864000;77866944;77844672;77863104 | 153841568;153879904;153774368;153785888;153783648 | |
6 | mobilenet1_conv2_fwd | Convolution | [64,24,112,112] | 220314 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 4.00 | 0 | 96.00 | 101461.33 | 7.40 | 0.00 | 0.00 | true | 0.073889;0.074017;0.073913;0.073965;0.074007 | 0;0;0;0;0 | 96;96;96;96;96 | 103296;101632;101248;101504;101120 | |
7 | mobilenet1_batchnorm2_fwd | BatchNorm | [64,48,112,112] | 10348.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 402.00 | 232783872 | 152543178.67 | 154596000.00 | 93.60 | 0.76 | 579.06 | true | 0.937029;0.936158;0.935551;0.936843;0.936260 | 232783872;232783872;232783872;232783872;232783872 | 151739616;154148896;151740192;154148576;151740768 | 154586400;154561952;152249792;154650816;154639648 | |
8 | mobilenet1_relu2_fwd | Activation | [64,48,112,112] | 8108.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 403.67 | 77070336 | 153338208.00 | 153159658.67 | 98.90 | 0.25 | 190.93 | true | 0.989053;0.989170;0.988977;0.989086;0.989384 | 77070336;77070336;77070336;77070336;77070336 | 151555776;149126592;153956416;153966784;153969344 | 151732576;149324128;154141024;154141024;154141024 | |
9 | mobilenet1_conv3_fwd | Convolution | [64,48,112,112] | 152088.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 257.33 | 180983808 | 154143989.33 | 40192810.67 | 86.10 | 0.93 | 703.31 | true | 0.858734;0.860680;0.859666;0.862756;0.861388 | 180983808;180983808;180983808;180983808;180983808 | 154143776;154143520;154153760;154144352;154143840 | 40196000;40234016;40104480;40148416;40237344 | |
10 | mobilenet1_batchnorm3_fwd | BatchNorm | [64,48,56,56] | 2571.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 99.67 | 59375616 | 38538677.33 | 36940448.00 | 86.30 | 0.79 | 595.74 | true | 0.861850;0.862053;0.863872;0.862184;0.864822 | 59375616;59375616;59375616;59375616;59375616 | 38538272;38538720;38539104;38538336;38538976 | 36931744;36912800;37033536;36976800;36894880 | |
11 | mobilenet1_relu3_fwd | Activation | [64,48,56,56] | 2051.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 102.67 | 19267584 | 38535520.00 | 38466496.00 | 95.50 | 0.25 | 187.67 | true | 0.954931;0.953713;0.953371;0.955107;0.954889 | 19267584;19267584;19267584;19267584;19267584 | 38535520;38535520;38536032;38535520;38535520 | 38477376;38451008;38456640;38470208;38472640 | |
12 | mobilenet1_conv4_fwd | Convolution | [64,48,56,56] | 110443.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x32_relu_interior_nn_v1 | 279.00 | 1888223232 | 115703850.67 | 77040053.33 | 23.90 | 9.80 | 6767.83 | true | 0.239930;0.238925;0.239597;0.239206;0.239512 | 1888223232;1888223232;1888223232;1888223232;1888223232 | 77045440;77037184;77058112;77037536;77029312 | 115706688;115701440;115703424;115707968;115697088 | |
12 | mobilenet1_conv4_fwd | Convolution | [64,48,56,56] | 110443.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.00 | 0 | 96.00 | 26069.33 | 6.10 | 0.00 | 0.00 | true | 0.061172;0.061207;0.061213;0.061179;0.061181 | 0;0;0;0;0 | 25984;26112;25984;32896;26112 | 96;96;96;96;96 | |
13 | mobilenet1_batchnorm4_fwd | BatchNorm | [64,96,56,56] | 5226 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 196.00 | 118751232 | 77079498.67 | 77154474.67 | 87.10 | 0.77 | 605.87 | true | 0.869884;0.870564;0.869656;0.872062;0.871137 | 118751232;118751232;118751232;118751232;118751232 | 77078432;77080992;77078752;77079904;77079840 | 77152672;77158464;77142688;77161152;77152288 | |
14 | mobilenet1_relu4_fwd | Activation | [64,96,56,56] | 4088 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 202.33 | 38535168 | 77070688.00 | 77003797.33 | 96.60 | 0.25 | 190.45 | true | 0.966648;0.964861;0.965236;0.966537;0.966513 | 38535168;38535168;38535168;38535168;38535168 | 77005504;77008448;77002560;76996288;77003328 | 77070688;77070688;77070688;77070688;77070944 | |
15 | mobilenet1_conv5_fwd | Convolution | [64,96,56,56] | 280065.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_c1_k1_nchw_hw_packed_kernel<float, float, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, int) | 264.33 | 404619264 | 76779317.33 | 76953312.00 | 83.10 | 2.63 | 1530.72 | true | 0.829586;0.829368;0.831085;0.831223;0.831617 | 404619264;404619264;404619264;404619264;404619264 | 76929632;76965984;76979552;76964320;76929376 | 76775456;76778912;76783584;76791456;76770464 | |
16 | mobilenet1_batchnorm5_fwd | BatchNorm | [64,96,56,56] | 5242 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 196.67 | 118751232 | 76974922.67 | 77247744.00 | 87.00 | 0.77 | 603.82 | true | 0.869858;0.870472;0.869667;0.871094;0.869975 | 118751232;118751232;118751232;118751232;118751232 | 76969760;76972576;76982432;76988704;76959136 | 77271200;77235136;77232448;77236896;77277600 | |
17 | mobilenet1_relu5_fwd | Activation | [64,96,56,56] | 4096.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 202.00 | 38535168 | 77070688.00 | 77007978.67 | 96.70 | 0.25 | 190.77 | true | 0.967379;0.967148;0.966619;0.966636;0.966614 | 38535168;38535168;38535168;38535168;38535168 | 77011264;77009216;76996928;77010624;77004096 | 77070688;77070688;77070688;77070688;77070688 | |
18 | mobilenet1_conv6_fwd | Convolution | [64,96,56,56] | 218011.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 417.67 | 4983881728 | 143812458.67 | 72355328.00 | 24.20 | 23.06 | 11932.67 | false | 0.242525;0.242325;0.242652;0.242370;0.242372 | 4983881728;4983881728;4983881728;4983881728;4983881728 | 139802048;142210496;139791040;149424832;154255424 | 69977120;72347648;69918368;74741216;77171456 | |
18 | mobilenet1_conv6_fwd | Convolution | [64,96,56,56] | 218011.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.33 | 0 | 96.00 | 26240.00 | 6.10 | 0.00 | 0.00 | true | 0.061181;0.061201;0.061204;0.061185;0.061183 | 0;0;0;0;0 | 96;96;96;96;96 | 26240;32896;26240;25984;26240 | |
19 | mobilenet1_batchnorm6_fwd | BatchNorm | [64,96,56,56] | 5143 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 196.67 | 118751232 | 77079541.33 | 77024298.67 | 87.00 | 0.77 | 603.82 | true | 0.870158;0.870084;0.870979;0.870443;0.869737 | 118751232;118751232;118751232;118751232;118751232 | 77080032;77079008;77083552;77078176;77079584 | 76977088;77014720;77052064;77042336;77015840 | |
20 | mobilenet1_relu6_fwd | Activation | [64,96,56,56] | 4125.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 202.00 | 38535168 | 77070688.00 | 77006954.67 | 96.70 | 0.25 | 190.77 | true | 0.966698;0.967250;0.965201;0.966808;0.966246 | 38535168;38535168;38535168;38535168;38535168 | 77070688;77070688;77070688;77070688;77070688 | 77010752;77001536;76991680;77008576;77018688 | |
21 | mobilenet1_conv7_fwd | Convolution | [64,96,56,56] | 77674.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 132.33 | 89468928 | 77074144.00 | 20896522.67 | 83.70 | 0.91 | 676.09 | true | 0.836088;0.838028;0.837256;0.840498;0.833358 | 89468928;89468928;89468928;89468928;89468928 | 77074144;77074272;77074144;77074144;77074144 | 20848672;20868640;20909760;20952768;20911168 | |
22 | mobilenet1_batchnorm7_fwd | BatchNorm | [64,96,28,28] | 1325.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 52.00 | 30474240 | 19269344.00 | 17880010.67 | 81.60 | 0.82 | 586.04 | true | 0.816111;0.817380;0.815298;0.816477;0.814962 | 30474240;30474240;30474240;30474240;30474240 | 19269344;19269344;19269344;19269472;19269344 | 17928448;17909152;17873376;17830496;17857504 | |
23 | mobilenet1_relu7_fwd | Activation | [64,96,28,28] | 1045 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.00 | 9633792 | 19267936.00 | 19024149.33 | 94.40 | 0.25 | 181.77 | true | 0.945445;0.946103;0.944385;0.942613;0.941473 | 9633792;9633792;9633792;9633792;9633792 | 19276896;19267936;19267936;19267936;19267936 | 19032320;19022272;19014144;19020928;19029248 | |
24 | mobilenet1_conv8_fwd | Convolution | [64,96,28,28] | 100320 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 169.67 | 1868955648 | 57754517.33 | 38749216.00 | 23.30 | 19.37 | 11015.43 | false | 0.232208;0.231901;0.233388;0.233302;0.232395 | 1868955648;1868955648;1868955648;1868955648;1868955648 | 57752128;57749120;57760512;57750912;57761920 | 38726464;38759968;38749376;38748384;38749888 | |
24 | mobilenet1_conv8_fwd | Convolution | [64,96,28,28] | 100320 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.33 | 0 | 181.33 | 10538.67 | 5.90 | 0.00 | 0.00 | true | 0.059367;0.059370;0.059371;0.059392;0.059371 | 0;0;0;0;0 | 96;352;96;4448;96 | 16768;7424;7424;19968;7168 | |
25 | mobilenet1_batchnorm8_fwd | BatchNorm | [64,192,28,28] | 2702 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 101.00 | 60948480 | 38539061.33 | 38545098.67 | 83.10 | 0.79 | 603.45 | true | 0.830861;0.831735;0.832360;0.830888;0.831204 | 60948480;60948480;60948480;60948480;60948480 | 38538464;38539488;38538848;38541408;38538848 | 38557440;38539200;38547712;38526496;38548384 | |
26 | mobilenet1_relu8_fwd | Activation | [64,192,28,28] | 2092 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 101.67 | 19267584 | 38535520.00 | 38308480.00 | 95.50 | 0.25 | 189.52 | true | 0.954170;0.955095;0.954360;0.955171;0.954193 | 19267584;19267584;19267584;19267584;19267584 | 38535520;38535520;38535520;38535520;38535520 | 38301376;38309632;38312576;38310336;38305472 | |
27 | mobilenet1_conv9_fwd | Convolution | [64,192,28,28] | 143253 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_c1_k1_nchw_hw_packed_kernel<float, float, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, int) | 149.67 | 202309632 | 38204576.00 | 38832853.33 | 91.80 | 2.63 | 1351.73 | true | 0.919904;0.917293;0.917109;0.918451;0.918558 | 202309632;202309632;202309632;202309632;202309632 | 38202400;38201312;38210016;38213728;38194080 | 38818720;38829440;38827872;38843392;38841248 | |
28 | mobilenet1_batchnorm9_fwd | BatchNorm | [64,192,28,28] | 2714.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 100.67 | 60948480 | 38373280.00 | 38463509.33 | 83.10 | 0.79 | 605.45 | true | 0.832989;0.831077;0.831165;0.831394;0.831065 | 60948480;60948480;60948480;60948480;60948480 | 38472224;38470144;38462112;38453664;38458272 | 38377824;38374304;38373344;38372192;38367200 | |
29 | mobilenet1_relu9_fwd | Activation | [64,192,28,28] | 2127 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 102.00 | 19267584 | 38535520.00 | 38310314.67 | 95.40 | 0.25 | 188.90 | true | 0.953585;0.953515;0.952322;0.954337;0.953957 | 19267584;19267584;19267584;19267584;19267584 | 38535520;38535520;38535520;38535520;38535520 | 38303232;38304768;38317632;38308544;38321792 | |
30 | mobilenet1_conv10_fwd | Convolution | [64,192,28,28] | 199757.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 312.33 | 3718643712 | 115759466.67 | 38840949.33 | 23.40 | 24.05 | 11906.02 | false | 0.233865;0.233767;0.235325;0.234504;0.234608 | 3718643712;3718643712;3718643712;3718643712;3718643712 | 115751680;115759552;115748160;115767168;115783360 | 38883552;38829920;38821088;38871840;38800512 | |
30 | mobilenet1_conv10_fwd | Convolution | [64,192,28,28] | 199757.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.33 | 0 | 96.00 | 7338.67 | 5.90 | 0.00 | 0.00 | true | 0.059367;0.059406;0.059349;0.059388;0.059353 | 0;0;0;0;0 | 96;352;96;96;96 | 7424;7680;7168;7424;7168 | |
31 | mobilenet1_batchnorm10_fwd | BatchNorm | [64,192,28,28] | 2600.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 102.00 | 60948480 | 38538890.67 | 38454368.00 | 83.20 | 0.79 | 597.53 | true | 0.831673;0.831898;0.830287;0.831797;0.831635 | 60948480;60948480;60948480;60948480;60948480 | 38413248;38462688;38476448;38423968;38493792 | 38538976;38538848;38549344;38538848;38538720 | |
32 | mobilenet1_relu10_fwd | Activation | [64,192,28,28] | 2089 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 102.00 | 19267584 | 38535520.00 | 38306453.33 | 95.50 | 0.25 | 188.90 | true | 0.955683;0.953849;0.953799;0.955511;0.955497 | 19267584;19267584;19267584;19267584;19267584 | 38535520;38535520;38535520;38535520;38535520 | 38306048;38311296;38304576;38308736;38297152 | |
33 | mobilenet1_conv11_fwd | Convolution | [64,192,28,28] | 40900.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 69.00 | 43720704 | 38542432.00 | 11289258.67 | 80.10 | 0.88 | 633.63 | true | 0.797144;0.800168;0.800272;0.803063;0.802398 | 43720704;43720704;43720704;43720704;43720704 | 38545504;38542432;38542432;38542432;38542432 | 11306912;11281056;11271328;11282080;11304640 | |
34 | mobilenet1_batchnorm11_fwd | BatchNorm | [64,192,14,14] | 1694.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 29.67 | 15237120 | 9709557.33 | 8942176.00 | 75.40 | 0.82 | 513.61 | true | 0.745631;0.764468;0.750946;0.755909;0.755798 | 15237120;15237120;15237120;15237120;15237120 | 9707936;9713440;9709280;9711456;9705184 | 8929888;8944576;8943680;8949408;8938272 | |
35 | mobilenet1_relu11_fwd | Activation | [64,192,14,14] | 546.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 27.33 | 4816896 | 8766784.00 | 9620309.33 | 93.60 | 0.26 | 176.23 | true | 0.937623;0.939541;0.934239;0.935681;0.935686 | 4816896;4816896;4816896;4816896;4816896 | 8765696;8761184;8754688;8774112;8773472 | 9615424;9625248;9620768;9620800;9619360 | |
36 | mobilenet1_conv12_fwd | Convolution | [64,192,14,14] | 99048.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 172.00 | 1859321856 | 44824373.33 | 21035573.33 | 22.20 | 28.23 | 10810.01 | false | 0.221148;0.221346;0.221300;0.221876;0.222455 | 1859321856;1859321856;1859321856;1859321856;1859321856 | 44415296;44874784;42439808;45183040;45795840 | 20994656;21024288;21138048;21075232;21007200 | |
36 | mobilenet1_conv12_fwd | Convolution | [64,192,14,14] | 99048.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 4.00 | 0 | 96.00 | 2432.00 | 5.80 | 0.00 | 0.00 | true | 0.057974;0.058068;0.057914;0.058037;0.057896 | 0;0;0;0;0 | 96;96;96;608;96 | 2432;2432;2432;2944;2432 | |
37 | mobilenet1_batchnorm12_fwd | BatchNorm | [64,384,14,14] | 2821.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 57.00 | 30474240 | 19258474.67 | 20920981.33 | 84.30 | 0.76 | 534.64 | true | 0.841531;0.842314;0.842104;0.844206;0.845409 | 30474240;30474240;30474240;30474240;30474240 | 19241024;19230112;19335680;19304288;19179648 | 20918944;20930880;20863392;20923456;20920544 | |
38 | mobilenet1_relu12_fwd | Activation | [64,384,14,14] | 1026.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 54.00 | 9633792 | 19267946.67 | 19245141.33 | 94.20 | 0.25 | 178.40 | true | 0.944203;0.940485;0.942559;0.941604;0.942718 | 9633792;9633792;9633792;9633792;9633792 | 19267936;19267936;19267936;19268000;19267968 | 19243616;19247424;19246272;19239584;19245536 | |
39 | mobilenet1_conv13_fwd | Convolution | [64,384,14,14] | 78795 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 93.33 | 83460096 | 19281760.00 | 19415701.33 | 87.10 | 2.16 | 894.22 | true | 0.871579;0.872403;0.871649;0.870970;0.870575 | 83460096;83460096;83460096;83460096;83460096 | 19281760;19281760;19281760;19281760;19281760 | 19415104;19418272;19414720;19407552;19417280 | |
40 | mobilenet1_batchnorm13_fwd | BatchNorm | [64,384,14,14] | 2478.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.00 | 30474240 | 19456032.00 | 20955701.33 | 84.40 | 0.75 | 544.18 | true | 0.843986;0.843613;0.846553;0.844901;0.844408 | 30474240;30474240;30474240;30474240;30474240 | 20940992;20959136;20956736;20966432;20951232 | 19451232;19456992;19455744;19459712;19455360 | |
41 | mobilenet1_relu13_fwd | Activation | [64,384,14,14] | 1039.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.33 | 9633792 | 19267936.00 | 19244000.00 | 94.30 | 0.25 | 180.63 | true | 0.942227;0.941815;0.945087;0.945324;0.942664 | 9633792;9633792;9633792;9633792;9633792 | 19267968;19267936;19267936;19267936;19267936 | 19249632;19242432;19243744;19242368;19245824 | |
42 | mobilenet1_conv14_fwd | Convolution | [64,384,14,14] | 198316 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 325.00 | 3709009920 | 104191914.67 | 21753429.33 | 22.40 | 29.45 | 11412.34 | false | 0.222776;0.224615;0.222764;0.224042;0.224704 | 3709009920;3709009920;3709009920;3709009920;3709009920 | 21649376;21822688;21434720;21788224;21826240 | 108722656;102457856;104745376;104410048;103420320 | |
42 | mobilenet1_conv14_fwd | Convolution | [64,384,14,14] | 198316 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.67 | 0 | 96.00 | 2688.00 | 5.80 | 0.00 | 0.00 | true | 0.057920;0.058102;0.057892;0.058082;0.057881 | 0;0;0;0;0 | 96;352;96;96;96 | 2688;3200;2688;2432;2688 | |
43 | mobilenet1_batchnorm14_fwd | BatchNorm | [64,384,14,14] | 2919 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.00 | 30474240 | 19398165.33 | 20081013.33 | 83.30 | 0.77 | 544.18 | true | 0.834568;0.833066;0.830324;0.837450;0.831364 | 30474240;30474240;30474240;30474240;30474240 | 19390144;19454528;19392032;19405696;19396768 | 20169472;19992256;20298656;20045280;20028288 | |
44 | mobilenet1_relu14_fwd | Activation | [64,384,14,14] | 1028.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 54.00 | 9633792 | 19267936.00 | 19247296.00 | 94.20 | 0.25 | 178.40 | true | 0.941798;0.941942;0.944484;0.942053;0.942598 | 9633792;9633792;9633792;9633792;9633792 | 19267936;19267936;19267936;19267936;19267968 | 19239424;19243776;19253184;19249888;19248224 | |
45 | mobilenet1_conv15_fwd | Convolution | [64,384,14,14] | 78629.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 94.00 | 83460096 | 19282442.67 | 19417397.33 | 87.10 | 2.16 | 887.87 | true | 0.870722;0.872395;0.872701;0.870686;0.871325 | 83460096;83460096;83460096;83460096;83460096 | 19418144;19418176;19416512;19417536;19410720 | 19288416;19281760;19281760;19281760;19283808 | |
46 | mobilenet1_batchnorm15_fwd | BatchNorm | [64,384,14,14] | 2552.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.67 | 30474240 | 19442890.67 | 20949248.00 | 84.60 | 0.75 | 537.78 | true | 0.845106;0.845746;0.846695;0.845369;0.846350 | 30474240;30474240;30474240;30474240;30474240 | 19442912;19443840;19439488;19442944;19442816 | 20946848;20947136;20953760;20954400;20945664 | |
47 | mobilenet1_relu15_fwd | Activation | [64,384,14,14] | 1030 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.33 | 9633792 | 19267936.00 | 19245632.00 | 94.30 | 0.25 | 180.63 | true | 0.944217;0.939614;0.942313;0.945063;0.943093 | 9633792;9633792;9633792;9633792;9633792 | 19244224;19249248;19245408;19240192;19247264 | 19267968;19267936;19267936;19267936;19267936 | |
48 | mobilenet1_conv16_fwd | Convolution | [64,384,14,14] | 198565.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 325.33 | 3709009920 | 106765045.33 | 21696608.00 | 22.30 | 28.87 | 11400.66 | false | 0.223628;0.219941;0.222636;0.224401;0.221610 | 3709009920;3709009920;3709009920;3709009920;3709009920 | 109629152;107317344;102588160;107691616;105286176 | 21777056;21829664;21639776;21672992;21532192 | |
48 | mobilenet1_conv16_fwd | Convolution | [64,384,14,14] | 198565.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.67 | 0 | 96.00 | 2432.00 | 5.80 | 0.00 | 0.00 | true | 0.057903;0.058046;0.057902;0.058034;0.058223 | 0;0;0;0;0 | 96;96;96;352;96 | 2432;2432;2432;3328;2432 | |
49 | mobilenet1_batchnorm16_fwd | BatchNorm | [64,384,14,14] | 2710.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.00 | 30474240 | 19419189.33 | 20055520.00 | 83.70 | 0.77 | 544.18 | true | 0.839714;0.832797;0.835684;0.840529;0.835796 | 30474240;30474240;30474240;30474240;30474240 | 19411872;19412000;19446784;19433696;19382656 | 20020576;20015808;20062208;20083776;20170624 | |
50 | mobilenet1_relu16_fwd | Activation | [64,384,14,14] | 1032 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 54.00 | 9633792 | 19267936.00 | 19241408.00 | 94.40 | 0.25 | 178.40 | true | 0.942805;0.943467;0.944487;0.945325;0.945106 | 9633792;9633792;9633792;9633792;9633792 | 19267936;19267936;19267936;19267936;19267936 | 19240640;19253152;19234272;19238336;19245248 | |
51 | mobilenet1_conv17_fwd | Convolution | [64,384,14,14] | 78617.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 94.00 | 83460096 | 19281760.00 | 19406613.33 | 87.10 | 2.16 | 887.87 | true | 0.870852;0.871507;0.871525;0.870569;0.871383 | 83460096;83460096;83460096;83460096;83460096 | 19282016;19281760;19281760;19281760;19281760 | 19401152;19399584;19412160;19407680;19411008 | |
52 | mobilenet1_batchnorm17_fwd | BatchNorm | [64,384,14,14] | 2419.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.67 | 30474240 | 19451360.00 | 20957013.33 | 84.50 | 0.75 | 537.78 | true | 0.845066;0.844163;0.845731;0.844128;0.846588 | 30474240;30474240;30474240;30474240;30474240 | 19451200;19453216;19445024;19450816;19452064 | 20966240;20953952;20958400;20955712;20956928 | |
53 | mobilenet1_relu17_fwd | Activation | [64,384,14,14] | 1052 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 54.00 | 9633792 | 19267936.00 | 19246261.33 | 94.50 | 0.25 | 178.40 | true | 0.946716;0.943705;0.942016;0.946076;0.945620 | 9633792;9633792;9633792;9633792;9633792 | 19243872;19250176;19244736;19247520;19246528 | 19267968;19267936;19267936;19267936;19267936 | |
54 | mobilenet1_conv18_fwd | Convolution | [64,384,14,14] | 198589.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 325.33 | 3709009920 | 105601728.00 | 21646773.33 | 22.40 | 29.15 | 11400.66 | false | 0.221708;0.225259;0.222879;0.223880;0.224111 | 3709009920;3709009920;3709009920;3709009920;3709009920 | 105720512;108564160;104074208;106153280;104931392 | 21627328;21608800;21704192;21733184;21544544 | |
54 | mobilenet1_conv18_fwd | Convolution | [64,384,14,14] | 198589.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 4.00 | 0 | 96.00 | 2688.00 | 5.80 | 0.00 | 0.00 | true | 0.058294;0.058082;0.057885;0.058083;0.057897 | 0;0;0;0;0 | 96;96;96;96;96 | 2432;2432;2944;2688;9856 | |
55 | mobilenet1_batchnorm18_fwd | BatchNorm | [64,384,14,14] | 2844 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.67 | 30474240 | 19342645.33 | 20118794.67 | 83.70 | 0.77 | 537.78 | true | 0.837406;0.836589;0.835665;0.836167;0.840367 | 30474240;30474240;30474240;30474240;30474240 | 20188320;20172000;20007584;20053024;20131360 | 19402112;19112416;19403712;19432096;19222112 | |
56 | mobilenet1_relu18_fwd | Activation | [64,384,14,14] | 1043.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.67 | 9633792 | 19267936.00 | 19245749.33 | 94.20 | 0.25 | 179.51 | true | 0.939329;0.944122;0.944725;0.939648;0.943147 | 9633792;9633792;9633792;9633792;9633792 | 19267936;19267936;19267936;19267936;19267936 | 19246592;19242336;19239456;19248320;19249024 | |
57 | mobilenet1_conv19_fwd | Convolution | [64,384,14,14] | 78704 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 93.67 | 83460096 | 19281760.00 | 19411562.67 | 87.10 | 2.16 | 891.03 | true | 0.872803;0.871104;0.870602;0.871013;0.870486 | 83460096;83460096;83460096;83460096;83460096 | 19281760;19281760;19281760;19281760;19281760 | 19415488;19412672;19417792;19406528;19389856 | |
58 | mobilenet1_batchnorm19_fwd | BatchNorm | [64,384,14,14] | 2464 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.00 | 30474240 | 19457120.00 | 20957013.33 | 84.50 | 0.75 | 544.18 | true | 0.843739;0.843984;0.844777;0.846834;0.846797 | 30474240;30474240;30474240;30474240;30474240 | 19456608;19456672;19457600;19458816;19457088 | 20960128;20956352;20954560;20953696;20967232 | |
59 | mobilenet1_relu19_fwd | Activation | [64,384,14,14] | 1031 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.00 | 9633792 | 19267936.00 | 19242005.33 | 94.30 | 0.25 | 181.77 | true | 0.944904;0.946394;0.942055;0.941494;0.942177 | 9633792;9633792;9633792;9633792;9633792 | 19267936;19267936;19267936;19267936;19267968 | 19251296;19242720;19244448;19237664;19238848 | |
60 | mobilenet1_conv20_fwd | Convolution | [64,384,14,14] | 198258 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 324.33 | 3709009920 | 105837728.00 | 21747690.67 | 22.20 | 29.07 | 11435.81 | false | 0.223786;0.219738;0.223457;0.219198;0.221739 | 3709009920;3709009920;3709009920;3709009920;3709009920 | 21603040;21703968;21873984;21875680;21665120 | 107971392;107105472;104862592;105545120;104703328 | |
60 | mobilenet1_conv20_fwd | Convolution | [64,384,14,14] | 198258 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.00 | 0 | 96.00 | 2432.00 | 5.80 | 0.00 | 0.00 | true | 0.057916;0.058094;0.057949;0.058058;0.057953 | 0;0;0;0;0 | 2432;2432;2432;2432;2432 | 96;96;96;96;96 | |
61 | mobilenet1_batchnorm20_fwd | BatchNorm | [64,384,14,14] | 2718.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.00 | 30474240 | 19387786.67 | 19997589.33 | 83.40 | 0.77 | 544.18 | true | 0.832938;0.836604;0.831287;0.835413;0.833469 | 30474240;30474240;30474240;30474240;30474240 | 19356224;19403968;19403168;19404032;19248320 | 20184064;20021504;19874688;19916320;20054944 | |
62 | mobilenet1_relu20_fwd | Activation | [64,384,14,14] | 1044.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.33 | 9633792 | 19267936.00 | 19247786.67 | 94.40 | 0.25 | 180.63 | true | 0.946256;0.942862;0.943053;0.941068;0.944947 | 9633792;9633792;9633792;9633792;9633792 | 19267936;19267936;19268064;19267936;19267936 | 19246432;19257376;19248928;19248000;19244992 | |
63 | mobilenet1_conv21_fwd | Convolution | [64,384,14,14] | 78665.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 94.00 | 83460096 | 19281760.00 | 19411498.67 | 87.20 | 2.16 | 887.87 | true | 0.871516;0.871519;0.870684;0.871598;0.871705 | 83460096;83460096;83460096;83460096;83460096 | 19281760;19281760;19281760;19281760;19281760 | 19410368;19396544;19409184;19414944;19416512 | |
64 | mobilenet1_batchnorm21_fwd | BatchNorm | [64,384,14,14] | 2601.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.67 | 30474240 | 19456160.00 | 20949482.67 | 84.50 | 0.75 | 537.78 | true | 0.844045;0.845140;0.845252;0.845917;0.845410 | 30474240;30474240;30474240;30474240;30474240 | 20966880;20953120;20946368;20947584;20947744 | 19456032;19456768;19459296;19455680;19454208 | |
65 | mobilenet1_relu21_fwd | Activation | [64,384,14,14] | 1047 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.00 | 9633792 | 19267936.00 | 19246613.33 | 94.30 | 0.25 | 181.77 | true | 0.944471;0.941547;0.944446;0.944539;0.941451 | 9633792;9633792;9633792;9633792;9633792 | 19242848;19247392;19247712;19244736;19252128 | 19267936;19267936;19267936;19267968;19267936 | |
66 | mobilenet1_conv22_fwd | Convolution | [64,384,14,14] | 198255.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 325.33 | 3709009920 | 104726005.33 | 21716149.33 | 22.10 | 29.33 | 11400.66 | false | 0.219899;0.219889;0.223220;0.219972;0.224989 | 3709009920;3709009920;3709009920;3709009920;3709009920 | 105708736;103980736;103783680;105908640;104488544 | 21684288;21853984;21629280;21780352;21683808 | |
66 | mobilenet1_conv22_fwd | Convolution | [64,384,14,14] | 198255.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.33 | 0 | 96.00 | 2602.67 | 5.80 | 0.00 | 0.00 | true | 0.058148;0.058083;0.057934;0.058078;0.058563 | 0;0;0;0;0 | 96;96;96;96;96 | 2816;2432;2688;2432;2688 | |
67 | mobilenet1_batchnorm22_fwd | BatchNorm | [64,384,14,14] | 3027.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 56.33 | 30474240 | 19388736.00 | 20038442.67 | 83.60 | 0.77 | 540.97 | true | 0.835708;0.834466;0.836948;0.836715;0.825427 | 30474240;30474240;30474240;30474240;30474240 | 20084480;20032288;20179296;19998560;19993312 | 19388224;19397856;19374944;19413536;19380128 | |
68 | mobilenet1_relu22_fwd | Activation | [64,384,14,14] | 1038 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 53.67 | 9633792 | 19268629.33 | 19246944.00 | 94.40 | 0.25 | 179.51 | true | 0.943237;0.941392;0.943582;0.947330;0.945330 | 9633792;9633792;9633792;9633792;9633792 | 19267936;19269984;19267968;19274592;19267936 | 19245312;19246016;19235200;19258592;19249504 | |
69 | mobilenet1_conv23_fwd | Convolution | [64,384,14,14] | 26005.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 39.67 | 20865024 | 19282058.67 | 6545098.67 | 82.60 | 0.81 | 526.00 | true | 0.830947;0.823942;0.823337;0.823461;0.829235 | 20865024;20865024;20865024;20865024;20865024 | 6562208;6548768;6546208;6540320;6536000 | 19282144;19282080;19282144;19281824;19281952 | |
70 | mobilenet1_batchnorm23_fwd | BatchNorm | [64,384,7,7] | 1232.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 45.00 | 8429568 | 4823264.00 | 3844960.00 | 17.90 | 0.97 | 187.32 | true | 0.175848;0.177513;0.181047;0.180405;0.179024 | 8429568;8429568;8429568;8429568;8429568 | 4823264;4823264;4823264;4823520;4823264 | 3815936;3838592;3857152;3839136;3866432 | |
71 | mobilenet1_relu23_fwd | Activation | [64,384,7,7] | 299.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 11.00 | 2408448 | 1521184.00 | 4477045.33 | 89.00 | 0.40 | 218.95 | true | 0.891135;0.890220;0.890180;0.878854;0.891002 | 2408448;2408448;2408448;2408448;2408448 | 1526816;1535776;1515040;1521696;1513504 | 4481376;4487648;4434944;4470304;4479456 | |
72 | mobilenet1_conv24_fwd | Convolution | [64,384,7,7] | 121935.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 165.67 | 1892352000 | 4923989.33 | 11744661.33 | 20.70 | 113.53 | 11422.62 | false | 0.205993;0.207732;0.205616;0.208529;0.206175 | 1892352000;1892352000;1892352000;1892352000;1892352000 | 5410400;4156352;4965408;5237888;4568672 | 11796160;11704160;11800512;11614144;11733664 | |
72 | mobilenet1_conv24_fwd | Convolution | [64,384,7,7] | 121935.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.33 | 0 | 96.00 | 1322.67 | 4.90 | 0.00 | 0.00 | true | 0.048691;0.049074;0.048691;0.049067;0.048664 | 0;0;0;0;0 | 96;96;96;96;96 | 1280;1408;1152;1408;1280 | |
73 | mobilenet1_batchnorm24_fwd | BatchNorm | [64,768,7,7] | 1943 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 84.00 | 16859136 | 8310314.67 | 10812213.33 | 18.60 | 0.88 | 200.70 | true | 0.186316;0.182280;0.185187;0.186111;0.200421 | 16859136;16859136;16859136;16859136;16859136 | 8350624;8291808;8386336;8252928;8288512 | 10795584;10833824;10811552;10829504;10787008 | |
74 | mobilenet1_relu24_fwd | Activation | [64,768,7,7] | 526.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 27.67 | 4816896 | 8846218.67 | 9604309.33 | 93.90 | 0.26 | 174.10 | true | 0.940225;0.939387;0.937164;0.940055;0.937661 | 4816896;4816896;4816896;4816896;4816896 | 8857568;8812896;8845408;8855264;8837984 | 9611264;9601344;9585472;9600320;9612704 | |
75 | mobilenet1_conv25_fwd | Convolution | [64,768,7,7] | 50480.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void conv2d_grouped_direct_kernel<float, float, float, float, float, true, false, 0, 1, 3>(cudnnTensorStruct, float const*, cudnnFilterStruct, float const*, cudnnConvolutionStruct, cudnnTensorStruct, float*, float, float, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, cudnn::reduced_divisor, int, float const*, float const*, cudnnActivationStruct) | 49.67 | 37896192 | 9661792.00 | 9698784.00 | 86.50 | 1.96 | 763.01 | true | 0.864464;0.865221;0.864639;0.865126;0.864486 | 37896192;37896192;37896192;37896192;37896192 | 9661728;9661792;9661792;9661792;9663072 | 9698240;9699104;9713824;9699008;9694400 | |
76 | mobilenet1_batchnorm25_fwd | BatchNorm | [64,768,7,7] | 1235.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 84.00 | 16859136 | 9647669.33 | 10786058.67 | 18.90 | 0.83 | 200.70 | true | 0.184257;0.198000;0.200889;0.185663;0.184239 | 16859136;16859136;16859136;16859136;16859136 | 9646304;9646304;9651680;9646304;9650400 | 10788576;10823424;10742784;10754208;10815392 | |
77 | mobilenet1_relu25_fwd | Activation | [64,768,7,7] | 525.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 27.33 | 4816896 | 8828725.33 | 9584586.67 | 93.80 | 0.26 | 176.23 | true | 0.937156;0.935763;0.937502;0.941462;0.938175 | 4816896;4816896;4816896;4816896;4816896 | 8800736;8838240;8830944;8832608;8822624 | 9582144;9577792;9596832;9593824;9564384 | |
78 | mobilenet1_conv26_fwd | Convolution | [64,768,7,7] | 249793.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 318.67 | 3779788800 | 15847509.33 | 11816618.67 | 22.40 | 136.63 | 11861.25 | false | 0.223462;0.222942;0.224462;0.226121;0.224196 | 3779788800;3779788800;3779788800;3779788800;3779788800 | 13655904;16930560;15646432;16477888;15418208 | 11808416;11862048;11754080;11833696;11807744 | |
78 | mobilenet1_conv26_fwd | Convolution | [64,768,7,7] | 249793.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.33 | 0 | 96.00 | 1450.67 | 4.90 | 0.00 | 0.00 | true | 0.048555;0.049426;0.048568;0.049153;0.048614 | 0;0;0;0;0 | 6240;96;96;96;96 | 17152;1536;1280;1536;1280 | |
79 | mobilenet1_batchnorm26_fwd | BatchNorm | [64,768,7,7] | 1430 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnTensorStruct, float const*, float const*, float const*, float const*, float) | 84.00 | 16859136 | 8380565.33 | 10715914.67 | 18.80 | 0.88 | 200.70 | true | 0.187054;0.189843;0.189190;0.186706;0.186235 | 16859136;16859136;16859136;16859136;16859136 | 8381696;8460096;8313856;8446144;8259296 | 10700000;10694144;10781056;10699360;10748384 | |
80 | mobilenet1_relu26_fwd | Activation | [64,768,7,7] | 524 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 27.33 | 4816896 | 8839648.00 | 9607925.33 | 93.90 | 0.26 | 176.23 | true | 0.938411;0.939915;0.938087;0.939161;0.936576 | 4816896;4816896;4816896;4816896;4816896 | 8837472;8834656;8846816;8815456;8854240 | 9611680;9593920;9606560;9605536;9619008 | |
81 | mobilenet1_pool0_fwd | Pooling | [64,768,7,7] | 3168.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 30.00 | 3136512 | 14867285.33 | 3142613.33 | 50.40 | 0.17 | 104.55 | true | 0.504062;0.505454;0.503258;0.505408;0.503150 | 3136512;3136512;3136512;3136512;3136512 | 14843264;14903488;14856576;14872512;14872768 | 3138976;3145792;3143072;3147584;3130048 | |
83 | mobilenet1_dense0_fwd | FullyConnected | [64,768] | 6163.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_sgemm_64x32_sliced1x4_tn | 23.67 | 101711872 | 3072949.33 | 4010.67 | 12.30 | 33.06 | 4297.62 | false | 0.123309;0.123247;0.123169;0.123108;0.123214 | 101711872;101711872;101711872;101711872;101711872 | 3074400;3073120;3072864;3072864;3072864 | 4992;3584;4224;4224;3200 | |
83 | mobilenet1_dense0_fwd | FullyConnected | [64,768] | 6163.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>) | 4.00 | 64000 | 4736.00 | 0.00 | 34.40 | 13.51 | 16.00 | true | 0.352585;0.344097;0.343695;0.343362;0.343505 | 64000;64000;64000;64000;64000 | 4992;4480;4736;4736;4736 | 0;0;0;0;0 |
Showing 1 to 98 of 98 entries