GPU Kernel Information
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound | achieved_occupancy | flop_count_sp | dram_read_bytes | dram_write_bytes |
---|
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound | achieved_occupancy | flop_count_sp | dram_read_bytes | dram_write_bytes |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | vgg_19/conv1/conv1_1/convolution-0-TransposeNHWCToNCHW-LayoutOptimizer | Transpose | [[1 3 224 224]] | 89.667 | 602112 | 602112 | 575874560 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::functor::SwapDimension1And2InTensor3UsingTiles<unsigned int, 1024, 1024, 2, false>(unsigned int const*, tensorflow::functor::Dimension<3>, unsigned int*) | 6.00 | 0 | 1472.00 | 0.00 | 59.00 | 0.00 | 0.00 | true | 0.590930;0.589718;0.588877;0.592356;0.590477 | 0;0;0;0;0 | 7104;1216;1472;1472;1472 | 0;0;0;0;0 |
2 | vgg_19/conv1/conv1_1/convolution | Conv2D | [[1 64 224 224]] | 264.667 | 12845056 | 13447168 | 588117504 | GPU_0_bfc | 602112 | 0 | 0 | 0 | volta_scudnn_128x64_relu_small_nn_v1 | 30.67 | 211943424 | 46048.00 | 10797354.67 | 20.40 | 19.55 | 6911.12 | true | 0.204468;0.204668;0.204095;0.204145;0.204535 | 211943424;211943424;211943424;211943424;211943424 | 10811264;10771072;10781824;10815872;10798976 | 42720;46304;49120;40032;58784 |
2 | vgg_19/conv1/conv1_1/convolution | Conv2D | [[1 64 224 224]] | 264.667 | 12845056 | 13447168 | 588117504 | GPU_0_bfc | 602112 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 3.33 | 0 | 7082.67 | 0.00 | 37.80 | 0.00 | 0.00 | true | 0.378455;0.378503;0.378470;0.378329;0.378253 | 0;0;0;0;0 | 7424;6912;6912;6912;9216 | 0;0;0;0;0 |
2 | vgg_19/conv1/conv1_1/convolution | Conv2D | [[1 64 224 224]] | 264.667 | 12845056 | 13447168 | 588117504 | GPU_0_bfc | 602112 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.00 | 0 | 0.00 | 0.00 | 20.50 | 0.00 | 0.00 | true | 0.205977;0.205208;0.201368;0.205640;0.203844 | 0;0;0;0;0 | 0;0;0;0;0 | 0;0;0;0;0 |
3 | vgg_19/conv1/conv1_1/BiasAdd | BiasAdd | [[1 64 224 224]] | 83.333 | 12845056 | 0 | 587515392 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 45.33 | 3211264 | 11132330.67 | 12758197.33 | 46.90 | 0.13 | 70.84 | true | 0.469424;0.471201;0.469314;0.469667;0.469182 | 3211264;3211264;3211264;3211264;3211264 | 11112320;11152640;11145600;11121920;11129472 | 12751200;12782656;12772320;12729632;12751072 |
4 | vgg_19/conv1/conv1_1/Relu | Relu | [[1 64 224 224]] | 61.333 | 12845056 | 0 | 587515392 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 35.00 | 0 | 12845216.00 | 12721952.00 | 88.80 | 0.00 | 0.00 | true | 0.885814;0.896447;0.888316;0.889778;0.882887 | 0;0;0;0;0 | 12845216;12845216;12845216;12845216;12845216 | 12711936;12725344;12736000;12700416;12728576 |
5 | vgg_19/conv1/conv1_2/convolution | Conv2D | [[1 64 224 224]] | 343.333 | 12845056 | 13402368 | 600360448 | GPU_0_bfc | 557312 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 168.00 | 1742110720 | 19745930.67 | 13633962.67 | 24.00 | 52.19 | 10369.71 | false | 0.240524;0.239528;0.240686;0.239892;0.238913 | 1742110720;1742110720;1742110720;1742110720;1742110720 | 19730848;19486368;20020576;20122144;19050336 | 13643904;13620768;13578560;13637216;13644800 |
5 | vgg_19/conv1/conv1_2/convolution | Conv2D | [[1 64 224 224]] | 343.333 | 12845056 | 13402368 | 600360448 | GPU_0_bfc | 557312 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 5.00 | 0 | 147648.00 | 334666.67 | 44.10 | 0.00 | 0.00 | true | 0.441580;0.442430;0.449483;0.440182;0.440382 | 0;0;0;0;0 | 340256;333856;325792;359968;329888 | 147648;147648;147648;147648;147648 |
5 | vgg_19/conv1/conv1_2/convolution | Conv2D | [[1 64 224 224]] | 343.333 | 12845056 | 13402368 | 600360448 | GPU_0_bfc | 557312 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 3.00 | 237568 | 1301.33 | 285824.00 | 6.20 | 0.83 | 79.19 | true | 0.062335;0.062342;0.062339;0.062381;0.062352 | 237568;237568;237568;237568;237568 | 448;1472;1216;1472;1216 | 283392;258304;288896;291072;285184 |
6 | vgg_19/conv1/conv1_2/BiasAdd | BiasAdd | [[1 64 224 224]] | 74.667 | 12845056 | 0 | 587515392 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 45.00 | 3211264 | 11796928.00 | 11645920.00 | 47.10 | 0.14 | 71.36 | true | 0.471305;0.471234;0.472330;0.470069;0.471928 | 3211264;3211264;3211264;3211264;3211264 | 11806592;11834176;11764672;11808832;11775360 | 11643872;11646752;11706400;11647136;11608192 |
7 | vgg_19/conv1/conv1_2/Relu | Relu | [[1 64 224 224]] | 58.333 | 12845056 | 0 | 587515392 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 35.00 | 0 | 12845216.00 | 12743530.67 | 89.60 | 0.00 | 0.00 | true | 0.901000;0.896508;0.897391;0.893398;0.888682 | 0;0;0;0;0 | 12707200;12753504;12755552;12721536;12774112 | 12847264;12845216;12845216;12845088;12845216 |
8 | vgg_19/pool1/MaxPool | MaxPool | [[1 64 112 112]] | 85 | 3211264 | 3211264 | 590726656 | GPU_0_bfc | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 28.33 | 802816 | 12727232.00 | 4665333.33 | 69.40 | 0.05 | 28.34 | true | 0.691872;0.694648;0.694635;0.691931;0.696301 | 802816;802816;802816;802816;802816 | 12724160;12758720;12692160;12729792;12727744 | 4684064;4687136;4635168;4676768;4609184 |
9 | vgg_19/conv2/conv2_1/convolution | Conv2D | [[1 128 112 112]] | 227.333 | 6422528 | 7537152 | 584304128 | GPU_0_bfc | 1114624 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 84.33 | 871055360 | 2563637.33 | 6559808.00 | 23.10 | 95.47 | 10328.76 | false | 0.229022;0.232973;0.228900;0.235287;0.231828 | 871055360;871055360;871055360;871055360;871055360 | 2617184;2549408;2482144;2622688;2524320 | 6557536;6492160;6515424;6606464;6625792 |
9 | vgg_19/conv2/conv2_1/convolution | Conv2D | [[1 128 112 112]] | 227.333 | 6422528 | 7537152 | 584304128 | GPU_0_bfc | 1114624 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 5.00 | 0 | 295104.00 | 181461.33 | 45.00 | 0.00 | 0.00 | true | 0.447794;0.448896;0.452448;0.450411;0.449953 | 0;0;0;0;0 | 183296;179200;181888;192512;155904 | 295104;295104;295104;295104;295104 |
9 | vgg_19/conv2/conv2_1/convolution | Conv2D | [[1 128 112 112]] | 227.333 | 6422528 | 7537152 | 584304128 | GPU_0_bfc | 1114624 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 3.00 | 475136 | 277.33 | 157952.00 | 6.20 | 3.00 | 158.38 | true | 0.062246;0.062268;0.062252;0.062252;0.062259 | 475136;475136;475136;475136;475136 | 192;192;448;192;448 | 148992;154752;170112;171136;131840 |
10 | vgg_19/conv2/conv2_1/BiasAdd | BiasAdd | [[1 128 112 112]] | 51.333 | 6422528 | 0 | 581092864 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 22.00 | 1605632 | 3881877.33 | 4796394.67 | 46.40 | 0.19 | 72.98 | true | 0.463282;0.464395;0.465062;0.464117;0.463552 | 1605632;1605632;1605632;1605632;1605632 | 3869696;3929600;3763008;3846336;3999744 | 4808384;4827616;4837536;4744896;4753184 |
11 | vgg_19/conv2/conv2_1/Relu | Relu | [[1 128 112 112]] | 33.667 | 6422528 | 0 | 581092864 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 12.00 | 0 | 1229493.33 | 5898890.67 | 79.30 | 0.00 | 0.00 | true | 0.800359;0.798590;0.789919;0.790087;0.776972 | 0;0;0;0;0 | 1242592;1245664;1222240;1096288;1223648 | 5871616;5933312;5874528;5888832;5964704 |
12 | vgg_19/conv2/conv2_2/convolution | Conv2D | [[1 128 112 112]] | 307.667 | 6422528 | 9633792 | 587515392 | GPU_0_bfc | 3211264 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 153.33 | 1725251584 | 10495050.67 | 7599477.33 | 23.40 | 95.35 | 11251.67 | false | 0.237364;0.232059;0.231818;0.234045;0.234557 | 1725251584;1725251584;1725251584;1725251584;1725251584 | 7511072;7627456;7504032;7859488;7659904 | 9978400;9997728;11227104;11080480;10406944 |
12 | vgg_19/conv2/conv2_2/convolution | Conv2D | [[1 128 112 112]] | 307.667 | 6422528 | 9633792 | 587515392 | GPU_0_bfc | 3211264 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 7.00 | 0 | 590016.00 | 1024362.67 | 45.10 | 0.00 | 0.00 | true | 0.450607;0.452654;0.450608;0.448919;0.453952 | 0;0;0;0;0 | 1037056;1018528;1038464;1017504;974496 | 590016;590016;590016;590016;590016 |
12 | vgg_19/conv2/conv2_2/convolution | Conv2D | [[1 128 112 112]] | 307.667 | 6422528 | 9633792 | 587515392 | GPU_0_bfc | 3211264 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.00 | 950272 | 149.33 | 857440.00 | 9.70 | 1.11 | 237.57 | true | 0.096830;0.096718;0.096973;0.097111;0.096831 | 950272;950272;950272;950272;950272 | 64;64;192;192;192 | 856064;854688;851328;861568;876224 |
13 | vgg_19/conv2/conv2_2/BiasAdd | BiasAdd | [[1 128 112 112]] | 50.333 | 6422528 | 0 | 581092864 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 22.67 | 1605632 | 4543829.33 | 4686485.33 | 46.50 | 0.17 | 70.84 | true | 0.464903;0.465234;0.462230;0.465595;0.464934 | 1605632;1605632;1605632;1605632;1605632 | 4341888;4554816;4605632;4887488;4471040 | 4756128;4676992;4816096;4492960;4626336 |
14 | vgg_19/conv2/conv2_2/Relu | Relu | [[1 128 112 112]] | 33 | 6422528 | 0 | 581092864 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 12.00 | 0 | 1094410.67 | 5937290.67 | 77.20 | 0.00 | 0.00 | true | 0.778135;0.791722;0.766993;0.769849;0.748194 | 0;0;0;0;0 | 1095776;1105888;1136480;966752;1081568 | 5950816;5938720;5966112;5922336;5902208 |
15 | vgg_19/pool2/MaxPool | MaxPool | [[1 128 56 56]] | 53 | 1605632 | 1605632 | 582698496 | GPU_0_bfc | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 10.00 | 401408 | 1664501.33 | 2465450.67 | 57.30 | 0.10 | 40.14 | true | 0.573886;0.573623;0.572002;0.567843;0.573936 | 401408;401408;401408;401408;401408 | 1662560;1632576;1605344;1706176;1698368 | 2431040;2505376;2466496;2432224;2497632 |
16 | vgg_19/conv3/conv3_1/convolution | Conv2D | [[1 256 56 56]] | 236.333 | 3211264 | 7668736 | 579487232 | GPU_0_bfc | 4457472 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 92.00 | 985858048 | 552821.33 | 4539456.00 | 22.60 | 193.60 | 10715.85 | false | 0.221112;0.233072;0.226187;0.228624;0.223212 | 985858048;985858048;985858048;985858048;985858048 | 564064;552096;518688;610784;542304 | 4536768;4474240;4560576;4521024;4585152 |
16 | vgg_19/conv3/conv3_1/convolution | Conv2D | [[1 256 56 56]] | 236.333 | 3211264 | 7668736 | 579487232 | GPU_0_bfc | 4457472 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 10.33 | 0 | 1179840.00 | 1230698.67 | 44.90 | 0.00 | 0.00 | true | 0.449174;0.452008;0.449719;0.449304;0.447351 | 0;0;0;0;0 | 1179840;1184704;1179840;1179840;1179840 | 1242880;1226496;1187552;1248384;1222720 |
16 | vgg_19/conv3/conv3_1/convolution | Conv2D | [[1 256 56 56]] | 236.333 | 3211264 | 7668736 | 579487232 | GPU_0_bfc | 4457472 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 5.00 | 1900544 | 192.00 | 1482176.00 | 16.10 | 1.28 | 380.11 | true | 0.161382;0.161121;0.161263;0.161263;0.161212 | 1900544;1900544;1900544;1900544;1900544 | 192;192;192;192;192 | 1483840;1470336;1520864;1458592;1492352 |
17 | vgg_19/conv3/conv3_1/BiasAdd | BiasAdd | [[1 256 56 56]] | 37.333 | 3211264 | 0 | 577881600 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 11.00 | 802816 | 513792.00 | 987626.67 | 45.70 | 0.53 | 72.98 | true | 0.455445;0.456718;0.460102;0.454929;0.460078 | 802816;802816;802816;802816;802816 | 514336;474880;580832;519104;507936 | 984768;968832;1009280;1045824;940640 |
18 | vgg_19/conv3/conv3_1/Relu | Relu | [[1 256 56 56]] | 26.667 | 3211264 | 0 | 577881600 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 5.00 | 0 | 96.00 | 1083189.33 | 76.20 | 0.00 | 0.00 | true | 0.764203;0.748652;0.764940;0.765378;0.755698 | 0;0;0;0;0 | 96;96;96;96;96 | 1092320;1089952;1035008;1069088;1090528 |
19 | vgg_19/conv3/conv3_2/convolution | Conv2D | [[1 256 56 56]] | 338 | 3211264 | 12125184 | 581092864 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 173.67 | 1962082304 | 10276832.00 | 5920000.00 | 21.50 | 121.14 | 11297.96 | false | 0.214136;0.214838;0.213977;0.215418;0.215560 | 1962082304;1962082304;1962082304;1962082304;1962082304 | 10290528;10285792;10254176;10215456;10301216 | 5665344;5809568;5998912;5973472;5976960 |
19 | vgg_19/conv3/conv3_2/convolution | Conv2D | [[1 256 56 56]] | 338 | 3211264 | 12125184 | 581092864 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 16.67 | 0 | 2359296.00 | 2579125.33 | 46.20 | 0.00 | 0.00 | true | 0.458634;0.461231;0.463754;0.463669;0.460149 | 0;0;0;0;0 | 2359296;2359360;2359296;2359296;2359296 | 2581568;2577280;2585920;2565216;2578528 |
19 | vgg_19/conv3/conv3_2/convolution | Conv2D | [[1 256 56 56]] | 338 | 3211264 | 12125184 | 581092864 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 8.00 | 3801088 | 67712.00 | 3611424.00 | 26.60 | 1.03 | 475.14 | true | 0.266088;0.262218;0.266608;0.267029;0.266217 | 3801088;3801088;3801088;3801088;3801088 | 64896;69504;61568;71680;68736 | 3631232;3591296;3612832;3625728;3595712 |
20 | vgg_19/conv3/conv3_2/BiasAdd | BiasAdd | [[1 256 56 56]] | 39.333 | 3211264 | 0 | 577881600 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 11.67 | 802816 | 2506698.67 | 302602.67 | 45.80 | 0.29 | 68.81 | true | 0.458408;0.457878;0.460559;0.451719;0.458491 | 802816;802816;802816;802816;802816 | 2274304;2369184;2590368;2594752;2560544 | 365536;378880;271872;264288;270400 |
21 | vgg_19/conv3/conv3_2/Relu | Relu | [[1 256 56 56]] | 27 | 3211264 | 0 | 577881600 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 5.00 | 0 | 96.00 | 1098880.00 | 76.30 | 0.00 | 0.00 | true | 0.762780;0.763966;0.764302;0.761835;0.753011 | 0;0;0;0;0 | 96;96;96;96;96 | 1019744;1069408;1113824;1123744;1113408 |
22 | vgg_19/conv3/conv3_3/convolution | Conv2D | [[1 256 56 56]] | 340 | 4816896 | 13730816 | 582698496 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 173.00 | 1962082304 | 10196917.33 | 5989205.33 | 21.60 | 121.22 | 11341.52 | false | 0.216684;0.214288;0.214718;0.216316;0.217134 | 1962082304;1962082304;1962082304;1962082304;1962082304 | 10190816;10210528;10203616;10196320;10149984 | 5711808;6007360;5980832;6012384;5979424 |
22 | vgg_19/conv3/conv3_3/convolution | Conv2D | [[1 256 56 56]] | 340 | 4816896 | 13730816 | 582698496 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 16.33 | 0 | 2359296.00 | 2472117.33 | 46.10 | 0.00 | 0.00 | true | 0.463264;0.459224;0.458041;0.463950;0.461475 | 0;0;0;0;0 | 2359296;2359296;2359296;2364672;2359296 | 2500672;2477056;2469184;2444320;2470112 |
22 | vgg_19/conv3/conv3_3/convolution | Conv2D | [[1 256 56 56]] | 340 | 4816896 | 13730816 | 582698496 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 8.00 | 3801088 | 60757.33 | 3612234.67 | 26.50 | 1.03 | 475.14 | true | 0.265327;0.265477;0.264325;0.268486;0.265648 | 3801088;3801088;3801088;3801088;3801088 | 62976;61568;49536;57728;66048 | 3611264;3593600;3604608;3621792;3620832 |
23 | vgg_19/conv3/conv3_3/BiasAdd | BiasAdd | [[1 256 56 56]] | 39 | 4816896 | 0 | 579487232 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 11.33 | 802816 | 2557760.00 | 277589.33 | 45.90 | 0.28 | 70.84 | true | 0.452278;0.458371;0.460322;0.460059;0.459341 | 802816;802816;802816;802816;802816 | 2192512;2552928;2565408;2554944;2592096 | 370240;279360;287040;265312;266368 |
24 | vgg_19/conv3/conv3_3/Relu | Relu | [[1 256 56 56]] | 26 | 4816896 | 0 | 579487232 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 5.00 | 0 | 96.00 | 1087424.00 | 76.60 | 0.00 | 0.00 | true | 0.765433;0.766529;0.764767;0.779498;0.765051 | 0;0;0;0;0 | 96;96;96;96;96 | 1084416;1091520;1089216;1028992;1088640 |
25 | vgg_19/conv3/conv3_4/convolution | Conv2D | [[1 256 56 56]] | 340.667 | 3211264 | 12125184 | 582698496 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 173.00 | 1962082304 | 10249354.67 | 5989248.00 | 21.50 | 120.83 | 11341.52 | false | 0.214315;0.214372;0.217210;0.213972;0.219509 | 1962082304;1962082304;1962082304;1962082304;1962082304 | 10135840;10315744;10239712;10267232;10241120 | 5983616;5994752;5987712;5985280;5995040 |
25 | vgg_19/conv3/conv3_4/convolution | Conv2D | [[1 256 56 56]] | 340.667 | 3211264 | 12125184 | 582698496 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 17.00 | 0 | 2359296.00 | 2511584.00 | 46.00 | 0.00 | 0.00 | true | 0.461034;0.458729;0.464210;0.460877;0.455609 | 0;0;0;0;0 | 2359296;2359296;2359296;2359296;2359360 | 2515584;2527104;2502112;2517056;2486240 |
25 | vgg_19/conv3/conv3_4/convolution | Conv2D | [[1 256 56 56]] | 340.667 | 3211264 | 12125184 | 582698496 | GPU_0_bfc | 8913920 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 8.00 | 3801088 | 62336.00 | 3593418.67 | 26.50 | 1.04 | 475.14 | true | 0.270365;0.265078;0.266068;0.263015;0.264641 | 3801088;3801088;3801088;3801088;3801088 | 3596832;3587392;3594240;3597760;3589184 | 67200;58752;61824;59264;65920 |
26 | vgg_19/conv3/conv3_4/BiasAdd | BiasAdd | [[1 256 56 56]] | 38.667 | 3211264 | 0 | 577881600 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 11.33 | 802816 | 2588949.33 | 270293.33 | 45.80 | 0.28 | 70.84 | true | 0.457363;0.456558;0.456413;0.459989;0.460218 | 802816;802816;802816;802816;802816 | 2588288;2587264;2590144;2588416;2595424 | 273792;280256;259968;261632;275456 |
27 | vgg_19/conv3/conv3_4/Relu | Relu | [[1 256 56 56]] | 26.333 | 3211264 | 0 | 577881600 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 5.00 | 0 | 96.00 | 1107424.00 | 76.40 | 0.00 | 0.00 | true | 0.762955;0.763899;0.741149;0.764122;0.764377 | 0;0;0;0;0 | 96;96;96;96;96 | 1106080;1096544;1126272;1118400;1097792 |
28 | vgg_19/pool3/MaxPool | MaxPool | [[1 256 28 28]] | 48 | 802816 | 802816 | 578684416 | GPU_0_bfc | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 5.00 | 200704 | 64.00 | 971829.33 | 49.60 | 0.21 | 40.14 | true | 0.492803;0.496937;0.503733;0.496024;0.494938 | 200704;200704;200704;200704;200704 | 961280;962688;976416;977248;976384 | 64;64;64;64;64 |
29 | vgg_19/conv4/conv4_1/convolution | Conv2D | [[1 512 28 28]] | 298.333 | 1605632 | 19433472 | 577078784 | GPU_0_bfc | 17827840 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 116.00 | 1121189888 | 14318016.00 | 3937525.33 | 21.30 | 61.42 | 9665.43 | false | 0.212512;0.213161;0.213869;0.213077;0.212765 | 1121189888;1121189888;1121189888;1121189888;1121189888 | 3974688;3885344;3921216;3964864;3926496 | 13931072;14059264;15465184;14248992;14645792 |
29 | vgg_19/conv4/conv4_1/convolution | Conv2D | [[1 512 28 28]] | 298.333 | 1605632 | 19433472 | 577078784 | GPU_0_bfc | 17827840 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 29.33 | 0 | 4722218.67 | 4818709.33 | 46.80 | 0.00 | 0.00 | true | 0.467340;0.467673;0.466645;0.467588;0.469144 | 0;0;0;0;0 | 4721920;4723264;4729216;4720320;4721472 | 4835488;4815168;4829504;4809472;4811456 |
29 | vgg_19/conv4/conv4_1/convolution | Conv2D | [[1 512 28 28]] | 298.333 | 1605632 | 19433472 | 577078784 | GPU_0_bfc | 17827840 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 17.67 | 7602176 | 2871829.33 | 7540970.67 | 44.80 | 0.73 | 430.30 | true | 0.447675;0.447950;0.455475;0.449484;0.441136 | 7602176;7602176;7602176;7602176;7602176 | 2877696;2877184;2878976;2850944;2860608 | 7495584;7594912;7538592;7535360;7548960 |
30 | vgg_19/conv4/conv4_1/BiasAdd | BiasAdd | [[1 512 28 28]] | 33.667 | 1605632 | 0 | 576275968 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 6.00 | 401408 | 89568.00 | 32.00 | 46.30 | 4.48 | 66.90 | true | 0.461538;0.462322;0.462060;0.464671;0.470423 | 401408;401408;401408;401408;401408 | 93536;84512;88608;96864;86560 | 160;0;0;64;32 |
31 | vgg_19/conv4/conv4_1/Relu | Relu | [[1 512 28 28]] | 26.333 | 1605632 | 0 | 576275968 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 4.00 | 0 | 96.00 | 0.00 | 66.50 | 0.00 | 0.00 | true | 0.673205;0.660884;0.661646;0.687880;0.660905 | 0;0;0;0;0 | 0;0;0;0;0 | 96;96;96;96;96 |
32 | vgg_19/conv4/conv4_2/convolution | Conv2D | [[1 512 28 28]] | 463 | 1605632 | 37259264 | 577881600 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 222.00 | 2236874752 | 37239061.33 | 4025717.33 | 21.40 | 54.21 | 10076.01 | false | 0.213766;0.213047;0.213942;0.213128;0.214011 | 2236874752;2236874752;2236874752;2236874752;2236874752 | 35859424;34843360;39077664;39643424;36780096 | 4042272;4039328;4006176;4026848;4010976 |
32 | vgg_19/conv4/conv4_2/convolution | Conv2D | [[1 512 28 28]] | 463 | 1605632 | 37259264 | 577881600 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 55.33 | 0 | 9640704.00 | 8598624.00 | 46.80 | 0.00 | 0.00 | true | 0.469626;0.469122;0.466734;0.466914;0.467487 | 0;0;0;0;0 | 9670400;9596800;9654848;9650944;9616320 | 8627488;8596736;8594592;8604544;8588704 |
32 | vgg_19/conv4/conv4_2/convolution | Conv2D | [[1 512 28 28]] | 463 | 1605632 | 37259264 | 577881600 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 35.00 | 15204352 | 8713088.00 | 15754005.33 | 57.60 | 0.62 | 434.41 | true | 0.574255;0.576681;0.579570;0.575586;0.577189 | 15204352;15204352;15204352;15204352;15204352 | 15719584;15751552;15797920;15719424;15790880 | 8728064;8706560;8721408;8711296;8695488 |
33 | vgg_19/conv4/conv4_2/BiasAdd | BiasAdd | [[1 512 28 28]] | 33 | 1605632 | 0 | 576275968 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 6.00 | 401408 | 369269.33 | 7104.00 | 46.40 | 1.07 | 66.90 | true | 0.465225;0.463616;0.461649;0.466988;0.463220 | 401408;401408;401408;401408;401408 | 7584;7360;5120;6528;7424 | 374752;367680;361376;366400;373728 |
34 | vgg_19/conv4/conv4_2/Relu | Relu | [[1 512 28 28]] | 24.667 | 1605632 | 0 | 576275968 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 4.00 | 0 | 96.00 | 0.00 | 65.80 | 0.00 | 0.00 | true | 0.656897;0.658265;0.649889;0.659422;0.657598 | 0;0;0;0;0 | 96;96;96;96;96 | 0;0;0;0;0 |
35 | vgg_19/conv4/conv4_3/convolution | Conv2D | [[1 512 28 28]] | 463.667 | 2408448 | 38062080 | 578684416 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 222.00 | 2236874752 | 36612757.33 | 4041930.67 | 21.40 | 55.02 | 10076.01 | false | 0.213729;0.214110;0.213124;0.214308;0.213010 | 2236874752;2236874752;2236874752;2236874752;2236874752 | 34233152;38502912;35477216;38147936;36213120 | 4044064;3970400;4038432;4074560;4043296 |
35 | vgg_19/conv4/conv4_3/convolution | Conv2D | [[1 512 28 28]] | 463.667 | 2408448 | 38062080 | 578684416 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 55.00 | 0 | 9596608.00 | 8598165.33 | 46.80 | 0.00 | 0.00 | true | 0.468075;0.467410;0.467500;0.470229;0.468568 | 0;0;0;0;0 | 9589440;9616448;9583936;9573504;9636032 | 8591904;8604672;8597920;8580352;8624544 |
35 | vgg_19/conv4/conv4_3/convolution | Conv2D | [[1 512 28 28]] | 463.667 | 2408448 | 38062080 | 578684416 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 34.67 | 15204352 | 8674752.00 | 15744149.33 | 57.60 | 0.62 | 438.58 | true | 0.578619;0.575174;0.575307;0.576118;0.576865 | 15204352;15204352;15204352;15204352;15204352 | 8651264;8681472;8679296;8663488;8721792 | 15749664;15760000;15747104;15735680;15720704 |
36 | vgg_19/conv4/conv4_3/BiasAdd | BiasAdd | [[1 512 28 28]] | 33.667 | 2408448 | 0 | 577078784 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 6.00 | 401408 | 373493.33 | 8682.67 | 46.20 | 1.05 | 66.90 | true | 0.462970;0.463513;0.460424;0.461720;0.462473 | 401408;401408;401408;401408;401408 | 370944;380672;374624;371936;373920 | 9248;8608;8672;8768;8320 |
37 | vgg_19/conv4/conv4_3/Relu | Relu | [[1 512 28 28]] | 25.667 | 2408448 | 0 | 577078784 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 4.00 | 0 | 96.00 | 0.00 | 65.90 | 0.00 | 0.00 | true | 0.659610;0.658881;0.655249;0.658593;0.659331 | 0;0;0;0;0 | 96;96;96;96;96 | 0;0;0;0;0 |
38 | vgg_19/conv4/conv4_4/convolution | Conv2D | [[1 512 28 28]] | 465.333 | 1605632 | 37259264 | 578684416 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 222.00 | 2236874752 | 37173760.00 | 4031050.67 | 21.40 | 54.29 | 10076.01 | false | 0.214129;0.213300;0.213835;0.213464;0.215062 | 2236874752;2236874752;2236874752;2236874752;2236874752 | 35459744;36674688;37934016;37780736;37065856 | 4039552;4090944;4021920;4031680;4019712 |
38 | vgg_19/conv4/conv4_4/convolution | Conv2D | [[1 512 28 28]] | 465.333 | 1605632 | 37259264 | 578684416 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 55.00 | 0 | 9601130.67 | 8646325.33 | 46.50 | 0.00 | 0.00 | true | 0.465936;0.464131;0.465092;0.464865;0.464984 | 0;0;0;0;0 | 9615680;9538816;9599168;9660928;9588544 | 8647680;8598432;8658560;8650144;8641152 |
38 | vgg_19/conv4/conv4_4/convolution | Conv2D | [[1 512 28 28]] | 465.333 | 1605632 | 37259264 | 578684416 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 34.33 | 15204352 | 8717546.67 | 15699349.33 | 57.40 | 0.62 | 442.85 | true | 0.571840;0.573379;0.573999;0.577947;0.575551 | 15204352;15204352;15204352;15204352;15204352 | 8735360;8704512;8740288;8712768;8700992 | 15724576;15703200;15702560;15681920;15692288 |
39 | vgg_19/conv4/conv4_4/BiasAdd | BiasAdd | [[1 512 28 28]] | 33 | 1605632 | 0 | 576275968 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 6.00 | 401408 | 371861.33 | 7082.67 | 46.10 | 1.06 | 66.90 | true | 0.458201;0.463515;0.462389;0.464587;0.454929 | 401408;401408;401408;401408;401408 | 354432;365056;375552;374976;376704 | 6496;6336;7520;7232;9056 |
40 | vgg_19/conv4/conv4_4/Relu | Relu | [[1 512 28 28]] | 25 | 1605632 | 0 | 576275968 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 4.00 | 0 | 96.00 | 0.00 | 65.80 | 0.00 | 0.00 | true | 0.657500;0.655917;0.659875;0.656280;0.669387 | 0;0;0;0;0 | 7008;96;96;96;96 | 0;0;0;0;0 |
41 | vgg_19/pool4/MaxPool | MaxPool | [[1 512 14 14]] | 48.667 | 401408 | 401408 | 576677376 | GPU_0_bfc | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 5.00 | 100352 | 64.00 | 0.00 | 31.20 | 1568.00 | 20.07 | false | 0.311525;0.311210;0.311684;0.311576;0.311952 | 100352;100352;100352;100352;100352 | 64;64;64;64;64 | 0;0;0;0;0 |
42 | vgg_19/conv5/conv5_1/convolution | Conv2D | [[1 512 14 14]] | 348.333 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 117.00 | 559218688 | 17178912.00 | 3488181.33 | 12.50 | 27.06 | 4779.65 | true | 0.124957;0.124960;0.124958;0.124958;0.124957 | 559218688;559218688;559218688;559218688;559218688 | 17178912;17180448;17178912;17178912;17178912 | 3508064;3487200;3500384;3438304;3476960 |
42 | vgg_19/conv5/conv5_1/convolution | Conv2D | [[1 512 14 14]] | 348.333 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 54.33 | 0 | 9590698.67 | 8895648.00 | 46.80 | 0.00 | 0.00 | true | 0.467960;0.467096;0.467054;0.467558;0.468801 | 0;0;0;0;0 | 9586112;9584768;9527424;9608320;9601216 | 8923136;8891488;8834592;8938880;8872320 |
42 | vgg_19/conv5/conv5_1/convolution | Conv2D | [[1 512 14 14]] | 348.333 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 36.00 | 15204352 | 8682005.33 | 15721013.33 | 58.10 | 0.62 | 422.34 | true | 0.582070;0.582742;0.579456;0.581708;0.579127 | 15204352;15204352;15204352;15204352;15204352 | 8687936;8681664;8669312;8698880;8676416 | 15670560;15710336;15759616;15716352;15736352 |
43 | vgg_19/conv5/conv5_1/BiasAdd | BiasAdd | [[1 512 14 14]] | 32 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 4.00 | 100352 | 2240.00 | 0.00 | 47.80 | 44.80 | 25.09 | false | 0.477529;0.477698;0.477716;0.477613;0.477945 | 100352;100352;100352;100352;100352 | 2240;2240;7360;2240;2240 | 0;0;0;0;0 |
44 | vgg_19/conv5/conv5_1/Relu | Relu | [[1 512 14 14]] | 24 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 3.00 | 0 | 96.00 | 0.00 | 54.30 | 0.00 | 0.00 | true | 0.539115;0.543970;0.543180;0.543192;0.542299 | 0;0;0;0;0 | 0;0;0;0;0 | 96;96;96;2144;96 |
45 | vgg_19/conv5/conv5_2/convolution | Conv2D | [[1 512 14 14]] | 353 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 117.00 | 559218688 | 17178912.00 | 3472480.00 | 12.50 | 27.08 | 4779.65 | true | 0.124959;0.124958;0.124960;0.124958;0.124958 | 559218688;559218688;559218688;559218688;559218688 | 17178912;17178912;17178912;17178912;17178912 | 3479008;3439328;3463776;3517792;3474656 |
45 | vgg_19/conv5/conv5_2/convolution | Conv2D | [[1 512 14 14]] | 353 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 54.33 | 0 | 9682944.00 | 7427776.00 | 46.40 | 0.00 | 0.00 | true | 0.464587;0.462757;0.463284;0.463903;0.463380 | 0;0;0;0;0 | 9693632;9691200;9709376;9616896;9664000 | 7440032;7421184;7422112;7397888;7478304 |
45 | vgg_19/conv5/conv5_2/convolution | Conv2D | [[1 512 14 14]] | 353 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 36.00 | 15204352 | 8704213.33 | 15711861.33 | 58.30 | 0.62 | 422.34 | true | 0.584800;0.583433;0.581102;0.580308;0.583720 | 15204352;15204352;15204352;15204352;15204352 | 8731328;8692928;8711552;8689088;8708160 | 15699744;15748480;15732640;15703200;15665920 |
46 | vgg_19/conv5/conv5_2/BiasAdd | BiasAdd | [[1 512 14 14]] | 30.333 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 4.00 | 100352 | 2240.00 | 0.00 | 47.80 | 44.80 | 25.09 | false | 0.477885;0.478644;0.477843;0.478440;0.477722 | 100352;100352;100352;100352;100352 | 2240;2240;2240;2240;2240 | 0;0;0;0;0 |
47 | vgg_19/conv5/conv5_2/Relu | Relu | [[1 512 14 14]] | 24.333 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 3.00 | 0 | 96.00 | 0.00 | 54.50 | 0.00 | 0.00 | true | 0.544158;0.544509;0.547165;0.544401;0.552663 | 0;0;0;0;0 | 96;96;96;96;96 | 0;0;0;0;0 |
48 | vgg_19/conv5/conv5_3/convolution | Conv2D | [[1 512 14 14]] | 348 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 117.00 | 559218688 | 17178912.00 | 3472394.67 | 12.50 | 27.08 | 4779.65 | true | 0.124958;0.124957;0.124958;0.124959;0.124958 | 559218688;559218688;559218688;559218688;559218688 | 17178912;17178912;17178912;17178912;17178912 | 3487456;3523424;3465312;3418976;3464416 |
48 | vgg_19/conv5/conv5_3/convolution | Conv2D | [[1 512 14 14]] | 348 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 54.00 | 0 | 9796160.00 | 7411040.00 | 46.50 | 0.00 | 0.00 | true | 0.465125;0.464364;0.465838;0.465155;0.465598 | 0;0;0;0;0 | 7394816;7402528;7415936;7417888;7414656 | 9757376;9798400;9806848;9783232;9815488 |
48 | vgg_19/conv5/conv5_3/convolution | Conv2D | [[1 512 14 14]] | 348 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 35.67 | 15204352 | 8679402.67 | 15737706.67 | 58.20 | 0.62 | 426.29 | true | 0.581552;0.583297;0.583428;0.581025;0.568086 | 15204352;15204352;15204352;15204352;15204352 | 15736736;15692416;15737216;15782176;15739168 | 8688128;8663808;8656960;8686272;8705344 |
49 | vgg_19/conv5/conv5_3/BiasAdd | BiasAdd | [[1 512 14 14]] | 30 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 4.00 | 100352 | 2240.00 | 0.00 | 47.80 | 44.80 | 25.09 | false | 0.477836;0.481614;0.477674;0.477817;0.477660 | 100352;100352;100352;100352;100352 | 2240;2240;2240;2240;2240 | 0;0;0;0;0 |
50 | vgg_19/conv5/conv5_3/Relu | Relu | [[1 512 14 14]] | 24 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 3.00 | 0 | 96.00 | 0.00 | 54.40 | 0.00 | 0.00 | true | 0.543206;0.544337;0.543698;0.543900;0.543870 | 0;0;0;0;0 | 0;0;0;0;0 | 96;96;96;96;5984 |
51 | vgg_19/conv5/conv5_4/convolution | Conv2D | [[1 512 14 14]] | 347 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 117.00 | 559218688 | 17178912.00 | 3490272.00 | 12.50 | 27.06 | 4779.65 | true | 0.124958;0.124958;0.124961;0.124958;0.124957 | 559218688;559218688;559218688;559218688;559218688 | 17178912;17178912;17178912;17178912;17178912 | 3537760;3516512;3463776;3422176;3490528 |
51 | vgg_19/conv5/conv5_4/convolution | Conv2D | [[1 512 14 14]] | 347 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 54.00 | 0 | 10009301.33 | 7489077.33 | 45.70 | 0.00 | 0.00 | true | 0.457766;0.456934;0.456982;0.455984;0.457703 | 0;0;0;0;0 | 7489152;7487776;7490304;7487520;7498624 | 9953216;10025984;9994432;10007488;10031040 |
51 | vgg_19/conv5/conv5_4/convolution | Conv2D | [[1 512 14 14]] | 347 | 401408 | 36055040 | 575473152 | GPU_0_bfc | 35653632 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 35.33 | 15204352 | 8736832.00 | 15636320.00 | 57.90 | 0.62 | 430.32 | true | 0.579045;0.579789;0.580987;0.578875;0.578959 | 15204352;15204352;15204352;15204352;15204352 | 15591840;15614336;15664896;15709216;15629728 | 8720064;8745216;8723904;8741376;8747840 |
52 | vgg_19/conv5/conv5_4/BiasAdd | BiasAdd | [[1 512 14 14]] | 30 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 4.00 | 100352 | 2240.00 | 0.00 | 47.80 | 44.80 | 25.09 | false | 0.478115;0.478487;0.477858;0.477930;0.477798 | 100352;100352;100352;100352;100352 | 2240;2240;2240;2240;2240 | 0;0;0;0;0 |
53 | vgg_19/conv5/conv5_4/Relu | Relu | [[1 512 14 14]] | 24.667 | 401408 | 0 | 575071744 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 3.00 | 0 | 96.00 | 0.00 | 54.40 | 0.00 | 0.00 | true | 0.543757;0.543952;0.555206;0.544014;0.543838 | 0;0;0;0;0 | 96;96;96;1632;96 | 0;0;0;0;0 |
54 | vgg_19/pool5/MaxPool | MaxPool | [[1 512 7 7]] | 45.333 | 100352 | 100352 | 575172096 | GPU_0_bfc | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 4.00 | 25088 | 64.00 | 0.00 | 10.80 | 392.00 | 6.27 | false | 0.108276;0.108300;0.108290;0.108227;0.108280 | 25088;25088;25088;25088;25088 | 64;64;64;64;64 | 0;0;0;0;0 |
55 | vgg_19/fc6/convolution | Conv2D | [[1 4096 1 1]] | 19463.667 | 16384 | 411058432 | 574787072 | GPU_0_bfc | 411042048 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 15176.67 | 0 | 84793728.00 | 53006218.67 | 25.00 | 0.00 | 0.00 | true | 0.250545;0.248954;0.249462;0.250053;0.250883 | 0;0;0;0;0 | 52926208;53047488;53044960;53548192;52883200 | 84494880;83437312;85150208;84744800;85141504 |
55 | vgg_19/fc6/convolution | Conv2D | [[1 4096 1 1]] | 19463.667 | 16384 | 411058432 | 574787072 | GPU_0_bfc | 411042048 | 0 | 0 | 0 | volta_scudnn_128x64_relu_interior_nn_v1 | 3022.67 | 26307723264 | 533752480.00 | 94362272.00 | 6.00 | 41.88 | 8703.48 | false | 0.062334;0.062400;0.062395;0.055638;0.051994 | 26307723264;26307723264;26307723264;26307723264;26307723264 | 560696000;517369120;565975904;521025280;519536160 | 94245600;90340448;98612128;94941120;93900096 |
55 | vgg_19/fc6/convolution | Conv2D | [[1 4096 1 1]] | 19463.667 | 16384 | 411058432 | 574787072 | GPU_0_bfc | 411042048 | 0 | 0 | 0 | cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams) | 3.00 | 0 | 537112896.00 | 98934250.67 | 31.30 | 0.00 | 0.00 | true | 0.313402;0.317233;0.312671;0.313300;0.305066 | 0;0;0;0;0 | 537375360;527868064;534940512;539022816;539533088 | 98008704;97598272;100017568;101056832;98776480 |
56 | vgg_19/fc6/BiasAdd | BiasAdd | [[1 4096 1 1]] | 36 | 16384 | 0 | 574686720 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 3.00 | 4096 | 533957216.00 | 94153045.33 | 47.10 | 0.00 | 1.37 | true | 0.471639;0.469971;0.471595;0.470867;0.470304 | 4096;4096;4096;4096;4096 | 531604800;519561472;526160576;563071456;544106272 | 88559680;90959392;93906240;98537792;97593504 |
57 | vgg_19/fc6/Relu | Relu | [[1 4096 1 1]] | 23.667 | 16384 | 0 | 574686720 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 3.00 | 0 | 74521162.67 | 405301.33 | 45.20 | 0.00 | 0.00 | true | 0.452083;0.451872;0.451726;0.451877;0.452178 | 0;0;0;0;0 | 50712544;59178496;92250944;90695584;73689408 | 346016;619904;367392;430592;417920 |
58 | vgg_19/fc7/convolution | Conv2D | [[1 4096 1 1]] | 937 | 16384 | 67125248 | 574703104 | GPU_0_bfc | 67108864 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 360.33 | 0 | 80497280.00 | 64782122.67 | 46.60 | 0.00 | 0.00 | true | 0.465666;0.465262;0.466545;0.467096;0.465341 | 0;0;0;0;0 | 79911360;80710208;77770368;80870272;81393664 | 64761728;64800768;64770432;64775168;64841472 |
58 | vgg_19/fc7/convolution | Conv2D | [[1 4096 1 1]] | 937 | 16384 | 67125248 | 574703104 | GPU_0_bfc | 67108864 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 360.33 | 1073745920 | 66727936.00 | 2353194.67 | 5.10 | 15.54 | 2979.87 | true | 0.051044;0.050850;0.050877;0.050902;0.051013 | 1073745920;1073745920;1073745920;1073745920;1073745920 | 66708864;66746240;66723200;66724864;66735744 | 2375424;2335872;2364544;2359168;2296320 |
59 | vgg_19/fc7/BiasAdd | BiasAdd | [[1 4096 1 1]] | 31.667 | 16384 | 0 | 574686720 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 3.00 | 4096 | 16960.00 | 0.00 | 47.00 | 0.24 | 1.37 | true | 0.470028;0.469842;0.465052;0.469326;0.471030 | 4096;4096;4096;4096;4096 | 16960;16832;17088;16704;17088 | 0;0;128;0;0 |
60 | vgg_19/fc7/Relu | Relu | [[1 4096 1 1]] | 23.667 | 16384 | 0 | 574686720 | GPU_0_bfc | 0 | 0 | 0 | 0 | void Eigen::internal::EigenMetaKernel<Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long>(Eigen::TensorEvaluator<Eigen::TensorAssignOp<Eigen::TensorMap<Eigen::Tensor<float, 1, 1, long>, 16, Eigen::MakePointer>, Eigen::TensorCwiseBinaryOp<Eigen::internal::scalar_max_op<float const, float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const, Eigen::TensorCwiseNullaryOp<Eigen::internal::scalar_constant_op<float const>, Eigen::TensorMap<Eigen::Tensor<float const, 1, 1, long>, 16, Eigen::MakePointer> const> const> const> const, Eigen::GpuDevice>, long) | 3.00 | 0 | 96.00 | 0.00 | 45.10 | 0.00 | 0.00 | true | 0.450688;0.456941;0.450745;0.451263;0.450757 | 0;0;0;0;0 | 96;96;96;96;96 | 0;0;0;0;0 |
61 | vgg_19/fc8/convolution | Conv2D | [[1 1000 1 1]] | 518.667 | 4096 | 16388096 | 574690816 | GPU_0_bfc | 16384000 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 281.00 | 268436456 | 15649792.00 | 2609770.67 | 3.10 | 14.70 | 955.29 | true | 0.031250;0.031250;0.031250;0.031250;0.031250 | 268436456;268436456;268436456;268436456;268436456 | 15636864;15659648;15641984;15665152;15647744 | 2600384;2611008;2608448;2609856;2629440 |
61 | vgg_19/fc8/convolution | Conv2D | [[1 1000 1 1]] | 518.667 | 4096 | 16388096 | 574690816 | GPU_0_bfc | 16384000 | 0 | 0 | 0 | void tensorflow::functor::ShuffleInTensor3Simple<float, 2, 1, 0, false>(int, float const*, tensorflow::functor::Dimension<3>, float*) | 93.33 | 0 | 16527616.00 | 13793941.33 | 46.10 | 0.00 | 0.00 | true | 0.461098;0.461913;0.461036;0.460488;0.462267 | 0;0;0;0;0 | 16529408;16530176;16525376;16528064;16524096 | 13803520;13792800;13795328;13793696;13774464 |
62 | vgg_19/fc8/BiasAdd | BiasAdd | [[1 1000 1 1]] | 30.333 | 4096 | 0 | 574674432 | GPU_0_bfc | 0 | 0 | 0 | 0 | void tensorflow::BiasNCHWKernel<float>(int, float const*, float const*, float*, int, int) | 3.00 | 1000 | 4224.00 | 0.00 | 46.40 | 0.24 | 0.33 | true | 0.464999;0.453182;0.464821;0.464376;0.463070 | 1000;1000;1000;1000;1000 | 4224;4224;4224;4224;4224 | 0;0;0;0;0 |
Showing 1 to 98 of 98 entries