Navigation :

GPU Kernel Information

Search:

layer_index	layer_name	layer_type	layer_shape	layer_duration (us)	layer_allocated_bytes	layer_peak_allocated_bytes	layer_allocator_bytes_in_use	layer_allocator_name	layer_host_temp_mem_bytes	layer_device_temp_mem_bytes	layer_host_persistent_mem_bytes	layer_device_persistent_mem_bytes	kernel_name	kernel_duration (us)	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

layer_index	layer_name	layer_type	layer_shape	layer_duration (us)	kernel_name	kernel_duration (us)	kernel_memory_bound
0	resnetv13_conv0_fwd	Convolution	[1,3,224,224]	21320	volta_scudnn_128x64_relu_medium_nn_v1	42.00	true
0	resnetv13_conv0_fwd	Convolution	[1,3,224,224]	21320	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.33	true
1	resnetv13_batchnorm0_fwd	BatchNorm	[1,64,112,112]	207	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	11.00	true
2	resnetv13_relu0_fwd	Activation	[1,64,112,112]	154	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	7.00	true
3	resnetv13_pool0_fwd	Pooling	[1,64,112,112]	2839.667	void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	9.67	true
4	resnetv13_stage1_conv0_fwd	Convolution	[1,64,56,56]	1922.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	11.00	true
4	resnetv13_stage1_conv0_fwd	Convolution	[1,64,56,56]	1922.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
5	resnetv13_stage1_batchnorm0_fwd	BatchNorm	[1,64,56,56]	78	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
6	resnetv13_stage1_relu0_fwd	Activation	[1,64,56,56]	41	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
7	resnetv13_stage1_conv1_fwd	Convolution	[1,64,56,56]	14769.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	26.00	true
7	resnetv13_stage1_conv1_fwd	Convolution	[1,64,56,56]	14769.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	2.00	true
8	resnetv13_stage1_batchnorm1_fwd	BatchNorm	[1,64,56,56]	110.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
9	resnetv13_stage1_relu1_fwd	Activation	[1,64,56,56]	40.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
10	resnetv13_stage1_conv2_fwd	Convolution	[1,64,56,56]	6208.667	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
10	resnetv13_stage1_conv2_fwd	Convolution	[1,64,56,56]	6208.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	10.00	true
10	resnetv13_stage1_conv2_fwd	Convolution	[1,64,56,56]	6208.667	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
11	resnetv13_stage1_batchnorm2_fwd	BatchNorm	[1,256,56,56]	455.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	9.00	true
12	resnetv13_stage1_conv3_fwd	Convolution	[1,64,56,56]	5981	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
12	resnetv13_stage1_conv3_fwd	Convolution	[1,64,56,56]	5981	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.67	true
13	resnetv13_stage1_batchnorm3_fwd	BatchNorm	[1,256,56,56]	629.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	8.67	true
14	add_resnetv13_stage1_activation0	add_relu	[1,256,56,56]	189	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	15.67	true
15	resnetv13_stage1_conv4_fwd	Convolution	[1,256,56,56]	6343	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	27.67	true
15	resnetv13_stage1_conv4_fwd	Convolution	[1,256,56,56]	6343	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.67	true
16	resnetv13_stage1_batchnorm4_fwd	BatchNorm	[1,64,56,56]	29.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
17	resnetv13_stage1_relu2_fwd	Activation	[1,64,56,56]	31.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
18	resnetv13_stage1_conv5_fwd	Convolution	[1,64,56,56]	14156.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	25.00	true
18	resnetv13_stage1_conv5_fwd	Convolution	[1,64,56,56]	14156.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	3.00	true
19	resnetv13_stage1_batchnorm5_fwd	BatchNorm	[1,64,56,56]	98.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
20	resnetv13_stage1_relu3_fwd	Activation	[1,64,56,56]	35	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
21	resnetv13_stage1_conv6_fwd	Convolution	[1,64,56,56]	5930.333	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
21	resnetv13_stage1_conv6_fwd	Convolution	[1,64,56,56]	5930.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	10.00	true
21	resnetv13_stage1_conv6_fwd	Convolution	[1,64,56,56]	5930.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
22	resnetv13_stage1_batchnorm6_fwd	BatchNorm	[1,256,56,56]	130.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	8.67	true
23	add_resnetv13_stage1_activation1	add_relu	[1,256,56,56]	186	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	12.00	true
24	resnetv13_stage1_conv7_fwd	Convolution	[1,256,56,56]	6187.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	27.33	true
24	resnetv13_stage1_conv7_fwd	Convolution	[1,256,56,56]	6187.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
25	resnetv13_stage1_batchnorm7_fwd	BatchNorm	[1,64,56,56]	28	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
26	resnetv13_stage1_relu4_fwd	Activation	[1,64,56,56]	41	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.67	true
27	resnetv13_stage1_conv8_fwd	Convolution	[1,64,56,56]	14295.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	25.33	true
27	resnetv13_stage1_conv8_fwd	Convolution	[1,64,56,56]	14295.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	2.67	true
28	resnetv13_stage1_batchnorm8_fwd	BatchNorm	[1,64,56,56]	100.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
29	resnetv13_stage1_relu5_fwd	Activation	[1,64,56,56]	38.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
30	resnetv13_stage1_conv9_fwd	Convolution	[1,64,56,56]	5945	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
30	resnetv13_stage1_conv9_fwd	Convolution	[1,64,56,56]	5945	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	10.00	true
30	resnetv13_stage1_conv9_fwd	Convolution	[1,64,56,56]	5945	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.33	true
31	resnetv13_stage1_batchnorm9_fwd	BatchNorm	[1,256,56,56]	98.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	9.33	true
32	add_resnetv13_stage1_activation2	add_relu	[1,256,56,56]	193.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	14.00	true
33	resnetv13_stage2_conv0_fwd	Convolution	[1,256,56,56]	3757.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	25.00	true
33	resnetv13_stage2_conv0_fwd	Convolution	[1,256,56,56]	3757.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
34	resnetv13_stage2_batchnorm0_fwd	BatchNorm	[1,128,28,28]	18	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
35	resnetv13_stage2_relu0_fwd	Activation	[1,128,28,28]	20	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
36	resnetv13_stage2_conv1_fwd	Convolution	[1,128,28,28]	12862.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	35.00	true
36	resnetv13_stage2_conv1_fwd	Convolution	[1,128,28,28]	12862.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
37	resnetv13_stage2_batchnorm1_fwd	BatchNorm	[1,128,28,28]	83.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
38	resnetv13_stage2_relu1_fwd	Activation	[1,128,28,28]	21	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
39	resnetv13_stage2_conv2_fwd	Convolution	[1,128,28,28]	5722.333	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
39	resnetv13_stage2_conv2_fwd	Convolution	[1,128,28,28]	5722.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	6.00	true
39	resnetv13_stage2_conv2_fwd	Convolution	[1,128,28,28]	5722.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
40	resnetv13_stage2_batchnorm2_fwd	BatchNorm	[1,512,28,28]	82.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	6.00	true
41	resnetv13_stage2_conv3_fwd	Convolution	[1,256,56,56]	10510.333	volta_scudnn_128x64_relu_interior_nn_v1	38.00	true
41	resnetv13_stage2_conv3_fwd	Convolution	[1,256,56,56]	10510.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
42	resnetv13_stage2_batchnorm3_fwd	BatchNorm	[1,512,28,28]	146.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.67	true
43	add_resnetv13_stage2_activation0	add_relu	[1,512,28,28]	88	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	6.33	true
44	resnetv13_stage2_conv4_fwd	Convolution	[1,512,28,28]	5990	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
44	resnetv13_stage2_conv4_fwd	Convolution	[1,512,28,28]	5990	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
45	resnetv13_stage2_batchnorm4_fwd	BatchNorm	[1,128,28,28]	17	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
46	resnetv13_stage2_relu2_fwd	Activation	[1,128,28,28]	21	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
47	resnetv13_stage2_conv5_fwd	Convolution	[1,128,28,28]	13075	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	35.33	true
47	resnetv13_stage2_conv5_fwd	Convolution	[1,128,28,28]	13075	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
48	resnetv13_stage2_batchnorm5_fwd	BatchNorm	[1,128,28,28]	94.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
49	resnetv13_stage2_relu3_fwd	Activation	[1,128,28,28]	21	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
50	resnetv13_stage2_conv6_fwd	Convolution	[1,128,28,28]	6016	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
50	resnetv13_stage2_conv6_fwd	Convolution	[1,128,28,28]	6016	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	6.33	true
50	resnetv13_stage2_conv6_fwd	Convolution	[1,128,28,28]	6016	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
51	resnetv13_stage2_batchnorm6_fwd	BatchNorm	[1,512,28,28]	107.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.00	true
52	add_resnetv13_stage2_activation1	add_relu	[1,512,28,28]	102.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	6.67	true
53	resnetv13_stage2_conv7_fwd	Convolution	[1,512,28,28]	6172.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
53	resnetv13_stage2_conv7_fwd	Convolution	[1,512,28,28]	6172.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
54	resnetv13_stage2_batchnorm7_fwd	BatchNorm	[1,128,28,28]	18.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
55	resnetv13_stage2_relu4_fwd	Activation	[1,128,28,28]	27.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
56	resnetv13_stage2_conv8_fwd	Convolution	[1,128,28,28]	12930.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	35.33	true
56	resnetv13_stage2_conv8_fwd	Convolution	[1,128,28,28]	12930.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
57	resnetv13_stage2_batchnorm8_fwd	BatchNorm	[1,128,28,28]	87.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
58	resnetv13_stage2_relu5_fwd	Activation	[1,128,28,28]	20	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
59	resnetv13_stage2_conv9_fwd	Convolution	[1,128,28,28]	5748.333	volta_scudnn_128x64_relu_interior_nn_v1	22.33	true
59	resnetv13_stage2_conv9_fwd	Convolution	[1,128,28,28]	5748.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	6.00	true
59	resnetv13_stage2_conv9_fwd	Convolution	[1,128,28,28]	5748.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
60	resnetv13_stage2_batchnorm9_fwd	BatchNorm	[1,512,28,28]	47.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.00	true
61	add_resnetv13_stage2_activation2	add_relu	[1,512,28,28]	93.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	6.00	true
62	resnetv13_stage2_conv10_fwd	Convolution	[1,512,28,28]	6083	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
62	resnetv13_stage2_conv10_fwd	Convolution	[1,512,28,28]	6083	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
63	resnetv13_stage2_batchnorm10_fwd	BatchNorm	[1,128,28,28]	18.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
64	resnetv13_stage2_relu6_fwd	Activation	[1,128,28,28]	21	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
65	resnetv13_stage2_conv11_fwd	Convolution	[1,128,28,28]	12917.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	36.00	true
65	resnetv13_stage2_conv11_fwd	Convolution	[1,128,28,28]	12917.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.33	true
66	resnetv13_stage2_batchnorm11_fwd	BatchNorm	[1,128,28,28]	83.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
67	resnetv13_stage2_relu7_fwd	Activation	[1,128,28,28]	20.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
68	resnetv13_stage2_conv12_fwd	Convolution	[1,128,28,28]	5857	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
68	resnetv13_stage2_conv12_fwd	Convolution	[1,128,28,28]	5857	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	6.00	true
68	resnetv13_stage2_conv12_fwd	Convolution	[1,128,28,28]	5857	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
69	resnetv13_stage2_batchnorm12_fwd	BatchNorm	[1,512,28,28]	54.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.00	true
70	add_resnetv13_stage2_activation3	add_relu	[1,512,28,28]	104.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	6.33	true
71	resnetv13_stage3_conv0_fwd	Convolution	[1,512,28,28]	3516.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	32.00	true
71	resnetv13_stage3_conv0_fwd	Convolution	[1,512,28,28]	3516.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
72	resnetv13_stage3_batchnorm0_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
73	resnetv13_stage3_relu0_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
74	resnetv13_stage3_conv1_fwd	Convolution	[1,256,14,14]	12520.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
74	resnetv13_stage3_conv1_fwd	Convolution	[1,256,14,14]	12520.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
75	resnetv13_stage3_batchnorm1_fwd	BatchNorm	[1,256,14,14]	88.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
76	resnetv13_stage3_relu1_fwd	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
77	resnetv13_stage3_conv2_fwd	Convolution	[1,256,14,14]	5579.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
77	resnetv13_stage3_conv2_fwd	Convolution	[1,256,14,14]	5579.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
78	resnetv13_stage3_batchnorm2_fwd	BatchNorm	[1,1024,14,14]	45.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
79	resnetv13_stage3_conv3_fwd	Convolution	[1,512,28,28]	10477	volta_scudnn_128x64_relu_interior_nn_v1	66.00	true
79	resnetv13_stage3_conv3_fwd	Convolution	[1,512,28,28]	10477	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
80	resnetv13_stage3_batchnorm3_fwd	BatchNorm	[1,1024,14,14]	136.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
81	add_resnetv13_stage3_activation0	add_relu	[1,1024,14,14]	52	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
82	resnetv13_stage3_conv4_fwd	Convolution	[1,1024,14,14]	5850	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
82	resnetv13_stage3_conv4_fwd	Convolution	[1,1024,14,14]	5850	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
83	resnetv13_stage3_batchnorm4_fwd	BatchNorm	[1,256,14,14]	15.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
84	resnetv13_stage3_relu2_fwd	Activation	[1,256,14,14]	13.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
85	resnetv13_stage3_conv5_fwd	Convolution	[1,256,14,14]	12447.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
85	resnetv13_stage3_conv5_fwd	Convolution	[1,256,14,14]	12447.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
86	resnetv13_stage3_batchnorm5_fwd	BatchNorm	[1,256,14,14]	82.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
87	resnetv13_stage3_relu3_fwd	Activation	[1,256,14,14]	13	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
88	resnetv13_stage3_conv6_fwd	Convolution	[1,256,14,14]	5849.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
88	resnetv13_stage3_conv6_fwd	Convolution	[1,256,14,14]	5849.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
89	resnetv13_stage3_batchnorm6_fwd	BatchNorm	[1,1024,14,14]	36.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
90	add_resnetv13_stage3_activation1	add_relu	[1,1024,14,14]	50.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
91	resnetv13_stage3_conv7_fwd	Convolution	[1,1024,14,14]	5998.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.33	true
91	resnetv13_stage3_conv7_fwd	Convolution	[1,1024,14,14]	5998.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
92	resnetv13_stage3_batchnorm7_fwd	BatchNorm	[1,256,14,14]	14.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
93	resnetv13_stage3_relu4_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
94	resnetv13_stage3_conv8_fwd	Convolution	[1,256,14,14]	12426.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
94	resnetv13_stage3_conv8_fwd	Convolution	[1,256,14,14]	12426.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
95	resnetv13_stage3_batchnorm8_fwd	BatchNorm	[1,256,14,14]	84.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
96	resnetv13_stage3_relu5_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
97	resnetv13_stage3_conv9_fwd	Convolution	[1,256,14,14]	5494	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
97	resnetv13_stage3_conv9_fwd	Convolution	[1,256,14,14]	5494	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
98	resnetv13_stage3_batchnorm9_fwd	BatchNorm	[1,1024,14,14]	31.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
99	add_resnetv13_stage3_activation2	add_relu	[1,1024,14,14]	49.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
100	resnetv13_stage3_conv10_fwd	Convolution	[1,1024,14,14]	5860	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	60.00	true
100	resnetv13_stage3_conv10_fwd	Convolution	[1,1024,14,14]	5860	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
101	resnetv13_stage3_batchnorm10_fwd	BatchNorm	[1,256,14,14]	13.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
102	resnetv13_stage3_relu6_fwd	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
103	resnetv13_stage3_conv11_fwd	Convolution	[1,256,14,14]	12286	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
103	resnetv13_stage3_conv11_fwd	Convolution	[1,256,14,14]	12286	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.00	true
104	resnetv13_stage3_batchnorm11_fwd	BatchNorm	[1,256,14,14]	77.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
105	resnetv13_stage3_relu7_fwd	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
106	resnetv13_stage3_conv12_fwd	Convolution	[1,256,14,14]	5619	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
106	resnetv13_stage3_conv12_fwd	Convolution	[1,256,14,14]	5619	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
107	resnetv13_stage3_batchnorm12_fwd	BatchNorm	[1,1024,14,14]	33	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
108	add_resnetv13_stage3_activation3	add_relu	[1,1024,14,14]	48	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
109	resnetv13_stage3_conv13_fwd	Convolution	[1,1024,14,14]	5883	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
109	resnetv13_stage3_conv13_fwd	Convolution	[1,1024,14,14]	5883	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.67	true
110	resnetv13_stage3_batchnorm13_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
111	resnetv13_stage3_relu8_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
112	resnetv13_stage3_conv14_fwd	Convolution	[1,256,14,14]	12371.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
112	resnetv13_stage3_conv14_fwd	Convolution	[1,256,14,14]	12371.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
113	resnetv13_stage3_batchnorm14_fwd	BatchNorm	[1,256,14,14]	82	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
114	resnetv13_stage3_relu9_fwd	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
115	resnetv13_stage3_conv15_fwd	Convolution	[1,256,14,14]	5585.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
115	resnetv13_stage3_conv15_fwd	Convolution	[1,256,14,14]	5585.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
116	resnetv13_stage3_batchnorm15_fwd	BatchNorm	[1,1024,14,14]	30.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
117	add_resnetv13_stage3_activation4	add_relu	[1,1024,14,14]	43.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
118	resnetv13_stage3_conv16_fwd	Convolution	[1,1024,14,14]	5832	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
118	resnetv13_stage3_conv16_fwd	Convolution	[1,1024,14,14]	5832	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
119	resnetv13_stage3_batchnorm16_fwd	BatchNorm	[1,256,14,14]	14.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
120	resnetv13_stage3_relu10_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
121	resnetv13_stage3_conv17_fwd	Convolution	[1,256,14,14]	12448.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
121	resnetv13_stage3_conv17_fwd	Convolution	[1,256,14,14]	12448.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
122	resnetv13_stage3_batchnorm17_fwd	BatchNorm	[1,256,14,14]	81.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
123	resnetv13_stage3_relu11_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
124	resnetv13_stage3_conv18_fwd	Convolution	[1,256,14,14]	5658.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
124	resnetv13_stage3_conv18_fwd	Convolution	[1,256,14,14]	5658.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
125	resnetv13_stage3_batchnorm18_fwd	BatchNorm	[1,1024,14,14]	32	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
126	add_resnetv13_stage3_activation5	add_relu	[1,1024,14,14]	46	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
127	resnetv13_stage3_conv19_fwd	Convolution	[1,1024,14,14]	5920	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.33	true
127	resnetv13_stage3_conv19_fwd	Convolution	[1,1024,14,14]	5920	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
128	resnetv13_stage3_batchnorm19_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
129	resnetv13_stage3_relu12_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
130	resnetv13_stage3_conv20_fwd	Convolution	[1,256,14,14]	12418	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
130	resnetv13_stage3_conv20_fwd	Convolution	[1,256,14,14]	12418	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
131	resnetv13_stage3_batchnorm20_fwd	BatchNorm	[1,256,14,14]	77.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
132	resnetv13_stage3_relu13_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
133	resnetv13_stage3_conv21_fwd	Convolution	[1,256,14,14]	5572	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
133	resnetv13_stage3_conv21_fwd	Convolution	[1,256,14,14]	5572	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
134	resnetv13_stage3_batchnorm21_fwd	BatchNorm	[1,1024,14,14]	31.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
135	add_resnetv13_stage3_activation6	add_relu	[1,1024,14,14]	48	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
136	resnetv13_stage3_conv22_fwd	Convolution	[1,1024,14,14]	5906.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
136	resnetv13_stage3_conv22_fwd	Convolution	[1,1024,14,14]	5906.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
137	resnetv13_stage3_batchnorm22_fwd	BatchNorm	[1,256,14,14]	13.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
138	resnetv13_stage3_relu14_fwd	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
139	resnetv13_stage3_conv23_fwd	Convolution	[1,256,14,14]	12450.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
139	resnetv13_stage3_conv23_fwd	Convolution	[1,256,14,14]	12450.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
140	resnetv13_stage3_batchnorm23_fwd	BatchNorm	[1,256,14,14]	87.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
141	resnetv13_stage3_relu15_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
142	resnetv13_stage3_conv24_fwd	Convolution	[1,256,14,14]	5611.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
142	resnetv13_stage3_conv24_fwd	Convolution	[1,256,14,14]	5611.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
143	resnetv13_stage3_batchnorm24_fwd	BatchNorm	[1,1024,14,14]	33.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
144	add_resnetv13_stage3_activation7	add_relu	[1,1024,14,14]	47	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
145	resnetv13_stage3_conv25_fwd	Convolution	[1,1024,14,14]	5901.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.67	true
145	resnetv13_stage3_conv25_fwd	Convolution	[1,1024,14,14]	5901.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
146	resnetv13_stage3_batchnorm25_fwd	BatchNorm	[1,256,14,14]	14.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
147	resnetv13_stage3_relu16_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
148	resnetv13_stage3_conv26_fwd	Convolution	[1,256,14,14]	12361	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
148	resnetv13_stage3_conv26_fwd	Convolution	[1,256,14,14]	12361	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
149	resnetv13_stage3_batchnorm26_fwd	BatchNorm	[1,256,14,14]	89.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
150	resnetv13_stage3_relu17_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
151	resnetv13_stage3_conv27_fwd	Convolution	[1,256,14,14]	5575.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
151	resnetv13_stage3_conv27_fwd	Convolution	[1,256,14,14]	5575.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.33	true
152	resnetv13_stage3_batchnorm27_fwd	BatchNorm	[1,1024,14,14]	30.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
153	add_resnetv13_stage3_activation8	add_relu	[1,1024,14,14]	46.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
154	resnetv13_stage3_conv28_fwd	Convolution	[1,1024,14,14]	5883.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
154	resnetv13_stage3_conv28_fwd	Convolution	[1,1024,14,14]	5883.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
155	resnetv13_stage3_batchnorm28_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
156	resnetv13_stage3_relu18_fwd	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
157	resnetv13_stage3_conv29_fwd	Convolution	[1,256,14,14]	12635.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	62.67	true
157	resnetv13_stage3_conv29_fwd	Convolution	[1,256,14,14]	12635.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.33	true
158	resnetv13_stage3_batchnorm29_fwd	BatchNorm	[1,256,14,14]	92.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
159	resnetv13_stage3_relu19_fwd	Activation	[1,256,14,14]	17.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
160	resnetv13_stage3_conv30_fwd	Convolution	[1,256,14,14]	5622.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
160	resnetv13_stage3_conv30_fwd	Convolution	[1,256,14,14]	5622.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.33	true
161	resnetv13_stage3_batchnorm30_fwd	BatchNorm	[1,1024,14,14]	30.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
162	add_resnetv13_stage3_activation9	add_relu	[1,1024,14,14]	46.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
163	resnetv13_stage3_conv31_fwd	Convolution	[1,1024,14,14]	5892.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
163	resnetv13_stage3_conv31_fwd	Convolution	[1,1024,14,14]	5892.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
164	resnetv13_stage3_batchnorm31_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.67	true
165	resnetv13_stage3_relu20_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
166	resnetv13_stage3_conv32_fwd	Convolution	[1,256,14,14]	12363	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
166	resnetv13_stage3_conv32_fwd	Convolution	[1,256,14,14]	12363	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
167	resnetv13_stage3_batchnorm32_fwd	BatchNorm	[1,256,14,14]	90.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
168	resnetv13_stage3_relu21_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
169	resnetv13_stage3_conv33_fwd	Convolution	[1,256,14,14]	5640	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
169	resnetv13_stage3_conv33_fwd	Convolution	[1,256,14,14]	5640	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
170	resnetv13_stage3_batchnorm33_fwd	BatchNorm	[1,1024,14,14]	31.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
171	add_resnetv13_stage3_activation10	add_relu	[1,1024,14,14]	48.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
172	resnetv13_stage3_conv34_fwd	Convolution	[1,1024,14,14]	5930.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
172	resnetv13_stage3_conv34_fwd	Convolution	[1,1024,14,14]	5930.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
173	resnetv13_stage3_batchnorm34_fwd	BatchNorm	[1,256,14,14]	13.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
174	resnetv13_stage3_relu22_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
175	resnetv13_stage3_conv35_fwd	Convolution	[1,256,14,14]	12603	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
175	resnetv13_stage3_conv35_fwd	Convolution	[1,256,14,14]	12603	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
176	resnetv13_stage3_batchnorm35_fwd	BatchNorm	[1,256,14,14]	84.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
177	resnetv13_stage3_relu23_fwd	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
178	resnetv13_stage3_conv36_fwd	Convolution	[1,256,14,14]	5640.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
178	resnetv13_stage3_conv36_fwd	Convolution	[1,256,14,14]	5640.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
179	resnetv13_stage3_batchnorm36_fwd	BatchNorm	[1,1024,14,14]	31	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
180	add_resnetv13_stage3_activation11	add_relu	[1,1024,14,14]	48.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
181	resnetv13_stage3_conv37_fwd	Convolution	[1,1024,14,14]	5979.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
181	resnetv13_stage3_conv37_fwd	Convolution	[1,1024,14,14]	5979.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
182	resnetv13_stage3_batchnorm37_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
183	resnetv13_stage3_relu24_fwd	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
184	resnetv13_stage3_conv38_fwd	Convolution	[1,256,14,14]	12564.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
184	resnetv13_stage3_conv38_fwd	Convolution	[1,256,14,14]	12564.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
185	resnetv13_stage3_batchnorm38_fwd	BatchNorm	[1,256,14,14]	85.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
186	resnetv13_stage3_relu25_fwd	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
187	resnetv13_stage3_conv39_fwd	Convolution	[1,256,14,14]	5725	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
187	resnetv13_stage3_conv39_fwd	Convolution	[1,256,14,14]	5725	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
188	resnetv13_stage3_batchnorm39_fwd	BatchNorm	[1,1024,14,14]	31	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
189	add_resnetv13_stage3_activation12	add_relu	[1,1024,14,14]	47.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
190	resnetv13_stage3_conv40_fwd	Convolution	[1,1024,14,14]	5942.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
190	resnetv13_stage3_conv40_fwd	Convolution	[1,1024,14,14]	5942.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
191	resnetv13_stage3_batchnorm40_fwd	BatchNorm	[1,256,14,14]	13.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
192	resnetv13_stage3_relu26_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
193	resnetv13_stage3_conv41_fwd	Convolution	[1,256,14,14]	12365.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
193	resnetv13_stage3_conv41_fwd	Convolution	[1,256,14,14]	12365.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
194	resnetv13_stage3_batchnorm41_fwd	BatchNorm	[1,256,14,14]	77.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
195	resnetv13_stage3_relu27_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
196	resnetv13_stage3_conv42_fwd	Convolution	[1,256,14,14]	5635.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
196	resnetv13_stage3_conv42_fwd	Convolution	[1,256,14,14]	5635.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
197	resnetv13_stage3_batchnorm42_fwd	BatchNorm	[1,1024,14,14]	30	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
198	add_resnetv13_stage3_activation13	add_relu	[1,1024,14,14]	44.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
199	resnetv13_stage3_conv43_fwd	Convolution	[1,1024,14,14]	5901	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
199	resnetv13_stage3_conv43_fwd	Convolution	[1,1024,14,14]	5901	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
200	resnetv13_stage3_batchnorm43_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
201	resnetv13_stage3_relu28_fwd	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
202	resnetv13_stage3_conv44_fwd	Convolution	[1,256,14,14]	12379	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	62.67	true
202	resnetv13_stage3_conv44_fwd	Convolution	[1,256,14,14]	12379	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
203	resnetv13_stage3_batchnorm44_fwd	BatchNorm	[1,256,14,14]	84	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
204	resnetv13_stage3_relu29_fwd	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
205	resnetv13_stage3_conv45_fwd	Convolution	[1,256,14,14]	5618.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
205	resnetv13_stage3_conv45_fwd	Convolution	[1,256,14,14]	5618.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.33	true
206	resnetv13_stage3_batchnorm45_fwd	BatchNorm	[1,1024,14,14]	30.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
207	add_resnetv13_stage3_activation14	add_relu	[1,1024,14,14]	49	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
208	resnetv13_stage3_conv46_fwd	Convolution	[1,1024,14,14]	5907	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.67	true
208	resnetv13_stage3_conv46_fwd	Convolution	[1,1024,14,14]	5907	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
209	resnetv13_stage3_batchnorm46_fwd	BatchNorm	[1,256,14,14]	14.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
210	resnetv13_stage3_relu30_fwd	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
211	resnetv13_stage3_conv47_fwd	Convolution	[1,256,14,14]	12482.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
211	resnetv13_stage3_conv47_fwd	Convolution	[1,256,14,14]	12482.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
212	resnetv13_stage3_batchnorm47_fwd	BatchNorm	[1,256,14,14]	79	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
213	resnetv13_stage3_relu31_fwd	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
214	resnetv13_stage3_conv48_fwd	Convolution	[1,256,14,14]	5609	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
214	resnetv13_stage3_conv48_fwd	Convolution	[1,256,14,14]	5609	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
215	resnetv13_stage3_batchnorm48_fwd	BatchNorm	[1,1024,14,14]	30	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
216	add_resnetv13_stage3_activation15	add_relu	[1,1024,14,14]	45.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	5.00	true
217	resnetv13_stage3_conv49_fwd	Convolution	[1,1024,14,14]	5883.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.67	true
217	resnetv13_stage3_conv49_fwd	Convolution	[1,1024,14,14]	5883.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
218	resnetv13_stage3_batchnorm49_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
219	resnetv13_stage3_relu32_fwd	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
220	resnetv13_stage3_conv50_fwd	Convolution	[1,256,14,14]	12360.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
220	resnetv13_stage3_conv50_fwd	Convolution	[1,256,14,14]	12360.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
221	resnetv13_stage3_batchnorm50_fwd	BatchNorm	[1,256,14,14]	79	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
222	resnetv13_stage3_relu33_fwd	Activation	[1,256,14,14]	13.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
223	resnetv13_stage3_conv51_fwd	Convolution	[1,256,14,14]	5592	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
223	resnetv13_stage3_conv51_fwd	Convolution	[1,256,14,14]	5592	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
224	resnetv13_stage3_batchnorm51_fwd	BatchNorm	[1,1024,14,14]	37.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
225	add_resnetv13_stage3_activation16	add_relu	[1,1024,14,14]	47	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
226	resnetv13_stage3_conv52_fwd	Convolution	[1,1024,14,14]	5963.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.67	true
226	resnetv13_stage3_conv52_fwd	Convolution	[1,1024,14,14]	5963.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
227	resnetv13_stage3_batchnorm52_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
228	resnetv13_stage3_relu34_fwd	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
229	resnetv13_stage3_conv53_fwd	Convolution	[1,256,14,14]	12543	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
229	resnetv13_stage3_conv53_fwd	Convolution	[1,256,14,14]	12543	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
230	resnetv13_stage3_batchnorm53_fwd	BatchNorm	[1,256,14,14]	94.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
231	resnetv13_stage3_relu35_fwd	Activation	[1,256,14,14]	13.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
232	resnetv13_stage3_conv54_fwd	Convolution	[1,256,14,14]	5639	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
232	resnetv13_stage3_conv54_fwd	Convolution	[1,256,14,14]	5639	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.33	true
233	resnetv13_stage3_batchnorm54_fwd	BatchNorm	[1,1024,14,14]	31.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
234	add_resnetv13_stage3_activation17	add_relu	[1,1024,14,14]	47.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
235	resnetv13_stage3_conv55_fwd	Convolution	[1,1024,14,14]	6065.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
235	resnetv13_stage3_conv55_fwd	Convolution	[1,1024,14,14]	6065.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
236	resnetv13_stage3_batchnorm55_fwd	BatchNorm	[1,256,14,14]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
237	resnetv13_stage3_relu36_fwd	Activation	[1,256,14,14]	13.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
238	resnetv13_stage3_conv56_fwd	Convolution	[1,256,14,14]	12673.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
238	resnetv13_stage3_conv56_fwd	Convolution	[1,256,14,14]	12673.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
239	resnetv13_stage3_batchnorm56_fwd	BatchNorm	[1,256,14,14]	94.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
240	resnetv13_stage3_relu37_fwd	Activation	[1,256,14,14]	14	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
241	resnetv13_stage3_conv57_fwd	Convolution	[1,256,14,14]	5756.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
241	resnetv13_stage3_conv57_fwd	Convolution	[1,256,14,14]	5756.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
242	resnetv13_stage3_batchnorm57_fwd	BatchNorm	[1,1024,14,14]	33.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
243	add_resnetv13_stage3_activation18	add_relu	[1,1024,14,14]	60.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
244	resnetv13_stage3_conv58_fwd	Convolution	[1,1024,14,14]	5978	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
244	resnetv13_stage3_conv58_fwd	Convolution	[1,1024,14,14]	5978	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
245	resnetv13_stage3_batchnorm58_fwd	BatchNorm	[1,256,14,14]	13.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
246	resnetv13_stage3_relu38_fwd	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
247	resnetv13_stage3_conv59_fwd	Convolution	[1,256,14,14]	12720.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
247	resnetv13_stage3_conv59_fwd	Convolution	[1,256,14,14]	12720.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
248	resnetv13_stage3_batchnorm59_fwd	BatchNorm	[1,256,14,14]	94.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
249	resnetv13_stage3_relu39_fwd	Activation	[1,256,14,14]	13.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
250	resnetv13_stage3_conv60_fwd	Convolution	[1,256,14,14]	5755	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
250	resnetv13_stage3_conv60_fwd	Convolution	[1,256,14,14]	5755	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
251	resnetv13_stage3_batchnorm60_fwd	BatchNorm	[1,1024,14,14]	34.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
252	add_resnetv13_stage3_activation19	add_relu	[1,1024,14,14]	69.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
253	resnetv13_stage3_conv61_fwd	Convolution	[1,1024,14,14]	6015.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
253	resnetv13_stage3_conv61_fwd	Convolution	[1,1024,14,14]	6015.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
254	resnetv13_stage3_batchnorm61_fwd	BatchNorm	[1,256,14,14]	14.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
255	resnetv13_stage3_relu40_fwd	Activation	[1,256,14,14]	14.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
256	resnetv13_stage3_conv62_fwd	Convolution	[1,256,14,14]	12655.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
256	resnetv13_stage3_conv62_fwd	Convolution	[1,256,14,14]	12655.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
257	resnetv13_stage3_batchnorm62_fwd	BatchNorm	[1,256,14,14]	93	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
258	resnetv13_stage3_relu41_fwd	Activation	[1,256,14,14]	13	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
259	resnetv13_stage3_conv63_fwd	Convolution	[1,256,14,14]	5697.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
259	resnetv13_stage3_conv63_fwd	Convolution	[1,256,14,14]	5697.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
260	resnetv13_stage3_batchnorm63_fwd	BatchNorm	[1,1024,14,14]	34	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
261	add_resnetv13_stage3_activation20	add_relu	[1,1024,14,14]	68.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
262	resnetv13_stage3_conv64_fwd	Convolution	[1,1024,14,14]	5994.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
262	resnetv13_stage3_conv64_fwd	Convolution	[1,1024,14,14]	5994.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
263	resnetv13_stage3_batchnorm64_fwd	BatchNorm	[1,256,14,14]	14.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
264	resnetv13_stage3_relu42_fwd	Activation	[1,256,14,14]	14	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
265	resnetv13_stage3_conv65_fwd	Convolution	[1,256,14,14]	12452.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
265	resnetv13_stage3_conv65_fwd	Convolution	[1,256,14,14]	12452.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
266	resnetv13_stage3_batchnorm65_fwd	BatchNorm	[1,256,14,14]	85	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
267	resnetv13_stage3_relu43_fwd	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
268	resnetv13_stage3_conv66_fwd	Convolution	[1,256,14,14]	5728.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
268	resnetv13_stage3_conv66_fwd	Convolution	[1,256,14,14]	5728.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.33	true
269	resnetv13_stage3_batchnorm66_fwd	BatchNorm	[1,1024,14,14]	34.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
270	add_resnetv13_stage3_activation21	add_relu	[1,1024,14,14]	56.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.33	true
271	resnetv13_stage3_conv67_fwd	Convolution	[1,1024,14,14]	5934.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
271	resnetv13_stage3_conv67_fwd	Convolution	[1,1024,14,14]	5934.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
272	resnetv13_stage3_batchnorm67_fwd	BatchNorm	[1,256,14,14]	14.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
273	resnetv13_stage3_relu44_fwd	Activation	[1,256,14,14]	14	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
274	resnetv13_stage3_conv68_fwd	Convolution	[1,256,14,14]	12433.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	62.67	true
274	resnetv13_stage3_conv68_fwd	Convolution	[1,256,14,14]	12433.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.33	true
275	resnetv13_stage3_batchnorm68_fwd	BatchNorm	[1,256,14,14]	92.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
276	resnetv13_stage3_relu45_fwd	Activation	[1,256,14,14]	14	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
277	resnetv13_stage3_conv69_fwd	Convolution	[1,256,14,14]	5643.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
277	resnetv13_stage3_conv69_fwd	Convolution	[1,256,14,14]	5643.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
278	resnetv13_stage3_batchnorm69_fwd	BatchNorm	[1,1024,14,14]	32.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
279	add_resnetv13_stage3_activation22	add_relu	[1,1024,14,14]	49	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
280	resnetv13_stage4_conv0_fwd	Convolution	[1,1024,14,14]	4019	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	63.00	true
280	resnetv13_stage4_conv0_fwd	Convolution	[1,1024,14,14]	4019	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
281	resnetv13_stage4_batchnorm0_fwd	BatchNorm	[1,512,7,7]	15.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
282	resnetv13_stage4_relu0_fwd	Activation	[1,512,7,7]	9.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
283	resnetv13_stage4_conv1_fwd	Convolution	[1,512,7,7]	15021.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	120.00	true
283	resnetv13_stage4_conv1_fwd	Convolution	[1,512,7,7]	15021.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	53.33	true
284	resnetv13_stage4_batchnorm1_fwd	BatchNorm	[1,512,7,7]	86.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
285	resnetv13_stage4_relu1_fwd	Activation	[1,512,7,7]	7.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
286	resnetv13_stage4_conv2_fwd	Convolution	[1,512,7,7]	6763.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.33	true
286	resnetv13_stage4_conv2_fwd	Convolution	[1,512,7,7]	6763.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.67	true
287	resnetv13_stage4_batchnorm2_fwd	BatchNorm	[1,2048,7,7]	27	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
288	resnetv13_stage4_conv3_fwd	Convolution	[1,1024,14,14]	12869.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	87.67	true
289	resnetv13_stage4_batchnorm3_fwd	BatchNorm	[1,2048,7,7]	106	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
290	add_resnetv13_stage4_activation0	add_relu	[1,2048,7,7]	25.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	4.00	true
291	resnetv13_stage4_conv4_fwd	Convolution	[1,2048,7,7]	6971	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	111.33	true
291	resnetv13_stage4_conv4_fwd	Convolution	[1,2048,7,7]	6971	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
292	resnetv13_stage4_batchnorm4_fwd	BatchNorm	[1,512,7,7]	14	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
293	resnetv13_stage4_relu2_fwd	Activation	[1,512,7,7]	8	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
294	resnetv13_stage4_conv5_fwd	Convolution	[1,512,7,7]	15201.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	120.00	true
294	resnetv13_stage4_conv5_fwd	Convolution	[1,512,7,7]	15201.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	51.00	true
295	resnetv13_stage4_batchnorm5_fwd	BatchNorm	[1,512,7,7]	87.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
296	resnetv13_stage4_relu3_fwd	Activation	[1,512,7,7]	8.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
297	resnetv13_stage4_conv6_fwd	Convolution	[1,512,7,7]	7021	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.33	true
297	resnetv13_stage4_conv6_fwd	Convolution	[1,512,7,7]	7021	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
298	resnetv13_stage4_batchnorm6_fwd	BatchNorm	[1,2048,7,7]	28.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
299	add_resnetv13_stage4_activation1	add_relu	[1,2048,7,7]	28	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	3.00	true
300	resnetv13_stage4_conv7_fwd	Convolution	[1,2048,7,7]	7204	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	112.00	true
300	resnetv13_stage4_conv7_fwd	Convolution	[1,2048,7,7]	7204	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
301	resnetv13_stage4_batchnorm7_fwd	BatchNorm	[1,512,7,7]	13.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
302	resnetv13_stage4_relu4_fwd	Activation	[1,512,7,7]	9.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
303	resnetv13_stage4_conv8_fwd	Convolution	[1,512,7,7]	15224.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	120.00	true
303	resnetv13_stage4_conv8_fwd	Convolution	[1,512,7,7]	15224.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	50.33	true
304	resnetv13_stage4_batchnorm8_fwd	BatchNorm	[1,512,7,7]	91.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.67	true
305	resnetv13_stage4_relu5_fwd	Activation	[1,512,7,7]	8.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
306	resnetv13_stage4_conv9_fwd	Convolution	[1,512,7,7]	6906.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	46.00	true
306	resnetv13_stage4_conv9_fwd	Convolution	[1,512,7,7]	6906.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
307	resnetv13_stage4_batchnorm9_fwd	BatchNorm	[1,2048,7,7]	30	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
308	add_resnetv13_stage4_activation2	add_relu	[1,2048,7,7]	27.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::AddReluKernel, float, float, float, mxnet::OpReqType>(int, float, float, float, mxnet::OpReqType)	3.33	true
309	resnetv13_pool1_fwd	Pooling	[1,2048,7,7]	155.333	void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	6.00	true
310	resnetv13_dense0_fwd	FullyConnected	[1,2048,1,1]	2078.333	void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 4, 4, false, cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float> >(cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>, float, float)	20.00	true
310	resnetv13_dense0_fwd	FullyConnected	[1,2048,1,1]	2078.333	void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>)	2.67	true

Showing 1 to 422 of 422 entries

Download as CSV