Navigation :

GPU Kernel Information

Search:

layer_index	layer_name	layer_type	layer_shape	layer_duration (us)	layer_allocated_bytes	layer_peak_allocated_bytes	layer_allocator_bytes_in_use	layer_allocator_name	layer_host_temp_mem_bytes	layer_device_temp_mem_bytes	layer_host_persistent_mem_bytes	layer_device_persistent_mem_bytes	kernel_name	kernel_duration (us)	kernel_flops	kernel_dram_read_bytes	kernel_dram_write_bytes	kernel_achieved_occupancy (%)	kernel_arithmetic_intensity (flops/byte)	kernel_arithmetic_throughput (GFlops)	kernel_memory_bound

layer_index	layer_name	layer_type	layer_shape	layer_duration (us)	kernel_name	kernel_duration (us)	kernel_memory_bound
0	resnetv24_batchnorm0_fwd	BatchNorm	[1,3,224,224]	185.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	23.00	true
0	resnetv24_batchnorm0_fwd	BatchNorm	[1,3,224,224]	185.333	void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 1, float>, float>, mshadow::expr::Plan<mshadow::expr::ScalarExp<float>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 1, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::ScalarExp<float>, float>)	2.67	true
1	resnetv24_conv0_fwd	Convolution	[1,3,224,224]	20242.333	volta_scudnn_128x64_relu_medium_nn_v1	42.67	true
1	resnetv24_conv0_fwd	Convolution	[1,3,224,224]	20242.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
2	resnetv24_batchnorm1_fwd	BatchNorm	[1,64,112,112]	916.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	11.33	true
3	resnetv24_relu0_fwd	Activation	[1,64,112,112]	207.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	6.33	true
4	resnetv24_pool0_fwd	Pooling	[1,64,112,112]	2844	void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	8.67	true
5	resnetv24_stage1_batchnorm0_fwd	BatchNorm	[1,64,56,56]	238	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
6	resnetv24_stage1_activation0	Activation	[1,64,56,56]	59.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
7	resnetv24_stage1_conv0_fwd	Convolution	[1,64,56,56]	2070.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	11.67	true
8	resnetv24_stage1_batchnorm1_fwd	BatchNorm	[1,64,56,56]	43	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
9	resnetv24_stage1_activation1	Activation	[1,64,56,56]	59	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
10	resnetv24_stage1_conv1_fwd	Convolution	[1,64,56,56]	14897.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	25.67	true
10	resnetv24_stage1_conv1_fwd	Convolution	[1,64,56,56]	14897.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	3.33	true
11	resnetv24_stage1_batchnorm2_fwd	BatchNorm	[1,64,56,56]	102	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
12	resnetv24_stage1_activation2	Activation	[1,64,56,56]	41.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
13	resnetv24_stage1_conv2_fwd	Convolution	[1,64,56,56]	5991.333	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
13	resnetv24_stage1_conv2_fwd	Convolution	[1,64,56,56]	5991.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
14	resnetv24_stage1_conv3_fwd	Convolution	[1,64,56,56]	5079.333	volta_scudnn_128x64_relu_interior_nn_v1	21.33	true
14	resnetv24_stage1_conv3_fwd	Convolution	[1,64,56,56]	5079.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.67	true
15	resnetv24_stage1__plus0	elemwise_add	[1,256,56,56]	210	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	12.00	true
16	resnetv24_stage1_batchnorm3_fwd	BatchNorm	[1,256,56,56]	95	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	10.67	true
17	resnetv24_stage1_activation3	Activation	[1,256,56,56]	166.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	6.00	true
18	resnetv24_stage1_conv4_fwd	Convolution	[1,256,56,56]	6211	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	28.00	true
19	resnetv24_stage1_batchnorm4_fwd	BatchNorm	[1,64,56,56]	105	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
20	resnetv24_stage1_activation4	Activation	[1,64,56,56]	56.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
21	resnetv24_stage1_conv5_fwd	Convolution	[1,64,56,56]	14360.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	25.67	true
21	resnetv24_stage1_conv5_fwd	Convolution	[1,64,56,56]	14360.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	3.00	true
22	resnetv24_stage1_batchnorm5_fwd	BatchNorm	[1,64,56,56]	104	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
23	resnetv24_stage1_activation5	Activation	[1,64,56,56]	53.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
24	resnetv24_stage1_conv6_fwd	Convolution	[1,64,56,56]	5948.333	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
24	resnetv24_stage1_conv6_fwd	Convolution	[1,64,56,56]	5948.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
25	resnetv24_stage1__plus1	elemwise_add	[1,256,56,56]	243	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	11.33	true
26	resnetv24_stage1_batchnorm6_fwd	BatchNorm	[1,256,56,56]	119.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	11.33	true
27	resnetv24_stage1_activation6	Activation	[1,256,56,56]	214.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	7.00	true
28	resnetv24_stage1_conv7_fwd	Convolution	[1,256,56,56]	6280.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	28.00	true
29	resnetv24_stage1_batchnorm7_fwd	BatchNorm	[1,64,56,56]	106.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
30	resnetv24_stage1_activation7	Activation	[1,64,56,56]	56	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.67	true
31	resnetv24_stage1_conv8_fwd	Convolution	[1,64,56,56]	14242	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	26.00	true
31	resnetv24_stage1_conv8_fwd	Convolution	[1,64,56,56]	14242	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	2.33	true
32	resnetv24_stage1_batchnorm8_fwd	BatchNorm	[1,64,56,56]	93.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
33	resnetv24_stage1_activation8	Activation	[1,64,56,56]	40.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
34	resnetv24_stage1_conv9_fwd	Convolution	[1,64,56,56]	5717	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
34	resnetv24_stage1_conv9_fwd	Convolution	[1,64,56,56]	5717	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
35	resnetv24_stage1__plus2	elemwise_add	[1,256,56,56]	214.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	11.33	true
36	resnetv24_stage2_batchnorm0_fwd	BatchNorm	[1,256,56,56]	107.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	10.67	true
37	resnetv24_stage2_activation0	Activation	[1,256,56,56]	189.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	6.67	true
38	resnetv24_stage2_conv0_fwd	Convolution	[1,256,56,56]	10987	volta_scudnn_128x64_relu_interior_nn_v1	35.00	true
38	resnetv24_stage2_conv0_fwd	Convolution	[1,256,56,56]	10987	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
39	resnetv24_stage2_batchnorm1_fwd	BatchNorm	[1,128,56,56]	117	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.00	true
40	resnetv24_stage2_activation1	Activation	[1,128,56,56]	87.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
41	resnetv24_stage2_conv1_fwd	Convolution	[1,128,56,56]	12921.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	97.67	true
42	resnetv24_stage2_batchnorm2_fwd	BatchNorm	[1,128,28,28]	97.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
43	resnetv24_stage2_activation2	Activation	[1,128,28,28]	28.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
44	resnetv24_stage2_conv2_fwd	Convolution	[1,128,28,28]	5879.667	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
44	resnetv24_stage2_conv2_fwd	Convolution	[1,128,28,28]	5879.667	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.33	true
45	resnetv24_stage2_conv3_fwd	Convolution	[1,256,56,56]	9777.667	volta_scudnn_128x64_relu_interior_nn_v1	38.00	true
45	resnetv24_stage2_conv3_fwd	Convolution	[1,256,56,56]	9777.667	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
46	resnetv24_stage2__plus0	elemwise_add	[1,512,28,28]	154.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.00	true
47	resnetv24_stage2_batchnorm3_fwd	BatchNorm	[1,512,28,28]	59.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	6.00	true
48	resnetv24_stage2_activation3	Activation	[1,512,28,28]	102.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
49	resnetv24_stage2_conv4_fwd	Convolution	[1,512,28,28]	6120.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	44.33	true
50	resnetv24_stage2_batchnorm4_fwd	BatchNorm	[1,128,28,28]	110	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
51	resnetv24_stage2_activation4	Activation	[1,128,28,28]	33	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
52	resnetv24_stage2_conv5_fwd	Convolution	[1,128,28,28]	13190.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	35.33	true
52	resnetv24_stage2_conv5_fwd	Convolution	[1,128,28,28]	13190.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
53	resnetv24_stage2_batchnorm5_fwd	BatchNorm	[1,128,28,28]	94.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
54	resnetv24_stage2_activation5	Activation	[1,128,28,28]	26	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
55	resnetv24_stage2_conv6_fwd	Convolution	[1,128,28,28]	5962.667	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
55	resnetv24_stage2_conv6_fwd	Convolution	[1,128,28,28]	5962.667	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
56	resnetv24_stage2__plus1	elemwise_add	[1,512,28,28]	147.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.33	true
57	resnetv24_stage2_batchnorm6_fwd	BatchNorm	[1,512,28,28]	55	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.67	true
58	resnetv24_stage2_activation6	Activation	[1,512,28,28]	83	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.67	true
59	resnetv24_stage2_conv7_fwd	Convolution	[1,512,28,28]	6149	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
60	resnetv24_stage2_batchnorm7_fwd	BatchNorm	[1,128,28,28]	98.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
61	resnetv24_stage2_activation7	Activation	[1,128,28,28]	32.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
62	resnetv24_stage2_conv8_fwd	Convolution	[1,128,28,28]	12921	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	36.00	true
62	resnetv24_stage2_conv8_fwd	Convolution	[1,128,28,28]	12921	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
63	resnetv24_stage2_batchnorm8_fwd	BatchNorm	[1,128,28,28]	93.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
64	resnetv24_stage2_activation8	Activation	[1,128,28,28]	20.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
65	resnetv24_stage2_conv9_fwd	Convolution	[1,128,28,28]	5671.333	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
65	resnetv24_stage2_conv9_fwd	Convolution	[1,128,28,28]	5671.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
66	resnetv24_stage2__plus2	elemwise_add	[1,512,28,28]	147.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.00	true
67	resnetv24_stage2_batchnorm9_fwd	BatchNorm	[1,512,28,28]	53	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.33	true
68	resnetv24_stage2_activation9	Activation	[1,512,28,28]	80.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
69	resnetv24_stage2_conv10_fwd	Convolution	[1,512,28,28]	6000.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
70	resnetv24_stage2_batchnorm10_fwd	BatchNorm	[1,128,28,28]	92.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
71	resnetv24_stage2_activation10	Activation	[1,128,28,28]	23.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
72	resnetv24_stage2_conv11_fwd	Convolution	[1,128,28,28]	13021.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	36.00	true
72	resnetv24_stage2_conv11_fwd	Convolution	[1,128,28,28]	13021.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
73	resnetv24_stage2_batchnorm11_fwd	BatchNorm	[1,128,28,28]	93.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
74	resnetv24_stage2_activation11	Activation	[1,128,28,28]	22	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
75	resnetv24_stage2_conv12_fwd	Convolution	[1,128,28,28]	5788.667	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
75	resnetv24_stage2_conv12_fwd	Convolution	[1,128,28,28]	5788.667	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
76	resnetv24_stage2__plus3	elemwise_add	[1,512,28,28]	154.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.33	true
77	resnetv24_stage2_batchnorm12_fwd	BatchNorm	[1,512,28,28]	54.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.67	true
78	resnetv24_stage2_activation12	Activation	[1,512,28,28]	81.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
79	resnetv24_stage2_conv13_fwd	Convolution	[1,512,28,28]	6028	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
80	resnetv24_stage2_batchnorm13_fwd	BatchNorm	[1,128,28,28]	93	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
81	resnetv24_stage2_activation13	Activation	[1,128,28,28]	23	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
82	resnetv24_stage2_conv14_fwd	Convolution	[1,128,28,28]	12862.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	35.67	true
82	resnetv24_stage2_conv14_fwd	Convolution	[1,128,28,28]	12862.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
83	resnetv24_stage2_batchnorm14_fwd	BatchNorm	[1,128,28,28]	83.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
84	resnetv24_stage2_activation14	Activation	[1,128,28,28]	21.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
85	resnetv24_stage2_conv15_fwd	Convolution	[1,128,28,28]	5754.333	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
85	resnetv24_stage2_conv15_fwd	Convolution	[1,128,28,28]	5754.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.33	true
86	resnetv24_stage2__plus4	elemwise_add	[1,512,28,28]	141.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.67	true
87	resnetv24_stage2_batchnorm15_fwd	BatchNorm	[1,512,28,28]	53.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.00	true
88	resnetv24_stage2_activation15	Activation	[1,512,28,28]	80.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
89	resnetv24_stage2_conv16_fwd	Convolution	[1,512,28,28]	6130.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
90	resnetv24_stage2_batchnorm16_fwd	BatchNorm	[1,128,28,28]	99.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
91	resnetv24_stage2_activation16	Activation	[1,128,28,28]	23.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
92	resnetv24_stage2_conv17_fwd	Convolution	[1,128,28,28]	13316.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	35.67	true
92	resnetv24_stage2_conv17_fwd	Convolution	[1,128,28,28]	13316.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
93	resnetv24_stage2_batchnorm17_fwd	BatchNorm	[1,128,28,28]	85.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
94	resnetv24_stage2_activation17	Activation	[1,128,28,28]	20.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
95	resnetv24_stage2_conv18_fwd	Convolution	[1,128,28,28]	5829	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
95	resnetv24_stage2_conv18_fwd	Convolution	[1,128,28,28]	5829	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.33	true
96	resnetv24_stage2__plus5	elemwise_add	[1,512,28,28]	146.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.00	true
97	resnetv24_stage2_batchnorm18_fwd	BatchNorm	[1,512,28,28]	55	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	5.67	true
98	resnetv24_stage2_activation18	Activation	[1,512,28,28]	86.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.33	true
99	resnetv24_stage2_conv19_fwd	Convolution	[1,512,28,28]	6109.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
100	resnetv24_stage2_batchnorm19_fwd	BatchNorm	[1,128,28,28]	96.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
101	resnetv24_stage2_activation19	Activation	[1,128,28,28]	20.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
102	resnetv24_stage2_conv20_fwd	Convolution	[1,128,28,28]	12859	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	36.00	true
102	resnetv24_stage2_conv20_fwd	Convolution	[1,128,28,28]	12859	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.33	true
103	resnetv24_stage2_batchnorm20_fwd	BatchNorm	[1,128,28,28]	89	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
104	resnetv24_stage2_activation20	Activation	[1,128,28,28]	21	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
105	resnetv24_stage2_conv21_fwd	Convolution	[1,128,28,28]	5743.667	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
105	resnetv24_stage2_conv21_fwd	Convolution	[1,128,28,28]	5743.667	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
106	resnetv24_stage2__plus6	elemwise_add	[1,512,28,28]	145	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.67	true
107	resnetv24_stage2_batchnorm21_fwd	BatchNorm	[1,512,28,28]	51	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	6.00	true
108	resnetv24_stage2_activation21	Activation	[1,512,28,28]	69.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
109	resnetv24_stage2_conv22_fwd	Convolution	[1,512,28,28]	6001.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.00	true
110	resnetv24_stage2_batchnorm22_fwd	BatchNorm	[1,128,28,28]	83.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
111	resnetv24_stage2_activation22	Activation	[1,128,28,28]	20	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
112	resnetv24_stage2_conv23_fwd	Convolution	[1,128,28,28]	12766.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	35.67	true
112	resnetv24_stage2_conv23_fwd	Convolution	[1,128,28,28]	12766.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	4.00	true
113	resnetv24_stage2_batchnorm23_fwd	BatchNorm	[1,128,28,28]	88.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
114	resnetv24_stage2_activation23	Activation	[1,128,28,28]	21.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
115	resnetv24_stage2_conv24_fwd	Convolution	[1,128,28,28]	5633.667	volta_scudnn_128x64_relu_interior_nn_v1	22.00	true
115	resnetv24_stage2_conv24_fwd	Convolution	[1,128,28,28]	5633.667	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.00	true
116	resnetv24_stage2__plus7	elemwise_add	[1,512,28,28]	144.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	5.67	true
117	resnetv24_stage3_batchnorm0_fwd	BatchNorm	[1,512,28,28]	50	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.67	true
118	resnetv24_stage3_activation0	Activation	[1,512,28,28]	64.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
119	resnetv24_stage3_conv0_fwd	Convolution	[1,512,28,28]	10447.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	50.00	true
120	resnetv24_stage3_batchnorm1_fwd	BatchNorm	[1,256,28,28]	103.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
121	resnetv24_stage3_activation1	Activation	[1,256,28,28]	36.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
122	resnetv24_stage3_conv1_fwd	Convolution	[1,256,28,28]	12475.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	130.00	true
123	resnetv24_stage3_batchnorm2_fwd	BatchNorm	[1,256,14,14]	84.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
124	resnetv24_stage3_activation2	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
125	resnetv24_stage3_conv2_fwd	Convolution	[1,256,14,14]	5709	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
126	resnetv24_stage3_conv3_fwd	Convolution	[1,512,28,28]	9869.333	volta_scudnn_128x64_relu_interior_nn_v1	65.00	true
126	resnetv24_stage3_conv3_fwd	Convolution	[1,512,28,28]	9869.333	cudnn::gemm::computeOffsetsKernel(cudnn::gemm::ComputeOffsetsParams)	2.67	true
127	resnetv24_stage3__plus0	elemwise_add	[1,1024,14,14]	121.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	3.33	true
128	resnetv24_stage3_batchnorm3_fwd	BatchNorm	[1,1024,14,14]	39	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
129	resnetv24_stage3_activation3	Activation	[1,1024,14,14]	42.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
130	resnetv24_stage3_conv4_fwd	Convolution	[1,1024,14,14]	5981.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
131	resnetv24_stage3_batchnorm4_fwd	BatchNorm	[1,256,14,14]	99	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
132	resnetv24_stage3_activation4	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
133	resnetv24_stage3_conv5_fwd	Convolution	[1,256,14,14]	12638.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
133	resnetv24_stage3_conv5_fwd	Convolution	[1,256,14,14]	12638.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.33	true
134	resnetv24_stage3_batchnorm5_fwd	BatchNorm	[1,256,14,14]	107.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
135	resnetv24_stage3_activation5	Activation	[1,256,14,14]	14.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
136	resnetv24_stage3_conv6_fwd	Convolution	[1,256,14,14]	5756.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
137	resnetv24_stage3__plus1	elemwise_add	[1,1024,14,14]	101	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
138	resnetv24_stage3_batchnorm6_fwd	BatchNorm	[1,1024,14,14]	36.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
139	resnetv24_stage3_activation6	Activation	[1,1024,14,14]	37.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
140	resnetv24_stage3_conv7_fwd	Convolution	[1,1024,14,14]	5801	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
141	resnetv24_stage3_batchnorm7_fwd	BatchNorm	[1,256,14,14]	84.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
142	resnetv24_stage3_activation7	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
143	resnetv24_stage3_conv8_fwd	Convolution	[1,256,14,14]	12355.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
143	resnetv24_stage3_conv8_fwd	Convolution	[1,256,14,14]	12355.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
144	resnetv24_stage3_batchnorm8_fwd	BatchNorm	[1,256,14,14]	74.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
145	resnetv24_stage3_activation8	Activation	[1,256,14,14]	10.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
146	resnetv24_stage3_conv9_fwd	Convolution	[1,256,14,14]	5463.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
147	resnetv24_stage3__plus2	elemwise_add	[1,1024,14,14]	95.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.33	true
148	resnetv24_stage3_batchnorm9_fwd	BatchNorm	[1,1024,14,14]	37.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
149	resnetv24_stage3_activation9	Activation	[1,1024,14,14]	41	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
150	resnetv24_stage3_conv10_fwd	Convolution	[1,1024,14,14]	5776.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.67	true
151	resnetv24_stage3_batchnorm10_fwd	BatchNorm	[1,256,14,14]	87	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
152	resnetv24_stage3_activation10	Activation	[1,256,14,14]	13.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
153	resnetv24_stage3_conv11_fwd	Convolution	[1,256,14,14]	12386	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
153	resnetv24_stage3_conv11_fwd	Convolution	[1,256,14,14]	12386	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
154	resnetv24_stage3_batchnorm11_fwd	BatchNorm	[1,256,14,14]	83.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.67	true
155	resnetv24_stage3_activation11	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
156	resnetv24_stage3_conv12_fwd	Convolution	[1,256,14,14]	5540	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
157	resnetv24_stage3__plus3	elemwise_add	[1,1024,14,14]	105.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
158	resnetv24_stage3_batchnorm12_fwd	BatchNorm	[1,1024,14,14]	40.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
159	resnetv24_stage3_activation12	Activation	[1,1024,14,14]	41	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
160	resnetv24_stage3_conv13_fwd	Convolution	[1,1024,14,14]	5818.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
161	resnetv24_stage3_batchnorm13_fwd	BatchNorm	[1,256,14,14]	88	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
162	resnetv24_stage3_activation13	Activation	[1,256,14,14]	13.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
163	resnetv24_stage3_conv14_fwd	Convolution	[1,256,14,14]	12452.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
163	resnetv24_stage3_conv14_fwd	Convolution	[1,256,14,14]	12452.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
164	resnetv24_stage3_batchnorm14_fwd	BatchNorm	[1,256,14,14]	99.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
165	resnetv24_stage3_activation14	Activation	[1,256,14,14]	12.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
166	resnetv24_stage3_conv15_fwd	Convolution	[1,256,14,14]	5519.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
167	resnetv24_stage3__plus4	elemwise_add	[1,1024,14,14]	114	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
168	resnetv24_stage3_batchnorm15_fwd	BatchNorm	[1,1024,14,14]	35.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
169	resnetv24_stage3_activation15	Activation	[1,1024,14,14]	38.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
170	resnetv24_stage3_conv16_fwd	Convolution	[1,1024,14,14]	5971.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.67	true
171	resnetv24_stage3_batchnorm16_fwd	BatchNorm	[1,256,14,14]	102.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
172	resnetv24_stage3_activation16	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
173	resnetv24_stage3_conv17_fwd	Convolution	[1,256,14,14]	12667.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
173	resnetv24_stage3_conv17_fwd	Convolution	[1,256,14,14]	12667.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.33	true
174	resnetv24_stage3_batchnorm17_fwd	BatchNorm	[1,256,14,14]	90.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
175	resnetv24_stage3_activation17	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
176	resnetv24_stage3_conv18_fwd	Convolution	[1,256,14,14]	5517	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
177	resnetv24_stage3__plus5	elemwise_add	[1,1024,14,14]	113.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
178	resnetv24_stage3_batchnorm18_fwd	BatchNorm	[1,1024,14,14]	38	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
179	resnetv24_stage3_activation18	Activation	[1,1024,14,14]	41	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
180	resnetv24_stage3_conv19_fwd	Convolution	[1,1024,14,14]	5815	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
181	resnetv24_stage3_batchnorm19_fwd	BatchNorm	[1,256,14,14]	79.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
182	resnetv24_stage3_activation19	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
183	resnetv24_stage3_conv20_fwd	Convolution	[1,256,14,14]	12408.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.33	true
183	resnetv24_stage3_conv20_fwd	Convolution	[1,256,14,14]	12408.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
184	resnetv24_stage3_batchnorm20_fwd	BatchNorm	[1,256,14,14]	100.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
185	resnetv24_stage3_activation20	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
186	resnetv24_stage3_conv21_fwd	Convolution	[1,256,14,14]	5585.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
187	resnetv24_stage3__plus6	elemwise_add	[1,1024,14,14]	113.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
188	resnetv24_stage3_batchnorm21_fwd	BatchNorm	[1,1024,14,14]	41	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
189	resnetv24_stage3_activation21	Activation	[1,1024,14,14]	36	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
190	resnetv24_stage3_conv22_fwd	Convolution	[1,1024,14,14]	5836	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
191	resnetv24_stage3_batchnorm22_fwd	BatchNorm	[1,256,14,14]	86.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
192	resnetv24_stage3_activation22	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
193	resnetv24_stage3_conv23_fwd	Convolution	[1,256,14,14]	12367	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.33	true
193	resnetv24_stage3_conv23_fwd	Convolution	[1,256,14,14]	12367	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
194	resnetv24_stage3_batchnorm23_fwd	BatchNorm	[1,256,14,14]	85.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
195	resnetv24_stage3_activation23	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
196	resnetv24_stage3_conv24_fwd	Convolution	[1,256,14,14]	5529.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
197	resnetv24_stage3__plus7	elemwise_add	[1,1024,14,14]	103.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
198	resnetv24_stage3_batchnorm24_fwd	BatchNorm	[1,1024,14,14]	36.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
199	resnetv24_stage3_activation24	Activation	[1,1024,14,14]	34.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
200	resnetv24_stage3_conv25_fwd	Convolution	[1,1024,14,14]	5824.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
201	resnetv24_stage3_batchnorm25_fwd	BatchNorm	[1,256,14,14]	81.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
202	resnetv24_stage3_activation25	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
203	resnetv24_stage3_conv26_fwd	Convolution	[1,256,14,14]	12431.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
203	resnetv24_stage3_conv26_fwd	Convolution	[1,256,14,14]	12431.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
204	resnetv24_stage3_batchnorm26_fwd	BatchNorm	[1,256,14,14]	80	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
205	resnetv24_stage3_activation26	Activation	[1,256,14,14]	10.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
206	resnetv24_stage3_conv27_fwd	Convolution	[1,256,14,14]	5576.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
207	resnetv24_stage3__plus8	elemwise_add	[1,1024,14,14]	117	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
208	resnetv24_stage3_batchnorm27_fwd	BatchNorm	[1,1024,14,14]	37.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
209	resnetv24_stage3_activation27	Activation	[1,1024,14,14]	35	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.67	true
210	resnetv24_stage3_conv28_fwd	Convolution	[1,1024,14,14]	5841.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
211	resnetv24_stage3_batchnorm28_fwd	BatchNorm	[1,256,14,14]	80.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
212	resnetv24_stage3_activation28	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
213	resnetv24_stage3_conv29_fwd	Convolution	[1,256,14,14]	12399	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
213	resnetv24_stage3_conv29_fwd	Convolution	[1,256,14,14]	12399	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
214	resnetv24_stage3_batchnorm29_fwd	BatchNorm	[1,256,14,14]	85.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
215	resnetv24_stage3_activation29	Activation	[1,256,14,14]	10.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
216	resnetv24_stage3_conv30_fwd	Convolution	[1,256,14,14]	5542.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
217	resnetv24_stage3__plus9	elemwise_add	[1,1024,14,14]	98.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
218	resnetv24_stage3_batchnorm30_fwd	BatchNorm	[1,1024,14,14]	36.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
219	resnetv24_stage3_activation30	Activation	[1,1024,14,14]	34.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
220	resnetv24_stage3_conv31_fwd	Convolution	[1,1024,14,14]	5827.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
221	resnetv24_stage3_batchnorm31_fwd	BatchNorm	[1,256,14,14]	78.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
222	resnetv24_stage3_activation31	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
223	resnetv24_stage3_conv32_fwd	Convolution	[1,256,14,14]	12305	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
223	resnetv24_stage3_conv32_fwd	Convolution	[1,256,14,14]	12305	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
224	resnetv24_stage3_batchnorm32_fwd	BatchNorm	[1,256,14,14]	75.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
225	resnetv24_stage3_activation32	Activation	[1,256,14,14]	10.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
226	resnetv24_stage3_conv33_fwd	Convolution	[1,256,14,14]	5496.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
227	resnetv24_stage3__plus10	elemwise_add	[1,1024,14,14]	92	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
228	resnetv24_stage3_batchnorm33_fwd	BatchNorm	[1,1024,14,14]	37	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
229	resnetv24_stage3_activation33	Activation	[1,1024,14,14]	33.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
230	resnetv24_stage3_conv34_fwd	Convolution	[1,1024,14,14]	5792.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
231	resnetv24_stage3_batchnorm34_fwd	BatchNorm	[1,256,14,14]	80.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
232	resnetv24_stage3_activation34	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
233	resnetv24_stage3_conv35_fwd	Convolution	[1,256,14,14]	12323.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
233	resnetv24_stage3_conv35_fwd	Convolution	[1,256,14,14]	12323.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
234	resnetv24_stage3_batchnorm35_fwd	BatchNorm	[1,256,14,14]	83	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
235	resnetv24_stage3_activation35	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
236	resnetv24_stage3_conv36_fwd	Convolution	[1,256,14,14]	5504	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.67	true
237	resnetv24_stage3__plus11	elemwise_add	[1,1024,14,14]	103	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.33	true
238	resnetv24_stage3_batchnorm36_fwd	BatchNorm	[1,1024,14,14]	36.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
239	resnetv24_stage3_activation36	Activation	[1,1024,14,14]	35.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
240	resnetv24_stage3_conv37_fwd	Convolution	[1,1024,14,14]	5801	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
241	resnetv24_stage3_batchnorm37_fwd	BatchNorm	[1,256,14,14]	91.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
242	resnetv24_stage3_activation37	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
243	resnetv24_stage3_conv38_fwd	Convolution	[1,256,14,14]	12312.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
243	resnetv24_stage3_conv38_fwd	Convolution	[1,256,14,14]	12312.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
244	resnetv24_stage3_batchnorm38_fwd	BatchNorm	[1,256,14,14]	79	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
245	resnetv24_stage3_activation38	Activation	[1,256,14,14]	10.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
246	resnetv24_stage3_conv39_fwd	Convolution	[1,256,14,14]	5541	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
247	resnetv24_stage3__plus12	elemwise_add	[1,1024,14,14]	104.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
248	resnetv24_stage3_batchnorm39_fwd	BatchNorm	[1,1024,14,14]	37.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
249	resnetv24_stage3_activation39	Activation	[1,1024,14,14]	36.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
250	resnetv24_stage3_conv40_fwd	Convolution	[1,1024,14,14]	5938	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
251	resnetv24_stage3_batchnorm40_fwd	BatchNorm	[1,256,14,14]	85.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
252	resnetv24_stage3_activation40	Activation	[1,256,14,14]	16.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
253	resnetv24_stage3_conv41_fwd	Convolution	[1,256,14,14]	12349.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
253	resnetv24_stage3_conv41_fwd	Convolution	[1,256,14,14]	12349.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
254	resnetv24_stage3_batchnorm41_fwd	BatchNorm	[1,256,14,14]	81	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
255	resnetv24_stage3_activation41	Activation	[1,256,14,14]	10	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
256	resnetv24_stage3_conv42_fwd	Convolution	[1,256,14,14]	5565.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
257	resnetv24_stage3__plus13	elemwise_add	[1,1024,14,14]	102	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
258	resnetv24_stage3_batchnorm42_fwd	BatchNorm	[1,1024,14,14]	37.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
259	resnetv24_stage3_activation42	Activation	[1,1024,14,14]	34	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	4.00	true
260	resnetv24_stage3_conv43_fwd	Convolution	[1,1024,14,14]	5839.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
261	resnetv24_stage3_batchnorm43_fwd	BatchNorm	[1,256,14,14]	81	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
262	resnetv24_stage3_activation43	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
263	resnetv24_stage3_conv44_fwd	Convolution	[1,256,14,14]	12350.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
263	resnetv24_stage3_conv44_fwd	Convolution	[1,256,14,14]	12350.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
264	resnetv24_stage3_batchnorm44_fwd	BatchNorm	[1,256,14,14]	90.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
265	resnetv24_stage3_activation44	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
266	resnetv24_stage3_conv45_fwd	Convolution	[1,256,14,14]	5465	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
267	resnetv24_stage3__plus14	elemwise_add	[1,1024,14,14]	103.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
268	resnetv24_stage3_batchnorm45_fwd	BatchNorm	[1,1024,14,14]	38	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
269	resnetv24_stage3_activation45	Activation	[1,1024,14,14]	36	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
270	resnetv24_stage3_conv46_fwd	Convolution	[1,1024,14,14]	5808.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
271	resnetv24_stage3_batchnorm46_fwd	BatchNorm	[1,256,14,14]	84.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
272	resnetv24_stage3_activation46	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
273	resnetv24_stage3_conv47_fwd	Convolution	[1,256,14,14]	12340	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
273	resnetv24_stage3_conv47_fwd	Convolution	[1,256,14,14]	12340	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
274	resnetv24_stage3_batchnorm47_fwd	BatchNorm	[1,256,14,14]	80.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
275	resnetv24_stage3_activation47	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
276	resnetv24_stage3_conv48_fwd	Convolution	[1,256,14,14]	5491.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.33	true
277	resnetv24_stage3__plus15	elemwise_add	[1,1024,14,14]	96.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
278	resnetv24_stage3_batchnorm48_fwd	BatchNorm	[1,1024,14,14]	36.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
279	resnetv24_stage3_activation48	Activation	[1,1024,14,14]	34.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
280	resnetv24_stage3_conv49_fwd	Convolution	[1,1024,14,14]	5810	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
281	resnetv24_stage3_batchnorm49_fwd	BatchNorm	[1,256,14,14]	79	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
282	resnetv24_stage3_activation49	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
283	resnetv24_stage3_conv50_fwd	Convolution	[1,256,14,14]	12341.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
283	resnetv24_stage3_conv50_fwd	Convolution	[1,256,14,14]	12341.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
284	resnetv24_stage3_batchnorm50_fwd	BatchNorm	[1,256,14,14]	79.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.67	true
285	resnetv24_stage3_activation50	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
286	resnetv24_stage3_conv51_fwd	Convolution	[1,256,14,14]	5491	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
287	resnetv24_stage3__plus16	elemwise_add	[1,1024,14,14]	97	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
288	resnetv24_stage3_batchnorm51_fwd	BatchNorm	[1,1024,14,14]	37.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
289	resnetv24_stage3_activation51	Activation	[1,1024,14,14]	34.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
290	resnetv24_stage3_conv52_fwd	Convolution	[1,1024,14,14]	5814	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
291	resnetv24_stage3_batchnorm52_fwd	BatchNorm	[1,256,14,14]	77.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
292	resnetv24_stage3_activation52	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
293	resnetv24_stage3_conv53_fwd	Convolution	[1,256,14,14]	12520	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
293	resnetv24_stage3_conv53_fwd	Convolution	[1,256,14,14]	12520	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.33	true
294	resnetv24_stage3_batchnorm53_fwd	BatchNorm	[1,256,14,14]	77.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
295	resnetv24_stage3_activation53	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
296	resnetv24_stage3_conv54_fwd	Convolution	[1,256,14,14]	5504.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
297	resnetv24_stage3__plus17	elemwise_add	[1,1024,14,14]	101	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
298	resnetv24_stage3_batchnorm54_fwd	BatchNorm	[1,1024,14,14]	35.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
299	resnetv24_stage3_activation54	Activation	[1,1024,14,14]	36	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
300	resnetv24_stage3_conv55_fwd	Convolution	[1,1024,14,14]	5795.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
301	resnetv24_stage3_batchnorm55_fwd	BatchNorm	[1,256,14,14]	79.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
302	resnetv24_stage3_activation55	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
303	resnetv24_stage3_conv56_fwd	Convolution	[1,256,14,14]	12312.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
303	resnetv24_stage3_conv56_fwd	Convolution	[1,256,14,14]	12312.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
304	resnetv24_stage3_batchnorm56_fwd	BatchNorm	[1,256,14,14]	78.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
305	resnetv24_stage3_activation56	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
306	resnetv24_stage3_conv57_fwd	Convolution	[1,256,14,14]	5503.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.67	true
307	resnetv24_stage3__plus18	elemwise_add	[1,1024,14,14]	108.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
308	resnetv24_stage3_batchnorm57_fwd	BatchNorm	[1,1024,14,14]	35.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
309	resnetv24_stage3_activation57	Activation	[1,1024,14,14]	34	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
310	resnetv24_stage3_conv58_fwd	Convolution	[1,1024,14,14]	5763	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
311	resnetv24_stage3_batchnorm58_fwd	BatchNorm	[1,256,14,14]	80.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
312	resnetv24_stage3_activation58	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
313	resnetv24_stage3_conv59_fwd	Convolution	[1,256,14,14]	12309	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
313	resnetv24_stage3_conv59_fwd	Convolution	[1,256,14,14]	12309	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
314	resnetv24_stage3_batchnorm59_fwd	BatchNorm	[1,256,14,14]	80.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
315	resnetv24_stage3_activation59	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
316	resnetv24_stage3_conv60_fwd	Convolution	[1,256,14,14]	5508	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
317	resnetv24_stage3__plus19	elemwise_add	[1,1024,14,14]	99.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
318	resnetv24_stage3_batchnorm60_fwd	BatchNorm	[1,1024,14,14]	38.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
319	resnetv24_stage3_activation60	Activation	[1,1024,14,14]	35.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.67	true
320	resnetv24_stage3_conv61_fwd	Convolution	[1,1024,14,14]	5835	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
321	resnetv24_stage3_batchnorm61_fwd	BatchNorm	[1,256,14,14]	81	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
322	resnetv24_stage3_activation61	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
323	resnetv24_stage3_conv62_fwd	Convolution	[1,256,14,14]	12306	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
323	resnetv24_stage3_conv62_fwd	Convolution	[1,256,14,14]	12306	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
324	resnetv24_stage3_batchnorm62_fwd	BatchNorm	[1,256,14,14]	83	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
325	resnetv24_stage3_activation62	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
326	resnetv24_stage3_conv63_fwd	Convolution	[1,256,14,14]	5503	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
327	resnetv24_stage3__plus20	elemwise_add	[1,1024,14,14]	98.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
328	resnetv24_stage3_batchnorm63_fwd	BatchNorm	[1,1024,14,14]	35.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
329	resnetv24_stage3_activation63	Activation	[1,1024,14,14]	36.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
330	resnetv24_stage3_conv64_fwd	Convolution	[1,1024,14,14]	5808.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
331	resnetv24_stage3_batchnorm64_fwd	BatchNorm	[1,256,14,14]	82.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
332	resnetv24_stage3_activation64	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
333	resnetv24_stage3_conv65_fwd	Convolution	[1,256,14,14]	12788.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
333	resnetv24_stage3_conv65_fwd	Convolution	[1,256,14,14]	12788.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
334	resnetv24_stage3_batchnorm65_fwd	BatchNorm	[1,256,14,14]	92.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
335	resnetv24_stage3_activation65	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
336	resnetv24_stage3_conv66_fwd	Convolution	[1,256,14,14]	5608.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
337	resnetv24_stage3__plus21	elemwise_add	[1,1024,14,14]	103.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.67	true
338	resnetv24_stage3_batchnorm66_fwd	BatchNorm	[1,1024,14,14]	36.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
339	resnetv24_stage3_activation66	Activation	[1,1024,14,14]	35.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
340	resnetv24_stage3_conv67_fwd	Convolution	[1,1024,14,14]	5825.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.67	true
341	resnetv24_stage3_batchnorm67_fwd	BatchNorm	[1,256,14,14]	91	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
342	resnetv24_stage3_activation67	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
343	resnetv24_stage3_conv68_fwd	Convolution	[1,256,14,14]	12365	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
343	resnetv24_stage3_conv68_fwd	Convolution	[1,256,14,14]	12365	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
344	resnetv24_stage3_batchnorm68_fwd	BatchNorm	[1,256,14,14]	84.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
345	resnetv24_stage3_activation68	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
346	resnetv24_stage3_conv69_fwd	Convolution	[1,256,14,14]	5505.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
347	resnetv24_stage3__plus22	elemwise_add	[1,1024,14,14]	98	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
348	resnetv24_stage3_batchnorm69_fwd	BatchNorm	[1,1024,14,14]	37	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
349	resnetv24_stage3_activation69	Activation	[1,1024,14,14]	34	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
350	resnetv24_stage3_conv70_fwd	Convolution	[1,1024,14,14]	5804.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.67	true
351	resnetv24_stage3_batchnorm70_fwd	BatchNorm	[1,256,14,14]	85	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.67	true
352	resnetv24_stage3_activation70	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
353	resnetv24_stage3_conv71_fwd	Convolution	[1,256,14,14]	12488	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
353	resnetv24_stage3_conv71_fwd	Convolution	[1,256,14,14]	12488	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
354	resnetv24_stage3_batchnorm71_fwd	BatchNorm	[1,256,14,14]	85.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
355	resnetv24_stage3_activation71	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
356	resnetv24_stage3_conv72_fwd	Convolution	[1,256,14,14]	5525.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
357	resnetv24_stage3__plus23	elemwise_add	[1,1024,14,14]	109	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
358	resnetv24_stage3_batchnorm72_fwd	BatchNorm	[1,1024,14,14]	35.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
359	resnetv24_stage3_activation72	Activation	[1,1024,14,14]	34.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
360	resnetv24_stage3_conv73_fwd	Convolution	[1,1024,14,14]	5883	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
361	resnetv24_stage3_batchnorm73_fwd	BatchNorm	[1,256,14,14]	84	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
362	resnetv24_stage3_activation73	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
363	resnetv24_stage3_conv74_fwd	Convolution	[1,256,14,14]	12382	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
363	resnetv24_stage3_conv74_fwd	Convolution	[1,256,14,14]	12382	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
364	resnetv24_stage3_batchnorm74_fwd	BatchNorm	[1,256,14,14]	81.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
365	resnetv24_stage3_activation74	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
366	resnetv24_stage3_conv75_fwd	Convolution	[1,256,14,14]	5536	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
367	resnetv24_stage3__plus24	elemwise_add	[1,1024,14,14]	101.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
368	resnetv24_stage3_batchnorm75_fwd	BatchNorm	[1,1024,14,14]	38.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
369	resnetv24_stage3_activation75	Activation	[1,1024,14,14]	33	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
370	resnetv24_stage3_conv76_fwd	Convolution	[1,1024,14,14]	5819.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
371	resnetv24_stage3_batchnorm76_fwd	BatchNorm	[1,256,14,14]	88.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
372	resnetv24_stage3_activation76	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
373	resnetv24_stage3_conv77_fwd	Convolution	[1,256,14,14]	12802.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
373	resnetv24_stage3_conv77_fwd	Convolution	[1,256,14,14]	12802.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
374	resnetv24_stage3_batchnorm77_fwd	BatchNorm	[1,256,14,14]	95	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
375	resnetv24_stage3_activation77	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
376	resnetv24_stage3_conv78_fwd	Convolution	[1,256,14,14]	5648.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
377	resnetv24_stage3__plus25	elemwise_add	[1,1024,14,14]	115	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
378	resnetv24_stage3_batchnorm78_fwd	BatchNorm	[1,1024,14,14]	36.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
379	resnetv24_stage3_activation78	Activation	[1,1024,14,14]	39.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
380	resnetv24_stage3_conv79_fwd	Convolution	[1,1024,14,14]	5843.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.33	true
381	resnetv24_stage3_batchnorm79_fwd	BatchNorm	[1,256,14,14]	87.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
382	resnetv24_stage3_activation79	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
383	resnetv24_stage3_conv80_fwd	Convolution	[1,256,14,14]	12305.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
383	resnetv24_stage3_conv80_fwd	Convolution	[1,256,14,14]	12305.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
384	resnetv24_stage3_batchnorm80_fwd	BatchNorm	[1,256,14,14]	77.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
385	resnetv24_stage3_activation80	Activation	[1,256,14,14]	10.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
386	resnetv24_stage3_conv81_fwd	Convolution	[1,256,14,14]	5531.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
387	resnetv24_stage3__plus26	elemwise_add	[1,1024,14,14]	98.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
388	resnetv24_stage3_batchnorm81_fwd	BatchNorm	[1,1024,14,14]	36.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
389	resnetv24_stage3_activation81	Activation	[1,1024,14,14]	34	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
390	resnetv24_stage3_conv82_fwd	Convolution	[1,1024,14,14]	5787.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
391	resnetv24_stage3_batchnorm82_fwd	BatchNorm	[1,256,14,14]	80.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
392	resnetv24_stage3_activation82	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
393	resnetv24_stage3_conv83_fwd	Convolution	[1,256,14,14]	12363.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
393	resnetv24_stage3_conv83_fwd	Convolution	[1,256,14,14]	12363.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
394	resnetv24_stage3_batchnorm83_fwd	BatchNorm	[1,256,14,14]	79.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.67	true
395	resnetv24_stage3_activation83	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
396	resnetv24_stage3_conv84_fwd	Convolution	[1,256,14,14]	5506.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
397	resnetv24_stage3__plus27	elemwise_add	[1,1024,14,14]	95.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
398	resnetv24_stage3_batchnorm84_fwd	BatchNorm	[1,1024,14,14]	35	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
399	resnetv24_stage3_activation84	Activation	[1,1024,14,14]	34.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
400	resnetv24_stage3_conv85_fwd	Convolution	[1,1024,14,14]	5815.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
401	resnetv24_stage3_batchnorm85_fwd	BatchNorm	[1,256,14,14]	80.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
402	resnetv24_stage3_activation85	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
403	resnetv24_stage3_conv86_fwd	Convolution	[1,256,14,14]	12328.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
403	resnetv24_stage3_conv86_fwd	Convolution	[1,256,14,14]	12328.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
404	resnetv24_stage3_batchnorm86_fwd	BatchNorm	[1,256,14,14]	81.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
405	resnetv24_stage3_activation86	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
406	resnetv24_stage3_conv87_fwd	Convolution	[1,256,14,14]	5535	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.67	true
407	resnetv24_stage3__plus28	elemwise_add	[1,1024,14,14]	101	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
408	resnetv24_stage3_batchnorm87_fwd	BatchNorm	[1,1024,14,14]	35.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
409	resnetv24_stage3_activation87	Activation	[1,1024,14,14]	36	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
410	resnetv24_stage3_conv88_fwd	Convolution	[1,1024,14,14]	5810.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
411	resnetv24_stage3_batchnorm88_fwd	BatchNorm	[1,256,14,14]	81.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
412	resnetv24_stage3_activation88	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
413	resnetv24_stage3_conv89_fwd	Convolution	[1,256,14,14]	12484	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
413	resnetv24_stage3_conv89_fwd	Convolution	[1,256,14,14]	12484	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.67	true
414	resnetv24_stage3_batchnorm89_fwd	BatchNorm	[1,256,14,14]	88.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
415	resnetv24_stage3_activation89	Activation	[1,256,14,14]	10.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
416	resnetv24_stage3_conv90_fwd	Convolution	[1,256,14,14]	5630.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.33	true
417	resnetv24_stage3__plus29	elemwise_add	[1,1024,14,14]	111.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
418	resnetv24_stage3_batchnorm90_fwd	BatchNorm	[1,1024,14,14]	39.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
419	resnetv24_stage3_activation90	Activation	[1,1024,14,14]	34.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
420	resnetv24_stage3_conv91_fwd	Convolution	[1,1024,14,14]	5913	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	58.67	true
421	resnetv24_stage3_batchnorm91_fwd	BatchNorm	[1,256,14,14]	91	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
422	resnetv24_stage3_activation91	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
423	resnetv24_stage3_conv92_fwd	Convolution	[1,256,14,14]	12482.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
423	resnetv24_stage3_conv92_fwd	Convolution	[1,256,14,14]	12482.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
424	resnetv24_stage3_batchnorm92_fwd	BatchNorm	[1,256,14,14]	89	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
425	resnetv24_stage3_activation92	Activation	[1,256,14,14]	11.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
426	resnetv24_stage3_conv93_fwd	Convolution	[1,256,14,14]	5555	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
427	resnetv24_stage3__plus30	elemwise_add	[1,1024,14,14]	112.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
428	resnetv24_stage3_batchnorm93_fwd	BatchNorm	[1,1024,14,14]	36.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
429	resnetv24_stage3_activation93	Activation	[1,1024,14,14]	35	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
430	resnetv24_stage3_conv94_fwd	Convolution	[1,1024,14,14]	5855	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
431	resnetv24_stage3_batchnorm94_fwd	BatchNorm	[1,256,14,14]	89	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
432	resnetv24_stage3_activation94	Activation	[1,256,14,14]	12	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
433	resnetv24_stage3_conv95_fwd	Convolution	[1,256,14,14]	12412.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
433	resnetv24_stage3_conv95_fwd	Convolution	[1,256,14,14]	12412.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
434	resnetv24_stage3_batchnorm95_fwd	BatchNorm	[1,256,14,14]	84.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
435	resnetv24_stage3_activation95	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
436	resnetv24_stage3_conv96_fwd	Convolution	[1,256,14,14]	5518.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	31.00	true
437	resnetv24_stage3__plus31	elemwise_add	[1,1024,14,14]	101.333	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
438	resnetv24_stage3_batchnorm96_fwd	BatchNorm	[1,1024,14,14]	35.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
439	resnetv24_stage3_activation96	Activation	[1,1024,14,14]	34	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
440	resnetv24_stage3_conv97_fwd	Convolution	[1,1024,14,14]	5813.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.00	true
441	resnetv24_stage3_batchnorm97_fwd	BatchNorm	[1,256,14,14]	93	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
442	resnetv24_stage3_activation97	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
443	resnetv24_stage3_conv98_fwd	Convolution	[1,256,14,14]	12388.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
443	resnetv24_stage3_conv98_fwd	Convolution	[1,256,14,14]	12388.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	11.33	true
444	resnetv24_stage3_batchnorm98_fwd	BatchNorm	[1,256,14,14]	82	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
445	resnetv24_stage3_activation98	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
446	resnetv24_stage3_conv99_fwd	Convolution	[1,256,14,14]	5564.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
447	resnetv24_stage3__plus32	elemwise_add	[1,1024,14,14]	114.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
448	resnetv24_stage3_batchnorm99_fwd	BatchNorm	[1,1024,14,14]	36.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
449	resnetv24_stage3_activation99	Activation	[1,1024,14,14]	35.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
450	resnetv24_stage3_conv100_fwd	Convolution	[1,1024,14,14]	5870	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.67	true
451	resnetv24_stage3_batchnorm100_fwd	BatchNorm	[1,256,14,14]	89.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
452	resnetv24_stage3_activation100	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
453	resnetv24_stage3_conv101_fwd	Convolution	[1,256,14,14]	12389.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
453	resnetv24_stage3_conv101_fwd	Convolution	[1,256,14,14]	12389.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
454	resnetv24_stage3_batchnorm101_fwd	BatchNorm	[1,256,14,14]	93	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
455	resnetv24_stage3_activation101	Activation	[1,256,14,14]	12.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
456	resnetv24_stage3_conv102_fwd	Convolution	[1,256,14,14]	5628	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
457	resnetv24_stage3__plus33	elemwise_add	[1,1024,14,14]	107.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
458	resnetv24_stage3_batchnorm102_fwd	BatchNorm	[1,1024,14,14]	36.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
459	resnetv24_stage3_activation102	Activation	[1,1024,14,14]	36.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
460	resnetv24_stage3_conv103_fwd	Convolution	[1,1024,14,14]	5887.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.67	true
461	resnetv24_stage3_batchnorm103_fwd	BatchNorm	[1,256,14,14]	91	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
462	resnetv24_stage3_activation103	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.33	true
463	resnetv24_stage3_conv104_fwd	Convolution	[1,256,14,14]	12449.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.33	true
463	resnetv24_stage3_conv104_fwd	Convolution	[1,256,14,14]	12449.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
464	resnetv24_stage3_batchnorm104_fwd	BatchNorm	[1,256,14,14]	80	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.33	true
465	resnetv24_stage3_activation104	Activation	[1,256,14,14]	11	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.00	true
466	resnetv24_stage3_conv105_fwd	Convolution	[1,256,14,14]	5556	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
467	resnetv24_stage3__plus34	elemwise_add	[1,1024,14,14]	103	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
468	resnetv24_stage3_batchnorm105_fwd	BatchNorm	[1,1024,14,14]	36	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
469	resnetv24_stage3_activation105	Activation	[1,1024,14,14]	36	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
470	resnetv24_stage3_conv106_fwd	Convolution	[1,1024,14,14]	5809.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	59.33	true
471	resnetv24_stage3_batchnorm106_fwd	BatchNorm	[1,256,14,14]	82.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
472	resnetv24_stage3_activation106	Activation	[1,256,14,14]	11.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
473	resnetv24_stage3_conv107_fwd	Convolution	[1,256,14,14]	12373.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	63.00	true
473	resnetv24_stage3_conv107_fwd	Convolution	[1,256,14,14]	12373.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	12.00	true
474	resnetv24_stage3_batchnorm107_fwd	BatchNorm	[1,256,14,14]	77	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.67	true
475	resnetv24_stage3_activation107	Activation	[1,256,14,14]	10.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
476	resnetv24_stage3_conv108_fwd	Convolution	[1,256,14,14]	5503.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	30.00	true
477	resnetv24_stage3__plus35	elemwise_add	[1,1024,14,14]	102	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	4.00	true
478	resnetv24_stage4_batchnorm0_fwd	BatchNorm	[1,1024,14,14]	37	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
479	resnetv24_stage4_activation0	Activation	[1,1024,14,14]	35.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.67	true
480	resnetv24_stage4_conv0_fwd	Convolution	[1,1024,14,14]	10602.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	84.67	true
481	resnetv24_stage4_batchnorm1_fwd	BatchNorm	[1,512,14,14]	89	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
482	resnetv24_stage4_activation1	Activation	[1,512,14,14]	19.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
483	resnetv24_stage4_conv1_fwd	Convolution	[1,512,14,14]	15019	void cudnn::detail::explicit_convolve_sgemm<float, int, 128, 5, 5, 3, 3, 3, 0, true>(int, int, int, float const, int, float const, int, float, kernel_conv_params, int, int, float, float, int, float, float*)	269.67	true
483	resnetv24_stage4_conv1_fwd	Convolution	[1,512,14,14]	15019	void im2col4d_kernel<float, int>(im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const, float, int)	39.67	true
484	resnetv24_stage4_batchnorm2_fwd	BatchNorm	[1,512,7,7]	84.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
485	resnetv24_stage4_activation2	Activation	[1,512,7,7]	8	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
486	resnetv24_stage4_conv2_fwd	Convolution	[1,512,7,7]	6845.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	46.00	true
487	resnetv24_stage4_conv3_fwd	Convolution	[1,1024,14,14]	12087	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	86.00	true
488	resnetv24_stage4__plus0	elemwise_add	[1,2048,7,7]	90.667	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	3.33	true
489	resnetv24_stage4_batchnorm3_fwd	BatchNorm	[1,2048,7,7]	36.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.00	true
490	resnetv24_stage4_activation3	Activation	[1,2048,7,7]	21	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
491	resnetv24_stage4_conv4_fwd	Convolution	[1,2048,7,7]	7106.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	112.00	true
492	resnetv24_stage4_batchnorm4_fwd	BatchNorm	[1,512,7,7]	90	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.33	true
493	resnetv24_stage4_activation4	Activation	[1,512,7,7]	8.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
494	resnetv24_stage4_conv5_fwd	Convolution	[1,512,7,7]	15623.667	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	121.33	true
494	resnetv24_stage4_conv5_fwd	Convolution	[1,512,7,7]	15623.667	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	51.67	true
495	resnetv24_stage4_batchnorm5_fwd	BatchNorm	[1,512,7,7]	94.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	3.00	true
496	resnetv24_stage4_activation5	Activation	[1,512,7,7]	8	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
497	resnetv24_stage4_conv6_fwd	Convolution	[1,512,7,7]	6892.667	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	46.67	true
498	resnetv24_stage4__plus1	elemwise_add	[1,2048,7,7]	100	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	3.00	true
499	resnetv24_stage4_batchnorm6_fwd	BatchNorm	[1,2048,7,7]	37.667	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
500	resnetv24_stage4_activation6	Activation	[1,2048,7,7]	20.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
501	resnetv24_stage4_conv7_fwd	Convolution	[1,2048,7,7]	7040	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	112.00	true
502	resnetv24_stage4_batchnorm7_fwd	BatchNorm	[1,512,7,7]	97	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.33	true
503	resnetv24_stage4_activation7	Activation	[1,512,7,7]	8.333	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.33	true
504	resnetv24_stage4_conv8_fwd	Convolution	[1,512,7,7]	15326.333	volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1	121.00	true
504	resnetv24_stage4_conv8_fwd	Convolution	[1,512,7,7]	15326.333	void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>)	50.67	true
505	resnetv24_stage4_batchnorm8_fwd	BatchNorm	[1,512,7,7]	93.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	2.67	true
506	resnetv24_stage4_activation8	Activation	[1,512,7,7]	7.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	3.00	true
507	resnetv24_stage4_conv9_fwd	Convolution	[1,512,7,7]	6889.333	void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const, int, float, float, kernel_conv_params, int, float, float, int, float, float*, int, int)	45.67	true
508	resnetv24_stage4__plus2	elemwise_add	[1,2048,7,7]	106	void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::plus, 1>, float, float, float>(int, float, float, float)	3.33	true
509	resnetv24_batchnorm2_fwd	BatchNorm	[1,2048,7,7]	36.333	void cudnn::detail::bn_fw_inf_1C11_kernel_NCHW<float, float, true, 1>(float, float, cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnTensorStruct, float const, float const, float const, float const, float)	4.33	true
510	resnetv24_relu1_fwd	Activation	[1,2048,7,7]	20.667	void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float, cudnnTensorStruct, float const, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool)	2.67	true
511	resnetv24_pool1_fwd	Pooling	[1,2048,7,7]	156.333	void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const, cudnnTensorStruct, float, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor)	6.00	true
513	resnetv24_dense0_fwd	FullyConnected	[1,2048]	2077.333	void gemv2T_kernel_val<int, int, float, float, float, 128, 16, 4, 4, false, cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float> >(cublasGemvParams<cublasGemvTensorStridedBatched<float const>, cublasGemvTensorStridedBatched<float>, float>, float, float)	20.00	true
513	resnetv24_dense0_fwd	FullyConnected	[1,2048]	2077.333	void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto, 8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float> >(mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 2, float>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu, 1, float>, float, 2, 1>, float>)	2.67	true

Showing 1 to 579 of 579 entries

Download as CSV