GPU Kernel Information
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|
layer_index | layer_name | layer_type | layer_shape | layer_duration (us) | layer_allocated_bytes | layer_peak_allocated_bytes | layer_allocator_bytes_in_use | layer_allocator_name | layer_host_temp_mem_bytes | layer_device_temp_mem_bytes | layer_host_persistent_mem_bytes | layer_device_persistent_mem_bytes | kernel_name | kernel_duration (us) | kernel_flops | kernel_dram_read_bytes | kernel_dram_write_bytes | kernel_achieved_occupancy (%) | kernel_arithmetic_intensity (flops/byte) | kernel_arithmetic_throughput (GFlops) | kernel_memory_bound |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | squeezenet1_conv0_fwd | Convolution | [1,3,224,224] | 10344 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 22.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
0 | squeezenet1_conv0_fwd | Convolution | [1,3,224,224] | 10344 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 10.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
1 | squeezenet1_relu0_fwd | Activation | [1,64,111,111] | 163.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 6.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
2 | squeezenet1_pool0_fwd | Pooling | [1,64,111,111] | 2508.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 9.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
3 | squeezenet1_conv1_fwd | Convolution | [1,64,55,55] | 724.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 12.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
3 | squeezenet1_conv1_fwd | Convolution | [1,64,55,55] | 724.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
4 | squeezenet1_relu1_fwd | Activation | [1,16,55,55] | 25.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
5 | squeezenet1_conv2_fwd | Convolution | [1,16,55,55] | 784.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 9.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
5 | squeezenet1_conv2_fwd | Convolution | [1,16,55,55] | 784.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
6 | squeezenet1_conv3_fwd | Convolution | [1,16,55,55] | 3707.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 20.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
6 | squeezenet1_conv3_fwd | Convolution | [1,16,55,55] | 3707.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
6 | squeezenet1_conv3_fwd | Convolution | [1,16,55,55] | 3707.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
7 | squeezenet1_relu2_fwd | Activation | [1,64,55,55] | 49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 20.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
7 | squeezenet1_relu2_fwd | Activation | [1,64,55,55] | 49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
7 | squeezenet1_relu2_fwd | Activation | [1,64,55,55] | 49 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
8 | squeezenet1_relu3_fwd | Activation | [1,64,55,55] | 45 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
9 | squeezenet1_concat0 | Concat | [1,64,55,55] | 65 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 5.14 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
9 | squeezenet1_concat0 | Concat | [1,64,55,55] | 65 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 5.14 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
10 | squeezenet1_conv4_fwd | Convolution | [1,128,55,55] | 1410.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 17.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
10 | squeezenet1_conv4_fwd | Convolution | [1,128,55,55] | 1410.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
11 | squeezenet1_relu4_fwd | Activation | [1,16,55,55] | 29.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
12 | squeezenet1_conv5_fwd | Convolution | [1,16,55,55] | 778 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 9.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
12 | squeezenet1_conv5_fwd | Convolution | [1,16,55,55] | 778 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
13 | squeezenet1_conv6_fwd | Convolution | [1,16,55,55] | 3716.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 20.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
13 | squeezenet1_conv6_fwd | Convolution | [1,16,55,55] | 3716.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
13 | squeezenet1_conv6_fwd | Convolution | [1,16,55,55] | 3716.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
14 | squeezenet1_relu5_fwd | Activation | [1,64,55,55] | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 20.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
14 | squeezenet1_relu5_fwd | Activation | [1,64,55,55] | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.50 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
14 | squeezenet1_relu5_fwd | Activation | [1,64,55,55] | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
15 | squeezenet1_relu6_fwd | Activation | [1,64,55,55] | 45.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
16 | squeezenet1_concat1 | Concat | [1,64,55,55] | 61 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 5.43 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
16 | squeezenet1_concat1 | Concat | [1,64,55,55] | 61 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 5.29 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
17 | squeezenet1_pool1_fwd | Pooling | [1,128,55,55] | 1246.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
18 | squeezenet1_conv7_fwd | Convolution | [1,128,27,27] | 650.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 12.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
18 | squeezenet1_conv7_fwd | Convolution | [1,128,27,27] | 650.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
19 | squeezenet1_relu7_fwd | Activation | [1,32,27,27] | 21.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
20 | squeezenet1_conv8_fwd | Convolution | [1,32,27,27] | 652.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 128, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 10.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
20 | squeezenet1_conv8_fwd | Convolution | [1,32,27,27] | 652.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
21 | squeezenet1_conv9_fwd | Convolution | [1,32,27,27] | 3395.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 14.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
21 | squeezenet1_conv9_fwd | Convolution | [1,32,27,27] | 3395.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
21 | squeezenet1_conv9_fwd | Convolution | [1,32,27,27] | 3395.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
21 | squeezenet1_conv9_fwd | Convolution | [1,32,27,27] | 3395.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
22 | squeezenet1_relu8_fwd | Activation | [1,128,27,27] | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
22 | squeezenet1_relu8_fwd | Activation | [1,128,27,27] | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
23 | squeezenet1_relu9_fwd | Activation | [1,128,27,27] | 29 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
24 | squeezenet1_concat2 | Concat | [1,128,27,27] | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.14 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
24 | squeezenet1_concat2 | Concat | [1,128,27,27] | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.14 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
25 | squeezenet1_conv10_fwd | Convolution | [1,256,27,27] | 948.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 20.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
25 | squeezenet1_conv10_fwd | Convolution | [1,256,27,27] | 948.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
26 | squeezenet1_relu10_fwd | Activation | [1,32,27,27] | 10.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
27 | squeezenet1_conv11_fwd | Convolution | [1,32,27,27] | 661 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 128, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 10.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
27 | squeezenet1_conv11_fwd | Convolution | [1,32,27,27] | 661 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
28 | squeezenet1_conv12_fwd | Convolution | [1,32,27,27] | 3404 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 14.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
28 | squeezenet1_conv12_fwd | Convolution | [1,32,27,27] | 3404 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
28 | squeezenet1_conv12_fwd | Convolution | [1,32,27,27] | 3404 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
28 | squeezenet1_conv12_fwd | Convolution | [1,32,27,27] | 3404 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
29 | squeezenet1_relu11_fwd | Activation | [1,128,27,27] | 31.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 14.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
29 | squeezenet1_relu11_fwd | Activation | [1,128,27,27] | 31.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
29 | squeezenet1_relu11_fwd | Activation | [1,128,27,27] | 31.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
30 | squeezenet1_relu12_fwd | Activation | [1,128,27,27] | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
31 | squeezenet1_concat3 | Concat | [1,128,27,27] | 31.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.14 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
31 | squeezenet1_concat3 | Concat | [1,128,27,27] | 31.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.14 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
32 | squeezenet1_pool2_fwd | Pooling | [1,256,27,27] | 598 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::maxpooling_func<float, (cudnnNanPropagation_t)0>, 0, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 6.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
33 | squeezenet1_conv13_fwd | Convolution | [1,256,13,13] | 378 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 21.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
33 | squeezenet1_conv13_fwd | Convolution | [1,256,13,13] | 378 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
34 | squeezenet1_relu13_fwd | Activation | [1,48,13,13] | 8.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
35 | squeezenet1_conv14_fwd | Convolution | [1,48,13,13] | 267.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 9.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
35 | squeezenet1_conv14_fwd | Convolution | [1,48,13,13] | 267.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
36 | squeezenet1_conv15_fwd | Convolution | [1,48,13,13] | 2388 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 17.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
36 | squeezenet1_conv15_fwd | Convolution | [1,48,13,13] | 2388 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
36 | squeezenet1_conv15_fwd | Convolution | [1,48,13,13] | 2388 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
36 | squeezenet1_conv15_fwd | Convolution | [1,48,13,13] | 2388 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
37 | squeezenet1_relu14_fwd | Activation | [1,192,13,13] | 19.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
38 | squeezenet1_relu15_fwd | Activation | [1,192,13,13] | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
39 | squeezenet1_concat4 | Concat | [1,192,13,13] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
39 | squeezenet1_concat4 | Concat | [1,192,13,13] | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
40 | squeezenet1_conv16_fwd | Convolution | [1,384,13,13] | 562.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 28.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
40 | squeezenet1_conv16_fwd | Convolution | [1,384,13,13] | 562.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
41 | squeezenet1_relu16_fwd | Activation | [1,48,13,13] | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
42 | squeezenet1_conv17_fwd | Convolution | [1,48,13,13] | 268 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 8.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
42 | squeezenet1_conv17_fwd | Convolution | [1,48,13,13] | 268 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
43 | squeezenet1_conv18_fwd | Convolution | [1,48,13,13] | 2386.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 17.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
43 | squeezenet1_conv18_fwd | Convolution | [1,48,13,13] | 2386.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
43 | squeezenet1_conv18_fwd | Convolution | [1,48,13,13] | 2386.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
43 | squeezenet1_conv18_fwd | Convolution | [1,48,13,13] | 2386.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
44 | squeezenet1_relu17_fwd | Activation | [1,192,13,13] | 19 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 4.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
44 | squeezenet1_relu17_fwd | Activation | [1,192,13,13] | 19 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
45 | squeezenet1_relu18_fwd | Activation | [1,192,13,13] | 12.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
46 | squeezenet1_concat5 | Concat | [1,192,13,13] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 3.43 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
46 | squeezenet1_concat5 | Concat | [1,192,13,13] | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 3.43 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
47 | squeezenet1_conv19_fwd | Convolution | [1,384,13,13] | 731 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 28.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
47 | squeezenet1_conv19_fwd | Convolution | [1,384,13,13] | 731 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
48 | squeezenet1_relu19_fwd | Activation | [1,64,13,13] | 9 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
49 | squeezenet1_conv20_fwd | Convolution | [1,64,13,13] | 482.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 9.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
49 | squeezenet1_conv20_fwd | Convolution | [1,64,13,13] | 482.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
50 | squeezenet1_conv21_fwd | Convolution | [1,64,13,13] | 4149.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 21.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
50 | squeezenet1_conv21_fwd | Convolution | [1,64,13,13] | 4149.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 5.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
50 | squeezenet1_conv21_fwd | Convolution | [1,64,13,13] | 4149.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
50 | squeezenet1_conv21_fwd | Convolution | [1,64,13,13] | 4149.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
51 | squeezenet1_relu20_fwd | Activation | [1,256,13,13] | 22.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 20.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
51 | squeezenet1_relu20_fwd | Activation | [1,256,13,13] | 22.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 5.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
51 | squeezenet1_relu20_fwd | Activation | [1,256,13,13] | 22.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
52 | squeezenet1_relu21_fwd | Activation | [1,256,13,13] | 15.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
53 | squeezenet1_concat6 | Concat | [1,256,13,13] | 20.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 3.86 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
53 | squeezenet1_concat6 | Concat | [1,256,13,13] | 20.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 3.86 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
54 | squeezenet1_conv22_fwd | Convolution | [1,512,13,13] | 989.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 35.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
54 | squeezenet1_conv22_fwd | Convolution | [1,512,13,13] | 989.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
55 | squeezenet1_relu22_fwd | Activation | [1,64,13,13] | 9 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
56 | squeezenet1_conv23_fwd | Convolution | [1,64,13,13] | 481.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 9.33 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
56 | squeezenet1_conv23_fwd | Convolution | [1,64,13,13] | 481.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
57 | squeezenet1_conv24_fwd | Convolution | [1,64,13,13] | 4149.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1 | 20.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
57 | squeezenet1_conv24_fwd | Convolution | [1,64,13,13] | 4149.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 5.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
57 | squeezenet1_conv24_fwd | Convolution | [1,64,13,13] | 4149.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
57 | squeezenet1_conv24_fwd | Convolution | [1,64,13,13] | 4149.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
58 | squeezenet1_relu23_fwd | Activation | [1,256,13,13] | 20.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams<float, float>) | 5.67 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
58 | squeezenet1_relu23_fwd | Activation | [1,256,13,13] | 20.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
59 | squeezenet1_relu24_fwd | Activation | [1,256,13,13] | 14.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
60 | squeezenet1_concat7 | Concat | [1,256,13,13] | 19.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
60 | squeezenet1_concat7 | Concat | [1,256,13,13] | 19.333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mshadow::cuda::MapPlanKernel<mshadow::sv::saveto, 8, mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float> >(mshadow::expr::Plan<mshadow::expr::SliceExp<mshadow::Tensor<mshadow::gpu, 3, float>, mshadow::gpu, float, 3, 2>, float>, int, mshadow::Shape<2>, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, 3, float>, float>) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
61 | squeezenet1_dropout0_fwd | Dropout | [1,512,13,13] | 23.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void mxnet::op::mxnet_op::mxnet_generic_kernel<mxnet::op::mxnet_op::op_with_req<mxnet::op::mshadow_op::identity, 1>, float*, float*>(int, float*, float*) | 4.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
62 | squeezenet1_conv25_fwd | Convolution | [1,512,13,13] | 13492 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::implicit_convolve_sgemm<float, float, 1024, 5, 5, 3, 3, 3, 1, true, false, true>(int, int, int, float const*, int, float*, float*, kernel_conv_params, int, float, float, int, float*, float*, int, int) | 54.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
62 | squeezenet1_conv25_fwd | Convolution | [1,512,13,13] | 13492 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)0, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 0>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
63 | squeezenet1_relu25_fwd | Activation | [1,1000,13,13] | 43.667 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void op_generic_tensor_kernel<2, float, float, float, 256, (cudnnGenericOp_t)8, (cudnnNanPropagation_t)0, (cudnnDimOrder_t)0, 1>(cudnnTensorStruct, float*, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, float, float, float, float, dimArray, reducedDivisorArray, bool) | 5.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true | |
64 | squeezenet1_pool3_fwd | Pooling | [1,1000,13,13] | 207 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | void cudnn::detail::pooling_fw_4d_kernel<float, float, cudnn::detail::averpooling_func<float>, 1, false>(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, cudnnPoolingStruct, float, float, int, cudnn::reduced_divisor, cudnn::reduced_divisor) | 8.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | true |
Showing 1 to 124 of 124 entries